agno 1.8.0__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (583) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +19 -27
  3. agno/agent/agent.py +2781 -4126
  4. agno/api/agent.py +9 -65
  5. agno/api/api.py +5 -46
  6. agno/api/evals.py +6 -17
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -41
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +5 -21
  11. agno/api/schemas/evals.py +7 -16
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +5 -21
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +11 -7
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +9 -64
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/db/__init__.py +24 -0
  25. agno/db/base.py +245 -0
  26. agno/db/dynamo/__init__.py +3 -0
  27. agno/db/dynamo/dynamo.py +1749 -0
  28. agno/db/dynamo/schemas.py +278 -0
  29. agno/db/dynamo/utils.py +684 -0
  30. agno/db/firestore/__init__.py +3 -0
  31. agno/db/firestore/firestore.py +1438 -0
  32. agno/db/firestore/schemas.py +130 -0
  33. agno/db/firestore/utils.py +278 -0
  34. agno/db/gcs_json/__init__.py +3 -0
  35. agno/db/gcs_json/gcs_json_db.py +1001 -0
  36. agno/db/gcs_json/utils.py +194 -0
  37. agno/db/in_memory/__init__.py +3 -0
  38. agno/db/in_memory/in_memory_db.py +888 -0
  39. agno/db/in_memory/utils.py +172 -0
  40. agno/db/json/__init__.py +3 -0
  41. agno/db/json/json_db.py +1051 -0
  42. agno/db/json/utils.py +196 -0
  43. agno/db/migrations/v1_to_v2.py +162 -0
  44. agno/db/mongo/__init__.py +3 -0
  45. agno/db/mongo/mongo.py +1417 -0
  46. agno/db/mongo/schemas.py +77 -0
  47. agno/db/mongo/utils.py +204 -0
  48. agno/db/mysql/__init__.py +3 -0
  49. agno/db/mysql/mysql.py +1719 -0
  50. agno/db/mysql/schemas.py +124 -0
  51. agno/db/mysql/utils.py +298 -0
  52. agno/db/postgres/__init__.py +3 -0
  53. agno/db/postgres/postgres.py +1720 -0
  54. agno/db/postgres/schemas.py +124 -0
  55. agno/db/postgres/utils.py +281 -0
  56. agno/db/redis/__init__.py +3 -0
  57. agno/db/redis/redis.py +1371 -0
  58. agno/db/redis/schemas.py +109 -0
  59. agno/db/redis/utils.py +288 -0
  60. agno/db/schemas/__init__.py +3 -0
  61. agno/db/schemas/evals.py +33 -0
  62. agno/db/schemas/knowledge.py +40 -0
  63. agno/db/schemas/memory.py +46 -0
  64. agno/db/singlestore/__init__.py +3 -0
  65. agno/db/singlestore/schemas.py +116 -0
  66. agno/db/singlestore/singlestore.py +1722 -0
  67. agno/db/singlestore/utils.py +327 -0
  68. agno/db/sqlite/__init__.py +3 -0
  69. agno/db/sqlite/schemas.py +119 -0
  70. agno/db/sqlite/sqlite.py +1680 -0
  71. agno/db/sqlite/utils.py +269 -0
  72. agno/db/utils.py +88 -0
  73. agno/eval/__init__.py +14 -0
  74. agno/eval/accuracy.py +142 -43
  75. agno/eval/performance.py +88 -23
  76. agno/eval/reliability.py +73 -20
  77. agno/eval/utils.py +23 -13
  78. agno/integrations/discord/__init__.py +3 -0
  79. agno/{app → integrations}/discord/client.py +10 -10
  80. agno/knowledge/__init__.py +2 -2
  81. agno/{document → knowledge}/chunking/agentic.py +2 -2
  82. agno/{document → knowledge}/chunking/document.py +2 -2
  83. agno/{document → knowledge}/chunking/fixed.py +3 -3
  84. agno/{document → knowledge}/chunking/markdown.py +2 -2
  85. agno/{document → knowledge}/chunking/recursive.py +2 -2
  86. agno/{document → knowledge}/chunking/row.py +2 -2
  87. agno/knowledge/chunking/semantic.py +59 -0
  88. agno/knowledge/chunking/strategy.py +121 -0
  89. agno/knowledge/content.py +74 -0
  90. agno/knowledge/document/__init__.py +5 -0
  91. agno/{document → knowledge/document}/base.py +12 -2
  92. agno/knowledge/embedder/__init__.py +5 -0
  93. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  94. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  95. agno/{embedder → knowledge/embedder}/base.py +6 -0
  96. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  97. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  98. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  99. agno/{embedder → knowledge/embedder}/google.py +74 -1
  100. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  101. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  102. agno/knowledge/embedder/langdb.py +22 -0
  103. agno/knowledge/embedder/mistral.py +139 -0
  104. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  105. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  106. agno/knowledge/embedder/openai.py +223 -0
  107. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  108. agno/{embedder → knowledge/embedder}/together.py +1 -1
  109. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  110. agno/knowledge/knowledge.py +1515 -0
  111. agno/knowledge/reader/__init__.py +7 -0
  112. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  113. agno/knowledge/reader/base.py +88 -0
  114. agno/{document → knowledge}/reader/csv_reader.py +68 -15
  115. agno/knowledge/reader/docx_reader.py +83 -0
  116. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  117. agno/knowledge/reader/gcs_reader.py +67 -0
  118. agno/{document → knowledge}/reader/json_reader.py +30 -9
  119. agno/{document → knowledge}/reader/markdown_reader.py +36 -9
  120. agno/{document → knowledge}/reader/pdf_reader.py +79 -21
  121. agno/knowledge/reader/reader_factory.py +275 -0
  122. agno/knowledge/reader/s3_reader.py +171 -0
  123. agno/{document → knowledge}/reader/text_reader.py +31 -10
  124. agno/knowledge/reader/url_reader.py +84 -0
  125. agno/knowledge/reader/web_search_reader.py +389 -0
  126. agno/{document → knowledge}/reader/website_reader.py +37 -10
  127. agno/knowledge/reader/wikipedia_reader.py +59 -0
  128. agno/knowledge/reader/youtube_reader.py +78 -0
  129. agno/knowledge/remote_content/remote_content.py +88 -0
  130. agno/{reranker → knowledge/reranker}/base.py +1 -1
  131. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  132. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  133. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  134. agno/knowledge/types.py +30 -0
  135. agno/knowledge/utils.py +169 -0
  136. agno/media.py +2 -2
  137. agno/memory/__init__.py +2 -10
  138. agno/memory/manager.py +1003 -148
  139. agno/models/aimlapi/__init__.py +2 -2
  140. agno/models/aimlapi/aimlapi.py +6 -6
  141. agno/models/anthropic/claude.py +129 -82
  142. agno/models/aws/bedrock.py +107 -175
  143. agno/models/aws/claude.py +64 -18
  144. agno/models/azure/ai_foundry.py +73 -23
  145. agno/models/base.py +347 -287
  146. agno/models/cerebras/cerebras.py +84 -27
  147. agno/models/cohere/chat.py +106 -98
  148. agno/models/dashscope/dashscope.py +14 -5
  149. agno/models/google/gemini.py +123 -53
  150. agno/models/groq/groq.py +97 -35
  151. agno/models/huggingface/huggingface.py +92 -27
  152. agno/models/ibm/watsonx.py +72 -13
  153. agno/models/litellm/chat.py +85 -13
  154. agno/models/message.py +38 -144
  155. agno/models/meta/llama.py +85 -49
  156. agno/models/metrics.py +120 -0
  157. agno/models/mistral/mistral.py +90 -21
  158. agno/models/ollama/__init__.py +0 -2
  159. agno/models/ollama/chat.py +84 -46
  160. agno/models/openai/chat.py +135 -27
  161. agno/models/openai/responses.py +233 -115
  162. agno/models/perplexity/perplexity.py +26 -2
  163. agno/models/portkey/portkey.py +0 -7
  164. agno/models/response.py +14 -8
  165. agno/models/utils.py +20 -0
  166. agno/models/vercel/__init__.py +2 -2
  167. agno/models/vercel/v0.py +1 -1
  168. agno/models/vllm/__init__.py +2 -2
  169. agno/models/vllm/vllm.py +3 -3
  170. agno/models/xai/xai.py +10 -10
  171. agno/os/__init__.py +3 -0
  172. agno/os/app.py +393 -0
  173. agno/os/auth.py +47 -0
  174. agno/os/config.py +103 -0
  175. agno/os/interfaces/agui/__init__.py +3 -0
  176. agno/os/interfaces/agui/agui.py +31 -0
  177. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  178. agno/{app → os/interfaces}/agui/utils.py +65 -28
  179. agno/os/interfaces/base.py +21 -0
  180. agno/os/interfaces/slack/__init__.py +3 -0
  181. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  182. agno/os/interfaces/slack/slack.py +33 -0
  183. agno/os/interfaces/whatsapp/__init__.py +3 -0
  184. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  185. agno/os/interfaces/whatsapp/whatsapp.py +30 -0
  186. agno/os/router.py +843 -0
  187. agno/os/routers/__init__.py +3 -0
  188. agno/os/routers/evals/__init__.py +3 -0
  189. agno/os/routers/evals/evals.py +204 -0
  190. agno/os/routers/evals/schemas.py +142 -0
  191. agno/os/routers/evals/utils.py +161 -0
  192. agno/os/routers/knowledge/__init__.py +3 -0
  193. agno/os/routers/knowledge/knowledge.py +413 -0
  194. agno/os/routers/knowledge/schemas.py +118 -0
  195. agno/os/routers/memory/__init__.py +3 -0
  196. agno/os/routers/memory/memory.py +179 -0
  197. agno/os/routers/memory/schemas.py +58 -0
  198. agno/os/routers/metrics/__init__.py +3 -0
  199. agno/os/routers/metrics/metrics.py +58 -0
  200. agno/os/routers/metrics/schemas.py +47 -0
  201. agno/os/routers/session/__init__.py +3 -0
  202. agno/os/routers/session/session.py +163 -0
  203. agno/os/schema.py +892 -0
  204. agno/{app/playground → os}/settings.py +8 -15
  205. agno/os/utils.py +270 -0
  206. agno/reasoning/azure_ai_foundry.py +4 -4
  207. agno/reasoning/deepseek.py +4 -4
  208. agno/reasoning/default.py +6 -11
  209. agno/reasoning/groq.py +4 -4
  210. agno/reasoning/helpers.py +4 -6
  211. agno/reasoning/ollama.py +4 -4
  212. agno/reasoning/openai.py +4 -4
  213. agno/run/{response.py → agent.py} +144 -72
  214. agno/run/base.py +44 -58
  215. agno/run/cancel.py +83 -0
  216. agno/run/team.py +133 -77
  217. agno/run/workflow.py +537 -12
  218. agno/session/__init__.py +10 -0
  219. agno/session/agent.py +244 -0
  220. agno/session/summary.py +225 -0
  221. agno/session/team.py +262 -0
  222. agno/{storage/session/v2 → session}/workflow.py +47 -24
  223. agno/team/__init__.py +15 -16
  224. agno/team/team.py +2967 -4243
  225. agno/tools/agentql.py +14 -5
  226. agno/tools/airflow.py +9 -4
  227. agno/tools/api.py +7 -3
  228. agno/tools/apify.py +2 -46
  229. agno/tools/arxiv.py +8 -3
  230. agno/tools/aws_lambda.py +7 -5
  231. agno/tools/aws_ses.py +7 -1
  232. agno/tools/baidusearch.py +4 -1
  233. agno/tools/bitbucket.py +4 -4
  234. agno/tools/brandfetch.py +14 -11
  235. agno/tools/bravesearch.py +4 -1
  236. agno/tools/brightdata.py +42 -22
  237. agno/tools/browserbase.py +13 -4
  238. agno/tools/calcom.py +12 -10
  239. agno/tools/calculator.py +10 -27
  240. agno/tools/cartesia.py +18 -13
  241. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  242. agno/tools/confluence.py +71 -18
  243. agno/tools/crawl4ai.py +7 -1
  244. agno/tools/csv_toolkit.py +9 -8
  245. agno/tools/dalle.py +18 -11
  246. agno/tools/daytona.py +13 -16
  247. agno/tools/decorator.py +6 -3
  248. agno/tools/desi_vocal.py +16 -7
  249. agno/tools/discord.py +11 -8
  250. agno/tools/docker.py +30 -42
  251. agno/tools/duckdb.py +34 -53
  252. agno/tools/duckduckgo.py +8 -7
  253. agno/tools/e2b.py +62 -62
  254. agno/tools/eleven_labs.py +35 -28
  255. agno/tools/email.py +4 -1
  256. agno/tools/evm.py +7 -1
  257. agno/tools/exa.py +19 -14
  258. agno/tools/fal.py +29 -29
  259. agno/tools/file.py +9 -8
  260. agno/tools/financial_datasets.py +25 -44
  261. agno/tools/firecrawl.py +22 -22
  262. agno/tools/function.py +68 -17
  263. agno/tools/giphy.py +22 -10
  264. agno/tools/github.py +48 -126
  265. agno/tools/gmail.py +46 -62
  266. agno/tools/google_bigquery.py +7 -6
  267. agno/tools/google_maps.py +11 -26
  268. agno/tools/googlesearch.py +7 -2
  269. agno/tools/googlesheets.py +21 -17
  270. agno/tools/hackernews.py +9 -5
  271. agno/tools/jina.py +5 -4
  272. agno/tools/jira.py +18 -9
  273. agno/tools/knowledge.py +31 -32
  274. agno/tools/linear.py +18 -33
  275. agno/tools/linkup.py +5 -1
  276. agno/tools/local_file_system.py +8 -5
  277. agno/tools/lumalab.py +31 -19
  278. agno/tools/mem0.py +18 -12
  279. agno/tools/memori.py +14 -10
  280. agno/tools/mlx_transcribe.py +3 -2
  281. agno/tools/models/azure_openai.py +32 -14
  282. agno/tools/models/gemini.py +58 -31
  283. agno/tools/models/groq.py +29 -20
  284. agno/tools/models/nebius.py +27 -11
  285. agno/tools/models_labs.py +39 -15
  286. agno/tools/moviepy_video.py +7 -6
  287. agno/tools/neo4j.py +134 -0
  288. agno/tools/newspaper.py +7 -2
  289. agno/tools/newspaper4k.py +8 -3
  290. agno/tools/openai.py +57 -26
  291. agno/tools/openbb.py +12 -11
  292. agno/tools/opencv.py +62 -46
  293. agno/tools/openweather.py +14 -12
  294. agno/tools/pandas.py +11 -3
  295. agno/tools/postgres.py +4 -12
  296. agno/tools/pubmed.py +4 -1
  297. agno/tools/python.py +9 -22
  298. agno/tools/reasoning.py +35 -27
  299. agno/tools/reddit.py +11 -26
  300. agno/tools/replicate.py +54 -41
  301. agno/tools/resend.py +4 -1
  302. agno/tools/scrapegraph.py +15 -14
  303. agno/tools/searxng.py +10 -23
  304. agno/tools/serpapi.py +6 -3
  305. agno/tools/serper.py +13 -4
  306. agno/tools/shell.py +9 -2
  307. agno/tools/slack.py +12 -11
  308. agno/tools/sleep.py +3 -2
  309. agno/tools/spider.py +24 -4
  310. agno/tools/sql.py +7 -6
  311. agno/tools/tavily.py +6 -4
  312. agno/tools/telegram.py +12 -4
  313. agno/tools/todoist.py +11 -31
  314. agno/tools/toolkit.py +1 -1
  315. agno/tools/trafilatura.py +22 -6
  316. agno/tools/trello.py +9 -22
  317. agno/tools/twilio.py +10 -3
  318. agno/tools/user_control_flow.py +6 -1
  319. agno/tools/valyu.py +34 -5
  320. agno/tools/visualization.py +19 -28
  321. agno/tools/webbrowser.py +4 -3
  322. agno/tools/webex.py +11 -7
  323. agno/tools/website.py +15 -46
  324. agno/tools/webtools.py +12 -4
  325. agno/tools/whatsapp.py +5 -9
  326. agno/tools/wikipedia.py +20 -13
  327. agno/tools/x.py +14 -13
  328. agno/tools/yfinance.py +13 -40
  329. agno/tools/youtube.py +26 -20
  330. agno/tools/zendesk.py +7 -2
  331. agno/tools/zep.py +10 -7
  332. agno/tools/zoom.py +10 -9
  333. agno/utils/common.py +1 -19
  334. agno/utils/events.py +95 -118
  335. agno/utils/knowledge.py +29 -0
  336. agno/utils/location.py +2 -2
  337. agno/utils/log.py +2 -2
  338. agno/utils/mcp.py +11 -5
  339. agno/utils/media.py +39 -0
  340. agno/utils/message.py +12 -1
  341. agno/utils/models/claude.py +6 -4
  342. agno/utils/models/mistral.py +8 -7
  343. agno/utils/models/schema_utils.py +3 -3
  344. agno/utils/pprint.py +33 -32
  345. agno/utils/print_response/agent.py +779 -0
  346. agno/utils/print_response/team.py +1565 -0
  347. agno/utils/print_response/workflow.py +1451 -0
  348. agno/utils/prompts.py +14 -14
  349. agno/utils/reasoning.py +87 -0
  350. agno/utils/response.py +42 -42
  351. agno/utils/string.py +8 -22
  352. agno/utils/team.py +50 -0
  353. agno/utils/timer.py +2 -2
  354. agno/vectordb/base.py +33 -21
  355. agno/vectordb/cassandra/cassandra.py +287 -23
  356. agno/vectordb/chroma/chromadb.py +482 -59
  357. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  358. agno/vectordb/couchbase/couchbase.py +309 -29
  359. agno/vectordb/lancedb/lance_db.py +360 -21
  360. agno/vectordb/langchaindb/__init__.py +5 -0
  361. agno/vectordb/langchaindb/langchaindb.py +145 -0
  362. agno/vectordb/lightrag/__init__.py +5 -0
  363. agno/vectordb/lightrag/lightrag.py +374 -0
  364. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  365. agno/vectordb/milvus/milvus.py +242 -32
  366. agno/vectordb/mongodb/mongodb.py +200 -24
  367. agno/vectordb/pgvector/pgvector.py +319 -37
  368. agno/vectordb/pineconedb/pineconedb.py +221 -27
  369. agno/vectordb/qdrant/qdrant.py +356 -14
  370. agno/vectordb/singlestore/singlestore.py +286 -29
  371. agno/vectordb/surrealdb/surrealdb.py +187 -7
  372. agno/vectordb/upstashdb/upstashdb.py +342 -26
  373. agno/vectordb/weaviate/weaviate.py +227 -165
  374. agno/workflow/__init__.py +17 -13
  375. agno/workflow/{v2/condition.py → condition.py} +135 -32
  376. agno/workflow/{v2/loop.py → loop.py} +115 -28
  377. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  378. agno/workflow/{v2/router.py → router.py} +133 -32
  379. agno/workflow/{v2/step.py → step.py} +200 -42
  380. agno/workflow/{v2/steps.py → steps.py} +147 -66
  381. agno/workflow/types.py +482 -0
  382. agno/workflow/workflow.py +2394 -696
  383. agno-2.0.0a1.dist-info/METADATA +355 -0
  384. agno-2.0.0a1.dist-info/RECORD +514 -0
  385. agno/agent/metrics.py +0 -107
  386. agno/api/app.py +0 -35
  387. agno/api/playground.py +0 -92
  388. agno/api/schemas/app.py +0 -12
  389. agno/api/schemas/playground.py +0 -22
  390. agno/api/schemas/user.py +0 -35
  391. agno/api/schemas/workspace.py +0 -46
  392. agno/api/user.py +0 -160
  393. agno/api/workflows.py +0 -33
  394. agno/api/workspace.py +0 -175
  395. agno/app/agui/__init__.py +0 -3
  396. agno/app/agui/app.py +0 -17
  397. agno/app/agui/sync_router.py +0 -120
  398. agno/app/base.py +0 -186
  399. agno/app/discord/__init__.py +0 -3
  400. agno/app/fastapi/__init__.py +0 -3
  401. agno/app/fastapi/app.py +0 -107
  402. agno/app/fastapi/async_router.py +0 -457
  403. agno/app/fastapi/sync_router.py +0 -448
  404. agno/app/playground/app.py +0 -228
  405. agno/app/playground/async_router.py +0 -1050
  406. agno/app/playground/deploy.py +0 -249
  407. agno/app/playground/operator.py +0 -183
  408. agno/app/playground/schemas.py +0 -220
  409. agno/app/playground/serve.py +0 -55
  410. agno/app/playground/sync_router.py +0 -1042
  411. agno/app/playground/utils.py +0 -46
  412. agno/app/settings.py +0 -15
  413. agno/app/slack/__init__.py +0 -3
  414. agno/app/slack/app.py +0 -19
  415. agno/app/slack/sync_router.py +0 -92
  416. agno/app/utils.py +0 -54
  417. agno/app/whatsapp/__init__.py +0 -3
  418. agno/app/whatsapp/app.py +0 -15
  419. agno/app/whatsapp/sync_router.py +0 -197
  420. agno/cli/auth_server.py +0 -249
  421. agno/cli/config.py +0 -274
  422. agno/cli/console.py +0 -88
  423. agno/cli/credentials.py +0 -23
  424. agno/cli/entrypoint.py +0 -571
  425. agno/cli/operator.py +0 -357
  426. agno/cli/settings.py +0 -96
  427. agno/cli/ws/ws_cli.py +0 -817
  428. agno/constants.py +0 -13
  429. agno/document/__init__.py +0 -5
  430. agno/document/chunking/semantic.py +0 -45
  431. agno/document/chunking/strategy.py +0 -31
  432. agno/document/reader/__init__.py +0 -5
  433. agno/document/reader/base.py +0 -47
  434. agno/document/reader/docx_reader.py +0 -60
  435. agno/document/reader/gcs/pdf_reader.py +0 -44
  436. agno/document/reader/s3/pdf_reader.py +0 -59
  437. agno/document/reader/s3/text_reader.py +0 -63
  438. agno/document/reader/url_reader.py +0 -59
  439. agno/document/reader/youtube_reader.py +0 -58
  440. agno/embedder/__init__.py +0 -5
  441. agno/embedder/langdb.py +0 -80
  442. agno/embedder/mistral.py +0 -82
  443. agno/embedder/openai.py +0 -78
  444. agno/file/__init__.py +0 -5
  445. agno/file/file.py +0 -16
  446. agno/file/local/csv.py +0 -32
  447. agno/file/local/txt.py +0 -19
  448. agno/infra/app.py +0 -240
  449. agno/infra/base.py +0 -144
  450. agno/infra/context.py +0 -20
  451. agno/infra/db_app.py +0 -52
  452. agno/infra/resource.py +0 -205
  453. agno/infra/resources.py +0 -55
  454. agno/knowledge/agent.py +0 -698
  455. agno/knowledge/arxiv.py +0 -33
  456. agno/knowledge/combined.py +0 -36
  457. agno/knowledge/csv.py +0 -144
  458. agno/knowledge/csv_url.py +0 -124
  459. agno/knowledge/document.py +0 -223
  460. agno/knowledge/docx.py +0 -137
  461. agno/knowledge/firecrawl.py +0 -34
  462. agno/knowledge/gcs/__init__.py +0 -0
  463. agno/knowledge/gcs/base.py +0 -39
  464. agno/knowledge/gcs/pdf.py +0 -125
  465. agno/knowledge/json.py +0 -137
  466. agno/knowledge/langchain.py +0 -71
  467. agno/knowledge/light_rag.py +0 -273
  468. agno/knowledge/llamaindex.py +0 -66
  469. agno/knowledge/markdown.py +0 -154
  470. agno/knowledge/pdf.py +0 -164
  471. agno/knowledge/pdf_bytes.py +0 -42
  472. agno/knowledge/pdf_url.py +0 -148
  473. agno/knowledge/s3/__init__.py +0 -0
  474. agno/knowledge/s3/base.py +0 -64
  475. agno/knowledge/s3/pdf.py +0 -33
  476. agno/knowledge/s3/text.py +0 -34
  477. agno/knowledge/text.py +0 -141
  478. agno/knowledge/url.py +0 -46
  479. agno/knowledge/website.py +0 -179
  480. agno/knowledge/wikipedia.py +0 -32
  481. agno/knowledge/youtube.py +0 -35
  482. agno/memory/agent.py +0 -423
  483. agno/memory/classifier.py +0 -104
  484. agno/memory/db/__init__.py +0 -5
  485. agno/memory/db/base.py +0 -42
  486. agno/memory/db/mongodb.py +0 -189
  487. agno/memory/db/postgres.py +0 -203
  488. agno/memory/db/sqlite.py +0 -193
  489. agno/memory/memory.py +0 -22
  490. agno/memory/row.py +0 -36
  491. agno/memory/summarizer.py +0 -201
  492. agno/memory/summary.py +0 -19
  493. agno/memory/team.py +0 -415
  494. agno/memory/v2/__init__.py +0 -2
  495. agno/memory/v2/db/__init__.py +0 -1
  496. agno/memory/v2/db/base.py +0 -42
  497. agno/memory/v2/db/firestore.py +0 -339
  498. agno/memory/v2/db/mongodb.py +0 -196
  499. agno/memory/v2/db/postgres.py +0 -214
  500. agno/memory/v2/db/redis.py +0 -187
  501. agno/memory/v2/db/schema.py +0 -54
  502. agno/memory/v2/db/sqlite.py +0 -209
  503. agno/memory/v2/manager.py +0 -437
  504. agno/memory/v2/memory.py +0 -1097
  505. agno/memory/v2/schema.py +0 -55
  506. agno/memory/v2/summarizer.py +0 -215
  507. agno/memory/workflow.py +0 -38
  508. agno/models/ollama/tools.py +0 -430
  509. agno/models/qwen/__init__.py +0 -5
  510. agno/playground/__init__.py +0 -10
  511. agno/playground/deploy.py +0 -3
  512. agno/playground/playground.py +0 -3
  513. agno/playground/serve.py +0 -3
  514. agno/playground/settings.py +0 -3
  515. agno/reranker/__init__.py +0 -0
  516. agno/run/v2/__init__.py +0 -0
  517. agno/run/v2/workflow.py +0 -567
  518. agno/storage/__init__.py +0 -0
  519. agno/storage/agent/__init__.py +0 -0
  520. agno/storage/agent/dynamodb.py +0 -1
  521. agno/storage/agent/json.py +0 -1
  522. agno/storage/agent/mongodb.py +0 -1
  523. agno/storage/agent/postgres.py +0 -1
  524. agno/storage/agent/singlestore.py +0 -1
  525. agno/storage/agent/sqlite.py +0 -1
  526. agno/storage/agent/yaml.py +0 -1
  527. agno/storage/base.py +0 -60
  528. agno/storage/dynamodb.py +0 -673
  529. agno/storage/firestore.py +0 -297
  530. agno/storage/gcs_json.py +0 -261
  531. agno/storage/in_memory.py +0 -234
  532. agno/storage/json.py +0 -237
  533. agno/storage/mongodb.py +0 -328
  534. agno/storage/mysql.py +0 -685
  535. agno/storage/postgres.py +0 -682
  536. agno/storage/redis.py +0 -336
  537. agno/storage/session/__init__.py +0 -16
  538. agno/storage/session/agent.py +0 -64
  539. agno/storage/session/team.py +0 -63
  540. agno/storage/session/v2/__init__.py +0 -5
  541. agno/storage/session/workflow.py +0 -61
  542. agno/storage/singlestore.py +0 -606
  543. agno/storage/sqlite.py +0 -646
  544. agno/storage/workflow/__init__.py +0 -0
  545. agno/storage/workflow/mongodb.py +0 -1
  546. agno/storage/workflow/postgres.py +0 -1
  547. agno/storage/workflow/sqlite.py +0 -1
  548. agno/storage/yaml.py +0 -241
  549. agno/tools/thinking.py +0 -73
  550. agno/utils/defaults.py +0 -57
  551. agno/utils/filesystem.py +0 -39
  552. agno/utils/git.py +0 -52
  553. agno/utils/json_io.py +0 -30
  554. agno/utils/load_env.py +0 -19
  555. agno/utils/py_io.py +0 -19
  556. agno/utils/pyproject.py +0 -18
  557. agno/utils/resource_filter.py +0 -31
  558. agno/workflow/v2/__init__.py +0 -21
  559. agno/workflow/v2/types.py +0 -357
  560. agno/workflow/v2/workflow.py +0 -3312
  561. agno/workspace/__init__.py +0 -0
  562. agno/workspace/config.py +0 -325
  563. agno/workspace/enums.py +0 -6
  564. agno/workspace/helpers.py +0 -52
  565. agno/workspace/operator.py +0 -757
  566. agno/workspace/settings.py +0 -158
  567. agno-1.8.0.dist-info/METADATA +0 -979
  568. agno-1.8.0.dist-info/RECORD +0 -565
  569. agno-1.8.0.dist-info/entry_points.txt +0 -3
  570. /agno/{app → db/migrations}/__init__.py +0 -0
  571. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  572. /agno/{cli → integrations}/__init__.py +0 -0
  573. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  574. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  575. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  576. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  577. /agno/{app → os/interfaces}/slack/security.py +0 -0
  578. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  579. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  580. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  581. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
  582. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
  583. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
agno/knowledge/agent.py DELETED
@@ -1,698 +0,0 @@
1
- import asyncio
2
- from pathlib import Path
3
- from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Set, Tuple
4
-
5
- from pydantic import BaseModel, ConfigDict, model_validator
6
-
7
- from agno.document import Document
8
- from agno.document.chunking.fixed import FixedSizeChunking
9
- from agno.document.chunking.strategy import ChunkingStrategy
10
- from agno.document.reader.base import Reader
11
- from agno.utils.log import log_debug, log_info, logger
12
- from agno.vectordb import VectorDb
13
-
14
-
15
- class AgentKnowledge(BaseModel):
16
- """Base class for Agent knowledge"""
17
-
18
- # Reader for reading documents from files, pdfs, urls, etc.
19
- reader: Optional[Reader] = None
20
- # Vector db for storing knowledge
21
- vector_db: Optional[VectorDb] = None
22
- # Number of relevant documents to return on search
23
- num_documents: int = 5
24
- # Number of documents to optimize the vector db on
25
- optimize_on: Optional[int] = 1000
26
-
27
- chunking_strategy: Optional[ChunkingStrategy] = None
28
- model_config = ConfigDict(arbitrary_types_allowed=True)
29
-
30
- valid_metadata_filters: Set[str] = None # type: ignore
31
-
32
- @model_validator(mode="after")
33
- def update_reader(self) -> "AgentKnowledge":
34
- if self.reader is not None and self.reader.chunking_strategy is None:
35
- self.reader.chunking_strategy = self.chunking_strategy or FixedSizeChunking()
36
- return self
37
-
38
- @property
39
- def document_lists(self) -> Iterator[List[Document]]:
40
- """Iterator that yields lists of documents in the knowledge base
41
- Each object yielded by the iterator is a list of documents.
42
- """
43
- raise NotImplementedError
44
-
45
- @property
46
- async def async_document_lists(self) -> AsyncIterator[List[Document]]:
47
- """Iterator that yields lists of documents in the knowledge base
48
- Each object yielded by the iterator is a list of documents.
49
- """
50
- raise NotImplementedError
51
-
52
- def _upsert_warning(self, upsert) -> None:
53
- """Log a warning if upsert is not available"""
54
- if upsert and self.vector_db is not None and not self.vector_db.upsert_available():
55
- log_info(
56
- f"Vector db '{self.vector_db.__class__.__module__}' does not support upsert. Falling back to insert."
57
- )
58
-
59
- def _load_init(self, recreate: bool, upsert: bool) -> None:
60
- """Initial setup for loading knowledge base"""
61
- if self.vector_db is None:
62
- logger.warning("No vector db provided")
63
- return
64
-
65
- if recreate:
66
- log_info("Dropping collection")
67
- self.vector_db.drop()
68
-
69
- if not self.vector_db.exists():
70
- log_info("Creating collection")
71
- self.vector_db.create()
72
-
73
- self._upsert_warning(upsert)
74
-
75
- async def _aload_init(self, recreate: bool, upsert: bool) -> None:
76
- """Initial async setup for loading knowledge base"""
77
- if self.vector_db is None:
78
- logger.warning("No vector db provided")
79
- return
80
-
81
- if recreate:
82
- log_info("Dropping collection")
83
- try:
84
- await self.vector_db.async_drop()
85
- except NotImplementedError:
86
- logger.warning("Vector db does not support async drop, falling back to sync drop")
87
- self.vector_db.drop()
88
-
89
- if not self.vector_db.exists():
90
- log_info("Creating collection")
91
- try:
92
- await self.vector_db.async_create()
93
- except NotImplementedError:
94
- logger.warning("Vector db does not support async create, falling back to sync create")
95
- self.vector_db.create()
96
-
97
- self._upsert_warning(upsert)
98
-
99
- def search(
100
- self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
101
- ) -> List[Document]:
102
- """Returns relevant documents matching a query"""
103
- try:
104
- if self.vector_db is None:
105
- logger.warning("No vector db provided")
106
- return []
107
-
108
- _num_documents = num_documents or self.num_documents
109
- log_debug(f"Getting {_num_documents} relevant documents for query: {query}")
110
- return self.vector_db.search(query=query, limit=_num_documents, filters=filters)
111
- except Exception as e:
112
- logger.error(f"Error searching for documents: {e}")
113
- return []
114
-
115
- async def async_search(
116
- self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
117
- ) -> List[Document]:
118
- """Returns relevant documents matching a query"""
119
- try:
120
- if self.vector_db is None:
121
- logger.warning("No vector db provided")
122
- return []
123
-
124
- _num_documents = num_documents or self.num_documents
125
- log_debug(f"Getting {_num_documents} relevant documents for query: {query}")
126
- try:
127
- return await self.vector_db.async_search(query=query, limit=_num_documents, filters=filters)
128
- except NotImplementedError:
129
- log_info("Vector db does not support async search")
130
- return self.search(query=query, num_documents=_num_documents, filters=filters)
131
- except Exception as e:
132
- logger.error(f"Error searching for documents: {e}")
133
- return []
134
-
135
- def load(
136
- self,
137
- recreate: bool = False,
138
- upsert: bool = False,
139
- skip_existing: bool = True,
140
- ) -> None:
141
- """Load the knowledge base to the vector db
142
-
143
- Args:
144
- recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
145
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
146
- skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
147
- """
148
- self._load_init(recreate, upsert)
149
- if self.vector_db is None:
150
- return
151
-
152
- log_info("Loading knowledge base")
153
- num_documents = 0
154
- for document_list in self.document_lists:
155
- documents_to_load = document_list
156
-
157
- # Track metadata for filtering capabilities
158
- for doc in document_list:
159
- if doc.meta_data:
160
- self._track_metadata_structure(doc.meta_data)
161
-
162
- # Upsert documents if upsert is True and vector db supports upsert
163
- if upsert and self.vector_db.upsert_available():
164
- self.vector_db.upsert(documents=documents_to_load, filters=doc.meta_data)
165
- # Insert documents
166
- else:
167
- # Filter out documents which already exist in the vector db
168
- if skip_existing:
169
- log_debug("Filtering out existing documents before insertion.")
170
- documents_to_load = self.filter_existing_documents(document_list)
171
-
172
- if documents_to_load:
173
- self.vector_db.insert(documents=documents_to_load, filters=doc.meta_data)
174
-
175
- num_documents += len(documents_to_load)
176
- log_info(f"Added {num_documents} documents to knowledge base")
177
-
178
- async def aload(
179
- self,
180
- recreate: bool = False,
181
- upsert: bool = False,
182
- skip_existing: bool = True,
183
- ) -> None:
184
- """Load the knowledge base to the vector db asynchronously
185
-
186
- Args:
187
- recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
188
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
189
- skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
190
- """
191
- await self._aload_init(recreate, upsert)
192
- if self.vector_db is None:
193
- return
194
-
195
- log_info("Loading knowledge base")
196
- num_documents = 0
197
- document_iterator = self.async_document_lists
198
- async for document_list in document_iterator: # type: ignore
199
- documents_to_load = document_list
200
- # Track metadata for filtering capabilities
201
- for doc in document_list:
202
- if doc.meta_data:
203
- self._track_metadata_structure(doc.meta_data)
204
-
205
- # Upsert documents if upsert is True and vector db supports upsert
206
- if upsert and self.vector_db.upsert_available():
207
- await self.vector_db.async_upsert(documents=documents_to_load, filters=doc.meta_data)
208
- # Insert documents
209
- else:
210
- # Filter out documents which already exist in the vector db
211
- if skip_existing:
212
- log_debug("Filtering out existing documents before insertion.")
213
- documents_to_load = await self.async_filter_existing_documents(document_list)
214
-
215
- if documents_to_load:
216
- await self.vector_db.async_insert(documents=documents_to_load, filters=doc.meta_data)
217
-
218
- num_documents += len(documents_to_load)
219
- log_info(f"Added {num_documents} documents to knowledge base")
220
-
221
- def load_documents(
222
- self,
223
- documents: List[Document],
224
- upsert: bool = False,
225
- skip_existing: bool = True,
226
- filters: Optional[Dict[str, Any]] = None,
227
- ) -> None:
228
- """Load documents to the knowledge base
229
-
230
- Args:
231
- documents (List[Document]): List of documents to load
232
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
233
- skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
234
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
235
- """
236
- self._load_init(recreate=False, upsert=upsert)
237
- if self.vector_db is None:
238
- return
239
-
240
- log_info("Loading knowledge base")
241
- # Upsert documents if upsert is True
242
- if upsert and self.vector_db.upsert_available():
243
- self.vector_db.upsert(documents=documents, filters=filters)
244
- log_info(f"Loaded {len(documents)} documents to knowledge base")
245
- else:
246
- # Filter out documents which already exist in the vector db
247
- documents_to_load = (
248
- [document for document in documents if not self.vector_db.doc_exists(document)]
249
- if skip_existing
250
- else documents
251
- )
252
-
253
- # Insert documents
254
- if len(documents_to_load) > 0:
255
- self.vector_db.insert(documents=documents_to_load, filters=filters)
256
- log_info(f"Loaded {len(documents_to_load)} documents to knowledge base")
257
- else:
258
- log_info("No new documents to load")
259
-
260
- async def async_load_documents(
261
- self,
262
- documents: List[Document],
263
- upsert: bool = False,
264
- skip_existing: bool = True,
265
- filters: Optional[Dict[str, Any]] = None,
266
- ) -> None:
267
- """Load documents to the knowledge base
268
-
269
- Args:
270
- documents (List[Document]): List of documents to load
271
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
272
- skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
273
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
274
- """
275
- await self._aload_init(recreate=False, upsert=upsert)
276
- if self.vector_db is None:
277
- return
278
-
279
- log_info("Loading knowledge base")
280
-
281
- # Upsert documents if upsert is True
282
- if upsert and self.vector_db.upsert_available():
283
- try:
284
- await self.vector_db.async_upsert(documents=documents, filters=filters)
285
- except NotImplementedError:
286
- logger.warning("Vector db does not support async upsert")
287
- self.vector_db.upsert(documents=documents, filters=filters)
288
- log_info(f"Loaded {len(documents)} documents to knowledge base")
289
- else:
290
- # Filter out documents which already exist in the vector db
291
- if skip_existing:
292
- try:
293
- # Parallelize existence checks using asyncio.gather
294
- existence_checks = await asyncio.gather(
295
- *[self.vector_db.async_doc_exists(document) for document in documents], return_exceptions=True
296
- )
297
-
298
- documents_to_load = [
299
- doc
300
- for doc, exists in zip(documents, existence_checks)
301
- if not (isinstance(exists, bool) and exists)
302
- ]
303
- except NotImplementedError:
304
- logger.warning("Vector db does not support async doc_exists")
305
- documents_to_load = [document for document in documents if not self.vector_db.doc_exists(document)]
306
- else:
307
- documents_to_load = documents
308
-
309
- # Insert documents
310
- if len(documents_to_load) > 0:
311
- try:
312
- await self.vector_db.async_insert(documents=documents_to_load, filters=filters)
313
- except NotImplementedError:
314
- logger.warning("Vector db does not support async insert")
315
- self.vector_db.insert(documents=documents_to_load, filters=filters)
316
- log_info(f"Loaded {len(documents_to_load)} documents to knowledge base")
317
- else:
318
- log_info("No new documents to load")
319
-
320
- def load_document(
321
- self,
322
- document: Document,
323
- upsert: bool = False,
324
- skip_existing: bool = True,
325
- filters: Optional[Dict[str, Any]] = None,
326
- ) -> None:
327
- """Load a document to the knowledge base
328
-
329
- Args:
330
- document (Document): Document to load
331
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
332
- skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
333
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
334
- """
335
- self.load_documents(documents=[document], upsert=upsert, skip_existing=skip_existing, filters=filters)
336
-
337
- async def async_load_document(
338
- self,
339
- document: Document,
340
- upsert: bool = False,
341
- skip_existing: bool = True,
342
- filters: Optional[Dict[str, Any]] = None,
343
- ) -> None:
344
- """Load a document to the knowledge base
345
-
346
- Args:
347
- document (Document): Document to load
348
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
349
- skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
350
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
351
- """
352
- await self.async_load_documents(
353
- documents=[document], upsert=upsert, skip_existing=skip_existing, filters=filters
354
- )
355
-
356
- def load_dict(
357
- self,
358
- document: Dict[str, Any],
359
- upsert: bool = False,
360
- skip_existing: bool = True,
361
- filters: Optional[Dict[str, Any]] = None,
362
- ) -> None:
363
- """Load a dictionary representation of a document to the knowledge base
364
-
365
- Args:
366
- document (Dict[str, Any]): Dictionary representation of a document
367
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
368
- skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
369
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
370
- """
371
- self.load_documents(
372
- documents=[Document.from_dict(document)], upsert=upsert, skip_existing=skip_existing, filters=filters
373
- )
374
-
375
- def load_json(
376
- self, document: str, upsert: bool = False, skip_existing: bool = True, filters: Optional[Dict[str, Any]] = None
377
- ) -> None:
378
- """Load a json representation of a document to the knowledge base
379
-
380
- Args:
381
- document (str): Json representation of a document
382
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
383
- skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
384
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
385
- """
386
- self.load_documents(
387
- documents=[Document.from_json(document)], upsert=upsert, skip_existing=skip_existing, filters=filters
388
- )
389
-
390
- def load_text(
391
- self, text: str, upsert: bool = False, skip_existing: bool = True, filters: Optional[Dict[str, Any]] = None
392
- ) -> None:
393
- """Load a text to the knowledge base
394
-
395
- Args:
396
- text (str): Text to load to the knowledge base
397
- upsert (bool): If True, upserts documents to the vector db. Defaults to False.
398
- skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
399
- filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
400
- """
401
- self.load_documents(
402
- documents=[Document(content=text)], upsert=upsert, skip_existing=skip_existing, filters=filters
403
- )
404
-
405
- def exists(self) -> bool:
406
- """Returns True if the knowledge base exists"""
407
- if self.vector_db is None:
408
- logger.warning("No vector db provided")
409
- return False
410
- return self.vector_db.exists()
411
-
412
- def delete(self) -> bool:
413
- """Clear the knowledge base"""
414
- if self.vector_db is None:
415
- logger.warning("No vector db available")
416
- return True
417
-
418
- return self.vector_db.delete()
419
-
420
- def filter_existing_documents(self, documents: List[Document]) -> List[Document]:
421
- """Filter out documents that already exist in the vector database.
422
-
423
- This helper method is used across various knowledge base implementations
424
- to avoid inserting duplicate documents.
425
-
426
- Args:
427
- documents (List[Document]): List of documents to filter
428
-
429
- Returns:
430
- List[Document]: Filtered list of documents that don't exist in the database
431
- """
432
- if not self.vector_db:
433
- log_debug("No vector database configured, skipping document filtering")
434
- return documents
435
-
436
- # Use set for O(1) lookups
437
- seen_content = set()
438
- original_count = len(documents)
439
- filtered_documents = []
440
-
441
- for doc in documents:
442
- # Check hash and existence in DB
443
- content_hash = doc.content # Assuming doc.content is reliable hash key
444
- if content_hash not in seen_content and not self.vector_db.doc_exists(doc):
445
- seen_content.add(content_hash)
446
- filtered_documents.append(doc)
447
- else:
448
- log_debug(f"Skipping existing document: {doc.name} (or duplicate content)")
449
-
450
- if len(filtered_documents) < original_count:
451
- log_info(f"Skipped {original_count - len(filtered_documents)} existing/duplicate documents.")
452
-
453
- return filtered_documents
454
-
455
- async def async_filter_existing_documents(self, documents: List[Document]) -> List[Document]:
456
- """Filter out documents that already exist in the vector database.
457
-
458
- This helper method is used across various knowledge base implementations
459
- to avoid inserting duplicate documents.
460
-
461
- Args:
462
- documents (List[Document]): List of documents to filter
463
-
464
- Returns:
465
- List[Document]: Filtered list of documents that don't exist in the database
466
- """
467
- from agno.utils.log import log_debug, log_info
468
-
469
- if not self.vector_db:
470
- log_debug("No vector database configured, skipping document filtering")
471
- return documents
472
-
473
- # Use set for O(1) lookups
474
- seen_content = set()
475
- original_count = len(documents)
476
- filtered_documents = []
477
-
478
- for doc in documents:
479
- # Check hash and existence in DB
480
- content_hash = doc.content # Assuming doc.content is reliable hash key
481
- if content_hash not in seen_content and not await self.vector_db.async_doc_exists(doc):
482
- seen_content.add(content_hash)
483
- filtered_documents.append(doc)
484
- else:
485
- log_debug(f"Skipping existing document: {doc.name} (or duplicate content)")
486
-
487
- if len(filtered_documents) < original_count:
488
- log_info(f"Skipped {original_count - len(filtered_documents)} existing/duplicate documents.")
489
-
490
- return filtered_documents
491
-
492
- def _track_metadata_structure(self, metadata: Optional[Dict[str, Any]]) -> None:
493
- """Track metadata structure to enable filter extraction from queries
494
-
495
- Args:
496
- metadata (Optional[Dict[str, Any]]): Metadata to track
497
- """
498
- if metadata:
499
- if self.valid_metadata_filters is None:
500
- self.valid_metadata_filters = set()
501
-
502
- # Extract top-level keys to track as potential filter fields
503
- for key in metadata.keys():
504
- self.valid_metadata_filters.add(key)
505
-
506
- def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
507
- if not filters:
508
- return {}, []
509
-
510
- valid_filters = {}
511
- invalid_keys = []
512
-
513
- # If no metadata filters tracked yet, all keys are considered invalid
514
- if self.valid_metadata_filters is None:
515
- invalid_keys = list(filters.keys())
516
- log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
517
- return {}, invalid_keys
518
-
519
- for key, value in filters.items():
520
- # Handle both normal keys and prefixed keys like meta_data.key
521
- base_key = key.split(".")[-1] if "." in key else key
522
- if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
523
- valid_filters[key] = value
524
- else:
525
- invalid_keys.append(key)
526
- log_debug(f"Invalid filter key: {key} - not present in knowledge base")
527
-
528
- return valid_filters, invalid_keys
529
-
530
- def initialize_valid_filters(self) -> None:
531
- """Refresh the valid metadata filters by scanning the documents in the knowledge base.
532
- This will be required majorly for the case when load/aload is commented out but we still need a way to call document_lists for updating the valid metadata filters.
533
- """
534
- if self.valid_metadata_filters is None:
535
- for doc_list in self.document_lists:
536
- for doc in doc_list:
537
- if doc.meta_data:
538
- self._track_metadata_structure(doc.meta_data)
539
-
540
- def prepare_load(
541
- self,
542
- file_path: Path,
543
- allowed_formats: Optional[List[str]],
544
- metadata: Optional[Dict[str, Any]] = None,
545
- recreate: bool = False,
546
- is_url: bool = False,
547
- ) -> bool:
548
- """Validate file path and prepare collection for loading.
549
- Args:
550
- file_path (Path): Path to validate
551
- allowed_formats (List[str]): List of allowed file formats
552
- metadata (Optional[Dict[str, Any]]): Metadata to track
553
- recreate (bool): Whether to recreate the collection
554
- Returns:
555
- bool: True if preparation succeeded, False otherwise
556
- """
557
- # 1. Validate file path
558
- if not is_url:
559
- if not file_path.exists():
560
- logger.error(f"File not found: {file_path}")
561
- return False
562
-
563
- if file_path.suffix not in allowed_formats: # type: ignore
564
- logger.error(f"Unsupported file format: {file_path.suffix}")
565
- return False
566
-
567
- # 2. Track metadata
568
- if metadata:
569
- self._track_metadata_structure(metadata)
570
-
571
- # 3. Prepare vector DB
572
- self._load_init(recreate, upsert=False)
573
- if self.vector_db is None:
574
- return False
575
- return True
576
-
577
- async def aprepare_load(
578
- self,
579
- file_path: Path,
580
- allowed_formats: List[str],
581
- metadata: Optional[Dict[str, Any]] = None,
582
- recreate: bool = False,
583
- is_url: bool = False,
584
- ) -> bool:
585
- """Validate file path and prepare collection for loading.
586
- Args:
587
- file_path (Path): Path to validate
588
- allowed_formats (List[str]): List of allowed file formats
589
- metadata (Optional[Dict[str, Any]]): Metadata to track
590
- recreate (bool): Whether to recreate the collection
591
- Returns:
592
- bool: True if preparation succeeded, False otherwise
593
- """
594
- # 1. Validate file path
595
- if not is_url:
596
- if not file_path.exists():
597
- logger.error(f"File not found: {file_path}")
598
- return False
599
-
600
- if file_path.suffix not in allowed_formats:
601
- logger.error(f"Unsupported file format: {file_path.suffix}")
602
- return False
603
-
604
- # 2. Track metadata
605
- if metadata:
606
- self._track_metadata_structure(metadata)
607
-
608
- # 3. Prepare vector DB
609
- await self._aload_init(recreate, upsert=False)
610
- if self.vector_db is None:
611
- return False
612
- return True
613
-
614
- def process_documents(
615
- self,
616
- documents: List[Document],
617
- metadata: Optional[Dict[str, Any]] = None,
618
- upsert: bool = False,
619
- skip_existing: bool = True,
620
- source_info: str = "documents",
621
- ) -> None:
622
- """Process and load documents asynchronously.
623
- Args:
624
- documents (List[Document]): Documents to process
625
- metadata (Optional[Dict[str, Any]]): Metadata to add to documents
626
- upsert (bool): Whether to upsert documents
627
- skip_existing (bool): Whether to skip existing documents
628
- source_info (str): Information about document source for logging
629
- """
630
- if not documents:
631
- logger.warning(f"No documents were read from {source_info}")
632
- return
633
-
634
- log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
635
-
636
- self._upsert_warning(upsert)
637
-
638
- # Decide loading strategy: upsert or insert (with optional skip)
639
- if upsert and self.vector_db.upsert_available(): # type: ignore
640
- log_debug(f"Upserting {len(documents)} documents.") # type: ignore
641
- self.vector_db.upsert(documents=documents, filters=metadata) # type: ignore
642
- else:
643
- documents_to_insert = documents
644
- if skip_existing:
645
- log_debug("Filtering out existing documents before insertion.")
646
- documents_to_insert = self.filter_existing_documents(documents)
647
-
648
- if documents_to_insert: # type: ignore
649
- # type: ignore
650
- log_debug(f"Inserting {len(documents_to_insert)} new documents.")
651
- self.vector_db.insert(documents=documents_to_insert, filters=metadata) # type: ignore
652
- else:
653
- log_info("No new documents to insert after filtering.")
654
-
655
- log_info(f"Finished loading documents from {source_info}.")
656
-
657
- async def aprocess_documents(
658
- self,
659
- documents: List[Document],
660
- metadata: Optional[Dict[str, Any]] = None,
661
- upsert: bool = False,
662
- skip_existing: bool = True,
663
- source_info: str = "documents",
664
- ) -> None:
665
- """Process and load documents asynchronously.
666
- Args:
667
- documents (List[Document]): Documents to process
668
- metadata (Optional[Dict[str, Any]]): Metadata to add to documents
669
- upsert (bool): Whether to upsert documents
670
- skip_existing (bool): Whether to skip existing documents
671
- source_info (str): Information about document source for logging
672
- """
673
- if not documents:
674
- logger.warning(f"No documents were read from {source_info}")
675
- return
676
-
677
- self._upsert_warning(upsert)
678
-
679
- log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
680
-
681
- # Decide loading strategy: upsert or insert (with optional skip)
682
- if upsert and self.vector_db.upsert_available(): # type: ignore
683
- log_debug(f"Upserting {len(documents)} documents.")
684
- # type: ignore
685
- await self.vector_db.async_upsert(documents=documents, filters=metadata) # type: ignore
686
- else:
687
- documents_to_insert = documents
688
- if skip_existing:
689
- log_debug("Filtering out existing documents before insertion.")
690
- documents_to_insert = await self.async_filter_existing_documents(documents)
691
-
692
- if documents_to_insert: # type: ignore
693
- log_debug(f"Inserting {len(documents_to_insert)} new documents.")
694
- await self.vector_db.async_insert(documents=documents_to_insert, filters=metadata) # type: ignore
695
- else:
696
- log_info("No new documents to insert after filtering.")
697
-
698
- log_info(f"Finished loading documents from {source_info}.")