agno 1.8.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. agno/agent/__init__.py +19 -27
  2. agno/agent/agent.py +3143 -4170
  3. agno/api/agent.py +11 -67
  4. agno/api/api.py +5 -46
  5. agno/api/evals.py +8 -19
  6. agno/api/os.py +17 -0
  7. agno/api/routes.py +6 -41
  8. agno/api/schemas/__init__.py +9 -0
  9. agno/api/schemas/agent.py +5 -21
  10. agno/api/schemas/evals.py +7 -16
  11. agno/api/schemas/os.py +14 -0
  12. agno/api/schemas/team.py +5 -21
  13. agno/api/schemas/utils.py +21 -0
  14. agno/api/schemas/workflows.py +11 -7
  15. agno/api/settings.py +53 -0
  16. agno/api/team.py +11 -66
  17. agno/api/workflow.py +28 -0
  18. agno/cloud/aws/base.py +214 -0
  19. agno/cloud/aws/s3/__init__.py +2 -0
  20. agno/cloud/aws/s3/api_client.py +43 -0
  21. agno/cloud/aws/s3/bucket.py +195 -0
  22. agno/cloud/aws/s3/object.py +57 -0
  23. agno/db/__init__.py +24 -0
  24. agno/db/base.py +245 -0
  25. agno/db/dynamo/__init__.py +3 -0
  26. agno/db/dynamo/dynamo.py +1743 -0
  27. agno/db/dynamo/schemas.py +278 -0
  28. agno/db/dynamo/utils.py +684 -0
  29. agno/db/firestore/__init__.py +3 -0
  30. agno/db/firestore/firestore.py +1432 -0
  31. agno/db/firestore/schemas.py +130 -0
  32. agno/db/firestore/utils.py +278 -0
  33. agno/db/gcs_json/__init__.py +3 -0
  34. agno/db/gcs_json/gcs_json_db.py +1001 -0
  35. agno/db/gcs_json/utils.py +194 -0
  36. agno/db/in_memory/__init__.py +3 -0
  37. agno/db/in_memory/in_memory_db.py +882 -0
  38. agno/db/in_memory/utils.py +172 -0
  39. agno/db/json/__init__.py +3 -0
  40. agno/db/json/json_db.py +1045 -0
  41. agno/db/json/utils.py +196 -0
  42. agno/db/migrations/v1_to_v2.py +162 -0
  43. agno/db/mongo/__init__.py +3 -0
  44. agno/db/mongo/mongo.py +1416 -0
  45. agno/db/mongo/schemas.py +77 -0
  46. agno/db/mongo/utils.py +204 -0
  47. agno/db/mysql/__init__.py +3 -0
  48. agno/db/mysql/mysql.py +1719 -0
  49. agno/db/mysql/schemas.py +124 -0
  50. agno/db/mysql/utils.py +297 -0
  51. agno/db/postgres/__init__.py +3 -0
  52. agno/db/postgres/postgres.py +1710 -0
  53. agno/db/postgres/schemas.py +124 -0
  54. agno/db/postgres/utils.py +280 -0
  55. agno/db/redis/__init__.py +3 -0
  56. agno/db/redis/redis.py +1367 -0
  57. agno/db/redis/schemas.py +109 -0
  58. agno/db/redis/utils.py +288 -0
  59. agno/db/schemas/__init__.py +3 -0
  60. agno/db/schemas/evals.py +33 -0
  61. agno/db/schemas/knowledge.py +40 -0
  62. agno/db/schemas/memory.py +46 -0
  63. agno/db/singlestore/__init__.py +3 -0
  64. agno/db/singlestore/schemas.py +116 -0
  65. agno/db/singlestore/singlestore.py +1712 -0
  66. agno/db/singlestore/utils.py +326 -0
  67. agno/db/sqlite/__init__.py +3 -0
  68. agno/db/sqlite/schemas.py +119 -0
  69. agno/db/sqlite/sqlite.py +1676 -0
  70. agno/db/sqlite/utils.py +268 -0
  71. agno/db/utils.py +88 -0
  72. agno/eval/__init__.py +14 -0
  73. agno/eval/accuracy.py +154 -48
  74. agno/eval/performance.py +88 -23
  75. agno/eval/reliability.py +73 -20
  76. agno/eval/utils.py +23 -13
  77. agno/integrations/discord/__init__.py +3 -0
  78. agno/{app → integrations}/discord/client.py +10 -10
  79. agno/knowledge/__init__.py +2 -2
  80. agno/{document → knowledge}/chunking/agentic.py +2 -2
  81. agno/{document → knowledge}/chunking/document.py +2 -2
  82. agno/{document → knowledge}/chunking/fixed.py +3 -3
  83. agno/{document → knowledge}/chunking/markdown.py +2 -2
  84. agno/{document → knowledge}/chunking/recursive.py +2 -2
  85. agno/{document → knowledge}/chunking/row.py +2 -2
  86. agno/knowledge/chunking/semantic.py +59 -0
  87. agno/knowledge/chunking/strategy.py +121 -0
  88. agno/knowledge/content.py +74 -0
  89. agno/knowledge/document/__init__.py +5 -0
  90. agno/{document → knowledge/document}/base.py +12 -2
  91. agno/knowledge/embedder/__init__.py +5 -0
  92. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  93. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  94. agno/{embedder → knowledge/embedder}/base.py +6 -0
  95. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  96. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  97. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  98. agno/{embedder → knowledge/embedder}/google.py +74 -1
  99. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  100. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  101. agno/knowledge/embedder/langdb.py +22 -0
  102. agno/knowledge/embedder/mistral.py +139 -0
  103. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  104. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  105. agno/knowledge/embedder/openai.py +223 -0
  106. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  107. agno/{embedder → knowledge/embedder}/together.py +1 -1
  108. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  109. agno/knowledge/knowledge.py +1551 -0
  110. agno/knowledge/reader/__init__.py +7 -0
  111. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  112. agno/knowledge/reader/base.py +88 -0
  113. agno/{document → knowledge}/reader/csv_reader.py +47 -65
  114. agno/knowledge/reader/docx_reader.py +83 -0
  115. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  116. agno/{document → knowledge}/reader/json_reader.py +30 -9
  117. agno/{document → knowledge}/reader/markdown_reader.py +58 -9
  118. agno/{document → knowledge}/reader/pdf_reader.py +71 -126
  119. agno/knowledge/reader/reader_factory.py +268 -0
  120. agno/knowledge/reader/s3_reader.py +101 -0
  121. agno/{document → knowledge}/reader/text_reader.py +31 -10
  122. agno/knowledge/reader/url_reader.py +128 -0
  123. agno/knowledge/reader/web_search_reader.py +366 -0
  124. agno/{document → knowledge}/reader/website_reader.py +37 -10
  125. agno/knowledge/reader/wikipedia_reader.py +59 -0
  126. agno/knowledge/reader/youtube_reader.py +78 -0
  127. agno/knowledge/remote_content/remote_content.py +88 -0
  128. agno/{reranker → knowledge/reranker}/base.py +1 -1
  129. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  130. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  131. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  132. agno/knowledge/types.py +30 -0
  133. agno/knowledge/utils.py +169 -0
  134. agno/media.py +269 -268
  135. agno/memory/__init__.py +2 -10
  136. agno/memory/manager.py +1003 -148
  137. agno/models/aimlapi/__init__.py +2 -2
  138. agno/models/aimlapi/aimlapi.py +6 -6
  139. agno/models/anthropic/claude.py +128 -72
  140. agno/models/aws/bedrock.py +107 -175
  141. agno/models/aws/claude.py +64 -18
  142. agno/models/azure/ai_foundry.py +73 -23
  143. agno/models/base.py +346 -290
  144. agno/models/cerebras/cerebras.py +84 -27
  145. agno/models/cohere/chat.py +106 -98
  146. agno/models/google/gemini.py +105 -46
  147. agno/models/groq/groq.py +97 -35
  148. agno/models/huggingface/huggingface.py +92 -27
  149. agno/models/ibm/watsonx.py +72 -13
  150. agno/models/litellm/chat.py +85 -13
  151. agno/models/message.py +46 -151
  152. agno/models/meta/llama.py +85 -49
  153. agno/models/metrics.py +120 -0
  154. agno/models/mistral/mistral.py +90 -21
  155. agno/models/ollama/__init__.py +0 -2
  156. agno/models/ollama/chat.py +85 -47
  157. agno/models/openai/chat.py +154 -37
  158. agno/models/openai/responses.py +178 -105
  159. agno/models/perplexity/perplexity.py +26 -2
  160. agno/models/portkey/portkey.py +0 -7
  161. agno/models/response.py +15 -9
  162. agno/models/utils.py +20 -0
  163. agno/models/vercel/__init__.py +2 -2
  164. agno/models/vercel/v0.py +1 -1
  165. agno/models/vllm/__init__.py +2 -2
  166. agno/models/vllm/vllm.py +3 -3
  167. agno/models/xai/xai.py +10 -10
  168. agno/os/__init__.py +3 -0
  169. agno/os/app.py +497 -0
  170. agno/os/auth.py +47 -0
  171. agno/os/config.py +103 -0
  172. agno/os/interfaces/agui/__init__.py +3 -0
  173. agno/os/interfaces/agui/agui.py +31 -0
  174. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  175. agno/{app → os/interfaces}/agui/utils.py +65 -28
  176. agno/os/interfaces/base.py +21 -0
  177. agno/os/interfaces/slack/__init__.py +3 -0
  178. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  179. agno/os/interfaces/slack/slack.py +32 -0
  180. agno/os/interfaces/whatsapp/__init__.py +3 -0
  181. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  182. agno/os/interfaces/whatsapp/whatsapp.py +29 -0
  183. agno/os/mcp.py +235 -0
  184. agno/os/router.py +1400 -0
  185. agno/os/routers/__init__.py +3 -0
  186. agno/os/routers/evals/__init__.py +3 -0
  187. agno/os/routers/evals/evals.py +393 -0
  188. agno/os/routers/evals/schemas.py +142 -0
  189. agno/os/routers/evals/utils.py +161 -0
  190. agno/os/routers/knowledge/__init__.py +3 -0
  191. agno/os/routers/knowledge/knowledge.py +850 -0
  192. agno/os/routers/knowledge/schemas.py +118 -0
  193. agno/os/routers/memory/__init__.py +3 -0
  194. agno/os/routers/memory/memory.py +410 -0
  195. agno/os/routers/memory/schemas.py +58 -0
  196. agno/os/routers/metrics/__init__.py +3 -0
  197. agno/os/routers/metrics/metrics.py +178 -0
  198. agno/os/routers/metrics/schemas.py +47 -0
  199. agno/os/routers/session/__init__.py +3 -0
  200. agno/os/routers/session/session.py +536 -0
  201. agno/os/schema.py +945 -0
  202. agno/{app/playground → os}/settings.py +7 -15
  203. agno/os/utils.py +270 -0
  204. agno/reasoning/azure_ai_foundry.py +4 -4
  205. agno/reasoning/deepseek.py +4 -4
  206. agno/reasoning/default.py +6 -11
  207. agno/reasoning/groq.py +4 -4
  208. agno/reasoning/helpers.py +4 -6
  209. agno/reasoning/ollama.py +4 -4
  210. agno/reasoning/openai.py +4 -4
  211. agno/run/agent.py +633 -0
  212. agno/run/base.py +53 -77
  213. agno/run/cancel.py +81 -0
  214. agno/run/team.py +243 -96
  215. agno/run/workflow.py +550 -12
  216. agno/session/__init__.py +10 -0
  217. agno/session/agent.py +244 -0
  218. agno/session/summary.py +225 -0
  219. agno/session/team.py +262 -0
  220. agno/{storage/session/v2 → session}/workflow.py +47 -24
  221. agno/team/__init__.py +15 -16
  222. agno/team/team.py +3260 -4824
  223. agno/tools/agentql.py +14 -5
  224. agno/tools/airflow.py +9 -4
  225. agno/tools/api.py +7 -3
  226. agno/tools/apify.py +2 -46
  227. agno/tools/arxiv.py +8 -3
  228. agno/tools/aws_lambda.py +7 -5
  229. agno/tools/aws_ses.py +7 -1
  230. agno/tools/baidusearch.py +4 -1
  231. agno/tools/bitbucket.py +4 -4
  232. agno/tools/brandfetch.py +14 -11
  233. agno/tools/bravesearch.py +4 -1
  234. agno/tools/brightdata.py +43 -23
  235. agno/tools/browserbase.py +13 -4
  236. agno/tools/calcom.py +12 -10
  237. agno/tools/calculator.py +10 -27
  238. agno/tools/cartesia.py +20 -17
  239. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  240. agno/tools/confluence.py +8 -8
  241. agno/tools/crawl4ai.py +7 -1
  242. agno/tools/csv_toolkit.py +9 -8
  243. agno/tools/dalle.py +22 -12
  244. agno/tools/daytona.py +13 -16
  245. agno/tools/decorator.py +6 -3
  246. agno/tools/desi_vocal.py +17 -8
  247. agno/tools/discord.py +11 -8
  248. agno/tools/docker.py +30 -42
  249. agno/tools/duckdb.py +34 -53
  250. agno/tools/duckduckgo.py +8 -7
  251. agno/tools/e2b.py +62 -62
  252. agno/tools/eleven_labs.py +36 -29
  253. agno/tools/email.py +4 -1
  254. agno/tools/evm.py +7 -1
  255. agno/tools/exa.py +19 -14
  256. agno/tools/fal.py +30 -30
  257. agno/tools/file.py +9 -8
  258. agno/tools/financial_datasets.py +25 -44
  259. agno/tools/firecrawl.py +17 -18
  260. agno/tools/function.py +127 -18
  261. agno/tools/giphy.py +23 -11
  262. agno/tools/github.py +48 -126
  263. agno/tools/gmail.py +45 -61
  264. agno/tools/google_bigquery.py +7 -6
  265. agno/tools/google_maps.py +11 -26
  266. agno/tools/googlesearch.py +7 -2
  267. agno/tools/googlesheets.py +21 -17
  268. agno/tools/hackernews.py +9 -5
  269. agno/tools/jina.py +5 -4
  270. agno/tools/jira.py +18 -9
  271. agno/tools/knowledge.py +31 -32
  272. agno/tools/linear.py +18 -33
  273. agno/tools/linkup.py +5 -1
  274. agno/tools/local_file_system.py +8 -5
  275. agno/tools/lumalab.py +32 -20
  276. agno/tools/mcp.py +1 -2
  277. agno/tools/mem0.py +18 -12
  278. agno/tools/memori.py +14 -10
  279. agno/tools/mlx_transcribe.py +3 -2
  280. agno/tools/models/azure_openai.py +33 -15
  281. agno/tools/models/gemini.py +59 -32
  282. agno/tools/models/groq.py +30 -23
  283. agno/tools/models/nebius.py +28 -12
  284. agno/tools/models_labs.py +40 -16
  285. agno/tools/moviepy_video.py +7 -6
  286. agno/tools/neo4j.py +10 -8
  287. agno/tools/newspaper.py +7 -2
  288. agno/tools/newspaper4k.py +8 -3
  289. agno/tools/openai.py +58 -32
  290. agno/tools/openbb.py +12 -11
  291. agno/tools/opencv.py +63 -47
  292. agno/tools/openweather.py +14 -12
  293. agno/tools/pandas.py +11 -3
  294. agno/tools/postgres.py +4 -12
  295. agno/tools/pubmed.py +4 -1
  296. agno/tools/python.py +9 -22
  297. agno/tools/reasoning.py +35 -27
  298. agno/tools/reddit.py +11 -26
  299. agno/tools/replicate.py +55 -42
  300. agno/tools/resend.py +4 -1
  301. agno/tools/scrapegraph.py +15 -14
  302. agno/tools/searxng.py +10 -23
  303. agno/tools/serpapi.py +6 -3
  304. agno/tools/serper.py +13 -4
  305. agno/tools/shell.py +9 -2
  306. agno/tools/slack.py +12 -11
  307. agno/tools/sleep.py +3 -2
  308. agno/tools/spider.py +24 -4
  309. agno/tools/sql.py +7 -6
  310. agno/tools/tavily.py +6 -4
  311. agno/tools/telegram.py +12 -4
  312. agno/tools/todoist.py +11 -31
  313. agno/tools/toolkit.py +1 -1
  314. agno/tools/trafilatura.py +22 -6
  315. agno/tools/trello.py +9 -22
  316. agno/tools/twilio.py +10 -3
  317. agno/tools/user_control_flow.py +6 -1
  318. agno/tools/valyu.py +34 -5
  319. agno/tools/visualization.py +19 -28
  320. agno/tools/webbrowser.py +4 -3
  321. agno/tools/webex.py +11 -7
  322. agno/tools/website.py +15 -46
  323. agno/tools/webtools.py +12 -4
  324. agno/tools/whatsapp.py +5 -9
  325. agno/tools/wikipedia.py +20 -13
  326. agno/tools/x.py +14 -13
  327. agno/tools/yfinance.py +13 -40
  328. agno/tools/youtube.py +26 -20
  329. agno/tools/zendesk.py +7 -2
  330. agno/tools/zep.py +10 -7
  331. agno/tools/zoom.py +10 -9
  332. agno/utils/common.py +1 -19
  333. agno/utils/events.py +100 -123
  334. agno/utils/gemini.py +1 -1
  335. agno/utils/knowledge.py +29 -0
  336. agno/utils/log.py +54 -4
  337. agno/utils/mcp.py +68 -10
  338. agno/utils/media.py +39 -0
  339. agno/utils/message.py +12 -1
  340. agno/utils/models/aws_claude.py +1 -1
  341. agno/utils/models/claude.py +6 -12
  342. agno/utils/models/cohere.py +1 -1
  343. agno/utils/models/mistral.py +8 -7
  344. agno/utils/models/schema_utils.py +3 -3
  345. agno/utils/models/watsonx.py +1 -1
  346. agno/utils/openai.py +1 -1
  347. agno/utils/pprint.py +33 -32
  348. agno/utils/print_response/agent.py +779 -0
  349. agno/utils/print_response/team.py +1669 -0
  350. agno/utils/print_response/workflow.py +1451 -0
  351. agno/utils/prompts.py +14 -14
  352. agno/utils/reasoning.py +87 -0
  353. agno/utils/response.py +42 -42
  354. agno/utils/streamlit.py +481 -0
  355. agno/utils/string.py +8 -22
  356. agno/utils/team.py +50 -0
  357. agno/utils/timer.py +2 -2
  358. agno/vectordb/base.py +33 -21
  359. agno/vectordb/cassandra/cassandra.py +287 -23
  360. agno/vectordb/chroma/chromadb.py +482 -59
  361. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  362. agno/vectordb/couchbase/couchbase.py +309 -29
  363. agno/vectordb/lancedb/lance_db.py +360 -21
  364. agno/vectordb/langchaindb/__init__.py +5 -0
  365. agno/vectordb/langchaindb/langchaindb.py +145 -0
  366. agno/vectordb/lightrag/__init__.py +5 -0
  367. agno/vectordb/lightrag/lightrag.py +374 -0
  368. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  369. agno/vectordb/milvus/milvus.py +242 -32
  370. agno/vectordb/mongodb/mongodb.py +200 -24
  371. agno/vectordb/pgvector/pgvector.py +319 -37
  372. agno/vectordb/pineconedb/pineconedb.py +221 -27
  373. agno/vectordb/qdrant/qdrant.py +334 -14
  374. agno/vectordb/singlestore/singlestore.py +286 -29
  375. agno/vectordb/surrealdb/surrealdb.py +187 -7
  376. agno/vectordb/upstashdb/upstashdb.py +342 -26
  377. agno/vectordb/weaviate/weaviate.py +227 -165
  378. agno/workflow/__init__.py +17 -13
  379. agno/workflow/{v2/condition.py → condition.py} +135 -32
  380. agno/workflow/{v2/loop.py → loop.py} +115 -28
  381. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  382. agno/workflow/{v2/router.py → router.py} +133 -32
  383. agno/workflow/{v2/step.py → step.py} +207 -49
  384. agno/workflow/{v2/steps.py → steps.py} +147 -66
  385. agno/workflow/types.py +482 -0
  386. agno/workflow/workflow.py +2410 -696
  387. agno-2.0.0.dist-info/METADATA +494 -0
  388. agno-2.0.0.dist-info/RECORD +515 -0
  389. agno-2.0.0.dist-info/licenses/LICENSE +201 -0
  390. agno/agent/metrics.py +0 -110
  391. agno/api/app.py +0 -35
  392. agno/api/playground.py +0 -92
  393. agno/api/schemas/app.py +0 -12
  394. agno/api/schemas/playground.py +0 -22
  395. agno/api/schemas/user.py +0 -35
  396. agno/api/schemas/workspace.py +0 -46
  397. agno/api/user.py +0 -160
  398. agno/api/workflows.py +0 -33
  399. agno/api/workspace.py +0 -175
  400. agno/app/agui/__init__.py +0 -3
  401. agno/app/agui/app.py +0 -17
  402. agno/app/agui/sync_router.py +0 -120
  403. agno/app/base.py +0 -186
  404. agno/app/discord/__init__.py +0 -3
  405. agno/app/fastapi/__init__.py +0 -3
  406. agno/app/fastapi/app.py +0 -107
  407. agno/app/fastapi/async_router.py +0 -457
  408. agno/app/fastapi/sync_router.py +0 -448
  409. agno/app/playground/app.py +0 -228
  410. agno/app/playground/async_router.py +0 -1053
  411. agno/app/playground/deploy.py +0 -249
  412. agno/app/playground/operator.py +0 -183
  413. agno/app/playground/schemas.py +0 -223
  414. agno/app/playground/serve.py +0 -55
  415. agno/app/playground/sync_router.py +0 -1045
  416. agno/app/playground/utils.py +0 -46
  417. agno/app/settings.py +0 -15
  418. agno/app/slack/__init__.py +0 -3
  419. agno/app/slack/app.py +0 -19
  420. agno/app/slack/sync_router.py +0 -92
  421. agno/app/utils.py +0 -54
  422. agno/app/whatsapp/__init__.py +0 -3
  423. agno/app/whatsapp/app.py +0 -15
  424. agno/app/whatsapp/sync_router.py +0 -197
  425. agno/cli/auth_server.py +0 -249
  426. agno/cli/config.py +0 -274
  427. agno/cli/console.py +0 -88
  428. agno/cli/credentials.py +0 -23
  429. agno/cli/entrypoint.py +0 -571
  430. agno/cli/operator.py +0 -357
  431. agno/cli/settings.py +0 -96
  432. agno/cli/ws/ws_cli.py +0 -817
  433. agno/constants.py +0 -13
  434. agno/document/__init__.py +0 -5
  435. agno/document/chunking/semantic.py +0 -45
  436. agno/document/chunking/strategy.py +0 -31
  437. agno/document/reader/__init__.py +0 -5
  438. agno/document/reader/base.py +0 -47
  439. agno/document/reader/docx_reader.py +0 -60
  440. agno/document/reader/gcs/pdf_reader.py +0 -44
  441. agno/document/reader/s3/pdf_reader.py +0 -59
  442. agno/document/reader/s3/text_reader.py +0 -63
  443. agno/document/reader/url_reader.py +0 -59
  444. agno/document/reader/youtube_reader.py +0 -58
  445. agno/embedder/__init__.py +0 -5
  446. agno/embedder/langdb.py +0 -80
  447. agno/embedder/mistral.py +0 -82
  448. agno/embedder/openai.py +0 -78
  449. agno/file/__init__.py +0 -5
  450. agno/file/file.py +0 -16
  451. agno/file/local/csv.py +0 -32
  452. agno/file/local/txt.py +0 -19
  453. agno/infra/app.py +0 -240
  454. agno/infra/base.py +0 -144
  455. agno/infra/context.py +0 -20
  456. agno/infra/db_app.py +0 -52
  457. agno/infra/resource.py +0 -205
  458. agno/infra/resources.py +0 -55
  459. agno/knowledge/agent.py +0 -702
  460. agno/knowledge/arxiv.py +0 -33
  461. agno/knowledge/combined.py +0 -36
  462. agno/knowledge/csv.py +0 -144
  463. agno/knowledge/csv_url.py +0 -124
  464. agno/knowledge/document.py +0 -223
  465. agno/knowledge/docx.py +0 -137
  466. agno/knowledge/firecrawl.py +0 -34
  467. agno/knowledge/gcs/__init__.py +0 -0
  468. agno/knowledge/gcs/base.py +0 -39
  469. agno/knowledge/gcs/pdf.py +0 -125
  470. agno/knowledge/json.py +0 -137
  471. agno/knowledge/langchain.py +0 -71
  472. agno/knowledge/light_rag.py +0 -273
  473. agno/knowledge/llamaindex.py +0 -66
  474. agno/knowledge/markdown.py +0 -154
  475. agno/knowledge/pdf.py +0 -164
  476. agno/knowledge/pdf_bytes.py +0 -42
  477. agno/knowledge/pdf_url.py +0 -148
  478. agno/knowledge/s3/__init__.py +0 -0
  479. agno/knowledge/s3/base.py +0 -64
  480. agno/knowledge/s3/pdf.py +0 -33
  481. agno/knowledge/s3/text.py +0 -34
  482. agno/knowledge/text.py +0 -141
  483. agno/knowledge/url.py +0 -46
  484. agno/knowledge/website.py +0 -179
  485. agno/knowledge/wikipedia.py +0 -32
  486. agno/knowledge/youtube.py +0 -35
  487. agno/memory/agent.py +0 -423
  488. agno/memory/classifier.py +0 -104
  489. agno/memory/db/__init__.py +0 -5
  490. agno/memory/db/base.py +0 -42
  491. agno/memory/db/mongodb.py +0 -189
  492. agno/memory/db/postgres.py +0 -203
  493. agno/memory/db/sqlite.py +0 -193
  494. agno/memory/memory.py +0 -22
  495. agno/memory/row.py +0 -36
  496. agno/memory/summarizer.py +0 -201
  497. agno/memory/summary.py +0 -19
  498. agno/memory/team.py +0 -415
  499. agno/memory/v2/__init__.py +0 -2
  500. agno/memory/v2/db/__init__.py +0 -1
  501. agno/memory/v2/db/base.py +0 -42
  502. agno/memory/v2/db/firestore.py +0 -339
  503. agno/memory/v2/db/mongodb.py +0 -196
  504. agno/memory/v2/db/postgres.py +0 -214
  505. agno/memory/v2/db/redis.py +0 -187
  506. agno/memory/v2/db/schema.py +0 -54
  507. agno/memory/v2/db/sqlite.py +0 -209
  508. agno/memory/v2/manager.py +0 -437
  509. agno/memory/v2/memory.py +0 -1097
  510. agno/memory/v2/schema.py +0 -55
  511. agno/memory/v2/summarizer.py +0 -215
  512. agno/memory/workflow.py +0 -38
  513. agno/models/ollama/tools.py +0 -430
  514. agno/models/qwen/__init__.py +0 -5
  515. agno/playground/__init__.py +0 -10
  516. agno/playground/deploy.py +0 -3
  517. agno/playground/playground.py +0 -3
  518. agno/playground/serve.py +0 -3
  519. agno/playground/settings.py +0 -3
  520. agno/reranker/__init__.py +0 -0
  521. agno/run/response.py +0 -467
  522. agno/run/v2/__init__.py +0 -0
  523. agno/run/v2/workflow.py +0 -567
  524. agno/storage/__init__.py +0 -0
  525. agno/storage/agent/__init__.py +0 -0
  526. agno/storage/agent/dynamodb.py +0 -1
  527. agno/storage/agent/json.py +0 -1
  528. agno/storage/agent/mongodb.py +0 -1
  529. agno/storage/agent/postgres.py +0 -1
  530. agno/storage/agent/singlestore.py +0 -1
  531. agno/storage/agent/sqlite.py +0 -1
  532. agno/storage/agent/yaml.py +0 -1
  533. agno/storage/base.py +0 -60
  534. agno/storage/dynamodb.py +0 -673
  535. agno/storage/firestore.py +0 -297
  536. agno/storage/gcs_json.py +0 -261
  537. agno/storage/in_memory.py +0 -234
  538. agno/storage/json.py +0 -237
  539. agno/storage/mongodb.py +0 -328
  540. agno/storage/mysql.py +0 -685
  541. agno/storage/postgres.py +0 -682
  542. agno/storage/redis.py +0 -336
  543. agno/storage/session/__init__.py +0 -16
  544. agno/storage/session/agent.py +0 -64
  545. agno/storage/session/team.py +0 -63
  546. agno/storage/session/v2/__init__.py +0 -5
  547. agno/storage/session/workflow.py +0 -61
  548. agno/storage/singlestore.py +0 -606
  549. agno/storage/sqlite.py +0 -646
  550. agno/storage/workflow/__init__.py +0 -0
  551. agno/storage/workflow/mongodb.py +0 -1
  552. agno/storage/workflow/postgres.py +0 -1
  553. agno/storage/workflow/sqlite.py +0 -1
  554. agno/storage/yaml.py +0 -241
  555. agno/tools/thinking.py +0 -73
  556. agno/utils/defaults.py +0 -57
  557. agno/utils/filesystem.py +0 -39
  558. agno/utils/git.py +0 -52
  559. agno/utils/json_io.py +0 -30
  560. agno/utils/load_env.py +0 -19
  561. agno/utils/py_io.py +0 -19
  562. agno/utils/pyproject.py +0 -18
  563. agno/utils/resource_filter.py +0 -31
  564. agno/workflow/v2/__init__.py +0 -21
  565. agno/workflow/v2/types.py +0 -357
  566. agno/workflow/v2/workflow.py +0 -3313
  567. agno/workspace/__init__.py +0 -0
  568. agno/workspace/config.py +0 -325
  569. agno/workspace/enums.py +0 -6
  570. agno/workspace/helpers.py +0 -52
  571. agno/workspace/operator.py +0 -757
  572. agno/workspace/settings.py +0 -158
  573. agno-1.8.2.dist-info/METADATA +0 -982
  574. agno-1.8.2.dist-info/RECORD +0 -566
  575. agno-1.8.2.dist-info/entry_points.txt +0 -3
  576. agno-1.8.2.dist-info/licenses/LICENSE +0 -375
  577. /agno/{app → db/migrations}/__init__.py +0 -0
  578. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  579. /agno/{cli → integrations}/__init__.py +0 -0
  580. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  581. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  582. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  583. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  584. /agno/{app → os/interfaces}/slack/security.py +0 -0
  585. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  586. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  587. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  588. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
  589. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,76 @@
1
1
  import asyncio
2
2
  import uuid
3
3
  from pathlib import Path
4
- from typing import IO, Any, List, Union
4
+ from typing import IO, Any, List, Optional, Union
5
5
 
6
- from agno.document.base import Document
7
- from agno.document.reader.base import Reader
6
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
7
+ from agno.knowledge.document.base import Document
8
+ from agno.knowledge.reader.base import Reader
9
+ from agno.knowledge.types import ContentType
8
10
  from agno.utils.log import log_info, logger
9
11
 
12
+ DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
13
+
14
+ # Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
15
+ try:
16
+ from agno.knowledge.chunking.markdown import MarkdownChunking
17
+
18
+ DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
19
+ MARKDOWN_CHUNKER_AVAILABLE = True
20
+ except ImportError:
21
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
22
+
23
+ DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
24
+ MARKDOWN_CHUNKER_AVAILABLE = False
25
+
10
26
 
11
27
  class MarkdownReader(Reader):
12
28
  """Reader for Markdown files"""
13
29
 
14
- def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
30
+ @classmethod
31
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
32
+ """Get the list of supported chunking strategies for Markdown readers."""
33
+ strategies = [
34
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
35
+ ChunkingStrategyType.AGENTIC_CHUNKER,
36
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
37
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
38
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
39
+ ]
40
+
41
+ # Only include MarkdownChunking if it's available
42
+ if MARKDOWN_CHUNKER_AVAILABLE:
43
+ strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
44
+
45
+ return strategies
46
+
47
+ @classmethod
48
+ def get_supported_content_types(self) -> List[ContentType]:
49
+ return [ContentType.MARKDOWN]
50
+
51
+ def __init__(
52
+ self,
53
+ chunking_strategy: Optional[ChunkingStrategy] = None,
54
+ name: Optional[str] = None,
55
+ description: Optional[str] = None,
56
+ ) -> None:
57
+ # Use the default chunking strategy if none provided
58
+ if chunking_strategy is None:
59
+ chunking_strategy = DEFAULT_CHUNKER_STRATEGY
60
+
61
+ super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
62
+
63
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
15
64
  try:
16
65
  if isinstance(file, Path):
17
66
  if not file.exists():
18
67
  raise FileNotFoundError(f"Could not find file: {file}")
19
68
  log_info(f"Reading: {file}")
20
- file_name = file.stem
69
+ file_name = name or file.stem
21
70
  file_contents = file.read_text("utf-8")
22
71
  else:
23
72
  log_info(f"Reading uploaded file: {file.name}")
24
- file_name = file.name.split(".")[0]
73
+ file_name = name or file.name.split(".")[0]
25
74
  file.seek(0)
26
75
  file_contents = file.read().decode("utf-8")
27
76
 
@@ -36,14 +85,14 @@ class MarkdownReader(Reader):
36
85
  logger.error(f"Error reading: {file}: {e}")
37
86
  return []
38
87
 
39
- async def async_read(self, file: Union[Path, IO[Any]]) -> List[Document]:
88
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
40
89
  try:
41
90
  if isinstance(file, Path):
42
91
  if not file.exists():
43
92
  raise FileNotFoundError(f"Could not find file: {file}")
44
93
 
45
94
  log_info(f"Reading asynchronously: {file}")
46
- file_name = file.stem
95
+ file_name = name or file.stem
47
96
 
48
97
  try:
49
98
  import aiofiles
@@ -55,7 +104,7 @@ class MarkdownReader(Reader):
55
104
  file_contents = file.read_text("utf-8")
56
105
  else:
57
106
  log_info(f"Reading uploaded file asynchronously: {file.name}")
58
- file_name = file.name.split(".")[0]
107
+ file_name = name or file.name.split(".")[0]
59
108
  file.seek(0)
60
109
  file_contents = file.read().decode("utf-8")
61
110
 
@@ -4,9 +4,10 @@ from pathlib import Path
4
4
  from typing import IO, Any, List, Optional, Tuple, Union
5
5
  from uuid import uuid4
6
6
 
7
- from agno.document.base import Document
8
- from agno.document.reader.base import Reader
9
- from agno.utils.http import async_fetch_with_retry, fetch_with_retry
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
10
11
  from agno.utils.log import log_error, log_info, logger
11
12
 
12
13
  try:
@@ -95,9 +96,9 @@ def _clean_page_numbers(
95
96
  Notes:
96
97
  - The function scans for page numbers using a regular expression that matches digits at the start or end of a string.
97
98
  - It evaluates several potential starting points for numbering (-2, -1, 0, 1, 2 shifts) to determine the most consistent sequence.
98
- - If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
99
+ - If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
99
100
  the page numbers are processed.
100
- - If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
101
+ - If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
101
102
  `page_end_numbering_format` is provided.
102
103
  """
103
104
  assert len(extra_content) == 0 or len(extra_content) == len(page_content_list), (
@@ -190,8 +191,23 @@ class BasePDFReader(Reader):
190
191
  self.page_end_numbering_format = page_end_numbering_format
191
192
  self.password = password
192
193
 
194
+ if self.chunking_strategy is None:
195
+ from agno.knowledge.chunking.document import DocumentChunking
196
+
197
+ self.chunking_strategy = DocumentChunking(chunk_size=5000)
193
198
  super().__init__(**kwargs)
194
199
 
200
+ @classmethod
201
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
202
+ """Get the list of supported chunking strategies for PDF readers."""
203
+ return [
204
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
205
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
206
+ ChunkingStrategyType.AGENTIC_CHUNKER,
207
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
208
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
209
+ ]
210
+
195
211
  def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
196
212
  chunked_documents: List[Document] = []
197
213
  for document in documents:
@@ -205,19 +221,19 @@ class BasePDFReader(Reader):
205
221
  # Use provided password or fall back to instance password
206
222
  pdf_password = password or self.password
207
223
  if not pdf_password:
208
- logger.error(f"PDF {doc_name} is password protected but no password provided")
224
+ logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
209
225
  return False
210
226
 
211
227
  try:
212
228
  decrypted_pdf = doc_reader.decrypt(pdf_password)
213
229
  if decrypted_pdf:
214
- log_info(f"Successfully decrypted PDF {doc_name} with user password")
230
+ log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
215
231
  return True
216
232
  else:
217
- log_error(f"Failed to decrypt PDF {doc_name}: incorrect password")
233
+ log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
218
234
  return False
219
235
  except Exception as e:
220
- log_error(f"Error decrypting PDF {doc_name}: {e}")
236
+ log_error(f'Error decrypting PDF file "{doc_name}": {e}')
221
237
  return False
222
238
 
223
239
  def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
@@ -306,9 +322,17 @@ class BasePDFReader(Reader):
306
322
  class PDFReader(BasePDFReader):
307
323
  """Reader for PDF files"""
308
324
 
309
- def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
325
+ @classmethod
326
+ def get_supported_content_types(self) -> List[ContentType]:
327
+ return [ContentType.PDF]
328
+
329
+ def read(
330
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
331
+ ) -> List[Document]:
310
332
  try:
311
- if isinstance(pdf, str):
333
+ if name:
334
+ doc_name = name
335
+ elif isinstance(pdf, str):
312
336
  doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
313
337
  else:
314
338
  doc_name = pdf.name.split(".")[0]
@@ -318,26 +342,18 @@ class PDFReader(BasePDFReader):
318
342
  log_info(f"Reading: {doc_name}")
319
343
 
320
344
  try:
321
- pdf_reader = DocumentReader(pdf)
345
+ DocumentReader(pdf)
322
346
  except PdfStreamError as e:
323
347
  logger.error(f"Error reading PDF: {e}")
324
348
  return []
325
349
 
326
- # Handle PDF decryption
327
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
328
- return []
329
-
330
- # Read and chunk.
331
- return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
332
-
333
- async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
334
350
  try:
335
351
  if isinstance(pdf, str):
336
- doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
352
+ doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
337
353
  else:
338
- doc_name = pdf.name.split(".")[0]
354
+ doc_name = name or pdf.name.split(".")[0]
339
355
  except Exception:
340
- doc_name = "pdf"
356
+ doc_name = name or "pdf"
341
357
 
342
358
  log_info(f"Reading: {doc_name}")
343
359
 
@@ -351,73 +367,55 @@ class PDFReader(BasePDFReader):
351
367
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
352
368
  return []
353
369
 
354
- # Read and chunk.
355
- return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
356
-
357
-
358
- class PDFUrlReader(BasePDFReader):
359
- """Reader for PDF files from URL"""
360
-
361
- def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
362
- super().__init__(password=password, **kwargs)
363
- self.proxy = proxy
364
-
365
- def read(self, url: str, password: Optional[str] = None) -> List[Document]:
366
- if not url:
367
- raise ValueError("No url provided")
368
-
369
- from io import BytesIO
370
-
371
- log_info(f"Reading: {url}")
372
-
373
- # Retry the request up to 3 times with exponential backoff
374
- response = fetch_with_retry(url, proxy=self.proxy)
375
-
376
- doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
377
- pdf_reader = DocumentReader(BytesIO(response.content))
370
+ # Read and chunk
371
+ return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
378
372
 
379
- # Handle PDF decryption
380
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
373
+ async def async_read(
374
+ self,
375
+ pdf: Optional[Union[str, Path, IO[Any]]] = None,
376
+ name: Optional[str] = None,
377
+ password: Optional[str] = None,
378
+ ) -> List[Document]:
379
+ if pdf is None:
380
+ log_error("No pdf provided")
381
381
  return []
382
382
 
383
- # Read and chunk.
384
- return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
385
-
386
- async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
387
- if not url:
388
- raise ValueError("No url provided")
389
-
390
- from io import BytesIO
391
-
392
- import httpx
393
-
394
- log_info(f"Reading: {url}")
383
+ try:
384
+ if isinstance(pdf, str):
385
+ doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
386
+ else:
387
+ doc_name = pdf.name.split(".")[0]
388
+ except Exception:
389
+ doc_name = name or "pdf"
395
390
 
396
- client_args = {"proxy": self.proxy} if self.proxy else {}
397
- async with httpx.AsyncClient(**client_args) as client: # type: ignore
398
- response = await async_fetch_with_retry(url, client=client)
391
+ log_info(f"Reading: {doc_name}")
399
392
 
400
- doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
401
- pdf_reader = DocumentReader(BytesIO(response.content))
393
+ try:
394
+ pdf_reader = DocumentReader(pdf)
395
+ except PdfStreamError as e:
396
+ logger.error(f"Error reading PDF: {e}")
397
+ return []
402
398
 
403
399
  # Handle PDF decryption
404
400
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
405
401
  return []
406
402
 
407
403
  # Read and chunk.
408
- return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
404
+ return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
409
405
 
410
406
 
411
407
  class PDFImageReader(BasePDFReader):
412
408
  """Reader for PDF files with text and images extraction"""
413
409
 
414
- def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
410
+ def read(
411
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
412
+ ) -> List[Document]:
415
413
  if not pdf:
416
414
  raise ValueError("No pdf provided")
417
415
 
418
416
  try:
419
417
  if isinstance(pdf, str):
420
- doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
418
+ doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
421
419
  else:
422
420
  doc_name = pdf.name.split(".")[0]
423
421
  except Exception:
@@ -433,13 +431,15 @@ class PDFImageReader(BasePDFReader):
433
431
  # Read and chunk.
434
432
  return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
435
433
 
436
- async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
434
+ async def async_read(
435
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
436
+ ) -> List[Document]:
437
437
  if not pdf:
438
438
  raise ValueError("No pdf provided")
439
439
 
440
440
  try:
441
441
  if isinstance(pdf, str):
442
- doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
442
+ doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
443
443
  else:
444
444
  doc_name = pdf.name.split(".")[0]
445
445
  except Exception:
@@ -454,58 +454,3 @@ class PDFImageReader(BasePDFReader):
454
454
 
455
455
  # Read and chunk.
456
456
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
457
-
458
-
459
- class PDFUrlImageReader(BasePDFReader):
460
- """Reader for PDF files from URL with text and images extraction"""
461
-
462
- def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
463
- super().__init__(password=password, **kwargs)
464
- self.proxy = proxy
465
-
466
- def read(self, url: str, password: Optional[str] = None) -> List[Document]:
467
- if not url:
468
- raise ValueError("No url provided")
469
-
470
- from io import BytesIO
471
-
472
- import httpx
473
-
474
- # Read the PDF from the URL
475
- log_info(f"Reading: {url}")
476
- response = httpx.get(url, proxy=self.proxy) if self.proxy else httpx.get(url)
477
-
478
- doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
479
- pdf_reader = DocumentReader(BytesIO(response.content))
480
-
481
- # Handle PDF decryption
482
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
483
- return []
484
-
485
- # Read and chunk.
486
- return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
487
-
488
- async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
489
- if not url:
490
- raise ValueError("No url provided")
491
-
492
- from io import BytesIO
493
-
494
- import httpx
495
-
496
- log_info(f"Reading: {url}")
497
-
498
- client_args = {"proxy": self.proxy} if self.proxy else {}
499
- async with httpx.AsyncClient(**client_args) as client: # type: ignore
500
- response = await client.get(url)
501
- response.raise_for_status()
502
-
503
- doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
504
- pdf_reader = DocumentReader(BytesIO(response.content))
505
-
506
- # Handle PDF decryption
507
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
508
- return []
509
-
510
- # Read and chunk.
511
- return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
@@ -0,0 +1,268 @@
1
+ import os
2
+ from typing import Any, Callable, Dict, List, Optional
3
+
4
+ from agno.knowledge.reader.base import Reader
5
+
6
+
7
+ class ReaderFactory:
8
+ """Factory for creating and managing document readers with lazy loading."""
9
+
10
+ # Cache for instantiated readers
11
+ _reader_cache: Dict[str, Reader] = {}
12
+
13
+ @classmethod
14
+ def _get_pdf_reader(cls, **kwargs) -> Reader:
15
+ """Get PDF reader instance."""
16
+ from agno.knowledge.reader.pdf_reader import PDFReader
17
+
18
+ config: Dict[str, Any] = {
19
+ "chunk": True,
20
+ "chunk_size": 100,
21
+ "description": "Processes PDF documents with OCR support for images and text extraction",
22
+ }
23
+ config.update(kwargs)
24
+ return PDFReader(**config)
25
+
26
+ @classmethod
27
+ def _get_csv_reader(cls, **kwargs) -> Reader:
28
+ """Get CSV reader instance."""
29
+ from agno.knowledge.reader.csv_reader import CSVReader
30
+
31
+ config: Dict[str, Any] = {
32
+ "name": "CSV Reader",
33
+ "description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
34
+ }
35
+ config.update(kwargs)
36
+ return CSVReader(**config)
37
+
38
+ @classmethod
39
+ def _get_docx_reader(cls, **kwargs) -> Reader:
40
+ """Get Docx reader instance."""
41
+ from agno.knowledge.reader.docx_reader import DocxReader
42
+
43
+ config: Dict[str, Any] = {
44
+ "name": "Docx Reader",
45
+ "description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
46
+ }
47
+ config.update(kwargs)
48
+ return DocxReader(**config)
49
+
50
+ @classmethod
51
+ def _get_json_reader(cls, **kwargs) -> Reader:
52
+ """Get JSON reader instance."""
53
+ from agno.knowledge.reader.json_reader import JSONReader
54
+
55
+ config: Dict[str, Any] = {
56
+ "name": "JSON Reader",
57
+ "description": "Processes JSON data structures and API responses with nested object handling",
58
+ }
59
+ config.update(kwargs)
60
+ return JSONReader(**config)
61
+
62
+ @classmethod
63
+ def _get_markdown_reader(cls, **kwargs) -> Reader:
64
+ """Get Markdown reader instance."""
65
+ from agno.knowledge.reader.markdown_reader import MarkdownReader
66
+
67
+ config: Dict[str, Any] = {
68
+ "name": "Markdown Reader",
69
+ "description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
70
+ }
71
+ config.update(kwargs)
72
+ return MarkdownReader(**config)
73
+
74
+ @classmethod
75
+ def _get_text_reader(cls, **kwargs) -> Reader:
76
+ """Get Text reader instance."""
77
+ from agno.knowledge.reader.text_reader import TextReader
78
+
79
+ config: Dict[str, Any] = {
80
+ "name": "Text Reader",
81
+ "description": "Handles plain text files with customizable chunking strategies and encoding detection",
82
+ }
83
+ config.update(kwargs)
84
+ return TextReader(**config)
85
+
86
+ @classmethod
87
+ def _get_website_reader(cls, **kwargs) -> Reader:
88
+ """Get Website reader instance."""
89
+ from agno.knowledge.reader.website_reader import WebsiteReader
90
+
91
+ config: Dict[str, Any] = {
92
+ "name": "Website Reader",
93
+ "description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
94
+ }
95
+ config.update(kwargs)
96
+ return WebsiteReader(**config)
97
+
98
+ @classmethod
99
+ def _get_firecrawl_reader(cls, **kwargs) -> Reader:
100
+ """Get Firecrawl reader instance."""
101
+ from agno.knowledge.reader.firecrawl_reader import FirecrawlReader
102
+
103
+ config: Dict[str, Any] = {
104
+ "api_key": kwargs.get("api_key") or os.getenv("FIRECRAWL_API_KEY"),
105
+ "mode": "crawl",
106
+ "name": "Firecrawl Reader",
107
+ "description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
108
+ }
109
+ config.update(kwargs)
110
+ return FirecrawlReader(**config)
111
+
112
+ @classmethod
113
+ def _get_youtube_reader(cls, **kwargs) -> Reader:
114
+ """Get YouTube reader instance."""
115
+ from agno.knowledge.reader.youtube_reader import YouTubeReader
116
+
117
+ config: Dict[str, Any] = {
118
+ "name": "YouTube Reader",
119
+ "description": "Extracts transcripts and metadata from YouTube videos and playlists",
120
+ }
121
+ config.update(kwargs)
122
+ return YouTubeReader(**config)
123
+
124
+ @classmethod
125
+ def _get_arxiv_reader(cls, **kwargs) -> Reader:
126
+ """Get Arxiv reader instance."""
127
+ from agno.knowledge.reader.arxiv_reader import ArxivReader
128
+
129
+ config: Dict[str, Any] = {
130
+ "name": "Arxiv Reader",
131
+ "description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
132
+ }
133
+ config.update(kwargs)
134
+ return ArxivReader(**config)
135
+
136
+ @classmethod
137
+ def _get_wikipedia_reader(cls, **kwargs) -> Reader:
138
+ """Get Wikipedia reader instance."""
139
+ from agno.knowledge.reader.wikipedia_reader import WikipediaReader
140
+
141
+ config: Dict[str, Any] = {
142
+ "name": "Wikipedia Reader",
143
+ "description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
144
+ }
145
+ config.update(kwargs)
146
+ return WikipediaReader(**config)
147
+
148
+ @classmethod
149
+ def _get_web_search_reader(cls, **kwargs) -> Reader:
150
+ """Get Web Search reader instance."""
151
+ from agno.knowledge.reader.web_search_reader import WebSearchReader
152
+
153
+ config: Dict[str, Any] = {
154
+ "name": "Web Search Reader",
155
+ "description": "Executes web searches and processes results with relevance ranking and content extraction",
156
+ }
157
+ config.update(kwargs)
158
+ return WebSearchReader(**config)
159
+
160
+ @classmethod
161
+ def _get_reader_method(cls, reader_key: str) -> Callable[[], Reader]:
162
+ """Get the appropriate reader method for the given key."""
163
+ method_name = f"_get_{reader_key}_reader"
164
+ if not hasattr(cls, method_name):
165
+ raise ValueError(f"Unknown reader: {reader_key}")
166
+ return getattr(cls, method_name)
167
+
168
+ @classmethod
169
+ def create_reader(cls, reader_key: str, **kwargs) -> Reader:
170
+ """Create a reader instance with the given key and optional overrides."""
171
+ if reader_key in cls._reader_cache:
172
+ return cls._reader_cache[reader_key]
173
+
174
+ # Get the reader method and create the instance
175
+ reader_method = cls._get_reader_method(reader_key)
176
+ reader = reader_method(**kwargs)
177
+
178
+ # Cache the reader
179
+ cls._reader_cache[reader_key] = reader
180
+
181
+ return reader
182
+
183
+ @classmethod
184
+ def get_reader_for_extension(cls, extension: str) -> Reader:
185
+ """Get the appropriate reader for a file extension."""
186
+ extension = extension.lower()
187
+
188
+ if extension in [".pdf", "application/pdf"]:
189
+ return cls.create_reader("pdf")
190
+ elif extension in [".csv", "text/csv"]:
191
+ return cls.create_reader("csv")
192
+ elif extension in [".docx", ".doc"]:
193
+ return cls.create_reader("docx")
194
+ elif extension == ".json":
195
+ return cls.create_reader("json")
196
+ elif extension in [".md", ".markdown"]:
197
+ return cls.create_reader("markdown")
198
+ elif extension in [".txt", ".text"]:
199
+ return cls.create_reader("text")
200
+ else:
201
+ # Default to text reader for unknown extensions
202
+ return cls.create_reader("text")
203
+
204
+ @classmethod
205
+ def get_reader_for_url(cls, url: str) -> Reader:
206
+ """Get the appropriate reader for a URL."""
207
+ url_lower = url.lower()
208
+
209
+ # Check for YouTube URLs
210
+ if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
211
+ return cls.create_reader("youtube")
212
+
213
+ # Default to URL reader
214
+ return cls.create_reader("url")
215
+
216
+ @classmethod
217
+ def get_all_reader_keys(cls) -> List[str]:
218
+ """Get all available reader keys in priority order."""
219
+ # Extract reader keys from method names
220
+
221
+ PREFIX = "_get_"
222
+ SUFFIX = "_reader"
223
+
224
+ reader_keys = []
225
+ for attr_name in dir(cls):
226
+ if attr_name.startswith(PREFIX) and attr_name.endswith(SUFFIX):
227
+ reader_key = attr_name[len(PREFIX) : -len(SUFFIX)] # Remove "_get_" prefix and "_reader" suffix
228
+ reader_keys.append(reader_key)
229
+
230
+ # Define priority order for URL readers
231
+ url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
232
+
233
+ # Sort with URL readers in priority order, others alphabetically
234
+ def sort_key(reader_key):
235
+ if reader_key in url_reader_priority:
236
+ return (0, url_reader_priority.index(reader_key))
237
+ else:
238
+ return (1, reader_key)
239
+
240
+ reader_keys.sort(key=sort_key)
241
+ return reader_keys
242
+
243
+ @classmethod
244
+ def create_all_readers(cls) -> Dict[str, Reader]:
245
+ """Create all readers and return them as a dictionary."""
246
+ readers = {}
247
+ for reader_key in cls.get_all_reader_keys():
248
+ readers[reader_key] = cls.create_reader(reader_key)
249
+ return readers
250
+
251
+ @classmethod
252
+ def clear_cache(cls):
253
+ """Clear the reader cache."""
254
+ cls._reader_cache.clear()
255
+
256
+ @classmethod
257
+ def register_reader(
258
+ cls,
259
+ key: str,
260
+ reader_method,
261
+ name: str,
262
+ description: str,
263
+ extensions: Optional[List[str]] = None,
264
+ ):
265
+ """Register a new reader type."""
266
+ # Add the reader method to the class
267
+ method_name = f"_get_{key}_reader"
268
+ setattr(cls, method_name, classmethod(reader_method))