agno 1.8.0__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (583) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +19 -27
  3. agno/agent/agent.py +2781 -4126
  4. agno/api/agent.py +9 -65
  5. agno/api/api.py +5 -46
  6. agno/api/evals.py +6 -17
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -41
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +5 -21
  11. agno/api/schemas/evals.py +7 -16
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +5 -21
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +11 -7
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +9 -64
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/db/__init__.py +24 -0
  25. agno/db/base.py +245 -0
  26. agno/db/dynamo/__init__.py +3 -0
  27. agno/db/dynamo/dynamo.py +1749 -0
  28. agno/db/dynamo/schemas.py +278 -0
  29. agno/db/dynamo/utils.py +684 -0
  30. agno/db/firestore/__init__.py +3 -0
  31. agno/db/firestore/firestore.py +1438 -0
  32. agno/db/firestore/schemas.py +130 -0
  33. agno/db/firestore/utils.py +278 -0
  34. agno/db/gcs_json/__init__.py +3 -0
  35. agno/db/gcs_json/gcs_json_db.py +1001 -0
  36. agno/db/gcs_json/utils.py +194 -0
  37. agno/db/in_memory/__init__.py +3 -0
  38. agno/db/in_memory/in_memory_db.py +888 -0
  39. agno/db/in_memory/utils.py +172 -0
  40. agno/db/json/__init__.py +3 -0
  41. agno/db/json/json_db.py +1051 -0
  42. agno/db/json/utils.py +196 -0
  43. agno/db/migrations/v1_to_v2.py +162 -0
  44. agno/db/mongo/__init__.py +3 -0
  45. agno/db/mongo/mongo.py +1417 -0
  46. agno/db/mongo/schemas.py +77 -0
  47. agno/db/mongo/utils.py +204 -0
  48. agno/db/mysql/__init__.py +3 -0
  49. agno/db/mysql/mysql.py +1719 -0
  50. agno/db/mysql/schemas.py +124 -0
  51. agno/db/mysql/utils.py +298 -0
  52. agno/db/postgres/__init__.py +3 -0
  53. agno/db/postgres/postgres.py +1720 -0
  54. agno/db/postgres/schemas.py +124 -0
  55. agno/db/postgres/utils.py +281 -0
  56. agno/db/redis/__init__.py +3 -0
  57. agno/db/redis/redis.py +1371 -0
  58. agno/db/redis/schemas.py +109 -0
  59. agno/db/redis/utils.py +288 -0
  60. agno/db/schemas/__init__.py +3 -0
  61. agno/db/schemas/evals.py +33 -0
  62. agno/db/schemas/knowledge.py +40 -0
  63. agno/db/schemas/memory.py +46 -0
  64. agno/db/singlestore/__init__.py +3 -0
  65. agno/db/singlestore/schemas.py +116 -0
  66. agno/db/singlestore/singlestore.py +1722 -0
  67. agno/db/singlestore/utils.py +327 -0
  68. agno/db/sqlite/__init__.py +3 -0
  69. agno/db/sqlite/schemas.py +119 -0
  70. agno/db/sqlite/sqlite.py +1680 -0
  71. agno/db/sqlite/utils.py +269 -0
  72. agno/db/utils.py +88 -0
  73. agno/eval/__init__.py +14 -0
  74. agno/eval/accuracy.py +142 -43
  75. agno/eval/performance.py +88 -23
  76. agno/eval/reliability.py +73 -20
  77. agno/eval/utils.py +23 -13
  78. agno/integrations/discord/__init__.py +3 -0
  79. agno/{app → integrations}/discord/client.py +10 -10
  80. agno/knowledge/__init__.py +2 -2
  81. agno/{document → knowledge}/chunking/agentic.py +2 -2
  82. agno/{document → knowledge}/chunking/document.py +2 -2
  83. agno/{document → knowledge}/chunking/fixed.py +3 -3
  84. agno/{document → knowledge}/chunking/markdown.py +2 -2
  85. agno/{document → knowledge}/chunking/recursive.py +2 -2
  86. agno/{document → knowledge}/chunking/row.py +2 -2
  87. agno/knowledge/chunking/semantic.py +59 -0
  88. agno/knowledge/chunking/strategy.py +121 -0
  89. agno/knowledge/content.py +74 -0
  90. agno/knowledge/document/__init__.py +5 -0
  91. agno/{document → knowledge/document}/base.py +12 -2
  92. agno/knowledge/embedder/__init__.py +5 -0
  93. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  94. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  95. agno/{embedder → knowledge/embedder}/base.py +6 -0
  96. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  97. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  98. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  99. agno/{embedder → knowledge/embedder}/google.py +74 -1
  100. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  101. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  102. agno/knowledge/embedder/langdb.py +22 -0
  103. agno/knowledge/embedder/mistral.py +139 -0
  104. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  105. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  106. agno/knowledge/embedder/openai.py +223 -0
  107. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  108. agno/{embedder → knowledge/embedder}/together.py +1 -1
  109. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  110. agno/knowledge/knowledge.py +1515 -0
  111. agno/knowledge/reader/__init__.py +7 -0
  112. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  113. agno/knowledge/reader/base.py +88 -0
  114. agno/{document → knowledge}/reader/csv_reader.py +68 -15
  115. agno/knowledge/reader/docx_reader.py +83 -0
  116. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  117. agno/knowledge/reader/gcs_reader.py +67 -0
  118. agno/{document → knowledge}/reader/json_reader.py +30 -9
  119. agno/{document → knowledge}/reader/markdown_reader.py +36 -9
  120. agno/{document → knowledge}/reader/pdf_reader.py +79 -21
  121. agno/knowledge/reader/reader_factory.py +275 -0
  122. agno/knowledge/reader/s3_reader.py +171 -0
  123. agno/{document → knowledge}/reader/text_reader.py +31 -10
  124. agno/knowledge/reader/url_reader.py +84 -0
  125. agno/knowledge/reader/web_search_reader.py +389 -0
  126. agno/{document → knowledge}/reader/website_reader.py +37 -10
  127. agno/knowledge/reader/wikipedia_reader.py +59 -0
  128. agno/knowledge/reader/youtube_reader.py +78 -0
  129. agno/knowledge/remote_content/remote_content.py +88 -0
  130. agno/{reranker → knowledge/reranker}/base.py +1 -1
  131. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  132. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  133. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  134. agno/knowledge/types.py +30 -0
  135. agno/knowledge/utils.py +169 -0
  136. agno/media.py +2 -2
  137. agno/memory/__init__.py +2 -10
  138. agno/memory/manager.py +1003 -148
  139. agno/models/aimlapi/__init__.py +2 -2
  140. agno/models/aimlapi/aimlapi.py +6 -6
  141. agno/models/anthropic/claude.py +129 -82
  142. agno/models/aws/bedrock.py +107 -175
  143. agno/models/aws/claude.py +64 -18
  144. agno/models/azure/ai_foundry.py +73 -23
  145. agno/models/base.py +347 -287
  146. agno/models/cerebras/cerebras.py +84 -27
  147. agno/models/cohere/chat.py +106 -98
  148. agno/models/dashscope/dashscope.py +14 -5
  149. agno/models/google/gemini.py +123 -53
  150. agno/models/groq/groq.py +97 -35
  151. agno/models/huggingface/huggingface.py +92 -27
  152. agno/models/ibm/watsonx.py +72 -13
  153. agno/models/litellm/chat.py +85 -13
  154. agno/models/message.py +38 -144
  155. agno/models/meta/llama.py +85 -49
  156. agno/models/metrics.py +120 -0
  157. agno/models/mistral/mistral.py +90 -21
  158. agno/models/ollama/__init__.py +0 -2
  159. agno/models/ollama/chat.py +84 -46
  160. agno/models/openai/chat.py +135 -27
  161. agno/models/openai/responses.py +233 -115
  162. agno/models/perplexity/perplexity.py +26 -2
  163. agno/models/portkey/portkey.py +0 -7
  164. agno/models/response.py +14 -8
  165. agno/models/utils.py +20 -0
  166. agno/models/vercel/__init__.py +2 -2
  167. agno/models/vercel/v0.py +1 -1
  168. agno/models/vllm/__init__.py +2 -2
  169. agno/models/vllm/vllm.py +3 -3
  170. agno/models/xai/xai.py +10 -10
  171. agno/os/__init__.py +3 -0
  172. agno/os/app.py +393 -0
  173. agno/os/auth.py +47 -0
  174. agno/os/config.py +103 -0
  175. agno/os/interfaces/agui/__init__.py +3 -0
  176. agno/os/interfaces/agui/agui.py +31 -0
  177. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  178. agno/{app → os/interfaces}/agui/utils.py +65 -28
  179. agno/os/interfaces/base.py +21 -0
  180. agno/os/interfaces/slack/__init__.py +3 -0
  181. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  182. agno/os/interfaces/slack/slack.py +33 -0
  183. agno/os/interfaces/whatsapp/__init__.py +3 -0
  184. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  185. agno/os/interfaces/whatsapp/whatsapp.py +30 -0
  186. agno/os/router.py +843 -0
  187. agno/os/routers/__init__.py +3 -0
  188. agno/os/routers/evals/__init__.py +3 -0
  189. agno/os/routers/evals/evals.py +204 -0
  190. agno/os/routers/evals/schemas.py +142 -0
  191. agno/os/routers/evals/utils.py +161 -0
  192. agno/os/routers/knowledge/__init__.py +3 -0
  193. agno/os/routers/knowledge/knowledge.py +413 -0
  194. agno/os/routers/knowledge/schemas.py +118 -0
  195. agno/os/routers/memory/__init__.py +3 -0
  196. agno/os/routers/memory/memory.py +179 -0
  197. agno/os/routers/memory/schemas.py +58 -0
  198. agno/os/routers/metrics/__init__.py +3 -0
  199. agno/os/routers/metrics/metrics.py +58 -0
  200. agno/os/routers/metrics/schemas.py +47 -0
  201. agno/os/routers/session/__init__.py +3 -0
  202. agno/os/routers/session/session.py +163 -0
  203. agno/os/schema.py +892 -0
  204. agno/{app/playground → os}/settings.py +8 -15
  205. agno/os/utils.py +270 -0
  206. agno/reasoning/azure_ai_foundry.py +4 -4
  207. agno/reasoning/deepseek.py +4 -4
  208. agno/reasoning/default.py +6 -11
  209. agno/reasoning/groq.py +4 -4
  210. agno/reasoning/helpers.py +4 -6
  211. agno/reasoning/ollama.py +4 -4
  212. agno/reasoning/openai.py +4 -4
  213. agno/run/{response.py → agent.py} +144 -72
  214. agno/run/base.py +44 -58
  215. agno/run/cancel.py +83 -0
  216. agno/run/team.py +133 -77
  217. agno/run/workflow.py +537 -12
  218. agno/session/__init__.py +10 -0
  219. agno/session/agent.py +244 -0
  220. agno/session/summary.py +225 -0
  221. agno/session/team.py +262 -0
  222. agno/{storage/session/v2 → session}/workflow.py +47 -24
  223. agno/team/__init__.py +15 -16
  224. agno/team/team.py +2967 -4243
  225. agno/tools/agentql.py +14 -5
  226. agno/tools/airflow.py +9 -4
  227. agno/tools/api.py +7 -3
  228. agno/tools/apify.py +2 -46
  229. agno/tools/arxiv.py +8 -3
  230. agno/tools/aws_lambda.py +7 -5
  231. agno/tools/aws_ses.py +7 -1
  232. agno/tools/baidusearch.py +4 -1
  233. agno/tools/bitbucket.py +4 -4
  234. agno/tools/brandfetch.py +14 -11
  235. agno/tools/bravesearch.py +4 -1
  236. agno/tools/brightdata.py +42 -22
  237. agno/tools/browserbase.py +13 -4
  238. agno/tools/calcom.py +12 -10
  239. agno/tools/calculator.py +10 -27
  240. agno/tools/cartesia.py +18 -13
  241. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  242. agno/tools/confluence.py +71 -18
  243. agno/tools/crawl4ai.py +7 -1
  244. agno/tools/csv_toolkit.py +9 -8
  245. agno/tools/dalle.py +18 -11
  246. agno/tools/daytona.py +13 -16
  247. agno/tools/decorator.py +6 -3
  248. agno/tools/desi_vocal.py +16 -7
  249. agno/tools/discord.py +11 -8
  250. agno/tools/docker.py +30 -42
  251. agno/tools/duckdb.py +34 -53
  252. agno/tools/duckduckgo.py +8 -7
  253. agno/tools/e2b.py +62 -62
  254. agno/tools/eleven_labs.py +35 -28
  255. agno/tools/email.py +4 -1
  256. agno/tools/evm.py +7 -1
  257. agno/tools/exa.py +19 -14
  258. agno/tools/fal.py +29 -29
  259. agno/tools/file.py +9 -8
  260. agno/tools/financial_datasets.py +25 -44
  261. agno/tools/firecrawl.py +22 -22
  262. agno/tools/function.py +68 -17
  263. agno/tools/giphy.py +22 -10
  264. agno/tools/github.py +48 -126
  265. agno/tools/gmail.py +46 -62
  266. agno/tools/google_bigquery.py +7 -6
  267. agno/tools/google_maps.py +11 -26
  268. agno/tools/googlesearch.py +7 -2
  269. agno/tools/googlesheets.py +21 -17
  270. agno/tools/hackernews.py +9 -5
  271. agno/tools/jina.py +5 -4
  272. agno/tools/jira.py +18 -9
  273. agno/tools/knowledge.py +31 -32
  274. agno/tools/linear.py +18 -33
  275. agno/tools/linkup.py +5 -1
  276. agno/tools/local_file_system.py +8 -5
  277. agno/tools/lumalab.py +31 -19
  278. agno/tools/mem0.py +18 -12
  279. agno/tools/memori.py +14 -10
  280. agno/tools/mlx_transcribe.py +3 -2
  281. agno/tools/models/azure_openai.py +32 -14
  282. agno/tools/models/gemini.py +58 -31
  283. agno/tools/models/groq.py +29 -20
  284. agno/tools/models/nebius.py +27 -11
  285. agno/tools/models_labs.py +39 -15
  286. agno/tools/moviepy_video.py +7 -6
  287. agno/tools/neo4j.py +134 -0
  288. agno/tools/newspaper.py +7 -2
  289. agno/tools/newspaper4k.py +8 -3
  290. agno/tools/openai.py +57 -26
  291. agno/tools/openbb.py +12 -11
  292. agno/tools/opencv.py +62 -46
  293. agno/tools/openweather.py +14 -12
  294. agno/tools/pandas.py +11 -3
  295. agno/tools/postgres.py +4 -12
  296. agno/tools/pubmed.py +4 -1
  297. agno/tools/python.py +9 -22
  298. agno/tools/reasoning.py +35 -27
  299. agno/tools/reddit.py +11 -26
  300. agno/tools/replicate.py +54 -41
  301. agno/tools/resend.py +4 -1
  302. agno/tools/scrapegraph.py +15 -14
  303. agno/tools/searxng.py +10 -23
  304. agno/tools/serpapi.py +6 -3
  305. agno/tools/serper.py +13 -4
  306. agno/tools/shell.py +9 -2
  307. agno/tools/slack.py +12 -11
  308. agno/tools/sleep.py +3 -2
  309. agno/tools/spider.py +24 -4
  310. agno/tools/sql.py +7 -6
  311. agno/tools/tavily.py +6 -4
  312. agno/tools/telegram.py +12 -4
  313. agno/tools/todoist.py +11 -31
  314. agno/tools/toolkit.py +1 -1
  315. agno/tools/trafilatura.py +22 -6
  316. agno/tools/trello.py +9 -22
  317. agno/tools/twilio.py +10 -3
  318. agno/tools/user_control_flow.py +6 -1
  319. agno/tools/valyu.py +34 -5
  320. agno/tools/visualization.py +19 -28
  321. agno/tools/webbrowser.py +4 -3
  322. agno/tools/webex.py +11 -7
  323. agno/tools/website.py +15 -46
  324. agno/tools/webtools.py +12 -4
  325. agno/tools/whatsapp.py +5 -9
  326. agno/tools/wikipedia.py +20 -13
  327. agno/tools/x.py +14 -13
  328. agno/tools/yfinance.py +13 -40
  329. agno/tools/youtube.py +26 -20
  330. agno/tools/zendesk.py +7 -2
  331. agno/tools/zep.py +10 -7
  332. agno/tools/zoom.py +10 -9
  333. agno/utils/common.py +1 -19
  334. agno/utils/events.py +95 -118
  335. agno/utils/knowledge.py +29 -0
  336. agno/utils/location.py +2 -2
  337. agno/utils/log.py +2 -2
  338. agno/utils/mcp.py +11 -5
  339. agno/utils/media.py +39 -0
  340. agno/utils/message.py +12 -1
  341. agno/utils/models/claude.py +6 -4
  342. agno/utils/models/mistral.py +8 -7
  343. agno/utils/models/schema_utils.py +3 -3
  344. agno/utils/pprint.py +33 -32
  345. agno/utils/print_response/agent.py +779 -0
  346. agno/utils/print_response/team.py +1565 -0
  347. agno/utils/print_response/workflow.py +1451 -0
  348. agno/utils/prompts.py +14 -14
  349. agno/utils/reasoning.py +87 -0
  350. agno/utils/response.py +42 -42
  351. agno/utils/string.py +8 -22
  352. agno/utils/team.py +50 -0
  353. agno/utils/timer.py +2 -2
  354. agno/vectordb/base.py +33 -21
  355. agno/vectordb/cassandra/cassandra.py +287 -23
  356. agno/vectordb/chroma/chromadb.py +482 -59
  357. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  358. agno/vectordb/couchbase/couchbase.py +309 -29
  359. agno/vectordb/lancedb/lance_db.py +360 -21
  360. agno/vectordb/langchaindb/__init__.py +5 -0
  361. agno/vectordb/langchaindb/langchaindb.py +145 -0
  362. agno/vectordb/lightrag/__init__.py +5 -0
  363. agno/vectordb/lightrag/lightrag.py +374 -0
  364. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  365. agno/vectordb/milvus/milvus.py +242 -32
  366. agno/vectordb/mongodb/mongodb.py +200 -24
  367. agno/vectordb/pgvector/pgvector.py +319 -37
  368. agno/vectordb/pineconedb/pineconedb.py +221 -27
  369. agno/vectordb/qdrant/qdrant.py +356 -14
  370. agno/vectordb/singlestore/singlestore.py +286 -29
  371. agno/vectordb/surrealdb/surrealdb.py +187 -7
  372. agno/vectordb/upstashdb/upstashdb.py +342 -26
  373. agno/vectordb/weaviate/weaviate.py +227 -165
  374. agno/workflow/__init__.py +17 -13
  375. agno/workflow/{v2/condition.py → condition.py} +135 -32
  376. agno/workflow/{v2/loop.py → loop.py} +115 -28
  377. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  378. agno/workflow/{v2/router.py → router.py} +133 -32
  379. agno/workflow/{v2/step.py → step.py} +200 -42
  380. agno/workflow/{v2/steps.py → steps.py} +147 -66
  381. agno/workflow/types.py +482 -0
  382. agno/workflow/workflow.py +2394 -696
  383. agno-2.0.0a1.dist-info/METADATA +355 -0
  384. agno-2.0.0a1.dist-info/RECORD +514 -0
  385. agno/agent/metrics.py +0 -107
  386. agno/api/app.py +0 -35
  387. agno/api/playground.py +0 -92
  388. agno/api/schemas/app.py +0 -12
  389. agno/api/schemas/playground.py +0 -22
  390. agno/api/schemas/user.py +0 -35
  391. agno/api/schemas/workspace.py +0 -46
  392. agno/api/user.py +0 -160
  393. agno/api/workflows.py +0 -33
  394. agno/api/workspace.py +0 -175
  395. agno/app/agui/__init__.py +0 -3
  396. agno/app/agui/app.py +0 -17
  397. agno/app/agui/sync_router.py +0 -120
  398. agno/app/base.py +0 -186
  399. agno/app/discord/__init__.py +0 -3
  400. agno/app/fastapi/__init__.py +0 -3
  401. agno/app/fastapi/app.py +0 -107
  402. agno/app/fastapi/async_router.py +0 -457
  403. agno/app/fastapi/sync_router.py +0 -448
  404. agno/app/playground/app.py +0 -228
  405. agno/app/playground/async_router.py +0 -1050
  406. agno/app/playground/deploy.py +0 -249
  407. agno/app/playground/operator.py +0 -183
  408. agno/app/playground/schemas.py +0 -220
  409. agno/app/playground/serve.py +0 -55
  410. agno/app/playground/sync_router.py +0 -1042
  411. agno/app/playground/utils.py +0 -46
  412. agno/app/settings.py +0 -15
  413. agno/app/slack/__init__.py +0 -3
  414. agno/app/slack/app.py +0 -19
  415. agno/app/slack/sync_router.py +0 -92
  416. agno/app/utils.py +0 -54
  417. agno/app/whatsapp/__init__.py +0 -3
  418. agno/app/whatsapp/app.py +0 -15
  419. agno/app/whatsapp/sync_router.py +0 -197
  420. agno/cli/auth_server.py +0 -249
  421. agno/cli/config.py +0 -274
  422. agno/cli/console.py +0 -88
  423. agno/cli/credentials.py +0 -23
  424. agno/cli/entrypoint.py +0 -571
  425. agno/cli/operator.py +0 -357
  426. agno/cli/settings.py +0 -96
  427. agno/cli/ws/ws_cli.py +0 -817
  428. agno/constants.py +0 -13
  429. agno/document/__init__.py +0 -5
  430. agno/document/chunking/semantic.py +0 -45
  431. agno/document/chunking/strategy.py +0 -31
  432. agno/document/reader/__init__.py +0 -5
  433. agno/document/reader/base.py +0 -47
  434. agno/document/reader/docx_reader.py +0 -60
  435. agno/document/reader/gcs/pdf_reader.py +0 -44
  436. agno/document/reader/s3/pdf_reader.py +0 -59
  437. agno/document/reader/s3/text_reader.py +0 -63
  438. agno/document/reader/url_reader.py +0 -59
  439. agno/document/reader/youtube_reader.py +0 -58
  440. agno/embedder/__init__.py +0 -5
  441. agno/embedder/langdb.py +0 -80
  442. agno/embedder/mistral.py +0 -82
  443. agno/embedder/openai.py +0 -78
  444. agno/file/__init__.py +0 -5
  445. agno/file/file.py +0 -16
  446. agno/file/local/csv.py +0 -32
  447. agno/file/local/txt.py +0 -19
  448. agno/infra/app.py +0 -240
  449. agno/infra/base.py +0 -144
  450. agno/infra/context.py +0 -20
  451. agno/infra/db_app.py +0 -52
  452. agno/infra/resource.py +0 -205
  453. agno/infra/resources.py +0 -55
  454. agno/knowledge/agent.py +0 -698
  455. agno/knowledge/arxiv.py +0 -33
  456. agno/knowledge/combined.py +0 -36
  457. agno/knowledge/csv.py +0 -144
  458. agno/knowledge/csv_url.py +0 -124
  459. agno/knowledge/document.py +0 -223
  460. agno/knowledge/docx.py +0 -137
  461. agno/knowledge/firecrawl.py +0 -34
  462. agno/knowledge/gcs/__init__.py +0 -0
  463. agno/knowledge/gcs/base.py +0 -39
  464. agno/knowledge/gcs/pdf.py +0 -125
  465. agno/knowledge/json.py +0 -137
  466. agno/knowledge/langchain.py +0 -71
  467. agno/knowledge/light_rag.py +0 -273
  468. agno/knowledge/llamaindex.py +0 -66
  469. agno/knowledge/markdown.py +0 -154
  470. agno/knowledge/pdf.py +0 -164
  471. agno/knowledge/pdf_bytes.py +0 -42
  472. agno/knowledge/pdf_url.py +0 -148
  473. agno/knowledge/s3/__init__.py +0 -0
  474. agno/knowledge/s3/base.py +0 -64
  475. agno/knowledge/s3/pdf.py +0 -33
  476. agno/knowledge/s3/text.py +0 -34
  477. agno/knowledge/text.py +0 -141
  478. agno/knowledge/url.py +0 -46
  479. agno/knowledge/website.py +0 -179
  480. agno/knowledge/wikipedia.py +0 -32
  481. agno/knowledge/youtube.py +0 -35
  482. agno/memory/agent.py +0 -423
  483. agno/memory/classifier.py +0 -104
  484. agno/memory/db/__init__.py +0 -5
  485. agno/memory/db/base.py +0 -42
  486. agno/memory/db/mongodb.py +0 -189
  487. agno/memory/db/postgres.py +0 -203
  488. agno/memory/db/sqlite.py +0 -193
  489. agno/memory/memory.py +0 -22
  490. agno/memory/row.py +0 -36
  491. agno/memory/summarizer.py +0 -201
  492. agno/memory/summary.py +0 -19
  493. agno/memory/team.py +0 -415
  494. agno/memory/v2/__init__.py +0 -2
  495. agno/memory/v2/db/__init__.py +0 -1
  496. agno/memory/v2/db/base.py +0 -42
  497. agno/memory/v2/db/firestore.py +0 -339
  498. agno/memory/v2/db/mongodb.py +0 -196
  499. agno/memory/v2/db/postgres.py +0 -214
  500. agno/memory/v2/db/redis.py +0 -187
  501. agno/memory/v2/db/schema.py +0 -54
  502. agno/memory/v2/db/sqlite.py +0 -209
  503. agno/memory/v2/manager.py +0 -437
  504. agno/memory/v2/memory.py +0 -1097
  505. agno/memory/v2/schema.py +0 -55
  506. agno/memory/v2/summarizer.py +0 -215
  507. agno/memory/workflow.py +0 -38
  508. agno/models/ollama/tools.py +0 -430
  509. agno/models/qwen/__init__.py +0 -5
  510. agno/playground/__init__.py +0 -10
  511. agno/playground/deploy.py +0 -3
  512. agno/playground/playground.py +0 -3
  513. agno/playground/serve.py +0 -3
  514. agno/playground/settings.py +0 -3
  515. agno/reranker/__init__.py +0 -0
  516. agno/run/v2/__init__.py +0 -0
  517. agno/run/v2/workflow.py +0 -567
  518. agno/storage/__init__.py +0 -0
  519. agno/storage/agent/__init__.py +0 -0
  520. agno/storage/agent/dynamodb.py +0 -1
  521. agno/storage/agent/json.py +0 -1
  522. agno/storage/agent/mongodb.py +0 -1
  523. agno/storage/agent/postgres.py +0 -1
  524. agno/storage/agent/singlestore.py +0 -1
  525. agno/storage/agent/sqlite.py +0 -1
  526. agno/storage/agent/yaml.py +0 -1
  527. agno/storage/base.py +0 -60
  528. agno/storage/dynamodb.py +0 -673
  529. agno/storage/firestore.py +0 -297
  530. agno/storage/gcs_json.py +0 -261
  531. agno/storage/in_memory.py +0 -234
  532. agno/storage/json.py +0 -237
  533. agno/storage/mongodb.py +0 -328
  534. agno/storage/mysql.py +0 -685
  535. agno/storage/postgres.py +0 -682
  536. agno/storage/redis.py +0 -336
  537. agno/storage/session/__init__.py +0 -16
  538. agno/storage/session/agent.py +0 -64
  539. agno/storage/session/team.py +0 -63
  540. agno/storage/session/v2/__init__.py +0 -5
  541. agno/storage/session/workflow.py +0 -61
  542. agno/storage/singlestore.py +0 -606
  543. agno/storage/sqlite.py +0 -646
  544. agno/storage/workflow/__init__.py +0 -0
  545. agno/storage/workflow/mongodb.py +0 -1
  546. agno/storage/workflow/postgres.py +0 -1
  547. agno/storage/workflow/sqlite.py +0 -1
  548. agno/storage/yaml.py +0 -241
  549. agno/tools/thinking.py +0 -73
  550. agno/utils/defaults.py +0 -57
  551. agno/utils/filesystem.py +0 -39
  552. agno/utils/git.py +0 -52
  553. agno/utils/json_io.py +0 -30
  554. agno/utils/load_env.py +0 -19
  555. agno/utils/py_io.py +0 -19
  556. agno/utils/pyproject.py +0 -18
  557. agno/utils/resource_filter.py +0 -31
  558. agno/workflow/v2/__init__.py +0 -21
  559. agno/workflow/v2/types.py +0 -357
  560. agno/workflow/v2/workflow.py +0 -3312
  561. agno/workspace/__init__.py +0 -0
  562. agno/workspace/config.py +0 -325
  563. agno/workspace/enums.py +0 -6
  564. agno/workspace/helpers.py +0 -52
  565. agno/workspace/operator.py +0 -757
  566. agno/workspace/settings.py +0 -158
  567. agno-1.8.0.dist-info/METADATA +0 -979
  568. agno-1.8.0.dist-info/RECORD +0 -565
  569. agno-1.8.0.dist-info/entry_points.txt +0 -3
  570. /agno/{app → db/migrations}/__init__.py +0 -0
  571. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  572. /agno/{cli → integrations}/__init__.py +0 -0
  573. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  574. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  575. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  576. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  577. /agno/{app → os/interfaces}/slack/security.py +0 -0
  578. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  579. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  580. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  581. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
  582. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
  583. {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,389 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Set
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.reader.url_reader import URLReader
15
+ from agno.knowledge.types import ContentType
16
+ from agno.utils.log import log_debug, logger
17
+
18
+ try:
19
+ from bs4 import BeautifulSoup, Tag # noqa: F401
20
+ except ImportError:
21
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
22
+
23
+ try:
24
+ from ddgs import DDGS
25
+ except ImportError:
26
+ raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
27
+
28
+
29
+ @dataclass
30
+ class WebSearchReader(Reader):
31
+ """Reader that uses web search to find content for a given query"""
32
+
33
+ search_timeout: int = 10
34
+
35
+ request_timeout: int = 30
36
+ delay_between_requests: float = 2.0 # Increased default delay
37
+ max_retries: int = 3
38
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
39
+
40
+ # Search engine configuration
41
+ search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
42
+ search_delay: float = 3.0 # Delay between search requests
43
+ max_search_retries: int = 2 # Retries for search operations
44
+
45
+ # Rate limiting
46
+ rate_limit_delay: float = 5.0 # Delay when rate limited
47
+ exponential_backoff: bool = True
48
+
49
+ # Internal state
50
+ _visited_urls: Set[str] = field(default_factory=set)
51
+ _url_reader: Optional[URLReader] = None
52
+ _last_search_time: float = field(default=0.0, init=False)
53
+
54
+ # Override default chunking strategy
55
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
56
+
57
+ def __post_init__(self):
58
+ """Initialize the URL reader and chunking strategy after dataclass initialization"""
59
+ self._url_reader = URLReader()
60
+
61
+ @classmethod
62
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
63
+ """Get the list of supported chunking strategies for Web Search readers."""
64
+ return [
65
+ ChunkingStrategyType.AGENTIC_CHUNKING,
66
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
67
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
68
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
69
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
70
+ ]
71
+
72
+ @classmethod
73
+ def get_supported_content_types(self) -> List[ContentType]:
74
+ return [ContentType.URL, ContentType.TEXT]
75
+
76
+ def _respect_rate_limits(self):
77
+ """Ensure we don't exceed rate limits"""
78
+ current_time = time.time()
79
+ time_since_last_search = current_time - self._last_search_time
80
+
81
+ if time_since_last_search < self.search_delay:
82
+ sleep_time = self.search_delay - time_since_last_search
83
+ log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
84
+ time.sleep(sleep_time)
85
+
86
+ self._last_search_time = time.time()
87
+
88
+ def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
89
+ """Perform web search using DuckDuckGo with rate limiting"""
90
+ log_debug(f"Performing DuckDuckGo search for: {query}")
91
+
92
+ for attempt in range(self.max_search_retries):
93
+ try:
94
+ self._respect_rate_limits()
95
+
96
+ ddgs = DDGS(timeout=self.search_timeout)
97
+ search_results = ddgs.text(query=query, max_results=self.max_results)
98
+
99
+ # Convert to list and extract relevant fields
100
+ results = []
101
+ for result in search_results:
102
+ results.append(
103
+ {
104
+ "title": result.get("title", ""),
105
+ "url": result.get("link", ""),
106
+ "description": result.get("body", ""),
107
+ }
108
+ )
109
+
110
+ log_debug(f"Found {len(results)} search results")
111
+ return results
112
+
113
+ except Exception as e:
114
+ logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
115
+ if "rate limit" in str(e).lower() or "429" in str(e):
116
+ # Rate limited - wait longer
117
+ wait_time = (
118
+ self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
119
+ )
120
+ logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
121
+ time.sleep(wait_time)
122
+ elif attempt < self.max_search_retries - 1:
123
+ # Other error - shorter wait
124
+ time.sleep(self.search_delay)
125
+ else:
126
+ logger.error(f"All DuckDuckGo search attempts failed: {e}")
127
+ return []
128
+ return []
129
+
130
+ def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
131
+ """Perform web search using Google (requires googlesearch-python)"""
132
+ log_debug(f"Performing Google search for: {query}")
133
+
134
+ try:
135
+ from googlesearch import search
136
+ except ImportError:
137
+ logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
138
+ return []
139
+
140
+ for attempt in range(self.max_search_retries):
141
+ try:
142
+ self._respect_rate_limits()
143
+
144
+ results = []
145
+ search_results = search(query, num_results=self.max_results, stop=self.max_results)
146
+
147
+ for result in search_results:
148
+ results.append(
149
+ {
150
+ "title": getattr(result, "title", ""),
151
+ "url": getattr(result, "url", ""),
152
+ "description": getattr(result, "description", ""),
153
+ }
154
+ )
155
+
156
+ log_debug(f"Found {len(results)} Google search results")
157
+ return results
158
+
159
+ except Exception as e:
160
+ logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
161
+ if attempt < self.max_search_retries - 1:
162
+ time.sleep(self.search_delay)
163
+ else:
164
+ logger.error(f"All Google search attempts failed: {e}")
165
+ return []
166
+
167
+ return []
168
+
169
+ def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
170
+ """Perform web search using the configured search engine"""
171
+ if self.search_engine == "duckduckgo":
172
+ return self._perform_duckduckgo_search(query)
173
+ elif self.search_engine == "google":
174
+ return self._perform_google_search(query)
175
+ else:
176
+ logger.error(f"Unsupported search engine: {self.search_engine}")
177
+ return []
178
+
179
+ def _is_valid_url(self, url: str) -> bool:
180
+ """Check if URL is valid and not already visited"""
181
+ try:
182
+ parsed = urlparse(url)
183
+ return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
184
+ except Exception:
185
+ return False
186
+
187
+ def _extract_text_from_html(self, html_content: str, url: str) -> str:
188
+ """Extract clean text content from HTML"""
189
+ try:
190
+ soup = BeautifulSoup(html_content, "html.parser")
191
+
192
+ # Remove script and style elements
193
+ for script in soup(["script", "style"]):
194
+ script.decompose()
195
+
196
+ # Get text content
197
+ text = soup.get_text()
198
+
199
+ # Clean up whitespace
200
+ lines = (line.strip() for line in text.splitlines())
201
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
202
+ text = " ".join(chunk for chunk in chunks if chunk)
203
+
204
+ return text
205
+
206
+ except Exception as e:
207
+ logger.warning(f"Error extracting text from {url}: {e}")
208
+ return html_content
209
+
210
+ def _fetch_url_content(self, url: str) -> Optional[str]:
211
+ """Fetch content from a URL with retry logic"""
212
+ headers = {"User-Agent": self.user_agent}
213
+
214
+ for attempt in range(self.max_retries):
215
+ try:
216
+ response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
217
+ response.raise_for_status()
218
+
219
+ # Check if it's HTML content
220
+ content_type = response.headers.get("content-type", "").lower()
221
+ if "text/html" in content_type:
222
+ return self._extract_text_from_html(response.text, url)
223
+ else:
224
+ # For non-HTML content, return as-is
225
+ return response.text
226
+
227
+ except Exception as e:
228
+ logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
229
+ if attempt < self.max_retries - 1:
230
+ time.sleep(random.uniform(1, 3)) # Random delay between retries
231
+ continue
232
+
233
+ logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
234
+ return None
235
+
236
+ def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
237
+ """Create a Document object from URL content and search result metadata"""
238
+ # Use the URL as the document ID
239
+ doc_id = url
240
+
241
+ # Use the search result title as the document name, fallback to URL
242
+ doc_name = search_result.get("title", urlparse(url).netloc)
243
+
244
+ # Create metadata with search information
245
+ meta_data = {
246
+ "url": url,
247
+ "search_title": search_result.get("title", ""),
248
+ "search_description": search_result.get("description", ""),
249
+ "source": "web_search",
250
+ "search_engine": self.search_engine,
251
+ }
252
+
253
+ return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
254
+
255
+ def read(self, query: str) -> List[Document]:
256
+ """Read content for a given query by performing web search and fetching content"""
257
+ if not query:
258
+ raise ValueError("Query cannot be empty")
259
+
260
+ log_debug(f"Starting web search reader for query: {query}")
261
+
262
+ # Perform web search
263
+ search_results = self._perform_web_search(query)
264
+ if not search_results:
265
+ logger.warning(f"No search results found for query: {query}")
266
+ return []
267
+
268
+ documents: List[Document] = []
269
+
270
+ for result in search_results:
271
+ url = result.get("url", "")
272
+
273
+ # Skip if URL is invalid or already visited
274
+ if not self._is_valid_url(url):
275
+ continue
276
+
277
+ # Mark URL as visited
278
+ self._visited_urls.add(url)
279
+
280
+ # Add delay between requests to be respectful
281
+ if len(documents) > 0:
282
+ time.sleep(self.delay_between_requests)
283
+
284
+ # Fetch content from URL
285
+ content = self._fetch_url_content(url)
286
+ if content is None:
287
+ continue
288
+
289
+ # Create document
290
+ document = self._create_document_from_url(url, content, result)
291
+
292
+ # Apply chunking if enabled
293
+ if self.chunk:
294
+ chunked_docs = self.chunk_document(document)
295
+ documents.extend(chunked_docs)
296
+ else:
297
+ documents.append(document)
298
+
299
+ # Stop if we've reached max_results
300
+ if len(documents) >= self.max_results:
301
+ break
302
+
303
+ log_debug(f"Created {len(documents)} documents from web search")
304
+ return documents
305
+
306
+ async def async_read(self, query: str) -> List[Document]:
307
+ """Asynchronously read content for a given query"""
308
+ if not query:
309
+ raise ValueError("Query cannot be empty")
310
+
311
+ log_debug(f"Starting async web search reader for query: {query}")
312
+
313
+ # Perform web search (synchronous operation)
314
+ search_results = self._perform_web_search(query)
315
+ if not search_results:
316
+ logger.warning(f"No search results found for query: {query}")
317
+ return []
318
+
319
+ # Create tasks for fetching content from each URL
320
+ async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
321
+ url = result.get("url", "")
322
+
323
+ # Skip if URL is invalid or already visited
324
+ if not self._is_valid_url(url):
325
+ return None
326
+
327
+ # Mark URL as visited
328
+ self._visited_urls.add(url)
329
+
330
+ try:
331
+ # Use the URL reader for async fetching
332
+ if self._url_reader:
333
+ docs = await self._url_reader.async_read(url)
334
+ if docs:
335
+ # Use the first document and add search metadata
336
+ doc = docs[0]
337
+ doc.meta_data.update(
338
+ {
339
+ "search_title": result.get("title", ""),
340
+ "search_description": result.get("description", ""),
341
+ "source": "web_search",
342
+ "search_engine": self.search_engine,
343
+ }
344
+ )
345
+ return doc
346
+
347
+ # Fallback to manual async fetching
348
+ headers = {"User-Agent": self.user_agent}
349
+ async with httpx.AsyncClient(timeout=self.request_timeout) as client:
350
+ response = await client.get(url, headers=headers, follow_redirects=True)
351
+ response.raise_for_status()
352
+
353
+ content_type = response.headers.get("content-type", "").lower()
354
+ if "text/html" in content_type:
355
+ content = self._extract_text_from_html(response.text, url)
356
+ else:
357
+ content = response.text
358
+
359
+ document = self._create_document_from_url(url, content, result)
360
+ return document
361
+
362
+ except Exception as e:
363
+ logger.warning(f"Error fetching {url}: {e}")
364
+ return None
365
+
366
+ # Create tasks for all URLs
367
+ tasks = [fetch_url_async(result) for result in search_results]
368
+
369
+ # Execute all tasks concurrently with delays
370
+ documents = []
371
+ for i, task in enumerate(tasks):
372
+ if i > 0: # Add delay between requests (except for the first one)
373
+ await asyncio.sleep(self.delay_between_requests)
374
+
375
+ doc = await task
376
+ if doc is not None:
377
+ # Apply chunking if enabled
378
+ if self.chunk:
379
+ chunked_docs = await self.chunk_documents_async([doc])
380
+ documents.extend(chunked_docs)
381
+ else:
382
+ documents.append(doc)
383
+
384
+ # Stop if we've reached max_results
385
+ if len(documents) >= self.max_results:
386
+ break
387
+
388
+ log_debug(f"Created {len(documents)} documents from async web search")
389
+ return documents
@@ -7,8 +7,11 @@ from urllib.parse import urljoin, urlparse
7
7
 
8
8
  import httpx
9
9
 
10
- from agno.document.base import Document
11
- from agno.document.reader.base import Reader
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
12
15
  from agno.utils.log import log_debug, logger
13
16
 
14
17
  try:
@@ -28,9 +31,15 @@ class WebsiteReader(Reader):
28
31
  _urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
29
32
 
30
33
  def __init__(
31
- self, max_depth: int = 3, max_links: int = 10, timeout: int = 10, proxy: Optional[str] = None, **kwargs
34
+ self,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ max_depth: int = 3,
37
+ max_links: int = 10,
38
+ timeout: int = 10,
39
+ proxy: Optional[str] = None,
40
+ **kwargs,
32
41
  ):
33
- super().__init__(**kwargs)
42
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
34
43
  self.max_depth = max_depth
35
44
  self.max_links = max_links
36
45
  self.proxy = proxy
@@ -39,6 +48,21 @@ class WebsiteReader(Reader):
39
48
  self._visited = set()
40
49
  self._urls_to_crawl = []
41
50
 
51
+ @classmethod
52
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
53
+ """Get the list of supported chunking strategies for Website readers."""
54
+ return [
55
+ ChunkingStrategyType.AGENTIC_CHUNKING,
56
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
57
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
58
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
59
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
60
+ ]
61
+
62
+ @classmethod
63
+ def get_supported_content_types(self) -> List[ContentType]:
64
+ return [ContentType.URL]
65
+
42
66
  def delay(self, min_seconds=1, max_seconds=3):
43
67
  """
44
68
  Introduce a random delay.
@@ -316,7 +340,7 @@ class WebsiteReader(Reader):
316
340
 
317
341
  return crawler_result
318
342
 
319
- def read(self, url: str) -> List[Document]:
343
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
320
344
  """
321
345
  Reads a website and returns a list of documents.
322
346
 
@@ -338,7 +362,7 @@ class WebsiteReader(Reader):
338
362
  documents.extend(
339
363
  self.chunk_document(
340
364
  Document(
341
- name=url,
365
+ name=name or url,
342
366
  id=str(crawled_url),
343
367
  meta_data={"url": str(crawled_url)},
344
368
  content=crawled_content,
@@ -348,7 +372,7 @@ class WebsiteReader(Reader):
348
372
  else:
349
373
  documents.append(
350
374
  Document(
351
- name=url,
375
+ name=name or url,
352
376
  id=str(crawled_url),
353
377
  meta_data={"url": str(crawled_url)},
354
378
  content=crawled_content,
@@ -359,7 +383,7 @@ class WebsiteReader(Reader):
359
383
  logger.error(f"Error reading website {url}: {e}")
360
384
  raise
361
385
 
362
- async def async_read(self, url: str) -> List[Document]:
386
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
363
387
  """
364
388
  Asynchronously reads a website and returns a list of documents.
365
389
 
@@ -380,13 +404,16 @@ class WebsiteReader(Reader):
380
404
  async def process_document(crawled_url, crawled_content):
381
405
  if self.chunk:
382
406
  doc = Document(
383
- name=url, id=str(crawled_url), meta_data={"url": str(crawled_url)}, content=crawled_content
407
+ name=name or url,
408
+ id=str(crawled_url),
409
+ meta_data={"url": str(crawled_url)},
410
+ content=crawled_content,
384
411
  )
385
412
  return self.chunk_document(doc)
386
413
  else:
387
414
  return [
388
415
  Document(
389
- name=url,
416
+ name=name or url,
390
417
  id=str(crawled_url),
391
418
  meta_data={"url": str(crawled_url)},
392
419
  content=crawled_content,
@@ -0,0 +1,59 @@
1
+ from typing import List, Optional
2
+
3
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
4
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
5
+ from agno.knowledge.document import Document
6
+ from agno.knowledge.reader.base import Reader
7
+ from agno.knowledge.types import ContentType
8
+ from agno.utils.log import log_debug, log_info
9
+
10
+ try:
11
+ import wikipedia # noqa: F401
12
+ except ImportError:
13
+ raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
14
+
15
+
16
+ class WikipediaReader(Reader):
17
+ auto_suggest: bool = True
18
+
19
+ def __init__(
20
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
21
+ ):
22
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
23
+ self.auto_suggest = auto_suggest
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for Wikipedia readers."""
28
+ return [
29
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
30
+ ChunkingStrategyType.AGENTIC_CHUNKING,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
32
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
33
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.TOPIC]
39
+
40
+ def read(self, topic: str) -> List[Document]:
41
+ log_debug(f"Reading Wikipedia topic: {topic}")
42
+ summary = None
43
+ try:
44
+ summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
45
+
46
+ except wikipedia.exceptions.PageError:
47
+ summary = None
48
+ log_info("PageError: Page not found.")
49
+
50
+ # Only create Document if we successfully got a summary
51
+ if summary:
52
+ return [
53
+ Document(
54
+ name=topic,
55
+ meta_data={"topic": topic},
56
+ content=summary,
57
+ )
58
+ ]
59
+ return []
@@ -0,0 +1,78 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+
4
+ from agno.knowledge.chunking.recursive import RecursiveChunking
5
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
6
+ from agno.knowledge.document.base import Document
7
+ from agno.knowledge.reader.base import Reader
8
+ from agno.knowledge.types import ContentType
9
+ from agno.utils.log import log_debug, log_error, log_info
10
+
11
+ try:
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ except ImportError:
14
+ raise ImportError(
15
+ "`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
16
+ )
17
+
18
+
19
+ class YouTubeReader(Reader):
20
+ """Reader for YouTube video transcripts"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for YouTube readers."""
28
+ return [
29
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
30
+ ChunkingStrategyType.AGENTIC_CHUNKING,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
32
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.URL, ContentType.YOUTUBE]
39
+
40
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
41
+ try:
42
+ # Extract video ID from URL
43
+ video_id = url.split("v=")[-1].split("&")[0]
44
+ log_info(f"Reading transcript for video: {video_id}")
45
+
46
+ # Get transcript
47
+ log_debug(f"Fetching transcript for video: {video_id}")
48
+ # Create an instance of YouTubeTranscriptApi
49
+ ytt_api = YouTubeTranscriptApi()
50
+ transcript_data = ytt_api.fetch(video_id)
51
+
52
+ # Combine transcript segments into full text
53
+ transcript_text = ""
54
+ for segment in transcript_data:
55
+ transcript_text += f"{segment.text} "
56
+
57
+ documents = [
58
+ Document(
59
+ name=name or f"youtube_{video_id}",
60
+ id=f"youtube_{video_id}",
61
+ meta_data={"video_url": url, "video_id": video_id},
62
+ content=transcript_text.strip(),
63
+ )
64
+ ]
65
+
66
+ if self.chunk:
67
+ chunked_documents = []
68
+ for document in documents:
69
+ chunked_documents.extend(self.chunk_document(document))
70
+ return chunked_documents
71
+ return documents
72
+
73
+ except Exception as e:
74
+ log_error(f"Error reading transcript for {url}: {e}")
75
+ return []
76
+
77
+ async def async_read(self, url: str) -> List[Document]:
78
+ return await asyncio.get_event_loop().run_in_executor(None, self.read, url)