agno 1.8.1__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (580) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +19 -27
  3. agno/agent/agent.py +2778 -4123
  4. agno/api/agent.py +9 -65
  5. agno/api/api.py +5 -46
  6. agno/api/evals.py +6 -17
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -41
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +5 -21
  11. agno/api/schemas/evals.py +7 -16
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +5 -21
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +11 -7
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +9 -64
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/db/__init__.py +24 -0
  25. agno/db/base.py +245 -0
  26. agno/db/dynamo/__init__.py +3 -0
  27. agno/db/dynamo/dynamo.py +1749 -0
  28. agno/db/dynamo/schemas.py +278 -0
  29. agno/db/dynamo/utils.py +684 -0
  30. agno/db/firestore/__init__.py +3 -0
  31. agno/db/firestore/firestore.py +1438 -0
  32. agno/db/firestore/schemas.py +130 -0
  33. agno/db/firestore/utils.py +278 -0
  34. agno/db/gcs_json/__init__.py +3 -0
  35. agno/db/gcs_json/gcs_json_db.py +1001 -0
  36. agno/db/gcs_json/utils.py +194 -0
  37. agno/db/in_memory/__init__.py +3 -0
  38. agno/db/in_memory/in_memory_db.py +888 -0
  39. agno/db/in_memory/utils.py +172 -0
  40. agno/db/json/__init__.py +3 -0
  41. agno/db/json/json_db.py +1051 -0
  42. agno/db/json/utils.py +196 -0
  43. agno/db/migrations/v1_to_v2.py +162 -0
  44. agno/db/mongo/__init__.py +3 -0
  45. agno/db/mongo/mongo.py +1417 -0
  46. agno/db/mongo/schemas.py +77 -0
  47. agno/db/mongo/utils.py +204 -0
  48. agno/db/mysql/__init__.py +3 -0
  49. agno/db/mysql/mysql.py +1719 -0
  50. agno/db/mysql/schemas.py +124 -0
  51. agno/db/mysql/utils.py +298 -0
  52. agno/db/postgres/__init__.py +3 -0
  53. agno/db/postgres/postgres.py +1720 -0
  54. agno/db/postgres/schemas.py +124 -0
  55. agno/db/postgres/utils.py +281 -0
  56. agno/db/redis/__init__.py +3 -0
  57. agno/db/redis/redis.py +1371 -0
  58. agno/db/redis/schemas.py +109 -0
  59. agno/db/redis/utils.py +288 -0
  60. agno/db/schemas/__init__.py +3 -0
  61. agno/db/schemas/evals.py +33 -0
  62. agno/db/schemas/knowledge.py +40 -0
  63. agno/db/schemas/memory.py +46 -0
  64. agno/db/singlestore/__init__.py +3 -0
  65. agno/db/singlestore/schemas.py +116 -0
  66. agno/db/singlestore/singlestore.py +1722 -0
  67. agno/db/singlestore/utils.py +327 -0
  68. agno/db/sqlite/__init__.py +3 -0
  69. agno/db/sqlite/schemas.py +119 -0
  70. agno/db/sqlite/sqlite.py +1680 -0
  71. agno/db/sqlite/utils.py +269 -0
  72. agno/db/utils.py +88 -0
  73. agno/eval/__init__.py +14 -0
  74. agno/eval/accuracy.py +142 -43
  75. agno/eval/performance.py +88 -23
  76. agno/eval/reliability.py +73 -20
  77. agno/eval/utils.py +23 -13
  78. agno/integrations/discord/__init__.py +3 -0
  79. agno/{app → integrations}/discord/client.py +10 -10
  80. agno/knowledge/__init__.py +2 -2
  81. agno/{document → knowledge}/chunking/agentic.py +2 -2
  82. agno/{document → knowledge}/chunking/document.py +2 -2
  83. agno/{document → knowledge}/chunking/fixed.py +3 -3
  84. agno/{document → knowledge}/chunking/markdown.py +2 -2
  85. agno/{document → knowledge}/chunking/recursive.py +2 -2
  86. agno/{document → knowledge}/chunking/row.py +2 -2
  87. agno/knowledge/chunking/semantic.py +59 -0
  88. agno/knowledge/chunking/strategy.py +121 -0
  89. agno/knowledge/content.py +74 -0
  90. agno/knowledge/document/__init__.py +5 -0
  91. agno/{document → knowledge/document}/base.py +12 -2
  92. agno/knowledge/embedder/__init__.py +5 -0
  93. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  94. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  95. agno/{embedder → knowledge/embedder}/base.py +6 -0
  96. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  97. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  98. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  99. agno/{embedder → knowledge/embedder}/google.py +74 -1
  100. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  101. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  102. agno/knowledge/embedder/langdb.py +22 -0
  103. agno/knowledge/embedder/mistral.py +139 -0
  104. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  105. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  106. agno/knowledge/embedder/openai.py +223 -0
  107. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  108. agno/{embedder → knowledge/embedder}/together.py +1 -1
  109. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  110. agno/knowledge/knowledge.py +1515 -0
  111. agno/knowledge/reader/__init__.py +7 -0
  112. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  113. agno/knowledge/reader/base.py +88 -0
  114. agno/{document → knowledge}/reader/csv_reader.py +68 -15
  115. agno/knowledge/reader/docx_reader.py +83 -0
  116. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  117. agno/knowledge/reader/gcs_reader.py +67 -0
  118. agno/{document → knowledge}/reader/json_reader.py +30 -9
  119. agno/{document → knowledge}/reader/markdown_reader.py +36 -9
  120. agno/{document → knowledge}/reader/pdf_reader.py +79 -21
  121. agno/knowledge/reader/reader_factory.py +275 -0
  122. agno/knowledge/reader/s3_reader.py +171 -0
  123. agno/{document → knowledge}/reader/text_reader.py +31 -10
  124. agno/knowledge/reader/url_reader.py +84 -0
  125. agno/knowledge/reader/web_search_reader.py +389 -0
  126. agno/{document → knowledge}/reader/website_reader.py +37 -10
  127. agno/knowledge/reader/wikipedia_reader.py +59 -0
  128. agno/knowledge/reader/youtube_reader.py +78 -0
  129. agno/knowledge/remote_content/remote_content.py +88 -0
  130. agno/{reranker → knowledge/reranker}/base.py +1 -1
  131. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  132. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  133. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  134. agno/knowledge/types.py +30 -0
  135. agno/knowledge/utils.py +169 -0
  136. agno/memory/__init__.py +2 -10
  137. agno/memory/manager.py +1003 -148
  138. agno/models/aimlapi/__init__.py +2 -2
  139. agno/models/aimlapi/aimlapi.py +6 -6
  140. agno/models/anthropic/claude.py +129 -82
  141. agno/models/aws/bedrock.py +107 -175
  142. agno/models/aws/claude.py +64 -18
  143. agno/models/azure/ai_foundry.py +73 -23
  144. agno/models/base.py +347 -287
  145. agno/models/cerebras/cerebras.py +84 -27
  146. agno/models/cohere/chat.py +106 -98
  147. agno/models/google/gemini.py +100 -42
  148. agno/models/groq/groq.py +97 -35
  149. agno/models/huggingface/huggingface.py +92 -27
  150. agno/models/ibm/watsonx.py +72 -13
  151. agno/models/litellm/chat.py +85 -13
  152. agno/models/message.py +38 -144
  153. agno/models/meta/llama.py +85 -49
  154. agno/models/metrics.py +120 -0
  155. agno/models/mistral/mistral.py +90 -21
  156. agno/models/ollama/__init__.py +0 -2
  157. agno/models/ollama/chat.py +84 -46
  158. agno/models/openai/chat.py +121 -23
  159. agno/models/openai/responses.py +178 -105
  160. agno/models/perplexity/perplexity.py +26 -2
  161. agno/models/portkey/portkey.py +0 -7
  162. agno/models/response.py +14 -8
  163. agno/models/utils.py +20 -0
  164. agno/models/vercel/__init__.py +2 -2
  165. agno/models/vercel/v0.py +1 -1
  166. agno/models/vllm/__init__.py +2 -2
  167. agno/models/vllm/vllm.py +3 -3
  168. agno/models/xai/xai.py +10 -10
  169. agno/os/__init__.py +3 -0
  170. agno/os/app.py +393 -0
  171. agno/os/auth.py +47 -0
  172. agno/os/config.py +103 -0
  173. agno/os/interfaces/agui/__init__.py +3 -0
  174. agno/os/interfaces/agui/agui.py +31 -0
  175. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  176. agno/{app → os/interfaces}/agui/utils.py +65 -28
  177. agno/os/interfaces/base.py +21 -0
  178. agno/os/interfaces/slack/__init__.py +3 -0
  179. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  180. agno/os/interfaces/slack/slack.py +33 -0
  181. agno/os/interfaces/whatsapp/__init__.py +3 -0
  182. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  183. agno/os/interfaces/whatsapp/whatsapp.py +30 -0
  184. agno/os/router.py +843 -0
  185. agno/os/routers/__init__.py +3 -0
  186. agno/os/routers/evals/__init__.py +3 -0
  187. agno/os/routers/evals/evals.py +204 -0
  188. agno/os/routers/evals/schemas.py +142 -0
  189. agno/os/routers/evals/utils.py +161 -0
  190. agno/os/routers/knowledge/__init__.py +3 -0
  191. agno/os/routers/knowledge/knowledge.py +413 -0
  192. agno/os/routers/knowledge/schemas.py +118 -0
  193. agno/os/routers/memory/__init__.py +3 -0
  194. agno/os/routers/memory/memory.py +179 -0
  195. agno/os/routers/memory/schemas.py +58 -0
  196. agno/os/routers/metrics/__init__.py +3 -0
  197. agno/os/routers/metrics/metrics.py +58 -0
  198. agno/os/routers/metrics/schemas.py +47 -0
  199. agno/os/routers/session/__init__.py +3 -0
  200. agno/os/routers/session/session.py +163 -0
  201. agno/os/schema.py +892 -0
  202. agno/{app/playground → os}/settings.py +8 -15
  203. agno/os/utils.py +270 -0
  204. agno/reasoning/azure_ai_foundry.py +4 -4
  205. agno/reasoning/deepseek.py +4 -4
  206. agno/reasoning/default.py +6 -11
  207. agno/reasoning/groq.py +4 -4
  208. agno/reasoning/helpers.py +4 -6
  209. agno/reasoning/ollama.py +4 -4
  210. agno/reasoning/openai.py +4 -4
  211. agno/run/{response.py → agent.py} +144 -72
  212. agno/run/base.py +44 -58
  213. agno/run/cancel.py +83 -0
  214. agno/run/team.py +133 -77
  215. agno/run/workflow.py +537 -12
  216. agno/session/__init__.py +10 -0
  217. agno/session/agent.py +244 -0
  218. agno/session/summary.py +225 -0
  219. agno/session/team.py +262 -0
  220. agno/{storage/session/v2 → session}/workflow.py +47 -24
  221. agno/team/__init__.py +15 -16
  222. agno/team/team.py +2961 -4253
  223. agno/tools/agentql.py +14 -5
  224. agno/tools/airflow.py +9 -4
  225. agno/tools/api.py +7 -3
  226. agno/tools/apify.py +2 -46
  227. agno/tools/arxiv.py +8 -3
  228. agno/tools/aws_lambda.py +7 -5
  229. agno/tools/aws_ses.py +7 -1
  230. agno/tools/baidusearch.py +4 -1
  231. agno/tools/bitbucket.py +4 -4
  232. agno/tools/brandfetch.py +14 -11
  233. agno/tools/bravesearch.py +4 -1
  234. agno/tools/brightdata.py +42 -22
  235. agno/tools/browserbase.py +13 -4
  236. agno/tools/calcom.py +12 -10
  237. agno/tools/calculator.py +10 -27
  238. agno/tools/cartesia.py +18 -13
  239. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  240. agno/tools/confluence.py +8 -8
  241. agno/tools/crawl4ai.py +7 -1
  242. agno/tools/csv_toolkit.py +9 -8
  243. agno/tools/dalle.py +18 -11
  244. agno/tools/daytona.py +13 -16
  245. agno/tools/decorator.py +6 -3
  246. agno/tools/desi_vocal.py +16 -7
  247. agno/tools/discord.py +11 -8
  248. agno/tools/docker.py +30 -42
  249. agno/tools/duckdb.py +34 -53
  250. agno/tools/duckduckgo.py +8 -7
  251. agno/tools/e2b.py +61 -61
  252. agno/tools/eleven_labs.py +35 -28
  253. agno/tools/email.py +4 -1
  254. agno/tools/evm.py +7 -1
  255. agno/tools/exa.py +19 -14
  256. agno/tools/fal.py +29 -29
  257. agno/tools/file.py +9 -8
  258. agno/tools/financial_datasets.py +25 -44
  259. agno/tools/firecrawl.py +22 -22
  260. agno/tools/function.py +68 -17
  261. agno/tools/giphy.py +22 -10
  262. agno/tools/github.py +48 -126
  263. agno/tools/gmail.py +45 -61
  264. agno/tools/google_bigquery.py +7 -6
  265. agno/tools/google_maps.py +11 -26
  266. agno/tools/googlesearch.py +7 -2
  267. agno/tools/googlesheets.py +21 -17
  268. agno/tools/hackernews.py +9 -5
  269. agno/tools/jina.py +5 -4
  270. agno/tools/jira.py +18 -9
  271. agno/tools/knowledge.py +31 -32
  272. agno/tools/linear.py +18 -33
  273. agno/tools/linkup.py +5 -1
  274. agno/tools/local_file_system.py +8 -5
  275. agno/tools/lumalab.py +31 -19
  276. agno/tools/mem0.py +18 -12
  277. agno/tools/memori.py +14 -10
  278. agno/tools/mlx_transcribe.py +3 -2
  279. agno/tools/models/azure_openai.py +32 -14
  280. agno/tools/models/gemini.py +58 -31
  281. agno/tools/models/groq.py +29 -20
  282. agno/tools/models/nebius.py +27 -11
  283. agno/tools/models_labs.py +39 -15
  284. agno/tools/moviepy_video.py +7 -6
  285. agno/tools/neo4j.py +10 -8
  286. agno/tools/newspaper.py +7 -2
  287. agno/tools/newspaper4k.py +8 -3
  288. agno/tools/openai.py +57 -26
  289. agno/tools/openbb.py +12 -11
  290. agno/tools/opencv.py +62 -46
  291. agno/tools/openweather.py +14 -12
  292. agno/tools/pandas.py +11 -3
  293. agno/tools/postgres.py +4 -12
  294. agno/tools/pubmed.py +4 -1
  295. agno/tools/python.py +9 -22
  296. agno/tools/reasoning.py +35 -27
  297. agno/tools/reddit.py +11 -26
  298. agno/tools/replicate.py +54 -41
  299. agno/tools/resend.py +4 -1
  300. agno/tools/scrapegraph.py +15 -14
  301. agno/tools/searxng.py +10 -23
  302. agno/tools/serpapi.py +6 -3
  303. agno/tools/serper.py +13 -4
  304. agno/tools/shell.py +9 -2
  305. agno/tools/slack.py +12 -11
  306. agno/tools/sleep.py +3 -2
  307. agno/tools/spider.py +24 -4
  308. agno/tools/sql.py +7 -6
  309. agno/tools/tavily.py +6 -4
  310. agno/tools/telegram.py +12 -4
  311. agno/tools/todoist.py +11 -31
  312. agno/tools/toolkit.py +1 -1
  313. agno/tools/trafilatura.py +22 -6
  314. agno/tools/trello.py +9 -22
  315. agno/tools/twilio.py +10 -3
  316. agno/tools/user_control_flow.py +6 -1
  317. agno/tools/valyu.py +34 -5
  318. agno/tools/visualization.py +19 -28
  319. agno/tools/webbrowser.py +4 -3
  320. agno/tools/webex.py +11 -7
  321. agno/tools/website.py +15 -46
  322. agno/tools/webtools.py +12 -4
  323. agno/tools/whatsapp.py +5 -9
  324. agno/tools/wikipedia.py +20 -13
  325. agno/tools/x.py +14 -13
  326. agno/tools/yfinance.py +13 -40
  327. agno/tools/youtube.py +26 -20
  328. agno/tools/zendesk.py +7 -2
  329. agno/tools/zep.py +10 -7
  330. agno/tools/zoom.py +10 -9
  331. agno/utils/common.py +1 -19
  332. agno/utils/events.py +95 -118
  333. agno/utils/knowledge.py +29 -0
  334. agno/utils/log.py +2 -2
  335. agno/utils/mcp.py +11 -5
  336. agno/utils/media.py +39 -0
  337. agno/utils/message.py +12 -1
  338. agno/utils/models/claude.py +6 -4
  339. agno/utils/models/mistral.py +8 -7
  340. agno/utils/models/schema_utils.py +3 -3
  341. agno/utils/pprint.py +33 -32
  342. agno/utils/print_response/agent.py +779 -0
  343. agno/utils/print_response/team.py +1565 -0
  344. agno/utils/print_response/workflow.py +1451 -0
  345. agno/utils/prompts.py +14 -14
  346. agno/utils/reasoning.py +87 -0
  347. agno/utils/response.py +42 -42
  348. agno/utils/string.py +8 -22
  349. agno/utils/team.py +50 -0
  350. agno/utils/timer.py +2 -2
  351. agno/vectordb/base.py +33 -21
  352. agno/vectordb/cassandra/cassandra.py +287 -23
  353. agno/vectordb/chroma/chromadb.py +482 -59
  354. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  355. agno/vectordb/couchbase/couchbase.py +309 -29
  356. agno/vectordb/lancedb/lance_db.py +360 -21
  357. agno/vectordb/langchaindb/__init__.py +5 -0
  358. agno/vectordb/langchaindb/langchaindb.py +145 -0
  359. agno/vectordb/lightrag/__init__.py +5 -0
  360. agno/vectordb/lightrag/lightrag.py +374 -0
  361. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  362. agno/vectordb/milvus/milvus.py +242 -32
  363. agno/vectordb/mongodb/mongodb.py +200 -24
  364. agno/vectordb/pgvector/pgvector.py +319 -37
  365. agno/vectordb/pineconedb/pineconedb.py +221 -27
  366. agno/vectordb/qdrant/qdrant.py +334 -14
  367. agno/vectordb/singlestore/singlestore.py +286 -29
  368. agno/vectordb/surrealdb/surrealdb.py +187 -7
  369. agno/vectordb/upstashdb/upstashdb.py +342 -26
  370. agno/vectordb/weaviate/weaviate.py +227 -165
  371. agno/workflow/__init__.py +17 -13
  372. agno/workflow/{v2/condition.py → condition.py} +135 -32
  373. agno/workflow/{v2/loop.py → loop.py} +115 -28
  374. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  375. agno/workflow/{v2/router.py → router.py} +133 -32
  376. agno/workflow/{v2/step.py → step.py} +200 -42
  377. agno/workflow/{v2/steps.py → steps.py} +147 -66
  378. agno/workflow/types.py +482 -0
  379. agno/workflow/workflow.py +2394 -696
  380. agno-2.0.0a1.dist-info/METADATA +355 -0
  381. agno-2.0.0a1.dist-info/RECORD +514 -0
  382. agno/agent/metrics.py +0 -107
  383. agno/api/app.py +0 -35
  384. agno/api/playground.py +0 -92
  385. agno/api/schemas/app.py +0 -12
  386. agno/api/schemas/playground.py +0 -22
  387. agno/api/schemas/user.py +0 -35
  388. agno/api/schemas/workspace.py +0 -46
  389. agno/api/user.py +0 -160
  390. agno/api/workflows.py +0 -33
  391. agno/api/workspace.py +0 -175
  392. agno/app/agui/__init__.py +0 -3
  393. agno/app/agui/app.py +0 -17
  394. agno/app/agui/sync_router.py +0 -120
  395. agno/app/base.py +0 -186
  396. agno/app/discord/__init__.py +0 -3
  397. agno/app/fastapi/__init__.py +0 -3
  398. agno/app/fastapi/app.py +0 -107
  399. agno/app/fastapi/async_router.py +0 -457
  400. agno/app/fastapi/sync_router.py +0 -448
  401. agno/app/playground/app.py +0 -228
  402. agno/app/playground/async_router.py +0 -1050
  403. agno/app/playground/deploy.py +0 -249
  404. agno/app/playground/operator.py +0 -183
  405. agno/app/playground/schemas.py +0 -220
  406. agno/app/playground/serve.py +0 -55
  407. agno/app/playground/sync_router.py +0 -1042
  408. agno/app/playground/utils.py +0 -46
  409. agno/app/settings.py +0 -15
  410. agno/app/slack/__init__.py +0 -3
  411. agno/app/slack/app.py +0 -19
  412. agno/app/slack/sync_router.py +0 -92
  413. agno/app/utils.py +0 -54
  414. agno/app/whatsapp/__init__.py +0 -3
  415. agno/app/whatsapp/app.py +0 -15
  416. agno/app/whatsapp/sync_router.py +0 -197
  417. agno/cli/auth_server.py +0 -249
  418. agno/cli/config.py +0 -274
  419. agno/cli/console.py +0 -88
  420. agno/cli/credentials.py +0 -23
  421. agno/cli/entrypoint.py +0 -571
  422. agno/cli/operator.py +0 -357
  423. agno/cli/settings.py +0 -96
  424. agno/cli/ws/ws_cli.py +0 -817
  425. agno/constants.py +0 -13
  426. agno/document/__init__.py +0 -5
  427. agno/document/chunking/semantic.py +0 -45
  428. agno/document/chunking/strategy.py +0 -31
  429. agno/document/reader/__init__.py +0 -5
  430. agno/document/reader/base.py +0 -47
  431. agno/document/reader/docx_reader.py +0 -60
  432. agno/document/reader/gcs/pdf_reader.py +0 -44
  433. agno/document/reader/s3/pdf_reader.py +0 -59
  434. agno/document/reader/s3/text_reader.py +0 -63
  435. agno/document/reader/url_reader.py +0 -59
  436. agno/document/reader/youtube_reader.py +0 -58
  437. agno/embedder/__init__.py +0 -5
  438. agno/embedder/langdb.py +0 -80
  439. agno/embedder/mistral.py +0 -82
  440. agno/embedder/openai.py +0 -78
  441. agno/file/__init__.py +0 -5
  442. agno/file/file.py +0 -16
  443. agno/file/local/csv.py +0 -32
  444. agno/file/local/txt.py +0 -19
  445. agno/infra/app.py +0 -240
  446. agno/infra/base.py +0 -144
  447. agno/infra/context.py +0 -20
  448. agno/infra/db_app.py +0 -52
  449. agno/infra/resource.py +0 -205
  450. agno/infra/resources.py +0 -55
  451. agno/knowledge/agent.py +0 -702
  452. agno/knowledge/arxiv.py +0 -33
  453. agno/knowledge/combined.py +0 -36
  454. agno/knowledge/csv.py +0 -144
  455. agno/knowledge/csv_url.py +0 -124
  456. agno/knowledge/document.py +0 -223
  457. agno/knowledge/docx.py +0 -137
  458. agno/knowledge/firecrawl.py +0 -34
  459. agno/knowledge/gcs/__init__.py +0 -0
  460. agno/knowledge/gcs/base.py +0 -39
  461. agno/knowledge/gcs/pdf.py +0 -125
  462. agno/knowledge/json.py +0 -137
  463. agno/knowledge/langchain.py +0 -71
  464. agno/knowledge/light_rag.py +0 -273
  465. agno/knowledge/llamaindex.py +0 -66
  466. agno/knowledge/markdown.py +0 -154
  467. agno/knowledge/pdf.py +0 -164
  468. agno/knowledge/pdf_bytes.py +0 -42
  469. agno/knowledge/pdf_url.py +0 -148
  470. agno/knowledge/s3/__init__.py +0 -0
  471. agno/knowledge/s3/base.py +0 -64
  472. agno/knowledge/s3/pdf.py +0 -33
  473. agno/knowledge/s3/text.py +0 -34
  474. agno/knowledge/text.py +0 -141
  475. agno/knowledge/url.py +0 -46
  476. agno/knowledge/website.py +0 -179
  477. agno/knowledge/wikipedia.py +0 -32
  478. agno/knowledge/youtube.py +0 -35
  479. agno/memory/agent.py +0 -423
  480. agno/memory/classifier.py +0 -104
  481. agno/memory/db/__init__.py +0 -5
  482. agno/memory/db/base.py +0 -42
  483. agno/memory/db/mongodb.py +0 -189
  484. agno/memory/db/postgres.py +0 -203
  485. agno/memory/db/sqlite.py +0 -193
  486. agno/memory/memory.py +0 -22
  487. agno/memory/row.py +0 -36
  488. agno/memory/summarizer.py +0 -201
  489. agno/memory/summary.py +0 -19
  490. agno/memory/team.py +0 -415
  491. agno/memory/v2/__init__.py +0 -2
  492. agno/memory/v2/db/__init__.py +0 -1
  493. agno/memory/v2/db/base.py +0 -42
  494. agno/memory/v2/db/firestore.py +0 -339
  495. agno/memory/v2/db/mongodb.py +0 -196
  496. agno/memory/v2/db/postgres.py +0 -214
  497. agno/memory/v2/db/redis.py +0 -187
  498. agno/memory/v2/db/schema.py +0 -54
  499. agno/memory/v2/db/sqlite.py +0 -209
  500. agno/memory/v2/manager.py +0 -437
  501. agno/memory/v2/memory.py +0 -1097
  502. agno/memory/v2/schema.py +0 -55
  503. agno/memory/v2/summarizer.py +0 -215
  504. agno/memory/workflow.py +0 -38
  505. agno/models/ollama/tools.py +0 -430
  506. agno/models/qwen/__init__.py +0 -5
  507. agno/playground/__init__.py +0 -10
  508. agno/playground/deploy.py +0 -3
  509. agno/playground/playground.py +0 -3
  510. agno/playground/serve.py +0 -3
  511. agno/playground/settings.py +0 -3
  512. agno/reranker/__init__.py +0 -0
  513. agno/run/v2/__init__.py +0 -0
  514. agno/run/v2/workflow.py +0 -567
  515. agno/storage/__init__.py +0 -0
  516. agno/storage/agent/__init__.py +0 -0
  517. agno/storage/agent/dynamodb.py +0 -1
  518. agno/storage/agent/json.py +0 -1
  519. agno/storage/agent/mongodb.py +0 -1
  520. agno/storage/agent/postgres.py +0 -1
  521. agno/storage/agent/singlestore.py +0 -1
  522. agno/storage/agent/sqlite.py +0 -1
  523. agno/storage/agent/yaml.py +0 -1
  524. agno/storage/base.py +0 -60
  525. agno/storage/dynamodb.py +0 -673
  526. agno/storage/firestore.py +0 -297
  527. agno/storage/gcs_json.py +0 -261
  528. agno/storage/in_memory.py +0 -234
  529. agno/storage/json.py +0 -237
  530. agno/storage/mongodb.py +0 -328
  531. agno/storage/mysql.py +0 -685
  532. agno/storage/postgres.py +0 -682
  533. agno/storage/redis.py +0 -336
  534. agno/storage/session/__init__.py +0 -16
  535. agno/storage/session/agent.py +0 -64
  536. agno/storage/session/team.py +0 -63
  537. agno/storage/session/v2/__init__.py +0 -5
  538. agno/storage/session/workflow.py +0 -61
  539. agno/storage/singlestore.py +0 -606
  540. agno/storage/sqlite.py +0 -646
  541. agno/storage/workflow/__init__.py +0 -0
  542. agno/storage/workflow/mongodb.py +0 -1
  543. agno/storage/workflow/postgres.py +0 -1
  544. agno/storage/workflow/sqlite.py +0 -1
  545. agno/storage/yaml.py +0 -241
  546. agno/tools/thinking.py +0 -73
  547. agno/utils/defaults.py +0 -57
  548. agno/utils/filesystem.py +0 -39
  549. agno/utils/git.py +0 -52
  550. agno/utils/json_io.py +0 -30
  551. agno/utils/load_env.py +0 -19
  552. agno/utils/py_io.py +0 -19
  553. agno/utils/pyproject.py +0 -18
  554. agno/utils/resource_filter.py +0 -31
  555. agno/workflow/v2/__init__.py +0 -21
  556. agno/workflow/v2/types.py +0 -357
  557. agno/workflow/v2/workflow.py +0 -3312
  558. agno/workspace/__init__.py +0 -0
  559. agno/workspace/config.py +0 -325
  560. agno/workspace/enums.py +0 -6
  561. agno/workspace/helpers.py +0 -52
  562. agno/workspace/operator.py +0 -757
  563. agno/workspace/settings.py +0 -158
  564. agno-1.8.1.dist-info/METADATA +0 -982
  565. agno-1.8.1.dist-info/RECORD +0 -566
  566. agno-1.8.1.dist-info/entry_points.txt +0 -3
  567. /agno/{app → db/migrations}/__init__.py +0 -0
  568. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  569. /agno/{cli → integrations}/__init__.py +0 -0
  570. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  571. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  572. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  573. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  574. /agno/{app → os/interfaces}/slack/security.py +0 -0
  575. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  576. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  577. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  578. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
  579. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
  580. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,389 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Set
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.reader.url_reader import URLReader
15
+ from agno.knowledge.types import ContentType
16
+ from agno.utils.log import log_debug, logger
17
+
18
+ try:
19
+ from bs4 import BeautifulSoup, Tag # noqa: F401
20
+ except ImportError:
21
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
22
+
23
+ try:
24
+ from ddgs import DDGS
25
+ except ImportError:
26
+ raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
27
+
28
+
29
+ @dataclass
30
+ class WebSearchReader(Reader):
31
+ """Reader that uses web search to find content for a given query"""
32
+
33
+ search_timeout: int = 10
34
+
35
+ request_timeout: int = 30
36
+ delay_between_requests: float = 2.0 # Increased default delay
37
+ max_retries: int = 3
38
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
39
+
40
+ # Search engine configuration
41
+ search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
42
+ search_delay: float = 3.0 # Delay between search requests
43
+ max_search_retries: int = 2 # Retries for search operations
44
+
45
+ # Rate limiting
46
+ rate_limit_delay: float = 5.0 # Delay when rate limited
47
+ exponential_backoff: bool = True
48
+
49
+ # Internal state
50
+ _visited_urls: Set[str] = field(default_factory=set)
51
+ _url_reader: Optional[URLReader] = None
52
+ _last_search_time: float = field(default=0.0, init=False)
53
+
54
+ # Override default chunking strategy
55
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
56
+
57
+ def __post_init__(self):
58
+ """Initialize the URL reader and chunking strategy after dataclass initialization"""
59
+ self._url_reader = URLReader()
60
+
61
+ @classmethod
62
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
63
+ """Get the list of supported chunking strategies for Web Search readers."""
64
+ return [
65
+ ChunkingStrategyType.AGENTIC_CHUNKING,
66
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
67
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
68
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
69
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
70
+ ]
71
+
72
+ @classmethod
73
+ def get_supported_content_types(self) -> List[ContentType]:
74
+ return [ContentType.URL, ContentType.TEXT]
75
+
76
+ def _respect_rate_limits(self):
77
+ """Ensure we don't exceed rate limits"""
78
+ current_time = time.time()
79
+ time_since_last_search = current_time - self._last_search_time
80
+
81
+ if time_since_last_search < self.search_delay:
82
+ sleep_time = self.search_delay - time_since_last_search
83
+ log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
84
+ time.sleep(sleep_time)
85
+
86
+ self._last_search_time = time.time()
87
+
88
+ def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
89
+ """Perform web search using DuckDuckGo with rate limiting"""
90
+ log_debug(f"Performing DuckDuckGo search for: {query}")
91
+
92
+ for attempt in range(self.max_search_retries):
93
+ try:
94
+ self._respect_rate_limits()
95
+
96
+ ddgs = DDGS(timeout=self.search_timeout)
97
+ search_results = ddgs.text(query=query, max_results=self.max_results)
98
+
99
+ # Convert to list and extract relevant fields
100
+ results = []
101
+ for result in search_results:
102
+ results.append(
103
+ {
104
+ "title": result.get("title", ""),
105
+ "url": result.get("link", ""),
106
+ "description": result.get("body", ""),
107
+ }
108
+ )
109
+
110
+ log_debug(f"Found {len(results)} search results")
111
+ return results
112
+
113
+ except Exception as e:
114
+ logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
115
+ if "rate limit" in str(e).lower() or "429" in str(e):
116
+ # Rate limited - wait longer
117
+ wait_time = (
118
+ self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
119
+ )
120
+ logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
121
+ time.sleep(wait_time)
122
+ elif attempt < self.max_search_retries - 1:
123
+ # Other error - shorter wait
124
+ time.sleep(self.search_delay)
125
+ else:
126
+ logger.error(f"All DuckDuckGo search attempts failed: {e}")
127
+ return []
128
+ return []
129
+
130
+ def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
131
+ """Perform web search using Google (requires googlesearch-python)"""
132
+ log_debug(f"Performing Google search for: {query}")
133
+
134
+ try:
135
+ from googlesearch import search
136
+ except ImportError:
137
+ logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
138
+ return []
139
+
140
+ for attempt in range(self.max_search_retries):
141
+ try:
142
+ self._respect_rate_limits()
143
+
144
+ results = []
145
+ search_results = search(query, num_results=self.max_results, stop=self.max_results)
146
+
147
+ for result in search_results:
148
+ results.append(
149
+ {
150
+ "title": getattr(result, "title", ""),
151
+ "url": getattr(result, "url", ""),
152
+ "description": getattr(result, "description", ""),
153
+ }
154
+ )
155
+
156
+ log_debug(f"Found {len(results)} Google search results")
157
+ return results
158
+
159
+ except Exception as e:
160
+ logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
161
+ if attempt < self.max_search_retries - 1:
162
+ time.sleep(self.search_delay)
163
+ else:
164
+ logger.error(f"All Google search attempts failed: {e}")
165
+ return []
166
+
167
+ return []
168
+
169
+ def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
170
+ """Perform web search using the configured search engine"""
171
+ if self.search_engine == "duckduckgo":
172
+ return self._perform_duckduckgo_search(query)
173
+ elif self.search_engine == "google":
174
+ return self._perform_google_search(query)
175
+ else:
176
+ logger.error(f"Unsupported search engine: {self.search_engine}")
177
+ return []
178
+
179
+ def _is_valid_url(self, url: str) -> bool:
180
+ """Check if URL is valid and not already visited"""
181
+ try:
182
+ parsed = urlparse(url)
183
+ return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
184
+ except Exception:
185
+ return False
186
+
187
+ def _extract_text_from_html(self, html_content: str, url: str) -> str:
188
+ """Extract clean text content from HTML"""
189
+ try:
190
+ soup = BeautifulSoup(html_content, "html.parser")
191
+
192
+ # Remove script and style elements
193
+ for script in soup(["script", "style"]):
194
+ script.decompose()
195
+
196
+ # Get text content
197
+ text = soup.get_text()
198
+
199
+ # Clean up whitespace
200
+ lines = (line.strip() for line in text.splitlines())
201
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
202
+ text = " ".join(chunk for chunk in chunks if chunk)
203
+
204
+ return text
205
+
206
+ except Exception as e:
207
+ logger.warning(f"Error extracting text from {url}: {e}")
208
+ return html_content
209
+
210
+ def _fetch_url_content(self, url: str) -> Optional[str]:
211
+ """Fetch content from a URL with retry logic"""
212
+ headers = {"User-Agent": self.user_agent}
213
+
214
+ for attempt in range(self.max_retries):
215
+ try:
216
+ response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
217
+ response.raise_for_status()
218
+
219
+ # Check if it's HTML content
220
+ content_type = response.headers.get("content-type", "").lower()
221
+ if "text/html" in content_type:
222
+ return self._extract_text_from_html(response.text, url)
223
+ else:
224
+ # For non-HTML content, return as-is
225
+ return response.text
226
+
227
+ except Exception as e:
228
+ logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
229
+ if attempt < self.max_retries - 1:
230
+ time.sleep(random.uniform(1, 3)) # Random delay between retries
231
+ continue
232
+
233
+ logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
234
+ return None
235
+
236
+ def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
237
+ """Create a Document object from URL content and search result metadata"""
238
+ # Use the URL as the document ID
239
+ doc_id = url
240
+
241
+ # Use the search result title as the document name, fallback to URL
242
+ doc_name = search_result.get("title", urlparse(url).netloc)
243
+
244
+ # Create metadata with search information
245
+ meta_data = {
246
+ "url": url,
247
+ "search_title": search_result.get("title", ""),
248
+ "search_description": search_result.get("description", ""),
249
+ "source": "web_search",
250
+ "search_engine": self.search_engine,
251
+ }
252
+
253
+ return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
254
+
255
+ def read(self, query: str) -> List[Document]:
256
+ """Read content for a given query by performing web search and fetching content"""
257
+ if not query:
258
+ raise ValueError("Query cannot be empty")
259
+
260
+ log_debug(f"Starting web search reader for query: {query}")
261
+
262
+ # Perform web search
263
+ search_results = self._perform_web_search(query)
264
+ if not search_results:
265
+ logger.warning(f"No search results found for query: {query}")
266
+ return []
267
+
268
+ documents: List[Document] = []
269
+
270
+ for result in search_results:
271
+ url = result.get("url", "")
272
+
273
+ # Skip if URL is invalid or already visited
274
+ if not self._is_valid_url(url):
275
+ continue
276
+
277
+ # Mark URL as visited
278
+ self._visited_urls.add(url)
279
+
280
+ # Add delay between requests to be respectful
281
+ if len(documents) > 0:
282
+ time.sleep(self.delay_between_requests)
283
+
284
+ # Fetch content from URL
285
+ content = self._fetch_url_content(url)
286
+ if content is None:
287
+ continue
288
+
289
+ # Create document
290
+ document = self._create_document_from_url(url, content, result)
291
+
292
+ # Apply chunking if enabled
293
+ if self.chunk:
294
+ chunked_docs = self.chunk_document(document)
295
+ documents.extend(chunked_docs)
296
+ else:
297
+ documents.append(document)
298
+
299
+ # Stop if we've reached max_results
300
+ if len(documents) >= self.max_results:
301
+ break
302
+
303
+ log_debug(f"Created {len(documents)} documents from web search")
304
+ return documents
305
+
306
+ async def async_read(self, query: str) -> List[Document]:
307
+ """Asynchronously read content for a given query"""
308
+ if not query:
309
+ raise ValueError("Query cannot be empty")
310
+
311
+ log_debug(f"Starting async web search reader for query: {query}")
312
+
313
+ # Perform web search (synchronous operation)
314
+ search_results = self._perform_web_search(query)
315
+ if not search_results:
316
+ logger.warning(f"No search results found for query: {query}")
317
+ return []
318
+
319
+ # Create tasks for fetching content from each URL
320
+ async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
321
+ url = result.get("url", "")
322
+
323
+ # Skip if URL is invalid or already visited
324
+ if not self._is_valid_url(url):
325
+ return None
326
+
327
+ # Mark URL as visited
328
+ self._visited_urls.add(url)
329
+
330
+ try:
331
+ # Use the URL reader for async fetching
332
+ if self._url_reader:
333
+ docs = await self._url_reader.async_read(url)
334
+ if docs:
335
+ # Use the first document and add search metadata
336
+ doc = docs[0]
337
+ doc.meta_data.update(
338
+ {
339
+ "search_title": result.get("title", ""),
340
+ "search_description": result.get("description", ""),
341
+ "source": "web_search",
342
+ "search_engine": self.search_engine,
343
+ }
344
+ )
345
+ return doc
346
+
347
+ # Fallback to manual async fetching
348
+ headers = {"User-Agent": self.user_agent}
349
+ async with httpx.AsyncClient(timeout=self.request_timeout) as client:
350
+ response = await client.get(url, headers=headers, follow_redirects=True)
351
+ response.raise_for_status()
352
+
353
+ content_type = response.headers.get("content-type", "").lower()
354
+ if "text/html" in content_type:
355
+ content = self._extract_text_from_html(response.text, url)
356
+ else:
357
+ content = response.text
358
+
359
+ document = self._create_document_from_url(url, content, result)
360
+ return document
361
+
362
+ except Exception as e:
363
+ logger.warning(f"Error fetching {url}: {e}")
364
+ return None
365
+
366
+ # Create tasks for all URLs
367
+ tasks = [fetch_url_async(result) for result in search_results]
368
+
369
+ # Execute all tasks concurrently with delays
370
+ documents = []
371
+ for i, task in enumerate(tasks):
372
+ if i > 0: # Add delay between requests (except for the first one)
373
+ await asyncio.sleep(self.delay_between_requests)
374
+
375
+ doc = await task
376
+ if doc is not None:
377
+ # Apply chunking if enabled
378
+ if self.chunk:
379
+ chunked_docs = await self.chunk_documents_async([doc])
380
+ documents.extend(chunked_docs)
381
+ else:
382
+ documents.append(doc)
383
+
384
+ # Stop if we've reached max_results
385
+ if len(documents) >= self.max_results:
386
+ break
387
+
388
+ log_debug(f"Created {len(documents)} documents from async web search")
389
+ return documents
@@ -7,8 +7,11 @@ from urllib.parse import urljoin, urlparse
7
7
 
8
8
  import httpx
9
9
 
10
- from agno.document.base import Document
11
- from agno.document.reader.base import Reader
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
12
15
  from agno.utils.log import log_debug, logger
13
16
 
14
17
  try:
@@ -28,9 +31,15 @@ class WebsiteReader(Reader):
28
31
  _urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
29
32
 
30
33
  def __init__(
31
- self, max_depth: int = 3, max_links: int = 10, timeout: int = 10, proxy: Optional[str] = None, **kwargs
34
+ self,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ max_depth: int = 3,
37
+ max_links: int = 10,
38
+ timeout: int = 10,
39
+ proxy: Optional[str] = None,
40
+ **kwargs,
32
41
  ):
33
- super().__init__(**kwargs)
42
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
34
43
  self.max_depth = max_depth
35
44
  self.max_links = max_links
36
45
  self.proxy = proxy
@@ -39,6 +48,21 @@ class WebsiteReader(Reader):
39
48
  self._visited = set()
40
49
  self._urls_to_crawl = []
41
50
 
51
+ @classmethod
52
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
53
+ """Get the list of supported chunking strategies for Website readers."""
54
+ return [
55
+ ChunkingStrategyType.AGENTIC_CHUNKING,
56
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
57
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
58
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
59
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
60
+ ]
61
+
62
+ @classmethod
63
+ def get_supported_content_types(self) -> List[ContentType]:
64
+ return [ContentType.URL]
65
+
42
66
  def delay(self, min_seconds=1, max_seconds=3):
43
67
  """
44
68
  Introduce a random delay.
@@ -316,7 +340,7 @@ class WebsiteReader(Reader):
316
340
 
317
341
  return crawler_result
318
342
 
319
- def read(self, url: str) -> List[Document]:
343
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
320
344
  """
321
345
  Reads a website and returns a list of documents.
322
346
 
@@ -338,7 +362,7 @@ class WebsiteReader(Reader):
338
362
  documents.extend(
339
363
  self.chunk_document(
340
364
  Document(
341
- name=url,
365
+ name=name or url,
342
366
  id=str(crawled_url),
343
367
  meta_data={"url": str(crawled_url)},
344
368
  content=crawled_content,
@@ -348,7 +372,7 @@ class WebsiteReader(Reader):
348
372
  else:
349
373
  documents.append(
350
374
  Document(
351
- name=url,
375
+ name=name or url,
352
376
  id=str(crawled_url),
353
377
  meta_data={"url": str(crawled_url)},
354
378
  content=crawled_content,
@@ -359,7 +383,7 @@ class WebsiteReader(Reader):
359
383
  logger.error(f"Error reading website {url}: {e}")
360
384
  raise
361
385
 
362
- async def async_read(self, url: str) -> List[Document]:
386
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
363
387
  """
364
388
  Asynchronously reads a website and returns a list of documents.
365
389
 
@@ -380,13 +404,16 @@ class WebsiteReader(Reader):
380
404
  async def process_document(crawled_url, crawled_content):
381
405
  if self.chunk:
382
406
  doc = Document(
383
- name=url, id=str(crawled_url), meta_data={"url": str(crawled_url)}, content=crawled_content
407
+ name=name or url,
408
+ id=str(crawled_url),
409
+ meta_data={"url": str(crawled_url)},
410
+ content=crawled_content,
384
411
  )
385
412
  return self.chunk_document(doc)
386
413
  else:
387
414
  return [
388
415
  Document(
389
- name=url,
416
+ name=name or url,
390
417
  id=str(crawled_url),
391
418
  meta_data={"url": str(crawled_url)},
392
419
  content=crawled_content,
@@ -0,0 +1,59 @@
1
+ from typing import List, Optional
2
+
3
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
4
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
5
+ from agno.knowledge.document import Document
6
+ from agno.knowledge.reader.base import Reader
7
+ from agno.knowledge.types import ContentType
8
+ from agno.utils.log import log_debug, log_info
9
+
10
+ try:
11
+ import wikipedia # noqa: F401
12
+ except ImportError:
13
+ raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
14
+
15
+
16
+ class WikipediaReader(Reader):
17
+ auto_suggest: bool = True
18
+
19
+ def __init__(
20
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
21
+ ):
22
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
23
+ self.auto_suggest = auto_suggest
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for Wikipedia readers."""
28
+ return [
29
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
30
+ ChunkingStrategyType.AGENTIC_CHUNKING,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
32
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
33
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.TOPIC]
39
+
40
+ def read(self, topic: str) -> List[Document]:
41
+ log_debug(f"Reading Wikipedia topic: {topic}")
42
+ summary = None
43
+ try:
44
+ summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
45
+
46
+ except wikipedia.exceptions.PageError:
47
+ summary = None
48
+ log_info("PageError: Page not found.")
49
+
50
+ # Only create Document if we successfully got a summary
51
+ if summary:
52
+ return [
53
+ Document(
54
+ name=topic,
55
+ meta_data={"topic": topic},
56
+ content=summary,
57
+ )
58
+ ]
59
+ return []
@@ -0,0 +1,78 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+
4
+ from agno.knowledge.chunking.recursive import RecursiveChunking
5
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
6
+ from agno.knowledge.document.base import Document
7
+ from agno.knowledge.reader.base import Reader
8
+ from agno.knowledge.types import ContentType
9
+ from agno.utils.log import log_debug, log_error, log_info
10
+
11
+ try:
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ except ImportError:
14
+ raise ImportError(
15
+ "`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
16
+ )
17
+
18
+
19
+ class YouTubeReader(Reader):
20
+ """Reader for YouTube video transcripts"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for YouTube readers."""
28
+ return [
29
+ ChunkingStrategyType.RECURSIVE_CHUNKING,
30
+ ChunkingStrategyType.AGENTIC_CHUNKING,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKING,
32
+ ChunkingStrategyType.SEMANTIC_CHUNKING,
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKING,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.URL, ContentType.YOUTUBE]
39
+
40
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
41
+ try:
42
+ # Extract video ID from URL
43
+ video_id = url.split("v=")[-1].split("&")[0]
44
+ log_info(f"Reading transcript for video: {video_id}")
45
+
46
+ # Get transcript
47
+ log_debug(f"Fetching transcript for video: {video_id}")
48
+ # Create an instance of YouTubeTranscriptApi
49
+ ytt_api = YouTubeTranscriptApi()
50
+ transcript_data = ytt_api.fetch(video_id)
51
+
52
+ # Combine transcript segments into full text
53
+ transcript_text = ""
54
+ for segment in transcript_data:
55
+ transcript_text += f"{segment.text} "
56
+
57
+ documents = [
58
+ Document(
59
+ name=name or f"youtube_{video_id}",
60
+ id=f"youtube_{video_id}",
61
+ meta_data={"video_url": url, "video_id": video_id},
62
+ content=transcript_text.strip(),
63
+ )
64
+ ]
65
+
66
+ if self.chunk:
67
+ chunked_documents = []
68
+ for document in documents:
69
+ chunked_documents.extend(self.chunk_document(document))
70
+ return chunked_documents
71
+ return documents
72
+
73
+ except Exception as e:
74
+ log_error(f"Error reading transcript for {url}: {e}")
75
+ return []
76
+
77
+ async def async_read(self, url: str) -> List[Document]:
78
+ return await asyncio.get_event_loop().run_in_executor(None, self.read, url)