agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,455 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Optional, Set, Tuple
6
+ from urllib.parse import urljoin, urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
15
+ from agno.utils.log import log_debug, logger
16
+
17
+ try:
18
+ from bs4 import BeautifulSoup, Tag # noqa: F401
19
+ except ImportError:
20
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
21
+
22
+
23
+ @dataclass
24
+ class WebsiteReader(Reader):
25
+ """Reader for Websites"""
26
+
27
+ max_depth: int = 3
28
+ max_links: int = 10
29
+
30
+ _visited: Set[str] = field(default_factory=set)
31
+ _urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
32
+
33
+ def __init__(
34
+ self,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ max_depth: int = 3,
37
+ max_links: int = 10,
38
+ timeout: int = 10,
39
+ proxy: Optional[str] = None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
43
+ self.max_depth = max_depth
44
+ self.max_links = max_links
45
+ self.proxy = proxy
46
+ self.timeout = timeout
47
+
48
+ self._visited = set()
49
+ self._urls_to_crawl = []
50
+
51
+ @classmethod
52
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
53
+ """Get the list of supported chunking strategies for Website readers."""
54
+ return [
55
+ ChunkingStrategyType.AGENTIC_CHUNKER,
56
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
57
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
58
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
59
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
60
+ ]
61
+
62
+ @classmethod
63
+ def get_supported_content_types(self) -> List[ContentType]:
64
+ return [ContentType.URL]
65
+
66
+ def delay(self, min_seconds=1, max_seconds=3):
67
+ """
68
+ Introduce a random delay.
69
+
70
+ :param min_seconds: Minimum number of seconds to delay. Default is 1.
71
+ :param max_seconds: Maximum number of seconds to delay. Default is 3.
72
+ """
73
+ sleep_time = random.uniform(min_seconds, max_seconds)
74
+ time.sleep(sleep_time)
75
+
76
+ async def async_delay(self, min_seconds=1, max_seconds=3):
77
+ """
78
+ Introduce a random delay asynchronously.
79
+
80
+ :param min_seconds: Minimum number of seconds to delay. Default is 1.
81
+ :param max_seconds: Maximum number of seconds to delay. Default is 3.
82
+ """
83
+ sleep_time = random.uniform(min_seconds, max_seconds)
84
+ await asyncio.sleep(sleep_time)
85
+
86
+ def _get_primary_domain(self, url: str) -> str:
87
+ """
88
+ Extract primary domain from the given URL.
89
+
90
+ :param url: The URL to extract the primary domain from.
91
+ :return: The primary domain.
92
+ """
93
+ domain_parts = urlparse(url).netloc.split(".")
94
+ # Return primary domain (excluding subdomains)
95
+ return ".".join(domain_parts[-2:])
96
+
97
+ def _extract_main_content(self, soup: BeautifulSoup) -> str:
98
+ """
99
+ Extracts the main content from a BeautifulSoup object.
100
+
101
+ :param soup: The BeautifulSoup object to extract the main content from.
102
+ :return: The main content.
103
+ """
104
+
105
+ def match(tag: Tag) -> bool:
106
+ """
107
+ Check if the tag matches any of the relevant tags or class names
108
+ """
109
+ if not isinstance(tag, Tag):
110
+ return False
111
+
112
+ if tag.name in ["article", "main", "section"]:
113
+ return True
114
+
115
+ classes_attr = tag.get("class")
116
+ classes: List[str] = classes_attr if isinstance(classes_attr, list) else []
117
+ content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
118
+ if any(cls in content_classes for cls in classes):
119
+ return True
120
+
121
+ # Check for common content IDs
122
+ tag_id = tag.get("id", "")
123
+ if tag_id in ["content", "main", "article"]:
124
+ return True
125
+
126
+ return False
127
+
128
+ # Try to find main content element
129
+ element = soup.find(match)
130
+ if element and hasattr(element, "find_all"):
131
+ # Remove common unwanted elements from the found content
132
+ for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
133
+ unwanted.decompose()
134
+ return element.get_text(strip=True, separator=" ")
135
+
136
+ # Fallback: get full page content
137
+ for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
138
+ unwanted.decompose()
139
+ return soup.get_text(strip=True, separator=" ")
140
+
141
+ def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
142
+ """
143
+ Crawls a website and returns a dictionary of URLs and their corresponding content.
144
+
145
+ Parameters:
146
+ - url (str): The starting URL to begin the crawl.
147
+ - starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
148
+
149
+ Returns:
150
+ - Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
151
+ content extracted from that URL.
152
+
153
+ Raises:
154
+ - httpx.HTTPStatusError: If there's an HTTP status error.
155
+ - httpx.RequestError: If there's a request-related error (connection, timeout, etc).
156
+
157
+ Note:
158
+ The function focuses on extracting the main content by prioritizing content inside common HTML tags
159
+ like `<article>`, `<main>`, and `<div>` with class names such as "content", "main-content", etc.
160
+ The crawler will also respect the `max_depth` attribute of the WebCrawler class, ensuring it does not
161
+ crawl deeper than the specified depth.
162
+ """
163
+ num_links = 0
164
+ crawler_result: Dict[str, str] = {}
165
+ primary_domain = self._get_primary_domain(url)
166
+ # Add starting URL with its depth to the global list
167
+ self._urls_to_crawl.append((url, starting_depth))
168
+ while self._urls_to_crawl:
169
+ # Unpack URL and depth from the global list
170
+ current_url, current_depth = self._urls_to_crawl.pop(0)
171
+
172
+ # Skip if
173
+ # - URL is already visited
174
+ # - does not end with the primary domain,
175
+ # - exceeds max depth
176
+ # - exceeds max links
177
+ if (
178
+ current_url in self._visited
179
+ or not urlparse(current_url).netloc.endswith(primary_domain)
180
+ or (current_depth > self.max_depth and current_url != url)
181
+ or num_links >= self.max_links
182
+ ):
183
+ continue
184
+
185
+ self._visited.add(current_url)
186
+ self.delay()
187
+
188
+ try:
189
+ log_debug(f"Crawling: {current_url}")
190
+
191
+ response = (
192
+ httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
193
+ if self.proxy
194
+ else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
195
+ )
196
+ response.raise_for_status()
197
+
198
+ soup = BeautifulSoup(response.content, "html.parser")
199
+
200
+ # Extract main content
201
+ main_content = self._extract_main_content(soup)
202
+ if main_content:
203
+ crawler_result[current_url] = main_content
204
+ num_links += 1
205
+
206
+ # Add found URLs to the global list, with incremented depth
207
+ for link in soup.find_all("a", href=True):
208
+ if not isinstance(link, Tag):
209
+ continue
210
+
211
+ href_str = str(link["href"])
212
+ full_url = urljoin(current_url, href_str)
213
+
214
+ if not isinstance(full_url, str):
215
+ continue
216
+
217
+ parsed_url = urlparse(full_url)
218
+ if parsed_url.netloc.endswith(primary_domain) and not any(
219
+ parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
220
+ ):
221
+ full_url_str = str(full_url)
222
+ if (
223
+ full_url_str not in self._visited
224
+ and (full_url_str, current_depth + 1) not in self._urls_to_crawl
225
+ ):
226
+ self._urls_to_crawl.append((full_url_str, current_depth + 1))
227
+
228
+ except httpx.HTTPStatusError as e:
229
+ # Log HTTP status errors but continue crawling other pages
230
+ # Skip redirect errors (3xx) as they should be handled by follow_redirects
231
+ if e.response.status_code >= 300 and e.response.status_code < 400:
232
+ logger.debug(f"Redirect encountered for {current_url}, skipping: {e}")
233
+ else:
234
+ logger.warning(f"HTTP status error while crawling {current_url}: {e}")
235
+ # For the initial URL, we should raise the error only if it's not a redirect
236
+ if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
237
+ raise
238
+ except httpx.RequestError as e:
239
+ # Log request errors but continue crawling other pages
240
+ logger.warning(f"Request error while crawling {current_url}: {e}")
241
+ # For the initial URL, we should raise the error
242
+ if current_url == url and not crawler_result:
243
+ raise
244
+ except Exception as e:
245
+ # Log other exceptions but continue crawling other pages
246
+ logger.warning(f"Failed to crawl {current_url}: {e}")
247
+ # For the initial URL, we should raise the error
248
+ if current_url == url and not crawler_result:
249
+ # Wrap non-HTTP exceptions in a RequestError
250
+ raise httpx.RequestError(f"Failed to crawl starting URL {url}: {str(e)}", request=None) from e
251
+
252
+ # If we couldn't crawl any pages, raise an error
253
+ if not crawler_result:
254
+ raise httpx.RequestError(f"Failed to extract any content from {url}", request=None)
255
+
256
+ return crawler_result
257
+
258
+ async def async_crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
259
+ """
260
+ Asynchronously crawls a website and returns a dictionary of URLs and their corresponding content.
261
+
262
+ Parameters:
263
+ - url (str): The starting URL to begin the crawl.
264
+ - starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
265
+
266
+ Returns:
267
+ - Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
268
+ content extracted from that URL.
269
+
270
+ Raises:
271
+ - httpx.HTTPStatusError: If there's an HTTP status error.
272
+ - httpx.RequestError: If there's a request-related error (connection, timeout, etc).
273
+ """
274
+ num_links = 0
275
+ crawler_result: Dict[str, str] = {}
276
+ primary_domain = self._get_primary_domain(url)
277
+
278
+ # Clear previously visited URLs and URLs to crawl
279
+ self._visited = set()
280
+ self._urls_to_crawl = [(url, starting_depth)]
281
+
282
+ client_args = {"proxy": self.proxy} if self.proxy else {}
283
+ async with httpx.AsyncClient(**client_args) as client: # type: ignore
284
+ while self._urls_to_crawl and num_links < self.max_links:
285
+ current_url, current_depth = self._urls_to_crawl.pop(0)
286
+
287
+ if (
288
+ current_url in self._visited
289
+ or not urlparse(current_url).netloc.endswith(primary_domain)
290
+ or current_depth > self.max_depth
291
+ or num_links >= self.max_links
292
+ ):
293
+ continue
294
+
295
+ self._visited.add(current_url)
296
+ await self.async_delay()
297
+
298
+ try:
299
+ log_debug(f"Crawling asynchronously: {current_url}")
300
+ response = await client.get(current_url, timeout=self.timeout, follow_redirects=True)
301
+ response.raise_for_status()
302
+
303
+ soup = BeautifulSoup(response.content, "html.parser")
304
+
305
+ # Extract main content
306
+ main_content = self._extract_main_content(soup)
307
+ if main_content:
308
+ crawler_result[current_url] = main_content
309
+ num_links += 1
310
+
311
+ # Add found URLs to the list, with incremented depth
312
+ for link in soup.find_all("a", href=True):
313
+ if not isinstance(link, Tag):
314
+ continue
315
+
316
+ href_str = str(link["href"])
317
+ full_url = urljoin(current_url, href_str)
318
+
319
+ if not isinstance(full_url, str):
320
+ continue
321
+
322
+ parsed_url = urlparse(full_url)
323
+ if parsed_url.netloc.endswith(primary_domain) and not any(
324
+ parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
325
+ ):
326
+ full_url_str = str(full_url)
327
+ if (
328
+ full_url_str not in self._visited
329
+ and (full_url_str, current_depth + 1) not in self._urls_to_crawl
330
+ ):
331
+ self._urls_to_crawl.append((full_url_str, current_depth + 1))
332
+
333
+ except httpx.HTTPStatusError as e:
334
+ # Log HTTP status errors but continue crawling other pages
335
+ logger.warning(f"HTTP status error while crawling asynchronously {current_url}: {e}")
336
+ # For the initial URL, we should raise the error
337
+ if current_url == url and not crawler_result:
338
+ raise
339
+ except httpx.RequestError as e:
340
+ # Log request errors but continue crawling other pages
341
+ logger.warning(f"Request error while crawling asynchronously {current_url}: {e}")
342
+ # For the initial URL, we should raise the error
343
+ if current_url == url and not crawler_result:
344
+ raise
345
+ except Exception as e:
346
+ # Log other exceptions but continue crawling other pages
347
+ logger.warning(f"Failed to crawl asynchronously {current_url}: {e}")
348
+ # For the initial URL, we should raise the error
349
+ if current_url == url and not crawler_result:
350
+ # Wrap non-HTTP exceptions in a RequestError
351
+ raise httpx.RequestError(
352
+ f"Failed to crawl starting URL {url} asynchronously: {str(e)}", request=None
353
+ ) from e
354
+
355
+ # If we couldn't crawl any pages, raise an error
356
+ if not crawler_result:
357
+ raise httpx.RequestError(f"Failed to extract any content from {url} asynchronously", request=None)
358
+
359
+ return crawler_result
360
+
361
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
362
+ """
363
+ Reads a website and returns a list of documents.
364
+
365
+ This function first converts the website into a dictionary of URLs and their corresponding content.
366
+ Then iterates through the dictionary and returns chunks of content.
367
+
368
+ :param url: The URL of the website to read.
369
+ :return: A list of documents.
370
+ :raises httpx.HTTPStatusError: If there's an HTTP status error.
371
+ :raises httpx.RequestError: If there's a request-related error.
372
+ """
373
+
374
+ log_debug(f"Reading: {url}")
375
+ try:
376
+ crawler_result = self.crawl(url)
377
+ documents = []
378
+ for crawled_url, crawled_content in crawler_result.items():
379
+ if self.chunk:
380
+ documents.extend(
381
+ self.chunk_document(
382
+ Document(
383
+ name=name or url,
384
+ id=str(crawled_url),
385
+ meta_data={"url": str(crawled_url)},
386
+ content=crawled_content,
387
+ )
388
+ )
389
+ )
390
+ else:
391
+ documents.append(
392
+ Document(
393
+ name=name or url,
394
+ id=str(crawled_url),
395
+ meta_data={"url": str(crawled_url)},
396
+ content=crawled_content,
397
+ )
398
+ )
399
+ return documents
400
+ except (httpx.HTTPStatusError, httpx.RequestError) as e:
401
+ logger.error(f"Error reading website {url}: {e}")
402
+ raise
403
+
404
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
405
+ """
406
+ Asynchronously reads a website and returns a list of documents.
407
+
408
+ This function first converts the website into a dictionary of URLs and their corresponding content.
409
+ Then iterates through the dictionary and returns chunks of content.
410
+
411
+ :param url: The URL of the website to read.
412
+ :return: A list of documents.
413
+ :raises httpx.HTTPStatusError: If there's an HTTP status error.
414
+ :raises httpx.RequestError: If there's a request-related error.
415
+ """
416
+ log_debug(f"Reading asynchronously: {url}")
417
+ try:
418
+ crawler_result = await self.async_crawl(url)
419
+ documents = []
420
+
421
+ # Process documents in parallel
422
+ async def process_document(crawled_url, crawled_content):
423
+ if self.chunk:
424
+ doc = Document(
425
+ name=name or url,
426
+ id=str(crawled_url),
427
+ meta_data={"url": str(crawled_url)},
428
+ content=crawled_content,
429
+ )
430
+ return self.chunk_document(doc)
431
+ else:
432
+ return [
433
+ Document(
434
+ name=name or url,
435
+ id=str(crawled_url),
436
+ meta_data={"url": str(crawled_url)},
437
+ content=crawled_content,
438
+ )
439
+ ]
440
+
441
+ # Use asyncio.gather to process all documents in parallel
442
+ tasks = [
443
+ process_document(crawled_url, crawled_content)
444
+ for crawled_url, crawled_content in crawler_result.items()
445
+ ]
446
+ results = await asyncio.gather(*tasks)
447
+
448
+ # Flatten the results
449
+ for doc_list in results:
450
+ documents.extend(doc_list)
451
+
452
+ return documents
453
+ except (httpx.HTTPStatusError, httpx.RequestError) as e:
454
+ logger.error(f"Error reading website asynchronously {url}: {e}")
455
+ raise
@@ -0,0 +1,59 @@
1
+ from typing import List, Optional
2
+
3
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
4
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
5
+ from agno.knowledge.document import Document
6
+ from agno.knowledge.reader.base import Reader
7
+ from agno.knowledge.types import ContentType
8
+ from agno.utils.log import log_debug, log_info
9
+
10
+ try:
11
+ import wikipedia # noqa: F401
12
+ except ImportError:
13
+ raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
14
+
15
+
16
+ class WikipediaReader(Reader):
17
+ auto_suggest: bool = True
18
+
19
+ def __init__(
20
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
21
+ ):
22
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
23
+ self.auto_suggest = auto_suggest
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for Wikipedia readers."""
28
+ return [
29
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
30
+ ChunkingStrategyType.AGENTIC_CHUNKER,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
32
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
33
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.TOPIC]
39
+
40
+ def read(self, topic: str) -> List[Document]:
41
+ log_debug(f"Reading Wikipedia topic: {topic}")
42
+ summary = None
43
+ try:
44
+ summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
45
+
46
+ except wikipedia.exceptions.PageError:
47
+ summary = None
48
+ log_info("PageError: Page not found.")
49
+
50
+ # Only create Document if we successfully got a summary
51
+ if summary:
52
+ return [
53
+ Document(
54
+ name=topic,
55
+ meta_data={"topic": topic},
56
+ content=summary,
57
+ )
58
+ ]
59
+ return []
@@ -0,0 +1,78 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+
4
+ from agno.knowledge.chunking.recursive import RecursiveChunking
5
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
6
+ from agno.knowledge.document.base import Document
7
+ from agno.knowledge.reader.base import Reader
8
+ from agno.knowledge.types import ContentType
9
+ from agno.utils.log import log_debug, log_error, log_info
10
+
11
+ try:
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ except ImportError:
14
+ raise ImportError(
15
+ "`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
16
+ )
17
+
18
+
19
+ class YouTubeReader(Reader):
20
+ """Reader for YouTube video transcripts"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for YouTube readers."""
28
+ return [
29
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
30
+ ChunkingStrategyType.AGENTIC_CHUNKER,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
32
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.YOUTUBE]
39
+
40
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
41
+ try:
42
+ # Extract video ID from URL
43
+ video_id = url.split("v=")[-1].split("&")[0]
44
+ log_info(f"Reading transcript for video: {video_id}")
45
+
46
+ # Get transcript
47
+ log_debug(f"Fetching transcript for video: {video_id}")
48
+ # Create an instance of YouTubeTranscriptApi
49
+ ytt_api = YouTubeTranscriptApi()
50
+ transcript_data = ytt_api.fetch(video_id)
51
+
52
+ # Combine transcript segments into full text
53
+ transcript_text = ""
54
+ for segment in transcript_data:
55
+ transcript_text += f"{segment.text} "
56
+
57
+ documents = [
58
+ Document(
59
+ name=name or f"youtube_{video_id}",
60
+ id=f"youtube_{video_id}",
61
+ meta_data={"video_url": url, "video_id": video_id},
62
+ content=transcript_text.strip(),
63
+ )
64
+ ]
65
+
66
+ if self.chunk:
67
+ chunked_documents = []
68
+ for document in documents:
69
+ chunked_documents.extend(self.chunk_document(document))
70
+ return chunked_documents
71
+ return documents
72
+
73
+ except Exception as e:
74
+ log_error(f"Error reading transcript for {url}: {e}")
75
+ return []
76
+
77
+ async def async_read(self, url: str) -> List[Document]:
78
+ return await asyncio.get_event_loop().run_in_executor(None, self.read, url)
File without changes
@@ -0,0 +1,88 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional, Union
3
+
4
+ from agno.cloud.aws.s3.bucket import S3Bucket
5
+ from agno.cloud.aws.s3.object import S3Object
6
+
7
+
8
+ @dataclass
9
+ class S3Content:
10
+ def __init__(
11
+ self,
12
+ bucket_name: Optional[str] = None,
13
+ bucket: Optional[S3Bucket] = None,
14
+ key: Optional[str] = None,
15
+ object: Optional[S3Object] = None,
16
+ prefix: Optional[str] = None,
17
+ ):
18
+ self.bucket_name = bucket_name
19
+ self.bucket = bucket
20
+ self.key = key
21
+ self.object = object
22
+ self.prefix = prefix
23
+
24
+ if bucket_name is None and bucket is None:
25
+ raise ValueError("Either bucket_name or bucket must be provided")
26
+ if key is None and object is None:
27
+ raise ValueError("Either key or object must be provided")
28
+ if bucket_name is not None and bucket is not None:
29
+ raise ValueError("Either bucket_name or bucket must be provided, not both")
30
+ if key is not None and object is not None:
31
+ raise ValueError("Either key or object must be provided, not both")
32
+
33
+ if self.bucket_name is not None:
34
+ self.bucket = S3Bucket(name=self.bucket_name)
35
+
36
+ def get_config(self):
37
+ return {
38
+ "bucket_name": self.bucket_name,
39
+ "bucket": self.bucket,
40
+ "key": self.key,
41
+ "object": self.object,
42
+ "prefix": self.prefix,
43
+ }
44
+
45
+
46
+ @dataclass
47
+ class GCSContent:
48
+ def __init__(
49
+ self,
50
+ bucket=None, # Type hint removed to avoid import issues
51
+ bucket_name: Optional[str] = None,
52
+ blob_name: Optional[str] = None,
53
+ prefix: Optional[str] = None,
54
+ ):
55
+ # Import Google Cloud Storage only when actually needed
56
+ try:
57
+ from google.cloud import storage # type: ignore
58
+ except ImportError:
59
+ raise ImportError(
60
+ "The `google-cloud-storage` package is not installed. Please install it via `pip install google-cloud-storage`."
61
+ )
62
+
63
+ self.bucket = bucket
64
+ self.bucket_name = bucket_name
65
+ self.blob_name = blob_name
66
+ self.prefix = prefix
67
+
68
+ if self.bucket is None and self.bucket_name is None:
69
+ raise ValueError("No bucket or bucket_name provided")
70
+ if self.bucket is not None and self.bucket_name is not None:
71
+ raise ValueError("Provide either bucket or bucket_name")
72
+ if self.blob_name is None and self.prefix is None:
73
+ raise ValueError("Either blob_name or prefix must be provided")
74
+
75
+ if self.bucket is None:
76
+ client = storage.Client()
77
+ self.bucket = client.bucket(self.bucket_name)
78
+
79
+ def get_config(self):
80
+ return {
81
+ "bucket": self.bucket,
82
+ "bucket_name": self.bucket_name,
83
+ "blob_name": self.blob_name,
84
+ "prefix": self.prefix,
85
+ }
86
+
87
+
88
+ RemoteContent = Union[S3Content, GCSContent]
@@ -0,0 +1,3 @@
1
+ from agno.knowledge.reranker.base import Reranker
2
+
3
+ __all__ = ["Reranker"]