agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Optional, Set, Tuple
6
+ from urllib.parse import urljoin, urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
15
+ from agno.utils.log import log_debug, log_error, log_warning
16
+
17
+ try:
18
+ from bs4 import BeautifulSoup, Tag # noqa: F401
19
+ except ImportError:
20
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
21
+
22
+
23
+ @dataclass
24
+ class WebsiteReader(Reader):
25
+ """Reader for Websites"""
26
+
27
+ max_depth: int = 3
28
+ max_links: int = 10
29
+
30
+ _visited: Set[str] = field(default_factory=set)
31
+ _urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
32
+
33
+ def __init__(
34
+ self,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ max_depth: int = 3,
37
+ max_links: int = 10,
38
+ timeout: int = 10,
39
+ proxy: Optional[str] = None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
43
+ self.max_depth = max_depth
44
+ self.max_links = max_links
45
+ self.proxy = proxy
46
+ self.timeout = timeout
47
+
48
+ self._visited = set()
49
+ self._urls_to_crawl = []
50
+
51
+ @classmethod
52
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
53
+ """Get the list of supported chunking strategies for Website readers."""
54
+ return [
55
+ ChunkingStrategyType.AGENTIC_CHUNKER,
56
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
57
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
58
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
59
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
60
+ ]
61
+
62
+ @classmethod
63
+ def get_supported_content_types(self) -> List[ContentType]:
64
+ return [ContentType.URL]
65
+
66
+ def delay(self, min_seconds=1, max_seconds=3):
67
+ """
68
+ Introduce a random delay.
69
+
70
+ :param min_seconds: Minimum number of seconds to delay. Default is 1.
71
+ :param max_seconds: Maximum number of seconds to delay. Default is 3.
72
+ """
73
+ sleep_time = random.uniform(min_seconds, max_seconds)
74
+ time.sleep(sleep_time)
75
+
76
+ async def async_delay(self, min_seconds=1, max_seconds=3):
77
+ """
78
+ Introduce a random delay asynchronously.
79
+
80
+ :param min_seconds: Minimum number of seconds to delay. Default is 1.
81
+ :param max_seconds: Maximum number of seconds to delay. Default is 3.
82
+ """
83
+ sleep_time = random.uniform(min_seconds, max_seconds)
84
+ await asyncio.sleep(sleep_time)
85
+
86
+ def _get_primary_domain(self, url: str) -> str:
87
+ """
88
+ Extract primary domain from the given URL.
89
+
90
+ :param url: The URL to extract the primary domain from.
91
+ :return: The primary domain.
92
+ """
93
+ domain_parts = urlparse(url).netloc.split(".")
94
+ # Return primary domain (excluding subdomains)
95
+ return ".".join(domain_parts[-2:])
96
+
97
+ def _extract_main_content(self, soup: BeautifulSoup) -> str:
98
+ """
99
+ Extracts the main content from a BeautifulSoup object.
100
+
101
+ :param soup: The BeautifulSoup object to extract the main content from.
102
+ :return: The main content.
103
+ """
104
+
105
+ def match(tag: Tag) -> bool:
106
+ """
107
+ Check if the tag matches any of the relevant tags or class names
108
+ """
109
+ if not isinstance(tag, Tag):
110
+ return False
111
+
112
+ if tag.name in ["article", "main", "section"]:
113
+ return True
114
+
115
+ classes_attr = tag.get("class")
116
+ classes: List[str] = classes_attr if isinstance(classes_attr, list) else []
117
+ content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
118
+ if any(cls in content_classes for cls in classes):
119
+ return True
120
+
121
+ # Check for common content IDs
122
+ tag_id = tag.get("id", "")
123
+ if tag_id in ["content", "main", "article"]:
124
+ return True
125
+
126
+ return False
127
+
128
+ # Try to find main content element
129
+ element = soup.find(match)
130
+ if element and hasattr(element, "find_all"):
131
+ # Remove common unwanted elements from the found content
132
+ for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
133
+ unwanted.decompose()
134
+ return element.get_text(strip=True, separator=" ")
135
+
136
+ # Fallback: get full page content
137
+ for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
138
+ unwanted.decompose()
139
+ return soup.get_text(strip=True, separator=" ")
140
+
141
+ def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
142
+ """
143
+ Crawls a website and returns a dictionary of URLs and their corresponding content.
144
+
145
+ Parameters:
146
+ - url (str): The starting URL to begin the crawl.
147
+ - starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
148
+
149
+ Returns:
150
+ - Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
151
+ content extracted from that URL.
152
+
153
+ Raises:
154
+ - httpx.HTTPStatusError: If there's an HTTP status error.
155
+ - httpx.RequestError: If there's a request-related error (connection, timeout, etc).
156
+
157
+ Note:
158
+ The function focuses on extracting the main content by prioritizing content inside common HTML tags
159
+ like `<article>`, `<main>`, and `<div>` with class names such as "content", "main-content", etc.
160
+ The crawler will also respect the `max_depth` attribute of the WebCrawler class, ensuring it does not
161
+ crawl deeper than the specified depth.
162
+ """
163
+ num_links = 0
164
+ crawler_result: Dict[str, str] = {}
165
+ primary_domain = self._get_primary_domain(url)
166
+ # Add starting URL with its depth to the global list
167
+ self._urls_to_crawl.append((url, starting_depth))
168
+ while self._urls_to_crawl:
169
+ # Unpack URL and depth from the global list
170
+ current_url, current_depth = self._urls_to_crawl.pop(0)
171
+
172
+ # Skip if
173
+ # - URL is already visited
174
+ # - does not end with the primary domain,
175
+ # - exceeds max depth
176
+ # - exceeds max links
177
+ if (
178
+ current_url in self._visited
179
+ or not urlparse(current_url).netloc.endswith(primary_domain)
180
+ or (current_depth > self.max_depth and current_url != url)
181
+ or num_links >= self.max_links
182
+ ):
183
+ continue
184
+
185
+ self._visited.add(current_url)
186
+ self.delay()
187
+
188
+ try:
189
+ log_debug(f"Crawling: {current_url}")
190
+
191
+ response = (
192
+ httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
193
+ if self.proxy
194
+ else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
195
+ )
196
+ response.raise_for_status()
197
+
198
+ soup = BeautifulSoup(response.content, "html.parser")
199
+
200
+ # Extract main content
201
+ main_content = self._extract_main_content(soup)
202
+ if main_content:
203
+ crawler_result[current_url] = main_content
204
+ num_links += 1
205
+
206
+ # Add found URLs to the global list, with incremented depth
207
+ for link in soup.find_all("a", href=True):
208
+ if not isinstance(link, Tag):
209
+ continue
210
+
211
+ href_str = str(link["href"])
212
+ full_url = urljoin(current_url, href_str)
213
+
214
+ if not isinstance(full_url, str):
215
+ continue
216
+
217
+ parsed_url = urlparse(full_url)
218
+ if parsed_url.netloc.endswith(primary_domain) and not any(
219
+ parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
220
+ ):
221
+ full_url_str = str(full_url)
222
+ if (
223
+ full_url_str not in self._visited
224
+ and (full_url_str, current_depth + 1) not in self._urls_to_crawl
225
+ ):
226
+ self._urls_to_crawl.append((full_url_str, current_depth + 1))
227
+
228
+ except httpx.HTTPStatusError as e:
229
+ # Log HTTP status errors but continue crawling other pages
230
+ # Skip redirect errors (3xx) as they should be handled by follow_redirects
231
+ if e.response.status_code >= 300 and e.response.status_code < 400:
232
+ log_debug(f"Redirect encountered for {current_url}, skipping: {e}")
233
+ else:
234
+ log_warning(f"HTTP status error while crawling {current_url}: {e}")
235
+ # For the initial URL, we should raise the error only if it's not a redirect
236
+ if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
237
+ raise
238
+ except httpx.RequestError as e:
239
+ # Log request errors but continue crawling other pages
240
+ log_warning(f"Request error while crawling {current_url}: {e}")
241
+ # For the initial URL, we should raise the error
242
+ if current_url == url and not crawler_result:
243
+ raise
244
+ except Exception as e:
245
+ # Log other exceptions but continue crawling other pages
246
+ log_warning(f"Failed to crawl {current_url}: {e}")
247
+ # For the initial URL, we should raise the error
248
+ if current_url == url and not crawler_result:
249
+ # Wrap non-HTTP exceptions in a RequestError
250
+ raise httpx.RequestError(f"Failed to crawl starting URL {url}: {str(e)}", request=None) from e
251
+
252
+ # If we couldn't crawl any pages, raise an error
253
+ if not crawler_result:
254
+ raise httpx.RequestError(f"Failed to extract any content from {url}", request=None)
255
+
256
+ return crawler_result
257
+
258
+ async def async_crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
259
+ """
260
+ Asynchronously crawls a website and returns a dictionary of URLs and their corresponding content.
261
+
262
+ Parameters:
263
+ - url (str): The starting URL to begin the crawl.
264
+ - starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
265
+
266
+ Returns:
267
+ - Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
268
+ content extracted from that URL.
269
+
270
+ Raises:
271
+ - httpx.HTTPStatusError: If there's an HTTP status error.
272
+ - httpx.RequestError: If there's a request-related error (connection, timeout, etc).
273
+ """
274
+ num_links = 0
275
+ crawler_result: Dict[str, str] = {}
276
+ primary_domain = self._get_primary_domain(url)
277
+
278
+ # Clear previously visited URLs and URLs to crawl
279
+ self._visited = set()
280
+ self._urls_to_crawl = [(url, starting_depth)]
281
+
282
+ client_args = {"proxy": self.proxy} if self.proxy else {}
283
+ async with httpx.AsyncClient(**client_args) as client: # type: ignore
284
+ while self._urls_to_crawl and num_links < self.max_links:
285
+ current_url, current_depth = self._urls_to_crawl.pop(0)
286
+
287
+ if (
288
+ current_url in self._visited
289
+ or not urlparse(current_url).netloc.endswith(primary_domain)
290
+ or current_depth > self.max_depth
291
+ or num_links >= self.max_links
292
+ ):
293
+ continue
294
+
295
+ self._visited.add(current_url)
296
+ await self.async_delay()
297
+
298
+ try:
299
+ log_debug(f"Crawling asynchronously: {current_url}")
300
+ response = await client.get(current_url, timeout=self.timeout, follow_redirects=True)
301
+ response.raise_for_status()
302
+
303
+ soup = BeautifulSoup(response.content, "html.parser")
304
+
305
+ # Extract main content
306
+ main_content = self._extract_main_content(soup)
307
+ if main_content:
308
+ crawler_result[current_url] = main_content
309
+ num_links += 1
310
+
311
+ # Add found URLs to the list, with incremented depth
312
+ for link in soup.find_all("a", href=True):
313
+ if not isinstance(link, Tag):
314
+ continue
315
+
316
+ href_str = str(link["href"])
317
+ full_url = urljoin(current_url, href_str)
318
+
319
+ if not isinstance(full_url, str):
320
+ continue
321
+
322
+ parsed_url = urlparse(full_url)
323
+ if parsed_url.netloc.endswith(primary_domain) and not any(
324
+ parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
325
+ ):
326
+ full_url_str = str(full_url)
327
+ if (
328
+ full_url_str not in self._visited
329
+ and (full_url_str, current_depth + 1) not in self._urls_to_crawl
330
+ ):
331
+ self._urls_to_crawl.append((full_url_str, current_depth + 1))
332
+
333
+ except httpx.HTTPStatusError as e:
334
+ # Log HTTP status errors but continue crawling other pages
335
+ log_warning(f"HTTP status error while crawling asynchronously {current_url}: {e}")
336
+ # For the initial URL, we should raise the error
337
+ if current_url == url and not crawler_result:
338
+ raise
339
+ except httpx.RequestError as e:
340
+ # Log request errors but continue crawling other pages
341
+ log_warning(f"Request error while crawling asynchronously {current_url}: {e}")
342
+ # For the initial URL, we should raise the error
343
+ if current_url == url and not crawler_result:
344
+ raise
345
+ except Exception as e:
346
+ # Log other exceptions but continue crawling other pages
347
+ log_warning(f"Failed to crawl asynchronously {current_url}: {e}")
348
+ # For the initial URL, we should raise the error
349
+ if current_url == url and not crawler_result:
350
+ # Wrap non-HTTP exceptions in a RequestError
351
+ raise httpx.RequestError(
352
+ f"Failed to crawl starting URL {url} asynchronously: {str(e)}", request=None
353
+ ) from e
354
+
355
+ # If we couldn't crawl any pages, raise an error
356
+ if not crawler_result:
357
+ raise httpx.RequestError(f"Failed to extract any content from {url} asynchronously", request=None)
358
+
359
+ return crawler_result
360
+
361
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
362
+ """
363
+ Reads a website and returns a list of documents.
364
+
365
+ This function first converts the website into a dictionary of URLs and their corresponding content.
366
+ Then iterates through the dictionary and returns chunks of content.
367
+
368
+ :param url: The URL of the website to read.
369
+ :return: A list of documents.
370
+ :raises httpx.HTTPStatusError: If there's an HTTP status error.
371
+ :raises httpx.RequestError: If there's a request-related error.
372
+ """
373
+
374
+ log_debug(f"Reading: {url}")
375
+ try:
376
+ crawler_result = self.crawl(url)
377
+ documents = []
378
+ for crawled_url, crawled_content in crawler_result.items():
379
+ if self.chunk:
380
+ documents.extend(
381
+ self.chunk_document(
382
+ Document(
383
+ name=name or url,
384
+ id=str(crawled_url),
385
+ meta_data={"url": str(crawled_url)},
386
+ content=crawled_content,
387
+ )
388
+ )
389
+ )
390
+ else:
391
+ documents.append(
392
+ Document(
393
+ name=name or url,
394
+ id=str(crawled_url),
395
+ meta_data={"url": str(crawled_url)},
396
+ content=crawled_content,
397
+ )
398
+ )
399
+ return documents
400
+ except (httpx.HTTPStatusError, httpx.RequestError) as e:
401
+ log_error(f"Error reading website {url}: {e}")
402
+ raise
403
+
404
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
405
+ """
406
+ Asynchronously reads a website and returns a list of documents.
407
+
408
+ This function first converts the website into a dictionary of URLs and their corresponding content.
409
+ Then iterates through the dictionary and returns chunks of content.
410
+
411
+ :param url: The URL of the website to read.
412
+ :return: A list of documents.
413
+ :raises httpx.HTTPStatusError: If there's an HTTP status error.
414
+ :raises httpx.RequestError: If there's a request-related error.
415
+ """
416
+ log_debug(f"Reading asynchronously: {url}")
417
+ try:
418
+ crawler_result = await self.async_crawl(url)
419
+ documents = []
420
+
421
+ # Process documents in parallel
422
+ async def process_document(crawled_url, crawled_content):
423
+ if self.chunk:
424
+ doc = Document(
425
+ name=name or url,
426
+ id=str(crawled_url),
427
+ meta_data={"url": str(crawled_url)},
428
+ content=crawled_content,
429
+ )
430
+ return self.chunk_document(doc)
431
+ else:
432
+ return [
433
+ Document(
434
+ name=name or url,
435
+ id=str(crawled_url),
436
+ meta_data={"url": str(crawled_url)},
437
+ content=crawled_content,
438
+ )
439
+ ]
440
+
441
+ # Use asyncio.gather to process all documents in parallel
442
+ tasks = [
443
+ process_document(crawled_url, crawled_content)
444
+ for crawled_url, crawled_content in crawler_result.items()
445
+ ]
446
+ results = await asyncio.gather(*tasks)
447
+
448
+ # Flatten the results
449
+ for doc_list in results:
450
+ documents.extend(doc_list)
451
+
452
+ return documents
453
+ except (httpx.HTTPStatusError, httpx.RequestError) as e:
454
+ log_error(f"Error reading website asynchronously {url}: {e}")
455
+ raise
@@ -0,0 +1,91 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+
4
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
5
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
6
+ from agno.knowledge.document import Document
7
+ from agno.knowledge.reader.base import Reader
8
+ from agno.knowledge.types import ContentType
9
+ from agno.utils.log import log_debug, log_info
10
+
11
+ try:
12
+ import wikipedia # noqa: F401
13
+ except ImportError:
14
+ raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
15
+
16
+
17
+ class WikipediaReader(Reader):
18
+ auto_suggest: bool = True
19
+
20
+ def __init__(
21
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
22
+ ):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+ self.auto_suggest = auto_suggest
25
+
26
+ @classmethod
27
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
28
+ """Get the list of supported chunking strategies for Wikipedia readers."""
29
+ return [
30
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
31
+ ChunkingStrategyType.AGENTIC_CHUNKER,
32
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
33
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
34
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
35
+ ]
36
+
37
+ @classmethod
38
+ def get_supported_content_types(self) -> List[ContentType]:
39
+ return [ContentType.TOPIC]
40
+
41
+ def read(self, topic: str) -> List[Document]:
42
+ log_debug(f"Reading Wikipedia topic: {topic}")
43
+ summary = None
44
+ try:
45
+ summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
46
+
47
+ except wikipedia.exceptions.PageError:
48
+ summary = None
49
+ log_info("Wikipedia Error: Page not found.")
50
+
51
+ # Only create Document if we successfully got a summary
52
+ if summary:
53
+ return [
54
+ Document(
55
+ name=topic,
56
+ meta_data={"topic": topic},
57
+ content=summary,
58
+ )
59
+ ]
60
+ return []
61
+
62
+ async def async_read(self, topic: str) -> List[Document]:
63
+ """
64
+ Asynchronously read content from Wikipedia.
65
+
66
+ Args:
67
+ topic: The Wikipedia topic to read
68
+
69
+ Returns:
70
+ A list of documents containing the Wikipedia summary
71
+ """
72
+ log_debug(f"Async reading Wikipedia topic: {topic}")
73
+ summary = None
74
+ try:
75
+ # Run the synchronous wikipedia API call in a thread pool
76
+ summary = await asyncio.to_thread(wikipedia.summary, topic, auto_suggest=self.auto_suggest)
77
+
78
+ except wikipedia.exceptions.PageError:
79
+ summary = None
80
+ log_info("Wikipedia Error: Page not found.")
81
+
82
+ # Only create Document if we successfully got a summary
83
+ if summary:
84
+ return [
85
+ Document(
86
+ name=topic,
87
+ meta_data={"topic": topic},
88
+ content=summary,
89
+ )
90
+ ]
91
+ return []
@@ -0,0 +1,78 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+
4
+ from agno.knowledge.chunking.recursive import RecursiveChunking
5
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
6
+ from agno.knowledge.document.base import Document
7
+ from agno.knowledge.reader.base import Reader
8
+ from agno.knowledge.types import ContentType
9
+ from agno.utils.log import log_debug, log_error, log_info
10
+
11
+ try:
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ except ImportError:
14
+ raise ImportError(
15
+ "`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
16
+ )
17
+
18
+
19
+ class YouTubeReader(Reader):
20
+ """Reader for YouTube video transcripts"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for YouTube readers."""
28
+ return [
29
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
30
+ ChunkingStrategyType.AGENTIC_CHUNKER,
31
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
32
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.YOUTUBE]
39
+
40
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
41
+ try:
42
+ # Extract video ID from URL
43
+ video_id = url.split("v=")[-1].split("&")[0]
44
+ log_info(f"Reading transcript for video: {video_id}")
45
+
46
+ # Get transcript
47
+ log_debug(f"Fetching transcript for video: {video_id}")
48
+ # Create an instance of YouTubeTranscriptApi
49
+ ytt_api = YouTubeTranscriptApi()
50
+ transcript_data = ytt_api.fetch(video_id)
51
+
52
+ # Combine transcript segments into full text
53
+ transcript_text = ""
54
+ for segment in transcript_data:
55
+ transcript_text += f"{segment.text} "
56
+
57
+ documents = [
58
+ Document(
59
+ name=name or f"youtube_{video_id}",
60
+ id=f"youtube_{video_id}",
61
+ meta_data={"video_url": url, "video_id": video_id},
62
+ content=transcript_text.strip(),
63
+ )
64
+ ]
65
+
66
+ if self.chunk:
67
+ chunked_documents = []
68
+ for document in documents:
69
+ chunked_documents.extend(self.chunk_document(document))
70
+ return chunked_documents
71
+ return documents
72
+
73
+ except Exception as e:
74
+ log_error(f"Error reading transcript for {url}: {e}")
75
+ return []
76
+
77
+ async def async_read(self, url: str) -> List[Document]:
78
+ return await asyncio.get_event_loop().run_in_executor(None, self.read, url)