agno 1.8.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. agno/agent/__init__.py +19 -27
  2. agno/agent/agent.py +3143 -4170
  3. agno/api/agent.py +11 -67
  4. agno/api/api.py +5 -46
  5. agno/api/evals.py +8 -19
  6. agno/api/os.py +17 -0
  7. agno/api/routes.py +6 -41
  8. agno/api/schemas/__init__.py +9 -0
  9. agno/api/schemas/agent.py +5 -21
  10. agno/api/schemas/evals.py +7 -16
  11. agno/api/schemas/os.py +14 -0
  12. agno/api/schemas/team.py +5 -21
  13. agno/api/schemas/utils.py +21 -0
  14. agno/api/schemas/workflows.py +11 -7
  15. agno/api/settings.py +53 -0
  16. agno/api/team.py +11 -66
  17. agno/api/workflow.py +28 -0
  18. agno/cloud/aws/base.py +214 -0
  19. agno/cloud/aws/s3/__init__.py +2 -0
  20. agno/cloud/aws/s3/api_client.py +43 -0
  21. agno/cloud/aws/s3/bucket.py +195 -0
  22. agno/cloud/aws/s3/object.py +57 -0
  23. agno/db/__init__.py +24 -0
  24. agno/db/base.py +245 -0
  25. agno/db/dynamo/__init__.py +3 -0
  26. agno/db/dynamo/dynamo.py +1743 -0
  27. agno/db/dynamo/schemas.py +278 -0
  28. agno/db/dynamo/utils.py +684 -0
  29. agno/db/firestore/__init__.py +3 -0
  30. agno/db/firestore/firestore.py +1432 -0
  31. agno/db/firestore/schemas.py +130 -0
  32. agno/db/firestore/utils.py +278 -0
  33. agno/db/gcs_json/__init__.py +3 -0
  34. agno/db/gcs_json/gcs_json_db.py +1001 -0
  35. agno/db/gcs_json/utils.py +194 -0
  36. agno/db/in_memory/__init__.py +3 -0
  37. agno/db/in_memory/in_memory_db.py +882 -0
  38. agno/db/in_memory/utils.py +172 -0
  39. agno/db/json/__init__.py +3 -0
  40. agno/db/json/json_db.py +1045 -0
  41. agno/db/json/utils.py +196 -0
  42. agno/db/migrations/v1_to_v2.py +162 -0
  43. agno/db/mongo/__init__.py +3 -0
  44. agno/db/mongo/mongo.py +1416 -0
  45. agno/db/mongo/schemas.py +77 -0
  46. agno/db/mongo/utils.py +204 -0
  47. agno/db/mysql/__init__.py +3 -0
  48. agno/db/mysql/mysql.py +1719 -0
  49. agno/db/mysql/schemas.py +124 -0
  50. agno/db/mysql/utils.py +297 -0
  51. agno/db/postgres/__init__.py +3 -0
  52. agno/db/postgres/postgres.py +1710 -0
  53. agno/db/postgres/schemas.py +124 -0
  54. agno/db/postgres/utils.py +280 -0
  55. agno/db/redis/__init__.py +3 -0
  56. agno/db/redis/redis.py +1367 -0
  57. agno/db/redis/schemas.py +109 -0
  58. agno/db/redis/utils.py +288 -0
  59. agno/db/schemas/__init__.py +3 -0
  60. agno/db/schemas/evals.py +33 -0
  61. agno/db/schemas/knowledge.py +40 -0
  62. agno/db/schemas/memory.py +46 -0
  63. agno/db/singlestore/__init__.py +3 -0
  64. agno/db/singlestore/schemas.py +116 -0
  65. agno/db/singlestore/singlestore.py +1712 -0
  66. agno/db/singlestore/utils.py +326 -0
  67. agno/db/sqlite/__init__.py +3 -0
  68. agno/db/sqlite/schemas.py +119 -0
  69. agno/db/sqlite/sqlite.py +1676 -0
  70. agno/db/sqlite/utils.py +268 -0
  71. agno/db/utils.py +88 -0
  72. agno/eval/__init__.py +14 -0
  73. agno/eval/accuracy.py +154 -48
  74. agno/eval/performance.py +88 -23
  75. agno/eval/reliability.py +73 -20
  76. agno/eval/utils.py +23 -13
  77. agno/integrations/discord/__init__.py +3 -0
  78. agno/{app → integrations}/discord/client.py +10 -10
  79. agno/knowledge/__init__.py +2 -2
  80. agno/{document → knowledge}/chunking/agentic.py +2 -2
  81. agno/{document → knowledge}/chunking/document.py +2 -2
  82. agno/{document → knowledge}/chunking/fixed.py +3 -3
  83. agno/{document → knowledge}/chunking/markdown.py +2 -2
  84. agno/{document → knowledge}/chunking/recursive.py +2 -2
  85. agno/{document → knowledge}/chunking/row.py +2 -2
  86. agno/knowledge/chunking/semantic.py +59 -0
  87. agno/knowledge/chunking/strategy.py +121 -0
  88. agno/knowledge/content.py +74 -0
  89. agno/knowledge/document/__init__.py +5 -0
  90. agno/{document → knowledge/document}/base.py +12 -2
  91. agno/knowledge/embedder/__init__.py +5 -0
  92. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  93. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  94. agno/{embedder → knowledge/embedder}/base.py +6 -0
  95. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  96. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  97. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  98. agno/{embedder → knowledge/embedder}/google.py +74 -1
  99. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  100. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  101. agno/knowledge/embedder/langdb.py +22 -0
  102. agno/knowledge/embedder/mistral.py +139 -0
  103. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  104. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  105. agno/knowledge/embedder/openai.py +223 -0
  106. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  107. agno/{embedder → knowledge/embedder}/together.py +1 -1
  108. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  109. agno/knowledge/knowledge.py +1551 -0
  110. agno/knowledge/reader/__init__.py +7 -0
  111. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  112. agno/knowledge/reader/base.py +88 -0
  113. agno/{document → knowledge}/reader/csv_reader.py +47 -65
  114. agno/knowledge/reader/docx_reader.py +83 -0
  115. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  116. agno/{document → knowledge}/reader/json_reader.py +30 -9
  117. agno/{document → knowledge}/reader/markdown_reader.py +58 -9
  118. agno/{document → knowledge}/reader/pdf_reader.py +71 -126
  119. agno/knowledge/reader/reader_factory.py +268 -0
  120. agno/knowledge/reader/s3_reader.py +101 -0
  121. agno/{document → knowledge}/reader/text_reader.py +31 -10
  122. agno/knowledge/reader/url_reader.py +128 -0
  123. agno/knowledge/reader/web_search_reader.py +366 -0
  124. agno/{document → knowledge}/reader/website_reader.py +37 -10
  125. agno/knowledge/reader/wikipedia_reader.py +59 -0
  126. agno/knowledge/reader/youtube_reader.py +78 -0
  127. agno/knowledge/remote_content/remote_content.py +88 -0
  128. agno/{reranker → knowledge/reranker}/base.py +1 -1
  129. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  130. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  131. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  132. agno/knowledge/types.py +30 -0
  133. agno/knowledge/utils.py +169 -0
  134. agno/media.py +269 -268
  135. agno/memory/__init__.py +2 -10
  136. agno/memory/manager.py +1003 -148
  137. agno/models/aimlapi/__init__.py +2 -2
  138. agno/models/aimlapi/aimlapi.py +6 -6
  139. agno/models/anthropic/claude.py +128 -72
  140. agno/models/aws/bedrock.py +107 -175
  141. agno/models/aws/claude.py +64 -18
  142. agno/models/azure/ai_foundry.py +73 -23
  143. agno/models/base.py +346 -290
  144. agno/models/cerebras/cerebras.py +84 -27
  145. agno/models/cohere/chat.py +106 -98
  146. agno/models/google/gemini.py +105 -46
  147. agno/models/groq/groq.py +97 -35
  148. agno/models/huggingface/huggingface.py +92 -27
  149. agno/models/ibm/watsonx.py +72 -13
  150. agno/models/litellm/chat.py +85 -13
  151. agno/models/message.py +46 -151
  152. agno/models/meta/llama.py +85 -49
  153. agno/models/metrics.py +120 -0
  154. agno/models/mistral/mistral.py +90 -21
  155. agno/models/ollama/__init__.py +0 -2
  156. agno/models/ollama/chat.py +85 -47
  157. agno/models/openai/chat.py +154 -37
  158. agno/models/openai/responses.py +178 -105
  159. agno/models/perplexity/perplexity.py +26 -2
  160. agno/models/portkey/portkey.py +0 -7
  161. agno/models/response.py +15 -9
  162. agno/models/utils.py +20 -0
  163. agno/models/vercel/__init__.py +2 -2
  164. agno/models/vercel/v0.py +1 -1
  165. agno/models/vllm/__init__.py +2 -2
  166. agno/models/vllm/vllm.py +3 -3
  167. agno/models/xai/xai.py +10 -10
  168. agno/os/__init__.py +3 -0
  169. agno/os/app.py +497 -0
  170. agno/os/auth.py +47 -0
  171. agno/os/config.py +103 -0
  172. agno/os/interfaces/agui/__init__.py +3 -0
  173. agno/os/interfaces/agui/agui.py +31 -0
  174. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  175. agno/{app → os/interfaces}/agui/utils.py +65 -28
  176. agno/os/interfaces/base.py +21 -0
  177. agno/os/interfaces/slack/__init__.py +3 -0
  178. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  179. agno/os/interfaces/slack/slack.py +32 -0
  180. agno/os/interfaces/whatsapp/__init__.py +3 -0
  181. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  182. agno/os/interfaces/whatsapp/whatsapp.py +29 -0
  183. agno/os/mcp.py +235 -0
  184. agno/os/router.py +1400 -0
  185. agno/os/routers/__init__.py +3 -0
  186. agno/os/routers/evals/__init__.py +3 -0
  187. agno/os/routers/evals/evals.py +393 -0
  188. agno/os/routers/evals/schemas.py +142 -0
  189. agno/os/routers/evals/utils.py +161 -0
  190. agno/os/routers/knowledge/__init__.py +3 -0
  191. agno/os/routers/knowledge/knowledge.py +850 -0
  192. agno/os/routers/knowledge/schemas.py +118 -0
  193. agno/os/routers/memory/__init__.py +3 -0
  194. agno/os/routers/memory/memory.py +410 -0
  195. agno/os/routers/memory/schemas.py +58 -0
  196. agno/os/routers/metrics/__init__.py +3 -0
  197. agno/os/routers/metrics/metrics.py +178 -0
  198. agno/os/routers/metrics/schemas.py +47 -0
  199. agno/os/routers/session/__init__.py +3 -0
  200. agno/os/routers/session/session.py +536 -0
  201. agno/os/schema.py +945 -0
  202. agno/{app/playground → os}/settings.py +7 -15
  203. agno/os/utils.py +270 -0
  204. agno/reasoning/azure_ai_foundry.py +4 -4
  205. agno/reasoning/deepseek.py +4 -4
  206. agno/reasoning/default.py +6 -11
  207. agno/reasoning/groq.py +4 -4
  208. agno/reasoning/helpers.py +4 -6
  209. agno/reasoning/ollama.py +4 -4
  210. agno/reasoning/openai.py +4 -4
  211. agno/run/agent.py +633 -0
  212. agno/run/base.py +53 -77
  213. agno/run/cancel.py +81 -0
  214. agno/run/team.py +243 -96
  215. agno/run/workflow.py +550 -12
  216. agno/session/__init__.py +10 -0
  217. agno/session/agent.py +244 -0
  218. agno/session/summary.py +225 -0
  219. agno/session/team.py +262 -0
  220. agno/{storage/session/v2 → session}/workflow.py +47 -24
  221. agno/team/__init__.py +15 -16
  222. agno/team/team.py +3260 -4824
  223. agno/tools/agentql.py +14 -5
  224. agno/tools/airflow.py +9 -4
  225. agno/tools/api.py +7 -3
  226. agno/tools/apify.py +2 -46
  227. agno/tools/arxiv.py +8 -3
  228. agno/tools/aws_lambda.py +7 -5
  229. agno/tools/aws_ses.py +7 -1
  230. agno/tools/baidusearch.py +4 -1
  231. agno/tools/bitbucket.py +4 -4
  232. agno/tools/brandfetch.py +14 -11
  233. agno/tools/bravesearch.py +4 -1
  234. agno/tools/brightdata.py +43 -23
  235. agno/tools/browserbase.py +13 -4
  236. agno/tools/calcom.py +12 -10
  237. agno/tools/calculator.py +10 -27
  238. agno/tools/cartesia.py +20 -17
  239. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  240. agno/tools/confluence.py +8 -8
  241. agno/tools/crawl4ai.py +7 -1
  242. agno/tools/csv_toolkit.py +9 -8
  243. agno/tools/dalle.py +22 -12
  244. agno/tools/daytona.py +13 -16
  245. agno/tools/decorator.py +6 -3
  246. agno/tools/desi_vocal.py +17 -8
  247. agno/tools/discord.py +11 -8
  248. agno/tools/docker.py +30 -42
  249. agno/tools/duckdb.py +34 -53
  250. agno/tools/duckduckgo.py +8 -7
  251. agno/tools/e2b.py +62 -62
  252. agno/tools/eleven_labs.py +36 -29
  253. agno/tools/email.py +4 -1
  254. agno/tools/evm.py +7 -1
  255. agno/tools/exa.py +19 -14
  256. agno/tools/fal.py +30 -30
  257. agno/tools/file.py +9 -8
  258. agno/tools/financial_datasets.py +25 -44
  259. agno/tools/firecrawl.py +17 -18
  260. agno/tools/function.py +127 -18
  261. agno/tools/giphy.py +23 -11
  262. agno/tools/github.py +48 -126
  263. agno/tools/gmail.py +45 -61
  264. agno/tools/google_bigquery.py +7 -6
  265. agno/tools/google_maps.py +11 -26
  266. agno/tools/googlesearch.py +7 -2
  267. agno/tools/googlesheets.py +21 -17
  268. agno/tools/hackernews.py +9 -5
  269. agno/tools/jina.py +5 -4
  270. agno/tools/jira.py +18 -9
  271. agno/tools/knowledge.py +31 -32
  272. agno/tools/linear.py +18 -33
  273. agno/tools/linkup.py +5 -1
  274. agno/tools/local_file_system.py +8 -5
  275. agno/tools/lumalab.py +32 -20
  276. agno/tools/mcp.py +1 -2
  277. agno/tools/mem0.py +18 -12
  278. agno/tools/memori.py +14 -10
  279. agno/tools/mlx_transcribe.py +3 -2
  280. agno/tools/models/azure_openai.py +33 -15
  281. agno/tools/models/gemini.py +59 -32
  282. agno/tools/models/groq.py +30 -23
  283. agno/tools/models/nebius.py +28 -12
  284. agno/tools/models_labs.py +40 -16
  285. agno/tools/moviepy_video.py +7 -6
  286. agno/tools/neo4j.py +10 -8
  287. agno/tools/newspaper.py +7 -2
  288. agno/tools/newspaper4k.py +8 -3
  289. agno/tools/openai.py +58 -32
  290. agno/tools/openbb.py +12 -11
  291. agno/tools/opencv.py +63 -47
  292. agno/tools/openweather.py +14 -12
  293. agno/tools/pandas.py +11 -3
  294. agno/tools/postgres.py +4 -12
  295. agno/tools/pubmed.py +4 -1
  296. agno/tools/python.py +9 -22
  297. agno/tools/reasoning.py +35 -27
  298. agno/tools/reddit.py +11 -26
  299. agno/tools/replicate.py +55 -42
  300. agno/tools/resend.py +4 -1
  301. agno/tools/scrapegraph.py +15 -14
  302. agno/tools/searxng.py +10 -23
  303. agno/tools/serpapi.py +6 -3
  304. agno/tools/serper.py +13 -4
  305. agno/tools/shell.py +9 -2
  306. agno/tools/slack.py +12 -11
  307. agno/tools/sleep.py +3 -2
  308. agno/tools/spider.py +24 -4
  309. agno/tools/sql.py +7 -6
  310. agno/tools/tavily.py +6 -4
  311. agno/tools/telegram.py +12 -4
  312. agno/tools/todoist.py +11 -31
  313. agno/tools/toolkit.py +1 -1
  314. agno/tools/trafilatura.py +22 -6
  315. agno/tools/trello.py +9 -22
  316. agno/tools/twilio.py +10 -3
  317. agno/tools/user_control_flow.py +6 -1
  318. agno/tools/valyu.py +34 -5
  319. agno/tools/visualization.py +19 -28
  320. agno/tools/webbrowser.py +4 -3
  321. agno/tools/webex.py +11 -7
  322. agno/tools/website.py +15 -46
  323. agno/tools/webtools.py +12 -4
  324. agno/tools/whatsapp.py +5 -9
  325. agno/tools/wikipedia.py +20 -13
  326. agno/tools/x.py +14 -13
  327. agno/tools/yfinance.py +13 -40
  328. agno/tools/youtube.py +26 -20
  329. agno/tools/zendesk.py +7 -2
  330. agno/tools/zep.py +10 -7
  331. agno/tools/zoom.py +10 -9
  332. agno/utils/common.py +1 -19
  333. agno/utils/events.py +100 -123
  334. agno/utils/gemini.py +1 -1
  335. agno/utils/knowledge.py +29 -0
  336. agno/utils/log.py +54 -4
  337. agno/utils/mcp.py +68 -10
  338. agno/utils/media.py +39 -0
  339. agno/utils/message.py +12 -1
  340. agno/utils/models/aws_claude.py +1 -1
  341. agno/utils/models/claude.py +6 -12
  342. agno/utils/models/cohere.py +1 -1
  343. agno/utils/models/mistral.py +8 -7
  344. agno/utils/models/schema_utils.py +3 -3
  345. agno/utils/models/watsonx.py +1 -1
  346. agno/utils/openai.py +1 -1
  347. agno/utils/pprint.py +33 -32
  348. agno/utils/print_response/agent.py +779 -0
  349. agno/utils/print_response/team.py +1669 -0
  350. agno/utils/print_response/workflow.py +1451 -0
  351. agno/utils/prompts.py +14 -14
  352. agno/utils/reasoning.py +87 -0
  353. agno/utils/response.py +42 -42
  354. agno/utils/streamlit.py +481 -0
  355. agno/utils/string.py +8 -22
  356. agno/utils/team.py +50 -0
  357. agno/utils/timer.py +2 -2
  358. agno/vectordb/base.py +33 -21
  359. agno/vectordb/cassandra/cassandra.py +287 -23
  360. agno/vectordb/chroma/chromadb.py +482 -59
  361. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  362. agno/vectordb/couchbase/couchbase.py +309 -29
  363. agno/vectordb/lancedb/lance_db.py +360 -21
  364. agno/vectordb/langchaindb/__init__.py +5 -0
  365. agno/vectordb/langchaindb/langchaindb.py +145 -0
  366. agno/vectordb/lightrag/__init__.py +5 -0
  367. agno/vectordb/lightrag/lightrag.py +374 -0
  368. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  369. agno/vectordb/milvus/milvus.py +242 -32
  370. agno/vectordb/mongodb/mongodb.py +200 -24
  371. agno/vectordb/pgvector/pgvector.py +319 -37
  372. agno/vectordb/pineconedb/pineconedb.py +221 -27
  373. agno/vectordb/qdrant/qdrant.py +334 -14
  374. agno/vectordb/singlestore/singlestore.py +286 -29
  375. agno/vectordb/surrealdb/surrealdb.py +187 -7
  376. agno/vectordb/upstashdb/upstashdb.py +342 -26
  377. agno/vectordb/weaviate/weaviate.py +227 -165
  378. agno/workflow/__init__.py +17 -13
  379. agno/workflow/{v2/condition.py → condition.py} +135 -32
  380. agno/workflow/{v2/loop.py → loop.py} +115 -28
  381. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  382. agno/workflow/{v2/router.py → router.py} +133 -32
  383. agno/workflow/{v2/step.py → step.py} +207 -49
  384. agno/workflow/{v2/steps.py → steps.py} +147 -66
  385. agno/workflow/types.py +482 -0
  386. agno/workflow/workflow.py +2410 -696
  387. agno-2.0.0.dist-info/METADATA +494 -0
  388. agno-2.0.0.dist-info/RECORD +515 -0
  389. agno-2.0.0.dist-info/licenses/LICENSE +201 -0
  390. agno/agent/metrics.py +0 -110
  391. agno/api/app.py +0 -35
  392. agno/api/playground.py +0 -92
  393. agno/api/schemas/app.py +0 -12
  394. agno/api/schemas/playground.py +0 -22
  395. agno/api/schemas/user.py +0 -35
  396. agno/api/schemas/workspace.py +0 -46
  397. agno/api/user.py +0 -160
  398. agno/api/workflows.py +0 -33
  399. agno/api/workspace.py +0 -175
  400. agno/app/agui/__init__.py +0 -3
  401. agno/app/agui/app.py +0 -17
  402. agno/app/agui/sync_router.py +0 -120
  403. agno/app/base.py +0 -186
  404. agno/app/discord/__init__.py +0 -3
  405. agno/app/fastapi/__init__.py +0 -3
  406. agno/app/fastapi/app.py +0 -107
  407. agno/app/fastapi/async_router.py +0 -457
  408. agno/app/fastapi/sync_router.py +0 -448
  409. agno/app/playground/app.py +0 -228
  410. agno/app/playground/async_router.py +0 -1053
  411. agno/app/playground/deploy.py +0 -249
  412. agno/app/playground/operator.py +0 -183
  413. agno/app/playground/schemas.py +0 -223
  414. agno/app/playground/serve.py +0 -55
  415. agno/app/playground/sync_router.py +0 -1045
  416. agno/app/playground/utils.py +0 -46
  417. agno/app/settings.py +0 -15
  418. agno/app/slack/__init__.py +0 -3
  419. agno/app/slack/app.py +0 -19
  420. agno/app/slack/sync_router.py +0 -92
  421. agno/app/utils.py +0 -54
  422. agno/app/whatsapp/__init__.py +0 -3
  423. agno/app/whatsapp/app.py +0 -15
  424. agno/app/whatsapp/sync_router.py +0 -197
  425. agno/cli/auth_server.py +0 -249
  426. agno/cli/config.py +0 -274
  427. agno/cli/console.py +0 -88
  428. agno/cli/credentials.py +0 -23
  429. agno/cli/entrypoint.py +0 -571
  430. agno/cli/operator.py +0 -357
  431. agno/cli/settings.py +0 -96
  432. agno/cli/ws/ws_cli.py +0 -817
  433. agno/constants.py +0 -13
  434. agno/document/__init__.py +0 -5
  435. agno/document/chunking/semantic.py +0 -45
  436. agno/document/chunking/strategy.py +0 -31
  437. agno/document/reader/__init__.py +0 -5
  438. agno/document/reader/base.py +0 -47
  439. agno/document/reader/docx_reader.py +0 -60
  440. agno/document/reader/gcs/pdf_reader.py +0 -44
  441. agno/document/reader/s3/pdf_reader.py +0 -59
  442. agno/document/reader/s3/text_reader.py +0 -63
  443. agno/document/reader/url_reader.py +0 -59
  444. agno/document/reader/youtube_reader.py +0 -58
  445. agno/embedder/__init__.py +0 -5
  446. agno/embedder/langdb.py +0 -80
  447. agno/embedder/mistral.py +0 -82
  448. agno/embedder/openai.py +0 -78
  449. agno/file/__init__.py +0 -5
  450. agno/file/file.py +0 -16
  451. agno/file/local/csv.py +0 -32
  452. agno/file/local/txt.py +0 -19
  453. agno/infra/app.py +0 -240
  454. agno/infra/base.py +0 -144
  455. agno/infra/context.py +0 -20
  456. agno/infra/db_app.py +0 -52
  457. agno/infra/resource.py +0 -205
  458. agno/infra/resources.py +0 -55
  459. agno/knowledge/agent.py +0 -702
  460. agno/knowledge/arxiv.py +0 -33
  461. agno/knowledge/combined.py +0 -36
  462. agno/knowledge/csv.py +0 -144
  463. agno/knowledge/csv_url.py +0 -124
  464. agno/knowledge/document.py +0 -223
  465. agno/knowledge/docx.py +0 -137
  466. agno/knowledge/firecrawl.py +0 -34
  467. agno/knowledge/gcs/__init__.py +0 -0
  468. agno/knowledge/gcs/base.py +0 -39
  469. agno/knowledge/gcs/pdf.py +0 -125
  470. agno/knowledge/json.py +0 -137
  471. agno/knowledge/langchain.py +0 -71
  472. agno/knowledge/light_rag.py +0 -273
  473. agno/knowledge/llamaindex.py +0 -66
  474. agno/knowledge/markdown.py +0 -154
  475. agno/knowledge/pdf.py +0 -164
  476. agno/knowledge/pdf_bytes.py +0 -42
  477. agno/knowledge/pdf_url.py +0 -148
  478. agno/knowledge/s3/__init__.py +0 -0
  479. agno/knowledge/s3/base.py +0 -64
  480. agno/knowledge/s3/pdf.py +0 -33
  481. agno/knowledge/s3/text.py +0 -34
  482. agno/knowledge/text.py +0 -141
  483. agno/knowledge/url.py +0 -46
  484. agno/knowledge/website.py +0 -179
  485. agno/knowledge/wikipedia.py +0 -32
  486. agno/knowledge/youtube.py +0 -35
  487. agno/memory/agent.py +0 -423
  488. agno/memory/classifier.py +0 -104
  489. agno/memory/db/__init__.py +0 -5
  490. agno/memory/db/base.py +0 -42
  491. agno/memory/db/mongodb.py +0 -189
  492. agno/memory/db/postgres.py +0 -203
  493. agno/memory/db/sqlite.py +0 -193
  494. agno/memory/memory.py +0 -22
  495. agno/memory/row.py +0 -36
  496. agno/memory/summarizer.py +0 -201
  497. agno/memory/summary.py +0 -19
  498. agno/memory/team.py +0 -415
  499. agno/memory/v2/__init__.py +0 -2
  500. agno/memory/v2/db/__init__.py +0 -1
  501. agno/memory/v2/db/base.py +0 -42
  502. agno/memory/v2/db/firestore.py +0 -339
  503. agno/memory/v2/db/mongodb.py +0 -196
  504. agno/memory/v2/db/postgres.py +0 -214
  505. agno/memory/v2/db/redis.py +0 -187
  506. agno/memory/v2/db/schema.py +0 -54
  507. agno/memory/v2/db/sqlite.py +0 -209
  508. agno/memory/v2/manager.py +0 -437
  509. agno/memory/v2/memory.py +0 -1097
  510. agno/memory/v2/schema.py +0 -55
  511. agno/memory/v2/summarizer.py +0 -215
  512. agno/memory/workflow.py +0 -38
  513. agno/models/ollama/tools.py +0 -430
  514. agno/models/qwen/__init__.py +0 -5
  515. agno/playground/__init__.py +0 -10
  516. agno/playground/deploy.py +0 -3
  517. agno/playground/playground.py +0 -3
  518. agno/playground/serve.py +0 -3
  519. agno/playground/settings.py +0 -3
  520. agno/reranker/__init__.py +0 -0
  521. agno/run/response.py +0 -467
  522. agno/run/v2/__init__.py +0 -0
  523. agno/run/v2/workflow.py +0 -567
  524. agno/storage/__init__.py +0 -0
  525. agno/storage/agent/__init__.py +0 -0
  526. agno/storage/agent/dynamodb.py +0 -1
  527. agno/storage/agent/json.py +0 -1
  528. agno/storage/agent/mongodb.py +0 -1
  529. agno/storage/agent/postgres.py +0 -1
  530. agno/storage/agent/singlestore.py +0 -1
  531. agno/storage/agent/sqlite.py +0 -1
  532. agno/storage/agent/yaml.py +0 -1
  533. agno/storage/base.py +0 -60
  534. agno/storage/dynamodb.py +0 -673
  535. agno/storage/firestore.py +0 -297
  536. agno/storage/gcs_json.py +0 -261
  537. agno/storage/in_memory.py +0 -234
  538. agno/storage/json.py +0 -237
  539. agno/storage/mongodb.py +0 -328
  540. agno/storage/mysql.py +0 -685
  541. agno/storage/postgres.py +0 -682
  542. agno/storage/redis.py +0 -336
  543. agno/storage/session/__init__.py +0 -16
  544. agno/storage/session/agent.py +0 -64
  545. agno/storage/session/team.py +0 -63
  546. agno/storage/session/v2/__init__.py +0 -5
  547. agno/storage/session/workflow.py +0 -61
  548. agno/storage/singlestore.py +0 -606
  549. agno/storage/sqlite.py +0 -646
  550. agno/storage/workflow/__init__.py +0 -0
  551. agno/storage/workflow/mongodb.py +0 -1
  552. agno/storage/workflow/postgres.py +0 -1
  553. agno/storage/workflow/sqlite.py +0 -1
  554. agno/storage/yaml.py +0 -241
  555. agno/tools/thinking.py +0 -73
  556. agno/utils/defaults.py +0 -57
  557. agno/utils/filesystem.py +0 -39
  558. agno/utils/git.py +0 -52
  559. agno/utils/json_io.py +0 -30
  560. agno/utils/load_env.py +0 -19
  561. agno/utils/py_io.py +0 -19
  562. agno/utils/pyproject.py +0 -18
  563. agno/utils/resource_filter.py +0 -31
  564. agno/workflow/v2/__init__.py +0 -21
  565. agno/workflow/v2/types.py +0 -357
  566. agno/workflow/v2/workflow.py +0 -3313
  567. agno/workspace/__init__.py +0 -0
  568. agno/workspace/config.py +0 -325
  569. agno/workspace/enums.py +0 -6
  570. agno/workspace/helpers.py +0 -52
  571. agno/workspace/operator.py +0 -757
  572. agno/workspace/settings.py +0 -158
  573. agno-1.8.2.dist-info/METADATA +0 -982
  574. agno-1.8.2.dist-info/RECORD +0 -566
  575. agno-1.8.2.dist-info/entry_points.txt +0 -3
  576. agno-1.8.2.dist-info/licenses/LICENSE +0 -375
  577. /agno/{app → db/migrations}/__init__.py +0 -0
  578. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  579. /agno/{cli → integrations}/__init__.py +0 -0
  580. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  581. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  582. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  583. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  584. /agno/{app → os/interfaces}/slack/security.py +0 -0
  585. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  586. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  587. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  588. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
  589. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.reader.pdf_reader import PDFReader
11
+ from agno.knowledge.reader.text_reader import TextReader
12
+ from agno.knowledge.types import ContentType
13
+ from agno.utils.log import log_info, logger
14
+
15
+ try:
16
+ from agno.aws.resource.s3.object import S3Object # type: ignore
17
+ except (ModuleNotFoundError, ImportError):
18
+ raise ImportError("`agno-aws` not installed. Please install using `pip install agno-aws`")
19
+
20
+ try:
21
+ import textract # noqa: F401
22
+ except ImportError:
23
+ raise ImportError("`textract` not installed. Please install it via `pip install textract`.")
24
+
25
+ try:
26
+ from pypdf import PdfReader as DocumentReader # noqa: F401
27
+ except ImportError:
28
+ raise ImportError("`pypdf` not installed. Please install it via `pip install pypdf`.")
29
+
30
+
31
+ class S3Reader(Reader):
32
+ """Reader for S3 files"""
33
+
34
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
35
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
36
+
37
+ @classmethod
38
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
39
+ """Get the list of supported chunking strategies for S3 readers."""
40
+ return [
41
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
42
+ ChunkingStrategyType.AGENTIC_CHUNKER,
43
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
44
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
45
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
46
+ ]
47
+
48
+ @classmethod
49
+ def get_supported_content_types(self) -> List[ContentType]:
50
+ return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
51
+
52
+ def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
53
+ try:
54
+ log_info(f"Reading S3 file: {s3_object.uri}")
55
+
56
+ # Read PDF files
57
+ if s3_object.uri.endswith(".pdf"):
58
+ object_resource = s3_object.get_resource()
59
+ object_body = object_resource.get()["Body"]
60
+ doc_name = (
61
+ s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
62
+ if name is None
63
+ else name
64
+ )
65
+ return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
66
+
67
+ # Read text files
68
+ else:
69
+ doc_name = (
70
+ s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
71
+ if name is None
72
+ else name
73
+ )
74
+ obj_name = s3_object.name.split("/")[-1]
75
+ temporary_file = Path("storage").joinpath(obj_name)
76
+ s3_object.download(temporary_file)
77
+
78
+ # TODO: Before we were using textract here. Needed?
79
+ # s3_object.download(temporary_file)
80
+ # doc_content = textract.process(temporary_file)
81
+ # documents = [
82
+ # Document(
83
+ # name=doc_name,
84
+ # id=doc_name,
85
+ # content=doc_content.decode("utf-8"),
86
+ # )
87
+ # ]
88
+
89
+ documents = TextReader().read(file=temporary_file, name=doc_name)
90
+
91
+ temporary_file.unlink()
92
+ return documents
93
+
94
+ except Exception as e:
95
+ logger.error(f"Error reading: {s3_object.uri}: {e}")
96
+
97
+ return []
98
+
99
+ async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
100
+ """Asynchronously read S3 files by running the synchronous read operation in a thread."""
101
+ return await asyncio.to_thread(self.read, name, s3_object)
@@ -1,27 +1,48 @@
1
1
  import asyncio
2
2
  import uuid
3
3
  from pathlib import Path
4
- from typing import IO, Any, List, Union
4
+ from typing import IO, Any, List, Optional, Union
5
5
 
6
- from agno.document.base import Document
7
- from agno.document.reader.base import Reader
6
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
8
11
  from agno.utils.log import log_info, logger
9
12
 
10
13
 
11
14
  class TextReader(Reader):
12
15
  """Reader for Text files"""
13
16
 
14
- def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
17
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
18
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
19
+
20
+ @classmethod
21
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
22
+ """Get the list of supported chunking strategies for Text readers."""
23
+ return [
24
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
25
+ ChunkingStrategyType.AGENTIC_CHUNKER,
26
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
27
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
28
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
29
+ ]
30
+
31
+ @classmethod
32
+ def get_supported_content_types(self) -> List[ContentType]:
33
+ return [ContentType.TXT]
34
+
35
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
15
36
  try:
16
37
  if isinstance(file, Path):
17
38
  if not file.exists():
18
39
  raise FileNotFoundError(f"Could not find file: {file}")
19
40
  log_info(f"Reading: {file}")
20
- file_name = file.stem
41
+ file_name = name or file.stem
21
42
  file_contents = file.read_text("utf-8")
22
43
  else:
23
- log_info(f"Reading uploaded file: {file.name}")
24
- file_name = file.name.split(".")[0]
44
+ file_name = name or file.name.split(".")[0]
45
+ log_info(f"Reading uploaded file: {file_name}")
25
46
  file.seek(0)
26
47
  file_contents = file.read().decode("utf-8")
27
48
 
@@ -42,14 +63,14 @@ class TextReader(Reader):
42
63
  logger.error(f"Error reading: {file}: {e}")
43
64
  return []
44
65
 
45
- async def async_read(self, file: Union[Path, IO[Any]]) -> List[Document]:
66
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
46
67
  try:
47
68
  if isinstance(file, Path):
48
69
  if not file.exists():
49
70
  raise FileNotFoundError(f"Could not find file: {file}")
50
71
 
51
72
  log_info(f"Reading asynchronously: {file}")
52
- file_name = file.stem
73
+ file_name = name or file.stem
53
74
 
54
75
  try:
55
76
  import aiofiles
@@ -61,7 +82,7 @@ class TextReader(Reader):
61
82
  file_contents = file.read_text("utf-8")
62
83
  else:
63
84
  log_info(f"Reading uploaded file asynchronously: {file.name}")
64
- file_name = file.name.split(".")[0]
85
+ file_name = name or file.name.split(".")[0]
65
86
  file.seek(0)
66
87
  file_contents = file.read().decode("utf-8")
67
88
 
@@ -0,0 +1,128 @@
1
+ from io import BytesIO
2
+ from os.path import basename
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import httpx
8
+
9
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
10
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
11
+ from agno.knowledge.document.base import Document
12
+ from agno.knowledge.reader.base import Reader
13
+ from agno.knowledge.reader.csv_reader import CSVReader
14
+ from agno.knowledge.reader.pdf_reader import PDFReader
15
+ from agno.knowledge.types import ContentType
16
+ from agno.utils.http import async_fetch_with_retry, fetch_with_retry
17
+ from agno.utils.log import log_debug
18
+
19
+
20
+ class URLReader(Reader):
21
+ """Reader for general URL content"""
22
+
23
+ def __init__(
24
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), proxy: Optional[str] = None, **kwargs
25
+ ):
26
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
27
+ self.proxy = proxy
28
+
29
+ @classmethod
30
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
31
+ """Get the list of supported chunking strategies for URL readers."""
32
+ return [
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
34
+ ChunkingStrategyType.AGENTIC_CHUNKER,
35
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
36
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
37
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
38
+ ]
39
+
40
+ @classmethod
41
+ def get_supported_content_types(self) -> List[ContentType]:
42
+ return [ContentType.URL]
43
+
44
+ def read(
45
+ self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
46
+ ) -> List[Document]:
47
+ if not url:
48
+ raise ValueError("No url provided")
49
+
50
+ log_debug(f"Reading: {url}")
51
+
52
+ # Retry the request up to 3 times with exponential backoff
53
+ response = fetch_with_retry(url, proxy=self.proxy)
54
+
55
+ documents = self._create_documents(
56
+ url=url, text=response.text, content=response.content, id=id, name=name, password=password
57
+ )
58
+
59
+ if not self.chunk:
60
+ return documents
61
+
62
+ chunked_documents = []
63
+ for document in documents:
64
+ chunked_documents.append(self.chunk_document(document))
65
+ return [doc for sublist in chunked_documents for doc in sublist]
66
+
67
+ async def async_read(
68
+ self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
69
+ ) -> List[Document]:
70
+ """Async version of read method"""
71
+ if not url:
72
+ raise ValueError("No url provided")
73
+
74
+ log_debug(f"Reading async: {url}")
75
+ client_args = {"proxy": self.proxy} if self.proxy else {}
76
+ async with httpx.AsyncClient(**client_args) as client: # type: ignore
77
+ response = await async_fetch_with_retry(url, client=client)
78
+
79
+ documents = self._create_documents(
80
+ url=url, text=response.text, content=response.content, id=id, name=name, password=password
81
+ )
82
+
83
+ if not self.chunk:
84
+ return documents
85
+
86
+ return await self.chunk_documents_async(documents)
87
+
88
+ def _create_documents(
89
+ self,
90
+ url: str,
91
+ text: str,
92
+ content: bytes,
93
+ id: Optional[str] = None,
94
+ name: Optional[str] = None,
95
+ password: Optional[str] = None,
96
+ ) -> List[Document]:
97
+ """Helper method to create a document from URL content"""
98
+
99
+ # Determine file extension from URL
100
+ parsed_url = urlparse(url)
101
+ url_path = Path(parsed_url.path) # type: ignore
102
+ file_extension = url_path.suffix.lower()
103
+
104
+ # Read the document using the appropriate reader
105
+ if file_extension == ".csv":
106
+ filename = basename(parsed_url.path) or "data.csv"
107
+ return CSVReader().read(file=BytesIO(content), name=filename)
108
+ elif file_extension == ".pdf":
109
+ if password:
110
+ return PDFReader().read(pdf=BytesIO(content), name=name, password=password)
111
+ else:
112
+ return PDFReader().read(pdf=BytesIO(content), name=name)
113
+ else:
114
+ doc_name = name or parsed_url.path.strip("/").replace("/", "_").replace(" ", "_")
115
+ if not doc_name:
116
+ doc_name = parsed_url.netloc
117
+ if not doc_name:
118
+ doc_name = url
119
+
120
+ return [
121
+ Document(
122
+ name=doc_name,
123
+ id=id or doc_name,
124
+ meta_data={"url": url},
125
+ content=text,
126
+ size=len(text),
127
+ )
128
+ ]
@@ -0,0 +1,366 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Set
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
15
+ from agno.utils.log import log_debug, logger
16
+
17
+ try:
18
+ from bs4 import BeautifulSoup, Tag # noqa: F401
19
+ except ImportError:
20
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
21
+
22
+ try:
23
+ from ddgs import DDGS
24
+ except ImportError:
25
+ raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
26
+
27
+
28
+ @dataclass
29
+ class WebSearchReader(Reader):
30
+ """Reader that uses web search to find content for a given query"""
31
+
32
+ search_timeout: int = 10
33
+
34
+ request_timeout: int = 30
35
+ delay_between_requests: float = 2.0 # Increased default delay
36
+ max_retries: int = 3
37
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
+
39
+ # Search engine configuration
40
+ search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
41
+ search_delay: float = 3.0 # Delay between search requests
42
+ max_search_retries: int = 2 # Retries for search operations
43
+
44
+ # Rate limiting
45
+ rate_limit_delay: float = 5.0 # Delay when rate limited
46
+ exponential_backoff: bool = True
47
+
48
+ # Internal state
49
+ _visited_urls: Set[str] = field(default_factory=set)
50
+ _last_search_time: float = field(default=0.0, init=False)
51
+
52
+ # Override default chunking strategy
53
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
54
+
55
+ @classmethod
56
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
57
+ """Get the list of supported chunking strategies for Web Search readers."""
58
+ return [
59
+ ChunkingStrategyType.AGENTIC_CHUNKER,
60
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
61
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
62
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
63
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
64
+ ]
65
+
66
+ @classmethod
67
+ def get_supported_content_types(self) -> List[ContentType]:
68
+ return [ContentType.TOPIC]
69
+
70
+ def _respect_rate_limits(self):
71
+ """Ensure we don't exceed rate limits"""
72
+ current_time = time.time()
73
+ time_since_last_search = current_time - self._last_search_time
74
+
75
+ if time_since_last_search < self.search_delay:
76
+ sleep_time = self.search_delay - time_since_last_search
77
+ log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
78
+ time.sleep(sleep_time)
79
+
80
+ self._last_search_time = time.time()
81
+
82
+ def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
83
+ """Perform web search using DuckDuckGo with rate limiting"""
84
+ log_debug(f"Performing DuckDuckGo search for: {query}")
85
+
86
+ for attempt in range(self.max_search_retries):
87
+ try:
88
+ self._respect_rate_limits()
89
+
90
+ ddgs = DDGS(timeout=self.search_timeout)
91
+ search_results = ddgs.text(query=query, max_results=self.max_results)
92
+
93
+ # Convert to list and extract relevant fields
94
+ results = []
95
+ for result in search_results:
96
+ results.append(
97
+ {
98
+ "title": result.get("title", ""),
99
+ "url": result.get("link", ""),
100
+ "description": result.get("body", ""),
101
+ }
102
+ )
103
+
104
+ log_debug(f"Found {len(results)} search results")
105
+ return results
106
+
107
+ except Exception as e:
108
+ logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
109
+ if "rate limit" in str(e).lower() or "429" in str(e):
110
+ # Rate limited - wait longer
111
+ wait_time = (
112
+ self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
113
+ )
114
+ logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
115
+ time.sleep(wait_time)
116
+ elif attempt < self.max_search_retries - 1:
117
+ # Other error - shorter wait
118
+ time.sleep(self.search_delay)
119
+ else:
120
+ logger.error(f"All DuckDuckGo search attempts failed: {e}")
121
+ return []
122
+ return []
123
+
124
+ def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
125
+ """Perform web search using Google (requires googlesearch-python)"""
126
+ log_debug(f"Performing Google search for: {query}")
127
+
128
+ try:
129
+ from googlesearch import search
130
+ except ImportError:
131
+ logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
132
+ return []
133
+
134
+ for attempt in range(self.max_search_retries):
135
+ try:
136
+ self._respect_rate_limits()
137
+
138
+ results = []
139
+ search_results = search(query, num_results=self.max_results, stop=self.max_results)
140
+
141
+ for result in search_results:
142
+ results.append(
143
+ {
144
+ "title": getattr(result, "title", ""),
145
+ "url": getattr(result, "url", ""),
146
+ "description": getattr(result, "description", ""),
147
+ }
148
+ )
149
+
150
+ log_debug(f"Found {len(results)} Google search results")
151
+ return results
152
+
153
+ except Exception as e:
154
+ logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
155
+ if attempt < self.max_search_retries - 1:
156
+ time.sleep(self.search_delay)
157
+ else:
158
+ logger.error(f"All Google search attempts failed: {e}")
159
+ return []
160
+
161
+ return []
162
+
163
+ def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
164
+ """Perform web search using the configured search engine"""
165
+ if self.search_engine == "duckduckgo":
166
+ return self._perform_duckduckgo_search(query)
167
+ elif self.search_engine == "google":
168
+ return self._perform_google_search(query)
169
+ else:
170
+ logger.error(f"Unsupported search engine: {self.search_engine}")
171
+ return []
172
+
173
+ def _is_valid_url(self, url: str) -> bool:
174
+ """Check if URL is valid and not already visited"""
175
+ try:
176
+ parsed = urlparse(url)
177
+ return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
178
+ except Exception:
179
+ return False
180
+
181
+ def _extract_text_from_html(self, html_content: str, url: str) -> str:
182
+ """Extract clean text content from HTML"""
183
+ try:
184
+ soup = BeautifulSoup(html_content, "html.parser")
185
+
186
+ # Remove script and style elements
187
+ for script in soup(["script", "style"]):
188
+ script.decompose()
189
+
190
+ # Get text content
191
+ text = soup.get_text()
192
+
193
+ # Clean up whitespace
194
+ lines = (line.strip() for line in text.splitlines())
195
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
196
+ text = " ".join(chunk for chunk in chunks if chunk)
197
+
198
+ return text
199
+
200
+ except Exception as e:
201
+ logger.warning(f"Error extracting text from {url}: {e}")
202
+ return html_content
203
+
204
+ def _fetch_url_content(self, url: str) -> Optional[str]:
205
+ """Fetch content from a URL with retry logic"""
206
+ headers = {"User-Agent": self.user_agent}
207
+
208
+ for attempt in range(self.max_retries):
209
+ try:
210
+ response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
211
+ response.raise_for_status()
212
+
213
+ # Check if it's HTML content
214
+ content_type = response.headers.get("content-type", "").lower()
215
+ if "text/html" in content_type:
216
+ return self._extract_text_from_html(response.text, url)
217
+ else:
218
+ # For non-HTML content, return as-is
219
+ return response.text
220
+
221
+ except Exception as e:
222
+ logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
223
+ if attempt < self.max_retries - 1:
224
+ time.sleep(random.uniform(1, 3)) # Random delay between retries
225
+ continue
226
+
227
+ logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
228
+ return None
229
+
230
+ def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
231
+ """Create a Document object from URL content and search result metadata"""
232
+ # Use the URL as the document ID
233
+ doc_id = url
234
+
235
+ # Use the search result title as the document name, fallback to URL
236
+ doc_name = search_result.get("title", urlparse(url).netloc)
237
+
238
+ # Create metadata with search information
239
+ meta_data = {
240
+ "url": url,
241
+ "search_title": search_result.get("title", ""),
242
+ "search_description": search_result.get("description", ""),
243
+ "source": "web_search",
244
+ "search_engine": self.search_engine,
245
+ }
246
+
247
+ return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
248
+
249
+ def read(self, query: str) -> List[Document]:
250
+ """Read content for a given query by performing web search and fetching content"""
251
+ if not query:
252
+ raise ValueError("Query cannot be empty")
253
+
254
+ log_debug(f"Starting web search reader for query: {query}")
255
+
256
+ # Perform web search
257
+ search_results = self._perform_web_search(query)
258
+ if not search_results:
259
+ logger.warning(f"No search results found for query: {query}")
260
+ return []
261
+
262
+ documents: List[Document] = []
263
+
264
+ for result in search_results:
265
+ url = result.get("url", "")
266
+
267
+ # Skip if URL is invalid or already visited
268
+ if not self._is_valid_url(url):
269
+ continue
270
+
271
+ # Mark URL as visited
272
+ self._visited_urls.add(url)
273
+
274
+ # Add delay between requests to be respectful
275
+ if len(documents) > 0:
276
+ time.sleep(self.delay_between_requests)
277
+
278
+ # Fetch content from URL
279
+ content = self._fetch_url_content(url)
280
+ if content is None:
281
+ continue
282
+
283
+ # Create document
284
+ document = self._create_document_from_url(url, content, result)
285
+
286
+ # Apply chunking if enabled
287
+ if self.chunk:
288
+ chunked_docs = self.chunk_document(document)
289
+ documents.extend(chunked_docs)
290
+ else:
291
+ documents.append(document)
292
+
293
+ # Stop if we've reached max_results
294
+ if len(documents) >= self.max_results:
295
+ break
296
+
297
+ log_debug(f"Created {len(documents)} documents from web search")
298
+ return documents
299
+
300
+ async def async_read(self, query: str) -> List[Document]:
301
+ """Asynchronously read content for a given query"""
302
+ if not query:
303
+ raise ValueError("Query cannot be empty")
304
+
305
+ log_debug(f"Starting async web search reader for query: {query}")
306
+
307
+ # Perform web search (synchronous operation)
308
+ search_results = self._perform_web_search(query)
309
+ if not search_results:
310
+ logger.warning(f"No search results found for query: {query}")
311
+ return []
312
+
313
+ # Create tasks for fetching content from each URL
314
+ async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
315
+ url = result.get("url", "")
316
+
317
+ # Skip if URL is invalid or already visited
318
+ if not self._is_valid_url(url):
319
+ return None
320
+
321
+ # Mark URL as visited
322
+ self._visited_urls.add(url)
323
+
324
+ try:
325
+ headers = {"User-Agent": self.user_agent}
326
+ async with httpx.AsyncClient(timeout=self.request_timeout) as client:
327
+ response = await client.get(url, headers=headers, follow_redirects=True)
328
+ response.raise_for_status()
329
+
330
+ content_type = response.headers.get("content-type", "").lower()
331
+ if "text/html" in content_type:
332
+ content = self._extract_text_from_html(response.text, url)
333
+ else:
334
+ content = response.text
335
+
336
+ document = self._create_document_from_url(url, content, result)
337
+ return document
338
+
339
+ except Exception as e:
340
+ logger.warning(f"Error fetching {url}: {e}")
341
+ return None
342
+
343
+ # Create tasks for all URLs
344
+ tasks = [fetch_url_async(result) for result in search_results]
345
+
346
+ # Execute all tasks concurrently with delays
347
+ documents = []
348
+ for i, task in enumerate(tasks):
349
+ if i > 0: # Add delay between requests (except for the first one)
350
+ await asyncio.sleep(self.delay_between_requests)
351
+
352
+ doc = await task
353
+ if doc is not None:
354
+ # Apply chunking if enabled
355
+ if self.chunk:
356
+ chunked_docs = await self.chunk_documents_async([doc])
357
+ documents.extend(chunked_docs)
358
+ else:
359
+ documents.append(doc)
360
+
361
+ # Stop if we've reached max_results
362
+ if len(documents) >= self.max_results:
363
+ break
364
+
365
+ log_debug(f"Created {len(documents)} documents from async web search")
366
+ return documents