agno 1.8.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (590) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +19 -27
  3. agno/agent/agent.py +3143 -4170
  4. agno/api/agent.py +11 -67
  5. agno/api/api.py +5 -46
  6. agno/api/evals.py +8 -19
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -41
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +5 -21
  11. agno/api/schemas/evals.py +7 -16
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +5 -21
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +11 -7
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +11 -66
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/db/__init__.py +24 -0
  25. agno/db/base.py +245 -0
  26. agno/db/dynamo/__init__.py +3 -0
  27. agno/db/dynamo/dynamo.py +1743 -0
  28. agno/db/dynamo/schemas.py +278 -0
  29. agno/db/dynamo/utils.py +684 -0
  30. agno/db/firestore/__init__.py +3 -0
  31. agno/db/firestore/firestore.py +1432 -0
  32. agno/db/firestore/schemas.py +130 -0
  33. agno/db/firestore/utils.py +278 -0
  34. agno/db/gcs_json/__init__.py +3 -0
  35. agno/db/gcs_json/gcs_json_db.py +1001 -0
  36. agno/db/gcs_json/utils.py +194 -0
  37. agno/db/in_memory/__init__.py +3 -0
  38. agno/db/in_memory/in_memory_db.py +882 -0
  39. agno/db/in_memory/utils.py +172 -0
  40. agno/db/json/__init__.py +3 -0
  41. agno/db/json/json_db.py +1045 -0
  42. agno/db/json/utils.py +196 -0
  43. agno/db/migrations/v1_to_v2.py +162 -0
  44. agno/db/mongo/__init__.py +3 -0
  45. agno/db/mongo/mongo.py +1416 -0
  46. agno/db/mongo/schemas.py +77 -0
  47. agno/db/mongo/utils.py +204 -0
  48. agno/db/mysql/__init__.py +3 -0
  49. agno/db/mysql/mysql.py +1719 -0
  50. agno/db/mysql/schemas.py +124 -0
  51. agno/db/mysql/utils.py +297 -0
  52. agno/db/postgres/__init__.py +3 -0
  53. agno/db/postgres/postgres.py +1710 -0
  54. agno/db/postgres/schemas.py +124 -0
  55. agno/db/postgres/utils.py +280 -0
  56. agno/db/redis/__init__.py +3 -0
  57. agno/db/redis/redis.py +1367 -0
  58. agno/db/redis/schemas.py +109 -0
  59. agno/db/redis/utils.py +288 -0
  60. agno/db/schemas/__init__.py +3 -0
  61. agno/db/schemas/evals.py +33 -0
  62. agno/db/schemas/knowledge.py +40 -0
  63. agno/db/schemas/memory.py +46 -0
  64. agno/db/singlestore/__init__.py +3 -0
  65. agno/db/singlestore/schemas.py +116 -0
  66. agno/db/singlestore/singlestore.py +1712 -0
  67. agno/db/singlestore/utils.py +326 -0
  68. agno/db/sqlite/__init__.py +3 -0
  69. agno/db/sqlite/schemas.py +119 -0
  70. agno/db/sqlite/sqlite.py +1676 -0
  71. agno/db/sqlite/utils.py +268 -0
  72. agno/db/utils.py +88 -0
  73. agno/eval/__init__.py +14 -0
  74. agno/eval/accuracy.py +154 -48
  75. agno/eval/performance.py +88 -23
  76. agno/eval/reliability.py +73 -20
  77. agno/eval/utils.py +23 -13
  78. agno/integrations/discord/__init__.py +3 -0
  79. agno/{app → integrations}/discord/client.py +15 -11
  80. agno/knowledge/__init__.py +2 -2
  81. agno/{document → knowledge}/chunking/agentic.py +2 -2
  82. agno/{document → knowledge}/chunking/document.py +2 -2
  83. agno/{document → knowledge}/chunking/fixed.py +3 -3
  84. agno/{document → knowledge}/chunking/markdown.py +2 -2
  85. agno/{document → knowledge}/chunking/recursive.py +2 -2
  86. agno/{document → knowledge}/chunking/row.py +2 -2
  87. agno/knowledge/chunking/semantic.py +59 -0
  88. agno/knowledge/chunking/strategy.py +121 -0
  89. agno/knowledge/content.py +74 -0
  90. agno/knowledge/document/__init__.py +5 -0
  91. agno/{document → knowledge/document}/base.py +12 -2
  92. agno/knowledge/embedder/__init__.py +5 -0
  93. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  94. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  95. agno/{embedder → knowledge/embedder}/base.py +6 -0
  96. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  97. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  98. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  99. agno/{embedder → knowledge/embedder}/google.py +74 -1
  100. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  101. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  102. agno/knowledge/embedder/langdb.py +22 -0
  103. agno/knowledge/embedder/mistral.py +139 -0
  104. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  105. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  106. agno/knowledge/embedder/openai.py +223 -0
  107. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  108. agno/{embedder → knowledge/embedder}/together.py +1 -1
  109. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  110. agno/knowledge/knowledge.py +1551 -0
  111. agno/knowledge/reader/__init__.py +7 -0
  112. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  113. agno/knowledge/reader/base.py +88 -0
  114. agno/{document → knowledge}/reader/csv_reader.py +47 -65
  115. agno/knowledge/reader/docx_reader.py +83 -0
  116. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  117. agno/{document → knowledge}/reader/json_reader.py +30 -9
  118. agno/{document → knowledge}/reader/markdown_reader.py +58 -9
  119. agno/{document → knowledge}/reader/pdf_reader.py +71 -126
  120. agno/knowledge/reader/reader_factory.py +268 -0
  121. agno/knowledge/reader/s3_reader.py +101 -0
  122. agno/{document → knowledge}/reader/text_reader.py +31 -10
  123. agno/knowledge/reader/url_reader.py +128 -0
  124. agno/knowledge/reader/web_search_reader.py +366 -0
  125. agno/{document → knowledge}/reader/website_reader.py +37 -10
  126. agno/knowledge/reader/wikipedia_reader.py +59 -0
  127. agno/knowledge/reader/youtube_reader.py +78 -0
  128. agno/knowledge/remote_content/remote_content.py +88 -0
  129. agno/{reranker → knowledge/reranker}/base.py +1 -1
  130. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  131. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  132. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  133. agno/knowledge/types.py +30 -0
  134. agno/knowledge/utils.py +169 -0
  135. agno/media.py +269 -268
  136. agno/memory/__init__.py +2 -10
  137. agno/memory/manager.py +1003 -148
  138. agno/models/aimlapi/__init__.py +2 -2
  139. agno/models/aimlapi/aimlapi.py +6 -6
  140. agno/models/anthropic/claude.py +131 -131
  141. agno/models/aws/bedrock.py +110 -182
  142. agno/models/aws/claude.py +64 -18
  143. agno/models/azure/ai_foundry.py +73 -23
  144. agno/models/base.py +346 -290
  145. agno/models/cerebras/cerebras.py +84 -27
  146. agno/models/cohere/chat.py +106 -98
  147. agno/models/google/gemini.py +105 -46
  148. agno/models/groq/groq.py +97 -35
  149. agno/models/huggingface/huggingface.py +92 -27
  150. agno/models/ibm/watsonx.py +72 -13
  151. agno/models/litellm/chat.py +85 -13
  152. agno/models/message.py +46 -151
  153. agno/models/meta/llama.py +85 -49
  154. agno/models/metrics.py +120 -0
  155. agno/models/mistral/mistral.py +90 -21
  156. agno/models/ollama/__init__.py +0 -2
  157. agno/models/ollama/chat.py +85 -47
  158. agno/models/openai/chat.py +154 -37
  159. agno/models/openai/responses.py +178 -105
  160. agno/models/perplexity/perplexity.py +26 -2
  161. agno/models/portkey/portkey.py +0 -7
  162. agno/models/response.py +15 -9
  163. agno/models/utils.py +20 -0
  164. agno/models/vercel/__init__.py +2 -2
  165. agno/models/vercel/v0.py +1 -1
  166. agno/models/vllm/__init__.py +2 -2
  167. agno/models/vllm/vllm.py +3 -3
  168. agno/models/xai/xai.py +10 -10
  169. agno/os/__init__.py +3 -0
  170. agno/os/app.py +497 -0
  171. agno/os/auth.py +47 -0
  172. agno/os/config.py +103 -0
  173. agno/os/interfaces/agui/__init__.py +3 -0
  174. agno/os/interfaces/agui/agui.py +31 -0
  175. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  176. agno/{app → os/interfaces}/agui/utils.py +77 -33
  177. agno/os/interfaces/base.py +21 -0
  178. agno/os/interfaces/slack/__init__.py +3 -0
  179. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  180. agno/os/interfaces/slack/slack.py +32 -0
  181. agno/os/interfaces/whatsapp/__init__.py +3 -0
  182. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  183. agno/os/interfaces/whatsapp/whatsapp.py +29 -0
  184. agno/os/mcp.py +235 -0
  185. agno/os/router.py +1400 -0
  186. agno/os/routers/__init__.py +3 -0
  187. agno/os/routers/evals/__init__.py +3 -0
  188. agno/os/routers/evals/evals.py +393 -0
  189. agno/os/routers/evals/schemas.py +142 -0
  190. agno/os/routers/evals/utils.py +161 -0
  191. agno/os/routers/knowledge/__init__.py +3 -0
  192. agno/os/routers/knowledge/knowledge.py +850 -0
  193. agno/os/routers/knowledge/schemas.py +118 -0
  194. agno/os/routers/memory/__init__.py +3 -0
  195. agno/os/routers/memory/memory.py +410 -0
  196. agno/os/routers/memory/schemas.py +58 -0
  197. agno/os/routers/metrics/__init__.py +3 -0
  198. agno/os/routers/metrics/metrics.py +178 -0
  199. agno/os/routers/metrics/schemas.py +47 -0
  200. agno/os/routers/session/__init__.py +3 -0
  201. agno/os/routers/session/session.py +536 -0
  202. agno/os/schema.py +945 -0
  203. agno/{app/playground → os}/settings.py +7 -15
  204. agno/os/utils.py +270 -0
  205. agno/reasoning/azure_ai_foundry.py +4 -4
  206. agno/reasoning/deepseek.py +4 -4
  207. agno/reasoning/default.py +6 -11
  208. agno/reasoning/groq.py +4 -4
  209. agno/reasoning/helpers.py +4 -6
  210. agno/reasoning/ollama.py +4 -4
  211. agno/reasoning/openai.py +4 -4
  212. agno/run/agent.py +633 -0
  213. agno/run/base.py +53 -77
  214. agno/run/cancel.py +81 -0
  215. agno/run/team.py +243 -96
  216. agno/run/workflow.py +550 -12
  217. agno/session/__init__.py +10 -0
  218. agno/session/agent.py +244 -0
  219. agno/session/summary.py +225 -0
  220. agno/session/team.py +262 -0
  221. agno/{storage/session/v2 → session}/workflow.py +47 -24
  222. agno/team/__init__.py +15 -16
  223. agno/team/team.py +3260 -4824
  224. agno/tools/agentql.py +14 -5
  225. agno/tools/airflow.py +9 -4
  226. agno/tools/api.py +7 -3
  227. agno/tools/apify.py +2 -46
  228. agno/tools/arxiv.py +8 -3
  229. agno/tools/aws_lambda.py +7 -5
  230. agno/tools/aws_ses.py +7 -1
  231. agno/tools/baidusearch.py +4 -1
  232. agno/tools/bitbucket.py +4 -4
  233. agno/tools/brandfetch.py +14 -11
  234. agno/tools/bravesearch.py +4 -1
  235. agno/tools/brightdata.py +43 -23
  236. agno/tools/browserbase.py +13 -4
  237. agno/tools/calcom.py +12 -10
  238. agno/tools/calculator.py +10 -27
  239. agno/tools/cartesia.py +20 -17
  240. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  241. agno/tools/confluence.py +8 -8
  242. agno/tools/crawl4ai.py +7 -1
  243. agno/tools/csv_toolkit.py +9 -8
  244. agno/tools/dalle.py +22 -12
  245. agno/tools/daytona.py +13 -16
  246. agno/tools/decorator.py +6 -3
  247. agno/tools/desi_vocal.py +17 -8
  248. agno/tools/discord.py +11 -8
  249. agno/tools/docker.py +30 -42
  250. agno/tools/duckdb.py +34 -53
  251. agno/tools/duckduckgo.py +8 -7
  252. agno/tools/e2b.py +62 -62
  253. agno/tools/eleven_labs.py +36 -29
  254. agno/tools/email.py +4 -1
  255. agno/tools/evm.py +7 -1
  256. agno/tools/exa.py +19 -14
  257. agno/tools/fal.py +30 -30
  258. agno/tools/file.py +9 -8
  259. agno/tools/financial_datasets.py +25 -44
  260. agno/tools/firecrawl.py +22 -22
  261. agno/tools/function.py +127 -18
  262. agno/tools/giphy.py +23 -11
  263. agno/tools/github.py +48 -126
  264. agno/tools/gmail.py +45 -61
  265. agno/tools/google_bigquery.py +7 -6
  266. agno/tools/google_maps.py +11 -26
  267. agno/tools/googlesearch.py +7 -2
  268. agno/tools/googlesheets.py +21 -17
  269. agno/tools/hackernews.py +9 -5
  270. agno/tools/jina.py +5 -4
  271. agno/tools/jira.py +18 -9
  272. agno/tools/knowledge.py +31 -32
  273. agno/tools/linear.py +19 -34
  274. agno/tools/linkup.py +5 -1
  275. agno/tools/local_file_system.py +8 -5
  276. agno/tools/lumalab.py +32 -20
  277. agno/tools/mcp.py +1 -2
  278. agno/tools/mem0.py +18 -12
  279. agno/tools/memori.py +14 -10
  280. agno/tools/mlx_transcribe.py +3 -2
  281. agno/tools/models/azure_openai.py +33 -15
  282. agno/tools/models/gemini.py +59 -32
  283. agno/tools/models/groq.py +30 -23
  284. agno/tools/models/nebius.py +28 -12
  285. agno/tools/models_labs.py +40 -16
  286. agno/tools/moviepy_video.py +7 -6
  287. agno/tools/neo4j.py +10 -8
  288. agno/tools/newspaper.py +7 -2
  289. agno/tools/newspaper4k.py +8 -3
  290. agno/tools/openai.py +58 -32
  291. agno/tools/openbb.py +12 -11
  292. agno/tools/opencv.py +63 -47
  293. agno/tools/openweather.py +14 -12
  294. agno/tools/pandas.py +11 -3
  295. agno/tools/postgres.py +4 -12
  296. agno/tools/pubmed.py +4 -1
  297. agno/tools/python.py +9 -22
  298. agno/tools/reasoning.py +35 -27
  299. agno/tools/reddit.py +11 -26
  300. agno/tools/replicate.py +55 -42
  301. agno/tools/resend.py +4 -1
  302. agno/tools/scrapegraph.py +15 -14
  303. agno/tools/searxng.py +10 -23
  304. agno/tools/serpapi.py +6 -3
  305. agno/tools/serper.py +13 -4
  306. agno/tools/shell.py +9 -2
  307. agno/tools/slack.py +12 -11
  308. agno/tools/sleep.py +3 -2
  309. agno/tools/spider.py +24 -4
  310. agno/tools/sql.py +7 -6
  311. agno/tools/tavily.py +6 -4
  312. agno/tools/telegram.py +12 -4
  313. agno/tools/todoist.py +11 -31
  314. agno/tools/toolkit.py +1 -1
  315. agno/tools/trafilatura.py +22 -6
  316. agno/tools/trello.py +9 -22
  317. agno/tools/twilio.py +10 -3
  318. agno/tools/user_control_flow.py +6 -1
  319. agno/tools/valyu.py +34 -5
  320. agno/tools/visualization.py +19 -28
  321. agno/tools/webbrowser.py +4 -3
  322. agno/tools/webex.py +11 -7
  323. agno/tools/website.py +15 -46
  324. agno/tools/webtools.py +12 -4
  325. agno/tools/whatsapp.py +5 -9
  326. agno/tools/wikipedia.py +20 -13
  327. agno/tools/x.py +14 -13
  328. agno/tools/yfinance.py +13 -40
  329. agno/tools/youtube.py +26 -20
  330. agno/tools/zendesk.py +7 -2
  331. agno/tools/zep.py +10 -7
  332. agno/tools/zoom.py +10 -9
  333. agno/utils/common.py +1 -19
  334. agno/utils/events.py +100 -123
  335. agno/utils/gemini.py +32 -2
  336. agno/utils/knowledge.py +29 -0
  337. agno/utils/log.py +54 -4
  338. agno/utils/mcp.py +68 -10
  339. agno/utils/media.py +39 -0
  340. agno/utils/message.py +12 -1
  341. agno/utils/models/aws_claude.py +1 -1
  342. agno/utils/models/claude.py +47 -4
  343. agno/utils/models/cohere.py +1 -1
  344. agno/utils/models/mistral.py +8 -7
  345. agno/utils/models/schema_utils.py +3 -3
  346. agno/utils/models/watsonx.py +1 -1
  347. agno/utils/openai.py +1 -1
  348. agno/utils/pprint.py +33 -32
  349. agno/utils/print_response/agent.py +779 -0
  350. agno/utils/print_response/team.py +1669 -0
  351. agno/utils/print_response/workflow.py +1451 -0
  352. agno/utils/prompts.py +14 -14
  353. agno/utils/reasoning.py +87 -0
  354. agno/utils/response.py +42 -42
  355. agno/utils/streamlit.py +481 -0
  356. agno/utils/string.py +8 -22
  357. agno/utils/team.py +50 -0
  358. agno/utils/timer.py +2 -2
  359. agno/vectordb/base.py +33 -21
  360. agno/vectordb/cassandra/cassandra.py +287 -23
  361. agno/vectordb/chroma/chromadb.py +482 -59
  362. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  363. agno/vectordb/couchbase/couchbase.py +309 -29
  364. agno/vectordb/lancedb/lance_db.py +360 -21
  365. agno/vectordb/langchaindb/__init__.py +5 -0
  366. agno/vectordb/langchaindb/langchaindb.py +145 -0
  367. agno/vectordb/lightrag/__init__.py +5 -0
  368. agno/vectordb/lightrag/lightrag.py +374 -0
  369. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  370. agno/vectordb/milvus/milvus.py +242 -32
  371. agno/vectordb/mongodb/mongodb.py +200 -24
  372. agno/vectordb/pgvector/pgvector.py +319 -37
  373. agno/vectordb/pineconedb/pineconedb.py +221 -27
  374. agno/vectordb/qdrant/qdrant.py +334 -14
  375. agno/vectordb/singlestore/singlestore.py +286 -29
  376. agno/vectordb/surrealdb/surrealdb.py +187 -7
  377. agno/vectordb/upstashdb/upstashdb.py +342 -26
  378. agno/vectordb/weaviate/weaviate.py +227 -165
  379. agno/workflow/__init__.py +17 -13
  380. agno/workflow/{v2/condition.py → condition.py} +135 -32
  381. agno/workflow/{v2/loop.py → loop.py} +115 -28
  382. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  383. agno/workflow/{v2/router.py → router.py} +133 -32
  384. agno/workflow/{v2/step.py → step.py} +207 -49
  385. agno/workflow/{v2/steps.py → steps.py} +147 -66
  386. agno/workflow/types.py +482 -0
  387. agno/workflow/workflow.py +2410 -696
  388. agno-2.0.0.dist-info/METADATA +494 -0
  389. agno-2.0.0.dist-info/RECORD +515 -0
  390. agno-2.0.0.dist-info/licenses/LICENSE +201 -0
  391. agno/agent/metrics.py +0 -107
  392. agno/api/app.py +0 -35
  393. agno/api/playground.py +0 -92
  394. agno/api/schemas/app.py +0 -12
  395. agno/api/schemas/playground.py +0 -22
  396. agno/api/schemas/user.py +0 -35
  397. agno/api/schemas/workspace.py +0 -46
  398. agno/api/user.py +0 -160
  399. agno/api/workflows.py +0 -33
  400. agno/api/workspace.py +0 -175
  401. agno/app/agui/__init__.py +0 -3
  402. agno/app/agui/app.py +0 -17
  403. agno/app/agui/sync_router.py +0 -120
  404. agno/app/base.py +0 -186
  405. agno/app/discord/__init__.py +0 -3
  406. agno/app/fastapi/__init__.py +0 -3
  407. agno/app/fastapi/app.py +0 -107
  408. agno/app/fastapi/async_router.py +0 -457
  409. agno/app/fastapi/sync_router.py +0 -448
  410. agno/app/playground/app.py +0 -228
  411. agno/app/playground/async_router.py +0 -1050
  412. agno/app/playground/deploy.py +0 -249
  413. agno/app/playground/operator.py +0 -183
  414. agno/app/playground/schemas.py +0 -220
  415. agno/app/playground/serve.py +0 -55
  416. agno/app/playground/sync_router.py +0 -1042
  417. agno/app/playground/utils.py +0 -46
  418. agno/app/settings.py +0 -15
  419. agno/app/slack/__init__.py +0 -3
  420. agno/app/slack/app.py +0 -19
  421. agno/app/slack/sync_router.py +0 -92
  422. agno/app/utils.py +0 -54
  423. agno/app/whatsapp/__init__.py +0 -3
  424. agno/app/whatsapp/app.py +0 -15
  425. agno/app/whatsapp/sync_router.py +0 -197
  426. agno/cli/auth_server.py +0 -249
  427. agno/cli/config.py +0 -274
  428. agno/cli/console.py +0 -88
  429. agno/cli/credentials.py +0 -23
  430. agno/cli/entrypoint.py +0 -571
  431. agno/cli/operator.py +0 -357
  432. agno/cli/settings.py +0 -96
  433. agno/cli/ws/ws_cli.py +0 -817
  434. agno/constants.py +0 -13
  435. agno/document/__init__.py +0 -5
  436. agno/document/chunking/semantic.py +0 -45
  437. agno/document/chunking/strategy.py +0 -31
  438. agno/document/reader/__init__.py +0 -5
  439. agno/document/reader/base.py +0 -47
  440. agno/document/reader/docx_reader.py +0 -60
  441. agno/document/reader/gcs/pdf_reader.py +0 -44
  442. agno/document/reader/s3/pdf_reader.py +0 -59
  443. agno/document/reader/s3/text_reader.py +0 -63
  444. agno/document/reader/url_reader.py +0 -59
  445. agno/document/reader/youtube_reader.py +0 -58
  446. agno/embedder/__init__.py +0 -5
  447. agno/embedder/langdb.py +0 -80
  448. agno/embedder/mistral.py +0 -82
  449. agno/embedder/openai.py +0 -78
  450. agno/file/__init__.py +0 -5
  451. agno/file/file.py +0 -16
  452. agno/file/local/csv.py +0 -32
  453. agno/file/local/txt.py +0 -19
  454. agno/infra/app.py +0 -240
  455. agno/infra/base.py +0 -144
  456. agno/infra/context.py +0 -20
  457. agno/infra/db_app.py +0 -52
  458. agno/infra/resource.py +0 -205
  459. agno/infra/resources.py +0 -55
  460. agno/knowledge/agent.py +0 -702
  461. agno/knowledge/arxiv.py +0 -33
  462. agno/knowledge/combined.py +0 -36
  463. agno/knowledge/csv.py +0 -144
  464. agno/knowledge/csv_url.py +0 -124
  465. agno/knowledge/document.py +0 -223
  466. agno/knowledge/docx.py +0 -137
  467. agno/knowledge/firecrawl.py +0 -34
  468. agno/knowledge/gcs/__init__.py +0 -0
  469. agno/knowledge/gcs/base.py +0 -39
  470. agno/knowledge/gcs/pdf.py +0 -125
  471. agno/knowledge/json.py +0 -137
  472. agno/knowledge/langchain.py +0 -71
  473. agno/knowledge/light_rag.py +0 -273
  474. agno/knowledge/llamaindex.py +0 -66
  475. agno/knowledge/markdown.py +0 -154
  476. agno/knowledge/pdf.py +0 -164
  477. agno/knowledge/pdf_bytes.py +0 -42
  478. agno/knowledge/pdf_url.py +0 -148
  479. agno/knowledge/s3/__init__.py +0 -0
  480. agno/knowledge/s3/base.py +0 -64
  481. agno/knowledge/s3/pdf.py +0 -33
  482. agno/knowledge/s3/text.py +0 -34
  483. agno/knowledge/text.py +0 -141
  484. agno/knowledge/url.py +0 -46
  485. agno/knowledge/website.py +0 -179
  486. agno/knowledge/wikipedia.py +0 -32
  487. agno/knowledge/youtube.py +0 -35
  488. agno/memory/agent.py +0 -423
  489. agno/memory/classifier.py +0 -104
  490. agno/memory/db/__init__.py +0 -5
  491. agno/memory/db/base.py +0 -42
  492. agno/memory/db/mongodb.py +0 -189
  493. agno/memory/db/postgres.py +0 -203
  494. agno/memory/db/sqlite.py +0 -193
  495. agno/memory/memory.py +0 -22
  496. agno/memory/row.py +0 -36
  497. agno/memory/summarizer.py +0 -201
  498. agno/memory/summary.py +0 -19
  499. agno/memory/team.py +0 -415
  500. agno/memory/v2/__init__.py +0 -2
  501. agno/memory/v2/db/__init__.py +0 -1
  502. agno/memory/v2/db/base.py +0 -42
  503. agno/memory/v2/db/firestore.py +0 -339
  504. agno/memory/v2/db/mongodb.py +0 -196
  505. agno/memory/v2/db/postgres.py +0 -214
  506. agno/memory/v2/db/redis.py +0 -187
  507. agno/memory/v2/db/schema.py +0 -54
  508. agno/memory/v2/db/sqlite.py +0 -209
  509. agno/memory/v2/manager.py +0 -437
  510. agno/memory/v2/memory.py +0 -1097
  511. agno/memory/v2/schema.py +0 -55
  512. agno/memory/v2/summarizer.py +0 -215
  513. agno/memory/workflow.py +0 -38
  514. agno/models/ollama/tools.py +0 -430
  515. agno/models/qwen/__init__.py +0 -5
  516. agno/playground/__init__.py +0 -10
  517. agno/playground/deploy.py +0 -3
  518. agno/playground/playground.py +0 -3
  519. agno/playground/serve.py +0 -3
  520. agno/playground/settings.py +0 -3
  521. agno/reranker/__init__.py +0 -0
  522. agno/run/response.py +0 -467
  523. agno/run/v2/__init__.py +0 -0
  524. agno/run/v2/workflow.py +0 -567
  525. agno/storage/__init__.py +0 -0
  526. agno/storage/agent/__init__.py +0 -0
  527. agno/storage/agent/dynamodb.py +0 -1
  528. agno/storage/agent/json.py +0 -1
  529. agno/storage/agent/mongodb.py +0 -1
  530. agno/storage/agent/postgres.py +0 -1
  531. agno/storage/agent/singlestore.py +0 -1
  532. agno/storage/agent/sqlite.py +0 -1
  533. agno/storage/agent/yaml.py +0 -1
  534. agno/storage/base.py +0 -60
  535. agno/storage/dynamodb.py +0 -673
  536. agno/storage/firestore.py +0 -297
  537. agno/storage/gcs_json.py +0 -261
  538. agno/storage/in_memory.py +0 -234
  539. agno/storage/json.py +0 -237
  540. agno/storage/mongodb.py +0 -328
  541. agno/storage/mysql.py +0 -685
  542. agno/storage/postgres.py +0 -682
  543. agno/storage/redis.py +0 -336
  544. agno/storage/session/__init__.py +0 -16
  545. agno/storage/session/agent.py +0 -64
  546. agno/storage/session/team.py +0 -63
  547. agno/storage/session/v2/__init__.py +0 -5
  548. agno/storage/session/workflow.py +0 -61
  549. agno/storage/singlestore.py +0 -606
  550. agno/storage/sqlite.py +0 -646
  551. agno/storage/workflow/__init__.py +0 -0
  552. agno/storage/workflow/mongodb.py +0 -1
  553. agno/storage/workflow/postgres.py +0 -1
  554. agno/storage/workflow/sqlite.py +0 -1
  555. agno/storage/yaml.py +0 -241
  556. agno/tools/thinking.py +0 -73
  557. agno/utils/defaults.py +0 -57
  558. agno/utils/filesystem.py +0 -39
  559. agno/utils/git.py +0 -52
  560. agno/utils/json_io.py +0 -30
  561. agno/utils/load_env.py +0 -19
  562. agno/utils/py_io.py +0 -19
  563. agno/utils/pyproject.py +0 -18
  564. agno/utils/resource_filter.py +0 -31
  565. agno/workflow/v2/__init__.py +0 -21
  566. agno/workflow/v2/types.py +0 -357
  567. agno/workflow/v2/workflow.py +0 -3312
  568. agno/workspace/__init__.py +0 -0
  569. agno/workspace/config.py +0 -325
  570. agno/workspace/enums.py +0 -6
  571. agno/workspace/helpers.py +0 -52
  572. agno/workspace/operator.py +0 -757
  573. agno/workspace/settings.py +0 -158
  574. agno-1.8.1.dist-info/METADATA +0 -982
  575. agno-1.8.1.dist-info/RECORD +0 -566
  576. agno-1.8.1.dist-info/entry_points.txt +0 -3
  577. agno-1.8.1.dist-info/licenses/LICENSE +0 -375
  578. /agno/{app → db/migrations}/__init__.py +0 -0
  579. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  580. /agno/{cli → integrations}/__init__.py +0 -0
  581. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  582. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  583. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  584. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  585. /agno/{app → os/interfaces}/slack/security.py +0 -0
  586. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  587. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  588. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  589. {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
  590. {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.reader.pdf_reader import PDFReader
11
+ from agno.knowledge.reader.text_reader import TextReader
12
+ from agno.knowledge.types import ContentType
13
+ from agno.utils.log import log_info, logger
14
+
15
+ try:
16
+ from agno.aws.resource.s3.object import S3Object # type: ignore
17
+ except (ModuleNotFoundError, ImportError):
18
+ raise ImportError("`agno-aws` not installed. Please install using `pip install agno-aws`")
19
+
20
+ try:
21
+ import textract # noqa: F401
22
+ except ImportError:
23
+ raise ImportError("`textract` not installed. Please install it via `pip install textract`.")
24
+
25
+ try:
26
+ from pypdf import PdfReader as DocumentReader # noqa: F401
27
+ except ImportError:
28
+ raise ImportError("`pypdf` not installed. Please install it via `pip install pypdf`.")
29
+
30
+
31
+ class S3Reader(Reader):
32
+ """Reader for S3 files"""
33
+
34
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
35
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
36
+
37
+ @classmethod
38
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
39
+ """Get the list of supported chunking strategies for S3 readers."""
40
+ return [
41
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
42
+ ChunkingStrategyType.AGENTIC_CHUNKER,
43
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
44
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
45
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
46
+ ]
47
+
48
+ @classmethod
49
+ def get_supported_content_types(self) -> List[ContentType]:
50
+ return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
51
+
52
+ def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
53
+ try:
54
+ log_info(f"Reading S3 file: {s3_object.uri}")
55
+
56
+ # Read PDF files
57
+ if s3_object.uri.endswith(".pdf"):
58
+ object_resource = s3_object.get_resource()
59
+ object_body = object_resource.get()["Body"]
60
+ doc_name = (
61
+ s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
62
+ if name is None
63
+ else name
64
+ )
65
+ return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
66
+
67
+ # Read text files
68
+ else:
69
+ doc_name = (
70
+ s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
71
+ if name is None
72
+ else name
73
+ )
74
+ obj_name = s3_object.name.split("/")[-1]
75
+ temporary_file = Path("storage").joinpath(obj_name)
76
+ s3_object.download(temporary_file)
77
+
78
+ # TODO: Before we were using textract here. Needed?
79
+ # s3_object.download(temporary_file)
80
+ # doc_content = textract.process(temporary_file)
81
+ # documents = [
82
+ # Document(
83
+ # name=doc_name,
84
+ # id=doc_name,
85
+ # content=doc_content.decode("utf-8"),
86
+ # )
87
+ # ]
88
+
89
+ documents = TextReader().read(file=temporary_file, name=doc_name)
90
+
91
+ temporary_file.unlink()
92
+ return documents
93
+
94
+ except Exception as e:
95
+ logger.error(f"Error reading: {s3_object.uri}: {e}")
96
+
97
+ return []
98
+
99
+ async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
100
+ """Asynchronously read S3 files by running the synchronous read operation in a thread."""
101
+ return await asyncio.to_thread(self.read, name, s3_object)
@@ -1,27 +1,48 @@
1
1
  import asyncio
2
2
  import uuid
3
3
  from pathlib import Path
4
- from typing import IO, Any, List, Union
4
+ from typing import IO, Any, List, Optional, Union
5
5
 
6
- from agno.document.base import Document
7
- from agno.document.reader.base import Reader
6
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
8
11
  from agno.utils.log import log_info, logger
9
12
 
10
13
 
11
14
  class TextReader(Reader):
12
15
  """Reader for Text files"""
13
16
 
14
- def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
17
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
18
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
19
+
20
+ @classmethod
21
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
22
+ """Get the list of supported chunking strategies for Text readers."""
23
+ return [
24
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
25
+ ChunkingStrategyType.AGENTIC_CHUNKER,
26
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
27
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
28
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
29
+ ]
30
+
31
+ @classmethod
32
+ def get_supported_content_types(self) -> List[ContentType]:
33
+ return [ContentType.TXT]
34
+
35
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
15
36
  try:
16
37
  if isinstance(file, Path):
17
38
  if not file.exists():
18
39
  raise FileNotFoundError(f"Could not find file: {file}")
19
40
  log_info(f"Reading: {file}")
20
- file_name = file.stem
41
+ file_name = name or file.stem
21
42
  file_contents = file.read_text("utf-8")
22
43
  else:
23
- log_info(f"Reading uploaded file: {file.name}")
24
- file_name = file.name.split(".")[0]
44
+ file_name = name or file.name.split(".")[0]
45
+ log_info(f"Reading uploaded file: {file_name}")
25
46
  file.seek(0)
26
47
  file_contents = file.read().decode("utf-8")
27
48
 
@@ -42,14 +63,14 @@ class TextReader(Reader):
42
63
  logger.error(f"Error reading: {file}: {e}")
43
64
  return []
44
65
 
45
- async def async_read(self, file: Union[Path, IO[Any]]) -> List[Document]:
66
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
46
67
  try:
47
68
  if isinstance(file, Path):
48
69
  if not file.exists():
49
70
  raise FileNotFoundError(f"Could not find file: {file}")
50
71
 
51
72
  log_info(f"Reading asynchronously: {file}")
52
- file_name = file.stem
73
+ file_name = name or file.stem
53
74
 
54
75
  try:
55
76
  import aiofiles
@@ -61,7 +82,7 @@ class TextReader(Reader):
61
82
  file_contents = file.read_text("utf-8")
62
83
  else:
63
84
  log_info(f"Reading uploaded file asynchronously: {file.name}")
64
- file_name = file.name.split(".")[0]
85
+ file_name = name or file.name.split(".")[0]
65
86
  file.seek(0)
66
87
  file_contents = file.read().decode("utf-8")
67
88
 
@@ -0,0 +1,128 @@
1
+ from io import BytesIO
2
+ from os.path import basename
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import httpx
8
+
9
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
10
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
11
+ from agno.knowledge.document.base import Document
12
+ from agno.knowledge.reader.base import Reader
13
+ from agno.knowledge.reader.csv_reader import CSVReader
14
+ from agno.knowledge.reader.pdf_reader import PDFReader
15
+ from agno.knowledge.types import ContentType
16
+ from agno.utils.http import async_fetch_with_retry, fetch_with_retry
17
+ from agno.utils.log import log_debug
18
+
19
+
20
+ class URLReader(Reader):
21
+ """Reader for general URL content"""
22
+
23
+ def __init__(
24
+ self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), proxy: Optional[str] = None, **kwargs
25
+ ):
26
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
27
+ self.proxy = proxy
28
+
29
+ @classmethod
30
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
31
+ """Get the list of supported chunking strategies for URL readers."""
32
+ return [
33
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
34
+ ChunkingStrategyType.AGENTIC_CHUNKER,
35
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
36
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
37
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
38
+ ]
39
+
40
+ @classmethod
41
+ def get_supported_content_types(self) -> List[ContentType]:
42
+ return [ContentType.URL]
43
+
44
+ def read(
45
+ self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
46
+ ) -> List[Document]:
47
+ if not url:
48
+ raise ValueError("No url provided")
49
+
50
+ log_debug(f"Reading: {url}")
51
+
52
+ # Retry the request up to 3 times with exponential backoff
53
+ response = fetch_with_retry(url, proxy=self.proxy)
54
+
55
+ documents = self._create_documents(
56
+ url=url, text=response.text, content=response.content, id=id, name=name, password=password
57
+ )
58
+
59
+ if not self.chunk:
60
+ return documents
61
+
62
+ chunked_documents = []
63
+ for document in documents:
64
+ chunked_documents.append(self.chunk_document(document))
65
+ return [doc for sublist in chunked_documents for doc in sublist]
66
+
67
+ async def async_read(
68
+ self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
69
+ ) -> List[Document]:
70
+ """Async version of read method"""
71
+ if not url:
72
+ raise ValueError("No url provided")
73
+
74
+ log_debug(f"Reading async: {url}")
75
+ client_args = {"proxy": self.proxy} if self.proxy else {}
76
+ async with httpx.AsyncClient(**client_args) as client: # type: ignore
77
+ response = await async_fetch_with_retry(url, client=client)
78
+
79
+ documents = self._create_documents(
80
+ url=url, text=response.text, content=response.content, id=id, name=name, password=password
81
+ )
82
+
83
+ if not self.chunk:
84
+ return documents
85
+
86
+ return await self.chunk_documents_async(documents)
87
+
88
+ def _create_documents(
89
+ self,
90
+ url: str,
91
+ text: str,
92
+ content: bytes,
93
+ id: Optional[str] = None,
94
+ name: Optional[str] = None,
95
+ password: Optional[str] = None,
96
+ ) -> List[Document]:
97
+ """Helper method to create a document from URL content"""
98
+
99
+ # Determine file extension from URL
100
+ parsed_url = urlparse(url)
101
+ url_path = Path(parsed_url.path) # type: ignore
102
+ file_extension = url_path.suffix.lower()
103
+
104
+ # Read the document using the appropriate reader
105
+ if file_extension == ".csv":
106
+ filename = basename(parsed_url.path) or "data.csv"
107
+ return CSVReader().read(file=BytesIO(content), name=filename)
108
+ elif file_extension == ".pdf":
109
+ if password:
110
+ return PDFReader().read(pdf=BytesIO(content), name=name, password=password)
111
+ else:
112
+ return PDFReader().read(pdf=BytesIO(content), name=name)
113
+ else:
114
+ doc_name = name or parsed_url.path.strip("/").replace("/", "_").replace(" ", "_")
115
+ if not doc_name:
116
+ doc_name = parsed_url.netloc
117
+ if not doc_name:
118
+ doc_name = url
119
+
120
+ return [
121
+ Document(
122
+ name=doc_name,
123
+ id=id or doc_name,
124
+ meta_data={"url": url},
125
+ content=text,
126
+ size=len(text),
127
+ )
128
+ ]
@@ -0,0 +1,366 @@
1
+ import asyncio
2
+ import random
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Set
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+
10
+ from agno.knowledge.chunking.semantic import SemanticChunking
11
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
12
+ from agno.knowledge.document.base import Document
13
+ from agno.knowledge.reader.base import Reader
14
+ from agno.knowledge.types import ContentType
15
+ from agno.utils.log import log_debug, logger
16
+
17
+ try:
18
+ from bs4 import BeautifulSoup, Tag # noqa: F401
19
+ except ImportError:
20
+ raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
21
+
22
+ try:
23
+ from ddgs import DDGS
24
+ except ImportError:
25
+ raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
26
+
27
+
28
+ @dataclass
29
+ class WebSearchReader(Reader):
30
+ """Reader that uses web search to find content for a given query"""
31
+
32
+ search_timeout: int = 10
33
+
34
+ request_timeout: int = 30
35
+ delay_between_requests: float = 2.0 # Increased default delay
36
+ max_retries: int = 3
37
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
+
39
+ # Search engine configuration
40
+ search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
41
+ search_delay: float = 3.0 # Delay between search requests
42
+ max_search_retries: int = 2 # Retries for search operations
43
+
44
+ # Rate limiting
45
+ rate_limit_delay: float = 5.0 # Delay when rate limited
46
+ exponential_backoff: bool = True
47
+
48
+ # Internal state
49
+ _visited_urls: Set[str] = field(default_factory=set)
50
+ _last_search_time: float = field(default=0.0, init=False)
51
+
52
+ # Override default chunking strategy
53
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
54
+
55
+ @classmethod
56
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
57
+ """Get the list of supported chunking strategies for Web Search readers."""
58
+ return [
59
+ ChunkingStrategyType.AGENTIC_CHUNKER,
60
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
61
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
62
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
63
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
64
+ ]
65
+
66
+ @classmethod
67
+ def get_supported_content_types(self) -> List[ContentType]:
68
+ return [ContentType.TOPIC]
69
+
70
+ def _respect_rate_limits(self):
71
+ """Ensure we don't exceed rate limits"""
72
+ current_time = time.time()
73
+ time_since_last_search = current_time - self._last_search_time
74
+
75
+ if time_since_last_search < self.search_delay:
76
+ sleep_time = self.search_delay - time_since_last_search
77
+ log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
78
+ time.sleep(sleep_time)
79
+
80
+ self._last_search_time = time.time()
81
+
82
+ def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
83
+ """Perform web search using DuckDuckGo with rate limiting"""
84
+ log_debug(f"Performing DuckDuckGo search for: {query}")
85
+
86
+ for attempt in range(self.max_search_retries):
87
+ try:
88
+ self._respect_rate_limits()
89
+
90
+ ddgs = DDGS(timeout=self.search_timeout)
91
+ search_results = ddgs.text(query=query, max_results=self.max_results)
92
+
93
+ # Convert to list and extract relevant fields
94
+ results = []
95
+ for result in search_results:
96
+ results.append(
97
+ {
98
+ "title": result.get("title", ""),
99
+ "url": result.get("link", ""),
100
+ "description": result.get("body", ""),
101
+ }
102
+ )
103
+
104
+ log_debug(f"Found {len(results)} search results")
105
+ return results
106
+
107
+ except Exception as e:
108
+ logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
109
+ if "rate limit" in str(e).lower() or "429" in str(e):
110
+ # Rate limited - wait longer
111
+ wait_time = (
112
+ self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
113
+ )
114
+ logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
115
+ time.sleep(wait_time)
116
+ elif attempt < self.max_search_retries - 1:
117
+ # Other error - shorter wait
118
+ time.sleep(self.search_delay)
119
+ else:
120
+ logger.error(f"All DuckDuckGo search attempts failed: {e}")
121
+ return []
122
+ return []
123
+
124
+ def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
125
+ """Perform web search using Google (requires googlesearch-python)"""
126
+ log_debug(f"Performing Google search for: {query}")
127
+
128
+ try:
129
+ from googlesearch import search
130
+ except ImportError:
131
+ logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
132
+ return []
133
+
134
+ for attempt in range(self.max_search_retries):
135
+ try:
136
+ self._respect_rate_limits()
137
+
138
+ results = []
139
+ search_results = search(query, num_results=self.max_results, stop=self.max_results)
140
+
141
+ for result in search_results:
142
+ results.append(
143
+ {
144
+ "title": getattr(result, "title", ""),
145
+ "url": getattr(result, "url", ""),
146
+ "description": getattr(result, "description", ""),
147
+ }
148
+ )
149
+
150
+ log_debug(f"Found {len(results)} Google search results")
151
+ return results
152
+
153
+ except Exception as e:
154
+ logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
155
+ if attempt < self.max_search_retries - 1:
156
+ time.sleep(self.search_delay)
157
+ else:
158
+ logger.error(f"All Google search attempts failed: {e}")
159
+ return []
160
+
161
+ return []
162
+
163
+ def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
164
+ """Perform web search using the configured search engine"""
165
+ if self.search_engine == "duckduckgo":
166
+ return self._perform_duckduckgo_search(query)
167
+ elif self.search_engine == "google":
168
+ return self._perform_google_search(query)
169
+ else:
170
+ logger.error(f"Unsupported search engine: {self.search_engine}")
171
+ return []
172
+
173
+ def _is_valid_url(self, url: str) -> bool:
174
+ """Check if URL is valid and not already visited"""
175
+ try:
176
+ parsed = urlparse(url)
177
+ return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
178
+ except Exception:
179
+ return False
180
+
181
+ def _extract_text_from_html(self, html_content: str, url: str) -> str:
182
+ """Extract clean text content from HTML"""
183
+ try:
184
+ soup = BeautifulSoup(html_content, "html.parser")
185
+
186
+ # Remove script and style elements
187
+ for script in soup(["script", "style"]):
188
+ script.decompose()
189
+
190
+ # Get text content
191
+ text = soup.get_text()
192
+
193
+ # Clean up whitespace
194
+ lines = (line.strip() for line in text.splitlines())
195
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
196
+ text = " ".join(chunk for chunk in chunks if chunk)
197
+
198
+ return text
199
+
200
+ except Exception as e:
201
+ logger.warning(f"Error extracting text from {url}: {e}")
202
+ return html_content
203
+
204
+ def _fetch_url_content(self, url: str) -> Optional[str]:
205
+ """Fetch content from a URL with retry logic"""
206
+ headers = {"User-Agent": self.user_agent}
207
+
208
+ for attempt in range(self.max_retries):
209
+ try:
210
+ response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
211
+ response.raise_for_status()
212
+
213
+ # Check if it's HTML content
214
+ content_type = response.headers.get("content-type", "").lower()
215
+ if "text/html" in content_type:
216
+ return self._extract_text_from_html(response.text, url)
217
+ else:
218
+ # For non-HTML content, return as-is
219
+ return response.text
220
+
221
+ except Exception as e:
222
+ logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
223
+ if attempt < self.max_retries - 1:
224
+ time.sleep(random.uniform(1, 3)) # Random delay between retries
225
+ continue
226
+
227
+ logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
228
+ return None
229
+
230
+ def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
231
+ """Create a Document object from URL content and search result metadata"""
232
+ # Use the URL as the document ID
233
+ doc_id = url
234
+
235
+ # Use the search result title as the document name, fallback to URL
236
+ doc_name = search_result.get("title", urlparse(url).netloc)
237
+
238
+ # Create metadata with search information
239
+ meta_data = {
240
+ "url": url,
241
+ "search_title": search_result.get("title", ""),
242
+ "search_description": search_result.get("description", ""),
243
+ "source": "web_search",
244
+ "search_engine": self.search_engine,
245
+ }
246
+
247
+ return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
248
+
249
+ def read(self, query: str) -> List[Document]:
250
+ """Read content for a given query by performing web search and fetching content"""
251
+ if not query:
252
+ raise ValueError("Query cannot be empty")
253
+
254
+ log_debug(f"Starting web search reader for query: {query}")
255
+
256
+ # Perform web search
257
+ search_results = self._perform_web_search(query)
258
+ if not search_results:
259
+ logger.warning(f"No search results found for query: {query}")
260
+ return []
261
+
262
+ documents: List[Document] = []
263
+
264
+ for result in search_results:
265
+ url = result.get("url", "")
266
+
267
+ # Skip if URL is invalid or already visited
268
+ if not self._is_valid_url(url):
269
+ continue
270
+
271
+ # Mark URL as visited
272
+ self._visited_urls.add(url)
273
+
274
+ # Add delay between requests to be respectful
275
+ if len(documents) > 0:
276
+ time.sleep(self.delay_between_requests)
277
+
278
+ # Fetch content from URL
279
+ content = self._fetch_url_content(url)
280
+ if content is None:
281
+ continue
282
+
283
+ # Create document
284
+ document = self._create_document_from_url(url, content, result)
285
+
286
+ # Apply chunking if enabled
287
+ if self.chunk:
288
+ chunked_docs = self.chunk_document(document)
289
+ documents.extend(chunked_docs)
290
+ else:
291
+ documents.append(document)
292
+
293
+ # Stop if we've reached max_results
294
+ if len(documents) >= self.max_results:
295
+ break
296
+
297
+ log_debug(f"Created {len(documents)} documents from web search")
298
+ return documents
299
+
300
+ async def async_read(self, query: str) -> List[Document]:
301
+ """Asynchronously read content for a given query"""
302
+ if not query:
303
+ raise ValueError("Query cannot be empty")
304
+
305
+ log_debug(f"Starting async web search reader for query: {query}")
306
+
307
+ # Perform web search (synchronous operation)
308
+ search_results = self._perform_web_search(query)
309
+ if not search_results:
310
+ logger.warning(f"No search results found for query: {query}")
311
+ return []
312
+
313
+ # Create tasks for fetching content from each URL
314
+ async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
315
+ url = result.get("url", "")
316
+
317
+ # Skip if URL is invalid or already visited
318
+ if not self._is_valid_url(url):
319
+ return None
320
+
321
+ # Mark URL as visited
322
+ self._visited_urls.add(url)
323
+
324
+ try:
325
+ headers = {"User-Agent": self.user_agent}
326
+ async with httpx.AsyncClient(timeout=self.request_timeout) as client:
327
+ response = await client.get(url, headers=headers, follow_redirects=True)
328
+ response.raise_for_status()
329
+
330
+ content_type = response.headers.get("content-type", "").lower()
331
+ if "text/html" in content_type:
332
+ content = self._extract_text_from_html(response.text, url)
333
+ else:
334
+ content = response.text
335
+
336
+ document = self._create_document_from_url(url, content, result)
337
+ return document
338
+
339
+ except Exception as e:
340
+ logger.warning(f"Error fetching {url}: {e}")
341
+ return None
342
+
343
+ # Create tasks for all URLs
344
+ tasks = [fetch_url_async(result) for result in search_results]
345
+
346
+ # Execute all tasks concurrently with delays
347
+ documents = []
348
+ for i, task in enumerate(tasks):
349
+ if i > 0: # Add delay between requests (except for the first one)
350
+ await asyncio.sleep(self.delay_between_requests)
351
+
352
+ doc = await task
353
+ if doc is not None:
354
+ # Apply chunking if enabled
355
+ if self.chunk:
356
+ chunked_docs = await self.chunk_documents_async([doc])
357
+ documents.extend(chunked_docs)
358
+ else:
359
+ documents.append(doc)
360
+
361
+ # Stop if we've reached max_results
362
+ if len(documents) >= self.max_results:
363
+ break
364
+
365
+ log_debug(f"Created {len(documents)} documents from async web search")
366
+ return documents