agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,137 @@
1
+ import asyncio
2
+ import uuid
3
+ from pathlib import Path
4
+ from typing import IO, Any, List, Optional, Union
5
+
6
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
7
+ from agno.knowledge.document.base import Document
8
+ from agno.knowledge.reader.base import Reader
9
+ from agno.knowledge.types import ContentType
10
+ from agno.utils.log import log_debug, log_error, log_warning
11
+
12
+ DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
13
+
14
+ # Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
15
+ try:
16
+ from agno.knowledge.chunking.markdown import MarkdownChunking
17
+
18
+ DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
19
+ MARKDOWN_CHUNKER_AVAILABLE = True
20
+ except ImportError:
21
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
22
+
23
+ DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
24
+ MARKDOWN_CHUNKER_AVAILABLE = False
25
+
26
+
27
+ class MarkdownReader(Reader):
28
+ """Reader for Markdown files"""
29
+
30
+ @classmethod
31
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
32
+ """Get the list of supported chunking strategies for Markdown readers."""
33
+ strategies = [
34
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
35
+ ChunkingStrategyType.AGENTIC_CHUNKER,
36
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
37
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
38
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
39
+ ]
40
+
41
+ # Only include MarkdownChunking if it's available
42
+ if MARKDOWN_CHUNKER_AVAILABLE:
43
+ strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
44
+
45
+ return strategies
46
+
47
+ @classmethod
48
+ def get_supported_content_types(self) -> List[ContentType]:
49
+ return [ContentType.MARKDOWN]
50
+
51
+ def __init__(
52
+ self,
53
+ chunking_strategy: Optional[ChunkingStrategy] = None,
54
+ name: Optional[str] = None,
55
+ description: Optional[str] = None,
56
+ ) -> None:
57
+ # Use the default chunking strategy if none provided
58
+ if chunking_strategy is None:
59
+ chunking_strategy = DEFAULT_CHUNKER_STRATEGY
60
+
61
+ super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
62
+
63
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
64
+ try:
65
+ if isinstance(file, Path):
66
+ if not file.exists():
67
+ raise FileNotFoundError(f"Could not find file: {file}")
68
+ log_debug(f"Reading: {file}")
69
+ file_name = name or file.stem
70
+ file_contents = file.read_text(encoding=self.encoding or "utf-8")
71
+ else:
72
+ log_debug(f"Reading uploaded file: {file.name}")
73
+ file_name = name or file.name.split(".")[0]
74
+ file.seek(0)
75
+ file_contents = file.read().decode(self.encoding or "utf-8")
76
+
77
+ documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
78
+ if self.chunk:
79
+ chunked_documents = []
80
+ for document in documents:
81
+ chunked_documents.extend(self.chunk_document(document))
82
+ return chunked_documents
83
+ return documents
84
+ except Exception as e:
85
+ log_error(f"Error reading: {file}: {e}")
86
+ return []
87
+
88
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
89
+ try:
90
+ if isinstance(file, Path):
91
+ if not file.exists():
92
+ raise FileNotFoundError(f"Could not find file: {file}")
93
+
94
+ log_debug(f"Reading asynchronously: {file}")
95
+ file_name = name or file.stem
96
+
97
+ try:
98
+ import aiofiles
99
+
100
+ async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
101
+ file_contents = await f.read()
102
+ except ImportError:
103
+ log_warning("aiofiles not installed, using synchronous file I/O")
104
+ file_contents = file.read_text(self.encoding or "utf-8")
105
+ else:
106
+ log_debug(f"Reading uploaded file asynchronously: {file.name}")
107
+ file_name = name or file.name.split(".")[0]
108
+ file.seek(0)
109
+ file_contents = file.read().decode(self.encoding or "utf-8")
110
+
111
+ document = Document(
112
+ name=file_name,
113
+ id=str(uuid.uuid4()), # Fixed an issue with the id creation
114
+ content=file_contents,
115
+ )
116
+
117
+ if self.chunk:
118
+ return await self._async_chunk_document(document)
119
+ return [document]
120
+ except Exception as e:
121
+ log_error(f"Error reading asynchronously: {file}: {e}")
122
+ return []
123
+
124
+ async def _async_chunk_document(self, document: Document) -> List[Document]:
125
+ if not self.chunk or not document:
126
+ return [document]
127
+
128
+ async def process_chunk(chunk_doc: Document) -> Document:
129
+ return chunk_doc
130
+
131
+ chunked_documents = self.chunk_document(document)
132
+
133
+ if not chunked_documents:
134
+ return [document]
135
+
136
+ tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
137
+ return await asyncio.gather(*tasks)
@@ -0,0 +1,431 @@
1
+ import asyncio
2
+ import re
3
+ from pathlib import Path
4
+ from typing import IO, Any, List, Optional, Tuple, Union
5
+ from uuid import uuid4
6
+
7
+ from agno.knowledge.chunking.document import DocumentChunking
8
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
9
+ from agno.knowledge.document.base import Document
10
+ from agno.knowledge.reader.base import Reader
11
+ from agno.knowledge.types import ContentType
12
+ from agno.utils.log import log_debug, log_error
13
+
14
+ try:
15
+ from pypdf import PdfReader as DocumentReader # noqa: F401
16
+ from pypdf.errors import PdfStreamError
17
+ except ImportError:
18
+ raise ImportError("`pypdf` not installed. Please install it via `pip install pypdf`.")
19
+
20
+
21
+ PAGE_START_NUMBERING_FORMAT_DEFAULT = "<start page {page_nr}>"
22
+ PAGE_END_NUMBERING_FORMAT_DEFAULT = "<end page {page_nr}>"
23
+ PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL = 0.4
24
+
25
+
26
+ def _ocr_reader(page: Any) -> str:
27
+ """A single PDF page object."""
28
+ try:
29
+ import rapidocr_onnxruntime as rapidocr
30
+ except ImportError:
31
+ raise ImportError(
32
+ "`rapidocr_onnxruntime` not installed. Please install it via `pip install rapidocr_onnxruntime`."
33
+ )
34
+ ocr = rapidocr.RapidOCR()
35
+ images_text_list = []
36
+
37
+ # Extract and process images
38
+ for image_object in page.images:
39
+ image_data = image_object.data
40
+
41
+ # Perform OCR on the image
42
+ ocr_result, elapse = ocr(image_data)
43
+
44
+ # Extract text from OCR result
45
+ images_text_list += [item[1] for item in ocr_result] if ocr_result else []
46
+
47
+ return "\n".join(images_text_list)
48
+
49
+
50
+ async def _async_ocr_reader(page: Any) -> str:
51
+ """page: A single PDF page object."""
52
+ try:
53
+ import rapidocr_onnxruntime as rapidocr
54
+ except ImportError:
55
+ raise ImportError(
56
+ "`rapidocr_onnxruntime` not installed. Please install it via `pip install rapidocr_onnxruntime`."
57
+ )
58
+ ocr = rapidocr.RapidOCR()
59
+
60
+ # Process images in parallel
61
+ async def process_image(image_data: bytes) -> List[str]:
62
+ ocr_result, _ = ocr(image_data)
63
+ return [item[1] for item in ocr_result] if ocr_result else []
64
+
65
+ image_tasks = [process_image(image.data) for image in page.images]
66
+ images_results = await asyncio.gather(*image_tasks)
67
+
68
+ images_text_list: List = []
69
+ for result in images_results:
70
+ images_text_list.extend(result)
71
+
72
+ images_text = "\n".join(images_text_list)
73
+ return images_text
74
+
75
+
76
+ def _clean_page_numbers(
77
+ page_content_list: List[str],
78
+ extra_content: List[str] = [],
79
+ page_start_numbering_format: str = PAGE_START_NUMBERING_FORMAT_DEFAULT,
80
+ page_end_numbering_format: str = PAGE_END_NUMBERING_FORMAT_DEFAULT,
81
+ ) -> Tuple[List[str], Optional[int]]:
82
+ f"""
83
+ Identifies and removes or reformats page numbers from a list of PDF page contents, based on the most consistent sequential numbering.
84
+
85
+ Args:
86
+ page_content_list (List[str]): A list of strings where each string represents the content of a PDF page.
87
+ extra_content (List[str]): A list of strings where each string will be appended after the main content. Can be used for appending image information.
88
+ page_start_numbering_format (str): A format string to prepend to the page content, with `{{page_nr}}` as a placeholder for the page number.
89
+ Defaults to {PAGE_START_NUMBERING_FORMAT_DEFAULT}. Make it an empty string to remove the page number.
90
+ page_end_numbering_format (str): A format string to append to the page content, with `{{page_nr}}` as a placeholder for the page number.
91
+ Defaults to {PAGE_END_NUMBERING_FORMAT_DEFAULT}. Make it an empty string to remove the page number.
92
+
93
+ Returns:
94
+ List[str]: The list of page contents with page numbers removed or reformatted based on the detected sequence.
95
+ Optional[Int]: The shift for the page numbering. Can be (-2, -1, 0, 1, 2).
96
+
97
+ Notes:
98
+ - The function scans for page numbers using a regular expression that matches digits at the start or end of a string.
99
+ - It evaluates several potential starting points for numbering (-2, -1, 0, 1, 2 shifts) to determine the most consistent sequence.
100
+ - If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
101
+ the page numbers are processed.
102
+ - If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
103
+ `page_end_numbering_format` is provided.
104
+ """
105
+ assert len(extra_content) == 0 or len(extra_content) == len(page_content_list), (
106
+ "Please provide an equally sized list of extra content if provided."
107
+ )
108
+
109
+ # Regex to match potential page numbers at the start or end of a string
110
+ page_number_regex = re.compile(r"^\s*(\d+)\s*|\s*(\d+)\s*$")
111
+
112
+ def find_page_number(content):
113
+ match = page_number_regex.search(content)
114
+ if match:
115
+ return int(match.group(1) or match.group(2))
116
+ return None
117
+
118
+ page_numbers = [find_page_number(content) for content in page_content_list]
119
+ if all(x is None or x > 5 for x in page_numbers):
120
+ # This approach won't work reliably for higher page numbers.
121
+ page_content_list = [
122
+ f"\n{page_content_list[i]}\n{extra_content[i]}" if extra_content else page_content_list[i]
123
+ for i in range(len(page_content_list))
124
+ ]
125
+ return page_content_list, None
126
+
127
+ # Possible range shifts to detect page numbering
128
+ range_shifts = [-2, -1, 0, 1, 2]
129
+ best_match, best_correct_count, best_shift = _identify_best_page_sequence(page_numbers, range_shifts)
130
+
131
+ # Check if at least ..% of the pages have correct sequential numbering
132
+ if best_match and best_correct_count / len(page_numbers) >= PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL:
133
+ # Remove the page numbers from the content
134
+ for i, expected_number in enumerate(best_match):
135
+ page_content_list[i] = re.sub(
136
+ rf"^\s*{expected_number}\s*|\s*{expected_number}\s*$", "", page_content_list[i]
137
+ )
138
+
139
+ page_start = (
140
+ page_start_numbering_format.format(page_nr=expected_number) + "\n"
141
+ if page_start_numbering_format
142
+ else ""
143
+ )
144
+ page_end = (
145
+ "\n" + page_end_numbering_format.format(page_nr=expected_number) if page_end_numbering_format else ""
146
+ )
147
+ extra_info = "\n" + extra_content[i] if extra_content else ""
148
+
149
+ # Add formatted page numbering if configured.
150
+ page_content_list[i] = page_start + page_content_list[i] + extra_info + page_end
151
+ else:
152
+ best_shift = None
153
+
154
+ return page_content_list, best_shift
155
+
156
+
157
+ def _identify_best_page_sequence(page_numbers, range_shifts):
158
+ best_match = None
159
+ best_shift: Optional[int] = None
160
+ best_correct_count = 0
161
+
162
+ for shift in range_shifts:
163
+ expected_numbers = [i + shift for i in range(len(page_numbers))]
164
+ # Check if expected number occurs (or that the expected "2" occurs in an incorrectly merged number like 25,
165
+ # where 2 is the page number and 5 is part of the PDF content).
166
+ correct_count = sum(
167
+ 1
168
+ for actual, expected in zip(page_numbers, expected_numbers)
169
+ if actual == expected or str(actual).startswith(str(expected)) or str(actual).endswith(str(expected))
170
+ )
171
+
172
+ if correct_count > best_correct_count:
173
+ best_correct_count = correct_count
174
+ best_match = expected_numbers
175
+ best_shift = shift
176
+
177
+ return best_match, best_correct_count, best_shift
178
+
179
+
180
+ class BasePDFReader(Reader):
181
+ def __init__(
182
+ self,
183
+ split_on_pages: bool = True,
184
+ page_start_numbering_format: Optional[str] = None,
185
+ page_end_numbering_format: Optional[str] = None,
186
+ password: Optional[str] = None,
187
+ chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
188
+ **kwargs,
189
+ ):
190
+ if page_start_numbering_format is None:
191
+ page_start_numbering_format = PAGE_START_NUMBERING_FORMAT_DEFAULT
192
+ if page_end_numbering_format is None:
193
+ page_end_numbering_format = PAGE_END_NUMBERING_FORMAT_DEFAULT
194
+
195
+ self.split_on_pages = split_on_pages
196
+ self.page_start_numbering_format = page_start_numbering_format
197
+ self.page_end_numbering_format = page_end_numbering_format
198
+ self.password = password
199
+
200
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
201
+
202
+ @classmethod
203
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
204
+ """Get the list of supported chunking strategies for PDF readers."""
205
+ return [
206
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
207
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
208
+ ChunkingStrategyType.AGENTIC_CHUNKER,
209
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
210
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
211
+ ]
212
+
213
+ def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
214
+ chunked_documents: List[Document] = []
215
+ for document in documents:
216
+ chunked_documents.extend(self.chunk_document(document))
217
+ return chunked_documents
218
+
219
+ def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
220
+ """Determines the document name from the source or a provided name."""
221
+ try:
222
+ if name:
223
+ return name
224
+ if isinstance(pdf_source, str):
225
+ return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
226
+ # Assumes a file-like object with a .name attribute
227
+ return pdf_source.name.split(".")[0]
228
+ except Exception:
229
+ # The original code had a bug here, it should check `name` first.
230
+ return name or "pdf"
231
+
232
+ def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
233
+ if not doc_reader.is_encrypted:
234
+ return True
235
+
236
+ # Use provided password or fall back to instance password
237
+ pdf_password = password or self.password
238
+ if not pdf_password:
239
+ log_error(f'PDF file "{doc_name}" is password protected but no password provided')
240
+ return False
241
+
242
+ try:
243
+ decrypted_pdf = doc_reader.decrypt(pdf_password)
244
+ if decrypted_pdf:
245
+ log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
246
+ return True
247
+ else:
248
+ log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
249
+ return False
250
+ except Exception as e:
251
+ log_error(f'Error decrypting PDF file "{doc_name}": {e}')
252
+ return False
253
+
254
+ def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
255
+ if self.split_on_pages:
256
+ shift = page_number_shift if page_number_shift is not None else 1
257
+ documents: List[Document] = []
258
+ for page_number, page_content in enumerate(pdf_content, start=shift):
259
+ documents.append(
260
+ Document(
261
+ name=doc_name,
262
+ id=(str(uuid4()) if use_uuid_for_id else f"{doc_name}_{page_number}"),
263
+ meta_data={"page": page_number},
264
+ content=page_content,
265
+ )
266
+ )
267
+ else:
268
+ pdf_content_str = "\n".join(pdf_content)
269
+ document = Document(
270
+ name=doc_name,
271
+ id=str(uuid4()) if use_uuid_for_id else doc_name,
272
+ meta_data={},
273
+ content=pdf_content_str,
274
+ )
275
+ documents = [document]
276
+
277
+ if self.chunk:
278
+ return self._build_chunked_documents(documents)
279
+ return documents
280
+
281
+ def _pdf_reader_to_documents(
282
+ self,
283
+ doc_reader: DocumentReader,
284
+ doc_name,
285
+ read_images=False,
286
+ use_uuid_for_id=False,
287
+ ):
288
+ pdf_content = []
289
+ pdf_images_text = []
290
+ for page in doc_reader.pages:
291
+ pdf_content.append(page.extract_text())
292
+ if read_images:
293
+ pdf_images_text.append(_ocr_reader(page))
294
+
295
+ pdf_content, shift = _clean_page_numbers(
296
+ page_content_list=pdf_content,
297
+ extra_content=pdf_images_text,
298
+ page_start_numbering_format=self.page_start_numbering_format,
299
+ page_end_numbering_format=self.page_end_numbering_format,
300
+ )
301
+ return self._create_documents(pdf_content, doc_name, use_uuid_for_id, shift)
302
+
303
+ async def _async_pdf_reader_to_documents(
304
+ self,
305
+ doc_reader: DocumentReader,
306
+ doc_name: str,
307
+ read_images=False,
308
+ use_uuid_for_id=False,
309
+ ):
310
+ async def _read_pdf_page(page, read_images) -> Tuple[str, str]:
311
+ # We tried "asyncio.to_thread(page.extract_text)", but it maintains state internally, which leads to issues.
312
+ page_text = page.extract_text()
313
+
314
+ if read_images:
315
+ pdf_images_text = await _async_ocr_reader(page)
316
+ else:
317
+ pdf_images_text = ""
318
+
319
+ return page_text, pdf_images_text
320
+
321
+ # Process pages in parallel using asyncio.gather
322
+ pdf_content: List[Tuple[str, str]] = await asyncio.gather(
323
+ *[_read_pdf_page(page, read_images) for page in doc_reader.pages]
324
+ )
325
+
326
+ pdf_content_clean, shift = _clean_page_numbers(
327
+ page_content_list=[x[0] for x in pdf_content],
328
+ extra_content=[x[1] for x in pdf_content],
329
+ page_start_numbering_format=self.page_start_numbering_format,
330
+ page_end_numbering_format=self.page_end_numbering_format,
331
+ )
332
+
333
+ return self._create_documents(pdf_content_clean, doc_name, use_uuid_for_id, shift)
334
+
335
+
336
+ class PDFReader(BasePDFReader):
337
+ """Reader for PDF files"""
338
+
339
+ @classmethod
340
+ def get_supported_content_types(self) -> List[ContentType]:
341
+ return [ContentType.PDF]
342
+
343
+ def read(
344
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
345
+ ) -> List[Document]:
346
+ doc_name = self._get_doc_name(pdf, name)
347
+ log_debug(f"Reading: {doc_name}")
348
+
349
+ try:
350
+ pdf_reader = DocumentReader(pdf)
351
+ except PdfStreamError as e:
352
+ log_error(f"Error reading PDF: {e}")
353
+ return []
354
+ # Handle PDF decryption
355
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
356
+ return []
357
+
358
+ # Read and chunk
359
+ return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
360
+
361
+ async def async_read(
362
+ self,
363
+ pdf: Optional[Union[str, Path, IO[Any]]] = None,
364
+ name: Optional[str] = None,
365
+ password: Optional[str] = None,
366
+ ) -> List[Document]:
367
+ if pdf is None:
368
+ log_error("No pdf provided")
369
+ return []
370
+ doc_name = self._get_doc_name(pdf, name)
371
+ log_debug(f"Reading: {doc_name}")
372
+
373
+ try:
374
+ pdf_reader = DocumentReader(pdf)
375
+ except PdfStreamError as e:
376
+ log_error(f"Error reading PDF: {e}")
377
+ return []
378
+
379
+ # Handle PDF decryption
380
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
381
+ return []
382
+
383
+ # Read and chunk.
384
+ return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
385
+
386
+
387
+ class PDFImageReader(BasePDFReader):
388
+ """Reader for PDF files with text and images extraction"""
389
+
390
+ def read(
391
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
392
+ ) -> List[Document]:
393
+ if not pdf:
394
+ raise ValueError("No pdf provided")
395
+
396
+ doc_name = self._get_doc_name(pdf, name)
397
+ log_debug(f"Reading: {doc_name}")
398
+ try:
399
+ pdf_reader = DocumentReader(pdf)
400
+ except PdfStreamError as e:
401
+ log_error(f"Error reading PDF: {e}")
402
+ return []
403
+
404
+ # Handle PDF decryption
405
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
406
+ return []
407
+
408
+ # Read and chunk.
409
+ return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
410
+
411
+ async def async_read(
412
+ self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
413
+ ) -> List[Document]:
414
+ if not pdf:
415
+ raise ValueError("No pdf provided")
416
+
417
+ doc_name = self._get_doc_name(pdf, name)
418
+ log_debug(f"Reading: {doc_name}")
419
+
420
+ try:
421
+ pdf_reader = DocumentReader(pdf)
422
+ except PdfStreamError as e:
423
+ log_error(f"Error reading PDF: {e}")
424
+ return []
425
+
426
+ # Handle PDF decryption
427
+ if not self._decrypt_pdf(pdf_reader, doc_name, password):
428
+ return []
429
+
430
+ # Read and chunk.
431
+ return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import IO, Any, List, Optional, Union
4
+ from uuid import uuid4
5
+
6
+ from agno.knowledge.chunking.document import DocumentChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
11
+ from agno.utils.log import log_debug, log_error
12
+
13
+ try:
14
+ from pptx import Presentation # type: ignore
15
+ except ImportError:
16
+ raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
17
+
18
+
19
+ class PPTXReader(Reader):
20
+ """Reader for PPTX files"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for PPTX readers."""
28
+ return [
29
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
30
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
31
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
32
+ ChunkingStrategyType.AGENTIC_CHUNKER,
33
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.PPTX]
39
+
40
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
41
+ """Read a pptx file and return a list of documents"""
42
+ try:
43
+ if isinstance(file, Path):
44
+ if not file.exists():
45
+ raise FileNotFoundError(f"Could not find file: {file}")
46
+ log_debug(f"Reading: {file}")
47
+ presentation = Presentation(str(file))
48
+ doc_name = name or file.stem
49
+ else:
50
+ log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
51
+ presentation = Presentation(file)
52
+ doc_name = name or (
53
+ getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
54
+ )
55
+
56
+ # Extract text from all slides
57
+ slide_texts = []
58
+ for slide_number, slide in enumerate(presentation.slides, 1):
59
+ slide_text = f"Slide {slide_number}:\n"
60
+
61
+ # Extract text from shapes that contain text
62
+ text_content = []
63
+ for shape in slide.shapes:
64
+ if hasattr(shape, "text") and shape.text.strip():
65
+ text_content.append(shape.text.strip())
66
+
67
+ if text_content:
68
+ slide_text += "\n".join(text_content)
69
+ else:
70
+ slide_text += "(No text content)"
71
+
72
+ slide_texts.append(slide_text)
73
+
74
+ doc_content = "\n\n".join(slide_texts)
75
+
76
+ documents = [
77
+ Document(
78
+ name=doc_name,
79
+ id=str(uuid4()),
80
+ content=doc_content,
81
+ )
82
+ ]
83
+
84
+ if self.chunk:
85
+ chunked_documents = []
86
+ for document in documents:
87
+ chunked_documents.extend(self.chunk_document(document))
88
+ return chunked_documents
89
+ return documents
90
+
91
+ except Exception as e:
92
+ log_error(f"Error reading file: {e}")
93
+ return []
94
+
95
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
96
+ """Asynchronously read a pptx file and return a list of documents"""
97
+ try:
98
+ return await asyncio.to_thread(self.read, file, name)
99
+ except Exception as e:
100
+ log_error(f"Error reading file asynchronously: {e}")
101
+ return []