agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. agno/agent/agent.py +6009 -2874
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +595 -187
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +3 -0
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +339 -266
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +1011 -566
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +110 -37
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +143 -4
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +60 -6
  142. agno/models/openai/chat.py +102 -43
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +81 -5
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -175
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +266 -112
  205. agno/run/base.py +53 -24
  206. agno/run/team.py +252 -111
  207. agno/run/workflow.py +156 -45
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1692
  213. agno/tools/brightdata.py +3 -3
  214. agno/tools/cartesia.py +3 -5
  215. agno/tools/dalle.py +9 -8
  216. agno/tools/decorator.py +4 -2
  217. agno/tools/desi_vocal.py +2 -2
  218. agno/tools/duckduckgo.py +15 -11
  219. agno/tools/e2b.py +20 -13
  220. agno/tools/eleven_labs.py +26 -28
  221. agno/tools/exa.py +21 -16
  222. agno/tools/fal.py +4 -4
  223. agno/tools/file.py +153 -23
  224. agno/tools/file_generation.py +350 -0
  225. agno/tools/firecrawl.py +4 -4
  226. agno/tools/function.py +257 -37
  227. agno/tools/giphy.py +2 -2
  228. agno/tools/gmail.py +238 -14
  229. agno/tools/google_drive.py +270 -0
  230. agno/tools/googlecalendar.py +36 -8
  231. agno/tools/googlesheets.py +20 -5
  232. agno/tools/jira.py +20 -0
  233. agno/tools/knowledge.py +3 -3
  234. agno/tools/lumalab.py +3 -3
  235. agno/tools/mcp/__init__.py +10 -0
  236. agno/tools/mcp/mcp.py +331 -0
  237. agno/tools/mcp/multi_mcp.py +347 -0
  238. agno/tools/mcp/params.py +24 -0
  239. agno/tools/mcp_toolbox.py +284 -0
  240. agno/tools/mem0.py +11 -17
  241. agno/tools/memori.py +1 -53
  242. agno/tools/memory.py +419 -0
  243. agno/tools/models/azure_openai.py +2 -2
  244. agno/tools/models/gemini.py +3 -3
  245. agno/tools/models/groq.py +3 -5
  246. agno/tools/models/nebius.py +7 -7
  247. agno/tools/models_labs.py +25 -15
  248. agno/tools/notion.py +204 -0
  249. agno/tools/openai.py +4 -9
  250. agno/tools/opencv.py +3 -3
  251. agno/tools/parallel.py +314 -0
  252. agno/tools/replicate.py +7 -7
  253. agno/tools/scrapegraph.py +58 -31
  254. agno/tools/searxng.py +2 -2
  255. agno/tools/serper.py +2 -2
  256. agno/tools/slack.py +18 -3
  257. agno/tools/spider.py +2 -2
  258. agno/tools/tavily.py +146 -0
  259. agno/tools/whatsapp.py +1 -1
  260. agno/tools/workflow.py +278 -0
  261. agno/tools/yfinance.py +12 -11
  262. agno/utils/agent.py +820 -0
  263. agno/utils/audio.py +27 -0
  264. agno/utils/common.py +90 -1
  265. agno/utils/events.py +222 -7
  266. agno/utils/gemini.py +181 -23
  267. agno/utils/hooks.py +57 -0
  268. agno/utils/http.py +111 -0
  269. agno/utils/knowledge.py +12 -5
  270. agno/utils/log.py +1 -0
  271. agno/utils/mcp.py +95 -5
  272. agno/utils/media.py +188 -10
  273. agno/utils/merge_dict.py +22 -1
  274. agno/utils/message.py +60 -0
  275. agno/utils/models/claude.py +40 -11
  276. agno/utils/models/cohere.py +1 -1
  277. agno/utils/models/watsonx.py +1 -1
  278. agno/utils/openai.py +1 -1
  279. agno/utils/print_response/agent.py +105 -21
  280. agno/utils/print_response/team.py +103 -38
  281. agno/utils/print_response/workflow.py +251 -34
  282. agno/utils/reasoning.py +22 -1
  283. agno/utils/serialize.py +32 -0
  284. agno/utils/streamlit.py +16 -10
  285. agno/utils/string.py +41 -0
  286. agno/utils/team.py +98 -9
  287. agno/utils/tools.py +1 -1
  288. agno/vectordb/base.py +23 -4
  289. agno/vectordb/cassandra/cassandra.py +65 -9
  290. agno/vectordb/chroma/chromadb.py +182 -38
  291. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  292. agno/vectordb/couchbase/couchbase.py +105 -10
  293. agno/vectordb/lancedb/lance_db.py +183 -135
  294. agno/vectordb/langchaindb/langchaindb.py +25 -7
  295. agno/vectordb/lightrag/lightrag.py +17 -3
  296. agno/vectordb/llamaindex/__init__.py +3 -0
  297. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  298. agno/vectordb/milvus/milvus.py +126 -9
  299. agno/vectordb/mongodb/__init__.py +7 -1
  300. agno/vectordb/mongodb/mongodb.py +112 -7
  301. agno/vectordb/pgvector/pgvector.py +142 -21
  302. agno/vectordb/pineconedb/pineconedb.py +80 -8
  303. agno/vectordb/qdrant/qdrant.py +125 -39
  304. agno/vectordb/redis/__init__.py +9 -0
  305. agno/vectordb/redis/redisdb.py +694 -0
  306. agno/vectordb/singlestore/singlestore.py +111 -25
  307. agno/vectordb/surrealdb/surrealdb.py +31 -5
  308. agno/vectordb/upstashdb/upstashdb.py +76 -8
  309. agno/vectordb/weaviate/weaviate.py +86 -15
  310. agno/workflow/__init__.py +2 -0
  311. agno/workflow/agent.py +299 -0
  312. agno/workflow/condition.py +112 -18
  313. agno/workflow/loop.py +69 -10
  314. agno/workflow/parallel.py +266 -118
  315. agno/workflow/router.py +110 -17
  316. agno/workflow/step.py +645 -136
  317. agno/workflow/steps.py +65 -6
  318. agno/workflow/types.py +71 -33
  319. agno/workflow/workflow.py +2113 -300
  320. agno-2.3.0.dist-info/METADATA +618 -0
  321. agno-2.3.0.dist-info/RECORD +577 -0
  322. agno-2.3.0.dist-info/licenses/LICENSE +201 -0
  323. agno/knowledge/reader/url_reader.py +0 -128
  324. agno/tools/googlesearch.py +0 -98
  325. agno/tools/mcp.py +0 -610
  326. agno/utils/models/aws_claude.py +0 -170
  327. agno-2.0.0rc2.dist-info/METADATA +0 -355
  328. agno-2.0.0rc2.dist-info/RECORD +0 -515
  329. agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
  330. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  331. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,12 @@ from pathlib import Path
4
4
  from typing import IO, Any, List, Optional, Tuple, Union
5
5
  from uuid import uuid4
6
6
 
7
- from agno.knowledge.chunking.strategy import ChunkingStrategyType
7
+ from agno.knowledge.chunking.document import DocumentChunking
8
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
9
  from agno.knowledge.document.base import Document
9
10
  from agno.knowledge.reader.base import Reader
10
11
  from agno.knowledge.types import ContentType
11
- from agno.utils.log import log_error, log_info, logger
12
+ from agno.utils.log import log_debug, log_error
12
13
 
13
14
  try:
14
15
  from pypdf import PdfReader as DocumentReader # noqa: F401
@@ -117,6 +118,10 @@ def _clean_page_numbers(
117
118
  page_numbers = [find_page_number(content) for content in page_content_list]
118
119
  if all(x is None or x > 5 for x in page_numbers):
119
120
  # This approach won't work reliably for higher page numbers.
121
+ page_content_list = [
122
+ f"\n{page_content_list[i]}\n{extra_content[i]}" if extra_content else page_content_list[i]
123
+ for i in range(len(page_content_list))
124
+ ]
120
125
  return page_content_list, None
121
126
 
122
127
  # Possible range shifts to detect page numbering
@@ -179,6 +184,7 @@ class BasePDFReader(Reader):
179
184
  page_start_numbering_format: Optional[str] = None,
180
185
  page_end_numbering_format: Optional[str] = None,
181
186
  password: Optional[str] = None,
187
+ chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
182
188
  **kwargs,
183
189
  ):
184
190
  if page_start_numbering_format is None:
@@ -191,11 +197,7 @@ class BasePDFReader(Reader):
191
197
  self.page_end_numbering_format = page_end_numbering_format
192
198
  self.password = password
193
199
 
194
- if self.chunking_strategy is None:
195
- from agno.knowledge.chunking.document import DocumentChunking
196
-
197
- self.chunking_strategy = DocumentChunking(chunk_size=5000)
198
- super().__init__(**kwargs)
200
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
199
201
 
200
202
  @classmethod
201
203
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
@@ -214,6 +216,19 @@ class BasePDFReader(Reader):
214
216
  chunked_documents.extend(self.chunk_document(document))
215
217
  return chunked_documents
216
218
 
219
+ def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
220
+ """Determines the document name from the source or a provided name."""
221
+ try:
222
+ if name:
223
+ return name
224
+ if isinstance(pdf_source, str):
225
+ return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
226
+ # Assumes a file-like object with a .name attribute
227
+ return pdf_source.name.split(".")[0]
228
+ except Exception:
229
+ # The original code had a bug here, it should check `name` first.
230
+ return name or "pdf"
231
+
217
232
  def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
218
233
  if not doc_reader.is_encrypted:
219
234
  return True
@@ -221,13 +236,13 @@ class BasePDFReader(Reader):
221
236
  # Use provided password or fall back to instance password
222
237
  pdf_password = password or self.password
223
238
  if not pdf_password:
224
- logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
239
+ log_error(f'PDF file "{doc_name}" is password protected but no password provided')
225
240
  return False
226
241
 
227
242
  try:
228
243
  decrypted_pdf = doc_reader.decrypt(pdf_password)
229
244
  if decrypted_pdf:
230
- log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
245
+ log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
231
246
  return True
232
247
  else:
233
248
  log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
@@ -261,7 +276,6 @@ class BasePDFReader(Reader):
261
276
 
262
277
  if self.chunk:
263
278
  return self._build_chunked_documents(documents)
264
-
265
279
  return documents
266
280
 
267
281
  def _pdf_reader_to_documents(
@@ -329,40 +343,14 @@ class PDFReader(BasePDFReader):
329
343
  def read(
330
344
  self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
331
345
  ) -> List[Document]:
332
- try:
333
- if name:
334
- doc_name = name
335
- elif isinstance(pdf, str):
336
- doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
337
- else:
338
- doc_name = pdf.name.split(".")[0]
339
- except Exception:
340
- doc_name = "pdf"
341
-
342
- log_info(f"Reading: {doc_name}")
343
-
344
- try:
345
- DocumentReader(pdf)
346
- except PdfStreamError as e:
347
- logger.error(f"Error reading PDF: {e}")
348
- return []
349
-
350
- try:
351
- if isinstance(pdf, str):
352
- doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
353
- else:
354
- doc_name = name or pdf.name.split(".")[0]
355
- except Exception:
356
- doc_name = name or "pdf"
357
-
358
- log_info(f"Reading: {doc_name}")
346
+ doc_name = self._get_doc_name(pdf, name)
347
+ log_debug(f"Reading: {doc_name}")
359
348
 
360
349
  try:
361
350
  pdf_reader = DocumentReader(pdf)
362
351
  except PdfStreamError as e:
363
- logger.error(f"Error reading PDF: {e}")
352
+ log_error(f"Error reading PDF: {e}")
364
353
  return []
365
-
366
354
  # Handle PDF decryption
367
355
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
368
356
  return []
@@ -379,21 +367,13 @@ class PDFReader(BasePDFReader):
379
367
  if pdf is None:
380
368
  log_error("No pdf provided")
381
369
  return []
382
-
383
- try:
384
- if isinstance(pdf, str):
385
- doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
386
- else:
387
- doc_name = pdf.name.split(".")[0]
388
- except Exception:
389
- doc_name = name or "pdf"
390
-
391
- log_info(f"Reading: {doc_name}")
370
+ doc_name = self._get_doc_name(pdf, name)
371
+ log_debug(f"Reading: {doc_name}")
392
372
 
393
373
  try:
394
374
  pdf_reader = DocumentReader(pdf)
395
375
  except PdfStreamError as e:
396
- logger.error(f"Error reading PDF: {e}")
376
+ log_error(f"Error reading PDF: {e}")
397
377
  return []
398
378
 
399
379
  # Handle PDF decryption
@@ -413,16 +393,13 @@ class PDFImageReader(BasePDFReader):
413
393
  if not pdf:
414
394
  raise ValueError("No pdf provided")
415
395
 
396
+ doc_name = self._get_doc_name(pdf, name)
397
+ log_debug(f"Reading: {doc_name}")
416
398
  try:
417
- if isinstance(pdf, str):
418
- doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
419
- else:
420
- doc_name = pdf.name.split(".")[0]
421
- except Exception:
422
- doc_name = "pdf"
423
-
424
- log_info(f"Reading: {doc_name}")
425
- pdf_reader = DocumentReader(pdf)
399
+ pdf_reader = DocumentReader(pdf)
400
+ except PdfStreamError as e:
401
+ log_error(f"Error reading PDF: {e}")
402
+ return []
426
403
 
427
404
  # Handle PDF decryption
428
405
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
@@ -437,16 +414,14 @@ class PDFImageReader(BasePDFReader):
437
414
  if not pdf:
438
415
  raise ValueError("No pdf provided")
439
416
 
440
- try:
441
- if isinstance(pdf, str):
442
- doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
443
- else:
444
- doc_name = pdf.name.split(".")[0]
445
- except Exception:
446
- doc_name = "pdf"
417
+ doc_name = self._get_doc_name(pdf, name)
418
+ log_debug(f"Reading: {doc_name}")
447
419
 
448
- log_info(f"Reading: {doc_name}")
449
- pdf_reader = DocumentReader(pdf)
420
+ try:
421
+ pdf_reader = DocumentReader(pdf)
422
+ except PdfStreamError as e:
423
+ log_error(f"Error reading PDF: {e}")
424
+ return []
450
425
 
451
426
  # Handle PDF decryption
452
427
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import IO, Any, List, Optional, Union
4
+ from uuid import uuid4
5
+
6
+ from agno.knowledge.chunking.document import DocumentChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
11
+ from agno.utils.log import log_debug, log_error
12
+
13
+ try:
14
+ from pptx import Presentation # type: ignore
15
+ except ImportError:
16
+ raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
17
+
18
+
19
+ class PPTXReader(Reader):
20
+ """Reader for PPTX files"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for PPTX readers."""
28
+ return [
29
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
30
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
31
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
32
+ ChunkingStrategyType.AGENTIC_CHUNKER,
33
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.PPTX]
39
+
40
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
41
+ """Read a pptx file and return a list of documents"""
42
+ try:
43
+ if isinstance(file, Path):
44
+ if not file.exists():
45
+ raise FileNotFoundError(f"Could not find file: {file}")
46
+ log_debug(f"Reading: {file}")
47
+ presentation = Presentation(str(file))
48
+ doc_name = name or file.stem
49
+ else:
50
+ log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
51
+ presentation = Presentation(file)
52
+ doc_name = name or (
53
+ getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
54
+ )
55
+
56
+ # Extract text from all slides
57
+ slide_texts = []
58
+ for slide_number, slide in enumerate(presentation.slides, 1):
59
+ slide_text = f"Slide {slide_number}:\n"
60
+
61
+ # Extract text from shapes that contain text
62
+ text_content = []
63
+ for shape in slide.shapes:
64
+ if hasattr(shape, "text") and shape.text.strip():
65
+ text_content.append(shape.text.strip())
66
+
67
+ if text_content:
68
+ slide_text += "\n".join(text_content)
69
+ else:
70
+ slide_text += "(No text content)"
71
+
72
+ slide_texts.append(slide_text)
73
+
74
+ doc_content = "\n\n".join(slide_texts)
75
+
76
+ documents = [
77
+ Document(
78
+ name=doc_name,
79
+ id=str(uuid4()),
80
+ content=doc_content,
81
+ )
82
+ ]
83
+
84
+ if self.chunk:
85
+ chunked_documents = []
86
+ for document in documents:
87
+ chunked_documents.extend(self.chunk_document(document))
88
+ return chunked_documents
89
+ return documents
90
+
91
+ except Exception as e:
92
+ log_error(f"Error reading file: {e}")
93
+ return []
94
+
95
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
96
+ """Asynchronously read a pptx file and return a list of documents"""
97
+ try:
98
+ return await asyncio.to_thread(self.read, file, name)
99
+ except Exception as e:
100
+ log_error(f"Error reading file asynchronously: {e}")
101
+ return []
@@ -16,8 +16,7 @@ class ReaderFactory:
16
16
  from agno.knowledge.reader.pdf_reader import PDFReader
17
17
 
18
18
  config: Dict[str, Any] = {
19
- "chunk": True,
20
- "chunk_size": 100,
19
+ "name": "PDF Reader",
21
20
  "description": "Processes PDF documents with OCR support for images and text extraction",
22
21
  }
23
22
  config.update(kwargs)
@@ -35,6 +34,18 @@ class ReaderFactory:
35
34
  config.update(kwargs)
36
35
  return CSVReader(**config)
37
36
 
37
+ @classmethod
38
+ def _get_field_labeled_csv_reader(cls, **kwargs) -> Reader:
39
+ """Get Field Labeled CSV reader instance."""
40
+ from agno.knowledge.reader.field_labeled_csv_reader import FieldLabeledCSVReader
41
+
42
+ config: Dict[str, Any] = {
43
+ "name": "Field Labeled CSV Reader",
44
+ "description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
45
+ }
46
+ config.update(kwargs)
47
+ return FieldLabeledCSVReader(**config)
48
+
38
49
  @classmethod
39
50
  def _get_docx_reader(cls, **kwargs) -> Reader:
40
51
  """Get Docx reader instance."""
@@ -47,6 +58,18 @@ class ReaderFactory:
47
58
  config.update(kwargs)
48
59
  return DocxReader(**config)
49
60
 
61
+ @classmethod
62
+ def _get_pptx_reader(cls, **kwargs) -> Reader:
63
+ """Get PPTX reader instance."""
64
+ from agno.knowledge.reader.pptx_reader import PPTXReader
65
+
66
+ config: Dict[str, Any] = {
67
+ "name": "PPTX Reader",
68
+ "description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
69
+ }
70
+ config.update(kwargs)
71
+ return PPTXReader(**config)
72
+
50
73
  @classmethod
51
74
  def _get_json_reader(cls, **kwargs) -> Reader:
52
75
  """Get JSON reader instance."""
@@ -109,6 +132,21 @@ class ReaderFactory:
109
132
  config.update(kwargs)
110
133
  return FirecrawlReader(**config)
111
134
 
135
+ @classmethod
136
+ def _get_tavily_reader(cls, **kwargs) -> Reader:
137
+ """Get Tavily reader instance."""
138
+ from agno.knowledge.reader.tavily_reader import TavilyReader
139
+
140
+ config: Dict[str, Any] = {
141
+ "api_key": kwargs.get("api_key") or os.getenv("TAVILY_API_KEY"),
142
+ "extract_format": "markdown",
143
+ "extract_depth": "basic",
144
+ "name": "Tavily Reader",
145
+ "description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
146
+ }
147
+ config.update(kwargs)
148
+ return TavilyReader(**config)
149
+
112
150
  @classmethod
113
151
  def _get_youtube_reader(cls, **kwargs) -> Reader:
114
152
  """Get YouTube reader instance."""
@@ -189,8 +227,10 @@ class ReaderFactory:
189
227
  return cls.create_reader("pdf")
190
228
  elif extension in [".csv", "text/csv"]:
191
229
  return cls.create_reader("csv")
192
- elif extension in [".docx", ".doc"]:
230
+ elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
193
231
  return cls.create_reader("docx")
232
+ elif extension == ".pptx":
233
+ return cls.create_reader("pptx")
194
234
  elif extension == ".json":
195
235
  return cls.create_reader("json")
196
236
  elif extension in [".md", ".markdown"]:
@@ -210,8 +250,8 @@ class ReaderFactory:
210
250
  if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
211
251
  return cls.create_reader("youtube")
212
252
 
213
- # Default to URL reader
214
- return cls.create_reader("url")
253
+ # Default to website reader
254
+ return cls.create_reader("website")
215
255
 
216
256
  @classmethod
217
257
  def get_all_reader_keys(cls) -> List[str]:
@@ -228,7 +268,12 @@ class ReaderFactory:
228
268
  reader_keys.append(reader_key)
229
269
 
230
270
  # Define priority order for URL readers
231
- url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
271
+ url_reader_priority = [
272
+ "website",
273
+ "firecrawl",
274
+ "tavily",
275
+ "youtube",
276
+ ]
232
277
 
233
278
  # Sort with URL readers in priority order, others alphabetically
234
279
  def sort_key(reader_key):
@@ -10,7 +10,7 @@ from agno.knowledge.reader.base import Reader
10
10
  from agno.knowledge.reader.pdf_reader import PDFReader
11
11
  from agno.knowledge.reader.text_reader import TextReader
12
12
  from agno.knowledge.types import ContentType
13
- from agno.utils.log import log_info, logger
13
+ from agno.utils.log import log_debug, log_error
14
14
 
15
15
  try:
16
16
  from agno.aws.resource.s3.object import S3Object # type: ignore
@@ -51,7 +51,7 @@ class S3Reader(Reader):
51
51
 
52
52
  def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
53
53
  try:
54
- log_info(f"Reading S3 file: {s3_object.uri}")
54
+ log_debug(f"Reading S3 file: {s3_object.uri}")
55
55
 
56
56
  # Read PDF files
57
57
  if s3_object.uri.endswith(".pdf"):
@@ -74,25 +74,13 @@ class S3Reader(Reader):
74
74
  obj_name = s3_object.name.split("/")[-1]
75
75
  temporary_file = Path("storage").joinpath(obj_name)
76
76
  s3_object.download(temporary_file)
77
-
78
- # TODO: Before we were using textract here. Needed?
79
- # s3_object.download(temporary_file)
80
- # doc_content = textract.process(temporary_file)
81
- # documents = [
82
- # Document(
83
- # name=doc_name,
84
- # id=doc_name,
85
- # content=doc_content.decode("utf-8"),
86
- # )
87
- # ]
88
-
89
77
  documents = TextReader().read(file=temporary_file, name=doc_name)
90
78
 
91
79
  temporary_file.unlink()
92
80
  return documents
93
81
 
94
82
  except Exception as e:
95
- logger.error(f"Error reading: {s3_object.uri}: {e}")
83
+ log_error(f"Error reading: {s3_object.uri}: {e}")
96
84
 
97
85
  return []
98
86
 
@@ -0,0 +1,194 @@
1
+ import asyncio
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Literal, Optional
4
+
5
+ from agno.knowledge.chunking.semantic import SemanticChunking
6
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
7
+ from agno.knowledge.document.base import Document
8
+ from agno.knowledge.reader.base import Reader
9
+ from agno.knowledge.types import ContentType
10
+ from agno.utils.log import log_debug, logger
11
+
12
+ try:
13
+ from tavily import TavilyClient # type: ignore[attr-defined]
14
+ except ImportError:
15
+ raise ImportError(
16
+ "The `tavily-python` package is not installed. Please install it via `pip install tavily-python`."
17
+ )
18
+
19
+
20
+ @dataclass
21
+ class TavilyReader(Reader):
22
+ api_key: Optional[str] = None
23
+ params: Optional[Dict] = None
24
+ extract_format: Literal["markdown", "text"] = "markdown"
25
+ extract_depth: Literal["basic", "advanced"] = "basic"
26
+
27
+ def __init__(
28
+ self,
29
+ api_key: Optional[str] = None,
30
+ params: Optional[Dict] = None,
31
+ extract_format: Literal["markdown", "text"] = "markdown",
32
+ extract_depth: Literal["basic", "advanced"] = "basic",
33
+ chunk: bool = True,
34
+ chunk_size: int = 5000,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ name: Optional[str] = None,
37
+ description: Optional[str] = None,
38
+ ) -> None:
39
+ """
40
+ Initialize TavilyReader for extracting content from URLs using Tavily's Extract API.
41
+
42
+ Args:
43
+ api_key: Tavily API key (or use TAVILY_API_KEY env var)
44
+ params: Additional parameters to pass to the extract API
45
+ extract_format: Output format - "markdown" or "text"
46
+ extract_depth: Extraction depth - "basic" (1 credit/5 URLs) or "advanced" (2 credits/5 URLs)
47
+ chunk: Whether to chunk the extracted content
48
+ chunk_size: Size of chunks when chunking is enabled
49
+ chunking_strategy: Strategy to use for chunking
50
+ name: Name of the reader
51
+ description: Description of the reader
52
+ """
53
+ # Initialize base Reader (handles chunk_size / strategy)
54
+ super().__init__(
55
+ chunk=chunk, chunk_size=chunk_size, chunking_strategy=chunking_strategy, name=name, description=description
56
+ )
57
+
58
+ # Tavily-specific attributes
59
+ self.api_key = api_key
60
+ self.params = params or {}
61
+ self.extract_format = extract_format
62
+ self.extract_depth = extract_depth
63
+
64
+ @classmethod
65
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
66
+ """Get the list of supported chunking strategies for Tavily readers."""
67
+ return [
68
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
69
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
70
+ ChunkingStrategyType.AGENTIC_CHUNKER,
71
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
72
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
73
+ ]
74
+
75
+ @classmethod
76
+ def get_supported_content_types(self) -> List[ContentType]:
77
+ return [ContentType.URL]
78
+
79
+ def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
80
+ """
81
+ Internal method to extract content from a URL using Tavily's Extract API.
82
+
83
+ Args:
84
+ url: The URL to extract content from
85
+ name: Optional name for the document (defaults to URL)
86
+
87
+ Returns:
88
+ A list of documents containing the extracted content
89
+ """
90
+ log_debug(f"Extracting content from: {url}")
91
+
92
+ client = TavilyClient(api_key=self.api_key)
93
+
94
+ # Prepare extract parameters
95
+ extract_params = {
96
+ "urls": [url],
97
+ "depth": self.extract_depth,
98
+ }
99
+
100
+ # Add optional params if provided
101
+ if self.params:
102
+ extract_params.update(self.params)
103
+
104
+ try:
105
+ # Call Tavily Extract API
106
+ response = client.extract(**extract_params)
107
+
108
+ # Extract content from response
109
+ if not response or "results" not in response:
110
+ logger.warning(f"No results received for URL: {url}")
111
+ return [Document(name=name or url, id=url, content="")]
112
+
113
+ results = response.get("results", [])
114
+ if not results:
115
+ logger.warning(f"Empty results for URL: {url}")
116
+ return [Document(name=name or url, id=url, content="")]
117
+
118
+ # Get the first result (since we're extracting a single URL)
119
+ result = results[0]
120
+
121
+ # Check if extraction failed
122
+ if "failed_reason" in result:
123
+ logger.warning(f"Extraction failed for {url}: {result['failed_reason']}")
124
+ return [Document(name=name or url, id=url, content="")]
125
+
126
+ # Get raw content
127
+ content = result.get("raw_content", "")
128
+
129
+ if content is None:
130
+ content = ""
131
+ logger.warning(f"No content received for URL: {url}")
132
+
133
+ # Debug logging
134
+ log_debug(f"Received content type: {type(content)}")
135
+ log_debug(f"Content length: {len(content) if content else 0}")
136
+
137
+ # Create documents
138
+ documents = []
139
+ if self.chunk and content:
140
+ documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
141
+ else:
142
+ documents.append(Document(name=name or url, id=url, content=content))
143
+
144
+ return documents
145
+
146
+ except Exception as e:
147
+ logger.error(f"Error extracting content from {url}: {e}")
148
+ return [Document(name=name or url, id=url, content="")]
149
+
150
+ async def _async_extract(self, url: str, name: Optional[str] = None) -> List[Document]:
151
+ """
152
+ Internal async method to extract content from a URL.
153
+
154
+ Args:
155
+ url: The URL to extract content from
156
+ name: Optional name for the document
157
+
158
+ Returns:
159
+ A list of documents containing the extracted content
160
+ """
161
+ log_debug(f"Async extracting content from: {url}")
162
+
163
+ # Use asyncio.to_thread to run the synchronous extract in a thread
164
+ return await asyncio.to_thread(self._extract, url, name)
165
+
166
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
167
+ """
168
+ Reads content from a URL using Tavily Extract API.
169
+
170
+ This is the public API method that users should call.
171
+
172
+ Args:
173
+ url: The URL to extract content from
174
+ name: Optional name for the document
175
+
176
+ Returns:
177
+ A list of documents containing the extracted content
178
+ """
179
+ return self._extract(url, name)
180
+
181
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
182
+ """
183
+ Asynchronously reads content from a URL using Tavily Extract API.
184
+
185
+ This is the public API method that users should call for async operations.
186
+
187
+ Args:
188
+ url: The URL to extract content from
189
+ name: Optional name for the document
190
+
191
+ Returns:
192
+ A list of documents containing the extracted content
193
+ """
194
+ return await self._async_extract(url, name)