PyPI - agno - Versions diffs - 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

agno/agent/agent.py +6009 -2874
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +595 -187
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +3 -0
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +339 -266
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +1011 -566
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +110 -37
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +143 -4
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +60 -6
agno/models/openai/chat.py +102 -43
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +81 -5
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -175
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +266 -112
agno/run/base.py +53 -24
agno/run/team.py +252 -111
agno/run/workflow.py +156 -45
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1692
agno/tools/brightdata.py +3 -3
agno/tools/cartesia.py +3 -5
agno/tools/dalle.py +9 -8
agno/tools/decorator.py +4 -2
agno/tools/desi_vocal.py +2 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +20 -13
agno/tools/eleven_labs.py +26 -28
agno/tools/exa.py +21 -16
agno/tools/fal.py +4 -4
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +257 -37
agno/tools/giphy.py +2 -2
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/lumalab.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/azure_openai.py +2 -2
agno/tools/models/gemini.py +3 -3
agno/tools/models/groq.py +3 -5
agno/tools/models/nebius.py +7 -7
agno/tools/models_labs.py +25 -15
agno/tools/notion.py +204 -0
agno/tools/openai.py +4 -9
agno/tools/opencv.py +3 -3
agno/tools/parallel.py +314 -0
agno/tools/replicate.py +7 -7
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +222 -7
agno/utils/gemini.py +181 -23
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +95 -5
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/models/cohere.py +1 -1
agno/utils/models/watsonx.py +1 -1
agno/utils/openai.py +1 -1
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +183 -135
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +645 -136
agno/workflow/steps.py +65 -6
agno/workflow/types.py +71 -33
agno/workflow/workflow.py +2113 -300
agno-2.3.0.dist-info/METADATA +618 -0
agno-2.3.0.dist-info/RECORD +577 -0
agno-2.3.0.dist-info/licenses/LICENSE +201 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.0rc2.dist-info/METADATA +0 -355
agno-2.0.0rc2.dist-info/RECORD +0 -515
agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/knowledge/reader/pdf_reader.py CHANGED Viewed

@@ -4,11 +4,12 @@ from pathlib import Path
 from typing import IO, Any, List, Optional, Tuple, Union
 from uuid import uuid4
-from agno.knowledge.chunking.strategy import ChunkingStrategyType
+from agno.knowledge.chunking.document import DocumentChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_error, log_info, logger
+from agno.utils.log import log_debug, log_error
 try:
     from pypdf import PdfReader as DocumentReader  # noqa: F401
@@ -117,6 +118,10 @@ def _clean_page_numbers(
     page_numbers = [find_page_number(content) for content in page_content_list]
     if all(x is None or x > 5 for x in page_numbers):
         # This approach won't work reliably for higher page numbers.
+        page_content_list = [
+            f"\n{page_content_list[i]}\n{extra_content[i]}" if extra_content else page_content_list[i]
+            for i in range(len(page_content_list))
+        ]
         return page_content_list, None
     # Possible range shifts to detect page numbering
@@ -179,6 +184,7 @@ class BasePDFReader(Reader):
         page_start_numbering_format: Optional[str] = None,
         page_end_numbering_format: Optional[str] = None,
         password: Optional[str] = None,
+        chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
         **kwargs,
     ):
         if page_start_numbering_format is None:
@@ -191,11 +197,7 @@ class BasePDFReader(Reader):
         self.page_end_numbering_format = page_end_numbering_format
         self.password = password
-        if self.chunking_strategy is None:
-            from agno.knowledge.chunking.document import DocumentChunking
-            self.chunking_strategy = DocumentChunking(chunk_size=5000)
-        super().__init__(**kwargs)
+        super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
@@ -214,6 +216,19 @@ class BasePDFReader(Reader):
             chunked_documents.extend(self.chunk_document(document))
         return chunked_documents
+    def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
+        """Determines the document name from the source or a provided name."""
+        try:
+            if name:
+                return name
+            if isinstance(pdf_source, str):
+                return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
+            # Assumes a file-like object with a .name attribute
+            return pdf_source.name.split(".")[0]
+        except Exception:
+            # The original code had a bug here, it should check `name` first.
+            return name or "pdf"
     def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
         if not doc_reader.is_encrypted:
             return True
@@ -221,13 +236,13 @@ class BasePDFReader(Reader):
         # Use provided password or fall back to instance password
         pdf_password = password or self.password
         if not pdf_password:
-            logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
+            log_error(f'PDF file "{doc_name}" is password protected but no password provided')
             return False
         try:
             decrypted_pdf = doc_reader.decrypt(pdf_password)
             if decrypted_pdf:
-                log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
+                log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
                 return True
             else:
                 log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
@@ -261,7 +276,6 @@ class BasePDFReader(Reader):
         if self.chunk:
             return self._build_chunked_documents(documents)
         return documents
     def _pdf_reader_to_documents(
@@ -329,40 +343,14 @@ class PDFReader(BasePDFReader):
     def read(
         self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
     ) -> List[Document]:
-        try:
-            if name:
-                doc_name = name
-            elif isinstance(pdf, str):
-                doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
-            else:
-                doc_name = pdf.name.split(".")[0]
-        except Exception:
-            doc_name = "pdf"
-        log_info(f"Reading: {doc_name}")
-        try:
-            DocumentReader(pdf)
-        except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
-            return []
-        try:
-            if isinstance(pdf, str):
-                doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
-            else:
-                doc_name = name or pdf.name.split(".")[0]
-        except Exception:
-            doc_name = name or "pdf"
-        log_info(f"Reading: {doc_name}")
+        doc_name = self._get_doc_name(pdf, name)
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
         if not self._decrypt_pdf(pdf_reader, doc_name, password):
             return []
@@ -379,21 +367,13 @@ class PDFReader(BasePDFReader):
         if pdf is None:
             log_error("No pdf provided")
             return []
-        try:
-            if isinstance(pdf, str):
-                doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
-            else:
-                doc_name = pdf.name.split(".")[0]
-        except Exception:
-            doc_name = name or "pdf"
-        log_info(f"Reading: {doc_name}")
+        doc_name = self._get_doc_name(pdf, name)
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
@@ -413,16 +393,13 @@ class PDFImageReader(BasePDFReader):
         if not pdf:
             raise ValueError("No pdf provided")
+        doc_name = self._get_doc_name(pdf, name)
+        log_debug(f"Reading: {doc_name}")
         try:
-            if isinstance(pdf, str):
-                doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
-            else:
-                doc_name = pdf.name.split(".")[0]
-        except Exception:
-            doc_name = "pdf"
-        log_info(f"Reading: {doc_name}")
-        pdf_reader = DocumentReader(pdf)
+            pdf_reader = DocumentReader(pdf)
+        except PdfStreamError as e:
+            log_error(f"Error reading PDF: {e}")
+            return []
         # Handle PDF decryption
         if not self._decrypt_pdf(pdf_reader, doc_name, password):
@@ -437,16 +414,14 @@ class PDFImageReader(BasePDFReader):
         if not pdf:
             raise ValueError("No pdf provided")
-        try:
-            if isinstance(pdf, str):
-                doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
-            else:
-                doc_name = pdf.name.split(".")[0]
-        except Exception:
-            doc_name = "pdf"
+        doc_name = self._get_doc_name(pdf, name)
+        log_debug(f"Reading: {doc_name}")
-        log_info(f"Reading: {doc_name}")
-        pdf_reader = DocumentReader(pdf)
+        try:
+            pdf_reader = DocumentReader(pdf)
+        except PdfStreamError as e:
+            log_error(f"Error reading PDF: {e}")
+            return []
         # Handle PDF decryption
         if not self._decrypt_pdf(pdf_reader, doc_name, password):

agno/knowledge/reader/pptx_reader.py ADDED Viewed

@@ -0,0 +1,101 @@
+import asyncio
+from pathlib import Path
+from typing import IO, Any, List, Optional, Union
+from uuid import uuid4
+from agno.knowledge.chunking.document import DocumentChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
+from agno.knowledge.document.base import Document
+from agno.knowledge.reader.base import Reader
+from agno.knowledge.types import ContentType
+from agno.utils.log import log_debug, log_error
+try:
+    from pptx import Presentation  # type: ignore
+except ImportError:
+    raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
+class PPTXReader(Reader):
+    """Reader for PPTX files"""
+    def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
+        super().__init__(chunking_strategy=chunking_strategy, **kwargs)
+    @classmethod
+    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+        """Get the list of supported chunking strategies for PPTX readers."""
+        return [
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+        ]
+    @classmethod
+    def get_supported_content_types(self) -> List[ContentType]:
+        return [ContentType.PPTX]
+    def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
+        """Read a pptx file and return a list of documents"""
+        try:
+            if isinstance(file, Path):
+                if not file.exists():
+                    raise FileNotFoundError(f"Could not find file: {file}")
+                log_debug(f"Reading: {file}")
+                presentation = Presentation(str(file))
+                doc_name = name or file.stem
+            else:
+                log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
+                presentation = Presentation(file)
+                doc_name = name or (
+                    getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
+                )
+            # Extract text from all slides
+            slide_texts = []
+            for slide_number, slide in enumerate(presentation.slides, 1):
+                slide_text = f"Slide {slide_number}:\n"
+                # Extract text from shapes that contain text
+                text_content = []
+                for shape in slide.shapes:
+                    if hasattr(shape, "text") and shape.text.strip():
+                        text_content.append(shape.text.strip())
+                if text_content:
+                    slide_text += "\n".join(text_content)
+                else:
+                    slide_text += "(No text content)"
+                slide_texts.append(slide_text)
+            doc_content = "\n\n".join(slide_texts)
+            documents = [
+                Document(
+                    name=doc_name,
+                    id=str(uuid4()),
+                    content=doc_content,
+                )
+            ]
+            if self.chunk:
+                chunked_documents = []
+                for document in documents:
+                    chunked_documents.extend(self.chunk_document(document))
+                return chunked_documents
+            return documents
+        except Exception as e:
+            log_error(f"Error reading file: {e}")
+            return []
+    async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
+        """Asynchronously read a pptx file and return a list of documents"""
+        try:
+            return await asyncio.to_thread(self.read, file, name)
+        except Exception as e:
+            log_error(f"Error reading file asynchronously: {e}")
+            return []

agno/knowledge/reader/reader_factory.py CHANGED Viewed

@@ -16,8 +16,7 @@ class ReaderFactory:
         from agno.knowledge.reader.pdf_reader import PDFReader
         config: Dict[str, Any] = {
-            "chunk": True,
-            "chunk_size": 100,
+            "name": "PDF Reader",
             "description": "Processes PDF documents with OCR support for images and text extraction",
         }
         config.update(kwargs)
@@ -35,6 +34,18 @@ class ReaderFactory:
         config.update(kwargs)
         return CSVReader(**config)
+    @classmethod
+    def _get_field_labeled_csv_reader(cls, **kwargs) -> Reader:
+        """Get Field Labeled CSV reader instance."""
+        from agno.knowledge.reader.field_labeled_csv_reader import FieldLabeledCSVReader
+        config: Dict[str, Any] = {
+            "name": "Field Labeled CSV Reader",
+            "description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
+        }
+        config.update(kwargs)
+        return FieldLabeledCSVReader(**config)
     @classmethod
     def _get_docx_reader(cls, **kwargs) -> Reader:
         """Get Docx reader instance."""
@@ -47,6 +58,18 @@ class ReaderFactory:
         config.update(kwargs)
         return DocxReader(**config)
+    @classmethod
+    def _get_pptx_reader(cls, **kwargs) -> Reader:
+        """Get PPTX reader instance."""
+        from agno.knowledge.reader.pptx_reader import PPTXReader
+        config: Dict[str, Any] = {
+            "name": "PPTX Reader",
+            "description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
+        }
+        config.update(kwargs)
+        return PPTXReader(**config)
     @classmethod
     def _get_json_reader(cls, **kwargs) -> Reader:
         """Get JSON reader instance."""
@@ -109,6 +132,21 @@ class ReaderFactory:
         config.update(kwargs)
         return FirecrawlReader(**config)
+    @classmethod
+    def _get_tavily_reader(cls, **kwargs) -> Reader:
+        """Get Tavily reader instance."""
+        from agno.knowledge.reader.tavily_reader import TavilyReader
+        config: Dict[str, Any] = {
+            "api_key": kwargs.get("api_key") or os.getenv("TAVILY_API_KEY"),
+            "extract_format": "markdown",
+            "extract_depth": "basic",
+            "name": "Tavily Reader",
+            "description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
+        }
+        config.update(kwargs)
+        return TavilyReader(**config)
     @classmethod
     def _get_youtube_reader(cls, **kwargs) -> Reader:
         """Get YouTube reader instance."""
@@ -189,8 +227,10 @@ class ReaderFactory:
             return cls.create_reader("pdf")
         elif extension in [".csv", "text/csv"]:
             return cls.create_reader("csv")
-        elif extension in [".docx", ".doc"]:
+        elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
             return cls.create_reader("docx")
+        elif extension == ".pptx":
+            return cls.create_reader("pptx")
         elif extension == ".json":
             return cls.create_reader("json")
         elif extension in [".md", ".markdown"]:
@@ -210,8 +250,8 @@ class ReaderFactory:
         if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
             return cls.create_reader("youtube")
-        # Default to URL reader
-        return cls.create_reader("url")
+        # Default to website reader
+        return cls.create_reader("website")
     @classmethod
     def get_all_reader_keys(cls) -> List[str]:
@@ -228,7 +268,12 @@ class ReaderFactory:
                 reader_keys.append(reader_key)
         # Define priority order for URL readers
-        url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
+        url_reader_priority = [
+            "website",
+            "firecrawl",
+            "tavily",
+            "youtube",
+        ]
         # Sort with URL readers in priority order, others alphabetically
         def sort_key(reader_key):

agno/knowledge/reader/s3_reader.py CHANGED Viewed

@@ -10,7 +10,7 @@ from agno.knowledge.reader.base import Reader
 from agno.knowledge.reader.pdf_reader import PDFReader
 from agno.knowledge.reader.text_reader import TextReader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_info, logger
+from agno.utils.log import log_debug, log_error
 try:
     from agno.aws.resource.s3.object import S3Object  # type: ignore
@@ -51,7 +51,7 @@ class S3Reader(Reader):
     def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
         try:
-            log_info(f"Reading S3 file: {s3_object.uri}")
+            log_debug(f"Reading S3 file: {s3_object.uri}")
             # Read PDF files
             if s3_object.uri.endswith(".pdf"):
@@ -74,25 +74,13 @@ class S3Reader(Reader):
                 obj_name = s3_object.name.split("/")[-1]
                 temporary_file = Path("storage").joinpath(obj_name)
                 s3_object.download(temporary_file)
-                # TODO: Before we were using textract here. Needed?
-                # s3_object.download(temporary_file)
-                # doc_content = textract.process(temporary_file)
-                # documents = [
-                #     Document(
-                #         name=doc_name,
-                #         id=doc_name,
-                #         content=doc_content.decode("utf-8"),
-                #     )
-                # ]
                 documents = TextReader().read(file=temporary_file, name=doc_name)
                 temporary_file.unlink()
                 return documents
         except Exception as e:
-            logger.error(f"Error reading: {s3_object.uri}: {e}")
+            log_error(f"Error reading: {s3_object.uri}: {e}")
         return []

agno/knowledge/reader/tavily_reader.py ADDED Viewed

@@ -0,0 +1,194 @@
+import asyncio
+from dataclasses import dataclass
+from typing import Dict, List, Literal, Optional
+from agno.knowledge.chunking.semantic import SemanticChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
+from agno.knowledge.document.base import Document
+from agno.knowledge.reader.base import Reader
+from agno.knowledge.types import ContentType
+from agno.utils.log import log_debug, logger
+try:
+    from tavily import TavilyClient  # type: ignore[attr-defined]
+except ImportError:
+    raise ImportError(
+        "The `tavily-python` package is not installed. Please install it via `pip install tavily-python`."
+    )
+@dataclass
+class TavilyReader(Reader):
+    api_key: Optional[str] = None
+    params: Optional[Dict] = None
+    extract_format: Literal["markdown", "text"] = "markdown"
+    extract_depth: Literal["basic", "advanced"] = "basic"
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        params: Optional[Dict] = None,
+        extract_format: Literal["markdown", "text"] = "markdown",
+        extract_depth: Literal["basic", "advanced"] = "basic",
+        chunk: bool = True,
+        chunk_size: int = 5000,
+        chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+    ) -> None:
+        """
+        Initialize TavilyReader for extracting content from URLs using Tavily's Extract API.
+        Args:
+            api_key: Tavily API key (or use TAVILY_API_KEY env var)
+            params: Additional parameters to pass to the extract API
+            extract_format: Output format - "markdown" or "text"
+            extract_depth: Extraction depth - "basic" (1 credit/5 URLs) or "advanced" (2 credits/5 URLs)
+            chunk: Whether to chunk the extracted content
+            chunk_size: Size of chunks when chunking is enabled
+            chunking_strategy: Strategy to use for chunking
+            name: Name of the reader
+            description: Description of the reader
+        """
+        # Initialize base Reader (handles chunk_size / strategy)
+        super().__init__(
+            chunk=chunk, chunk_size=chunk_size, chunking_strategy=chunking_strategy, name=name, description=description
+        )
+        # Tavily-specific attributes
+        self.api_key = api_key
+        self.params = params or {}
+        self.extract_format = extract_format
+        self.extract_depth = extract_depth
+    @classmethod
+    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+        """Get the list of supported chunking strategies for Tavily readers."""
+        return [
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+        ]
+    @classmethod
+    def get_supported_content_types(self) -> List[ContentType]:
+        return [ContentType.URL]
+    def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """
+        Internal method to extract content from a URL using Tavily's Extract API.
+        Args:
+            url: The URL to extract content from
+            name: Optional name for the document (defaults to URL)
+        Returns:
+            A list of documents containing the extracted content
+        """
+        log_debug(f"Extracting content from: {url}")
+        client = TavilyClient(api_key=self.api_key)
+        # Prepare extract parameters
+        extract_params = {
+            "urls": [url],
+            "depth": self.extract_depth,
+        }
+        # Add optional params if provided
+        if self.params:
+            extract_params.update(self.params)
+        try:
+            # Call Tavily Extract API
+            response = client.extract(**extract_params)
+            # Extract content from response
+            if not response or "results" not in response:
+                logger.warning(f"No results received for URL: {url}")
+                return [Document(name=name or url, id=url, content="")]
+            results = response.get("results", [])
+            if not results:
+                logger.warning(f"Empty results for URL: {url}")
+                return [Document(name=name or url, id=url, content="")]
+            # Get the first result (since we're extracting a single URL)
+            result = results[0]
+            # Check if extraction failed
+            if "failed_reason" in result:
+                logger.warning(f"Extraction failed for {url}: {result['failed_reason']}")
+                return [Document(name=name or url, id=url, content="")]
+            # Get raw content
+            content = result.get("raw_content", "")
+            if content is None:
+                content = ""
+                logger.warning(f"No content received for URL: {url}")
+            # Debug logging
+            log_debug(f"Received content type: {type(content)}")
+            log_debug(f"Content length: {len(content) if content else 0}")
+            # Create documents
+            documents = []
+            if self.chunk and content:
+                documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
+            else:
+                documents.append(Document(name=name or url, id=url, content=content))
+            return documents
+        except Exception as e:
+            logger.error(f"Error extracting content from {url}: {e}")
+            return [Document(name=name or url, id=url, content="")]
+    async def _async_extract(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """
+        Internal async method to extract content from a URL.
+        Args:
+            url: The URL to extract content from
+            name: Optional name for the document
+        Returns:
+            A list of documents containing the extracted content
+        """
+        log_debug(f"Async extracting content from: {url}")
+        # Use asyncio.to_thread to run the synchronous extract in a thread
+        return await asyncio.to_thread(self._extract, url, name)
+    def read(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """
+        Reads content from a URL using Tavily Extract API.
+        This is the public API method that users should call.
+        Args:
+            url: The URL to extract content from
+            name: Optional name for the document
+        Returns:
+            A list of documents containing the extracted content
+        """
+        return self._extract(url, name)
+    async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """
+        Asynchronously reads content from a URL using Tavily Extract API.
+        This is the public API method that users should call for async operations.
+        Args:
+            url: The URL to extract content from
+            name: Optional name for the document
+        Returns:
+            A list of documents containing the extracted content
+        """
+        return await self._async_extract(url, name)

agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl