PyPI - agno - Versions diffs - 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (383) hide show

agno/agent/__init__.py +6 -0
agno/agent/agent.py +5252 -3145
agno/agent/remote.py +525 -0
agno/api/api.py +2 -0
agno/client/__init__.py +3 -0
agno/client/a2a/__init__.py +10 -0
agno/client/a2a/client.py +554 -0
agno/client/a2a/schemas.py +112 -0
agno/client/a2a/utils.py +369 -0
agno/client/os.py +2669 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/manager.py +2 -2
agno/db/base.py +927 -6
agno/db/dynamo/dynamo.py +788 -2
agno/db/dynamo/schemas.py +128 -0
agno/db/dynamo/utils.py +26 -3
agno/db/firestore/firestore.py +674 -50
agno/db/firestore/schemas.py +41 -0
agno/db/firestore/utils.py +25 -10
agno/db/gcs_json/gcs_json_db.py +506 -3
agno/db/gcs_json/utils.py +14 -2
agno/db/in_memory/in_memory_db.py +203 -4
agno/db/in_memory/utils.py +14 -2
agno/db/json/json_db.py +498 -2
agno/db/json/utils.py +14 -2
agno/db/migrations/manager.py +199 -0
agno/db/migrations/utils.py +19 -0
agno/db/migrations/v1_to_v2.py +54 -16
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +977 -0
agno/db/mongo/async_mongo.py +1013 -39
agno/db/mongo/mongo.py +684 -4
agno/db/mongo/schemas.py +48 -0
agno/db/mongo/utils.py +17 -0
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2958 -0
agno/db/mysql/mysql.py +722 -53
agno/db/mysql/schemas.py +77 -11
agno/db/mysql/utils.py +151 -8
agno/db/postgres/async_postgres.py +1254 -137
agno/db/postgres/postgres.py +2316 -93
agno/db/postgres/schemas.py +153 -21
agno/db/postgres/utils.py +22 -7
agno/db/redis/redis.py +531 -3
agno/db/redis/schemas.py +36 -0
agno/db/redis/utils.py +31 -15
agno/db/schemas/evals.py +1 -0
agno/db/schemas/memory.py +20 -9
agno/db/singlestore/schemas.py +70 -1
agno/db/singlestore/singlestore.py +737 -74
agno/db/singlestore/utils.py +13 -3
agno/db/sqlite/async_sqlite.py +1069 -89
agno/db/sqlite/schemas.py +133 -1
agno/db/sqlite/sqlite.py +2203 -165
agno/db/sqlite/utils.py +21 -11
agno/db/surrealdb/models.py +25 -0
agno/db/surrealdb/surrealdb.py +603 -1
agno/db/utils.py +60 -0
agno/eval/__init__.py +26 -3
agno/eval/accuracy.py +25 -12
agno/eval/agent_as_judge.py +871 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +10 -4
agno/eval/reliability.py +22 -13
agno/eval/utils.py +2 -1
agno/exceptions.py +42 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/client.py +13 -2
agno/knowledge/__init__.py +4 -0
agno/knowledge/chunking/code.py +90 -0
agno/knowledge/chunking/document.py +65 -4
agno/knowledge/chunking/fixed.py +4 -1
agno/knowledge/chunking/markdown.py +102 -11
agno/knowledge/chunking/recursive.py +2 -2
agno/knowledge/chunking/semantic.py +130 -48
agno/knowledge/chunking/strategy.py +18 -0
agno/knowledge/embedder/azure_openai.py +0 -1
agno/knowledge/embedder/google.py +1 -1
agno/knowledge/embedder/mistral.py +1 -1
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/openai.py +16 -12
agno/knowledge/filesystem.py +412 -0
agno/knowledge/knowledge.py +4261 -1199
agno/knowledge/protocol.py +134 -0
agno/knowledge/reader/arxiv_reader.py +3 -2
agno/knowledge/reader/base.py +9 -7
agno/knowledge/reader/csv_reader.py +91 -42
agno/knowledge/reader/docx_reader.py +9 -10
agno/knowledge/reader/excel_reader.py +225 -0
agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
agno/knowledge/reader/firecrawl_reader.py +3 -2
agno/knowledge/reader/json_reader.py +16 -22
agno/knowledge/reader/markdown_reader.py +15 -14
agno/knowledge/reader/pdf_reader.py +33 -28
agno/knowledge/reader/pptx_reader.py +9 -10
agno/knowledge/reader/reader_factory.py +135 -1
agno/knowledge/reader/s3_reader.py +8 -16
agno/knowledge/reader/tavily_reader.py +3 -3
agno/knowledge/reader/text_reader.py +15 -14
agno/knowledge/reader/utils/__init__.py +17 -0
agno/knowledge/reader/utils/spreadsheet.py +114 -0
agno/knowledge/reader/web_search_reader.py +8 -65
agno/knowledge/reader/website_reader.py +16 -13
agno/knowledge/reader/wikipedia_reader.py +36 -3
agno/knowledge/reader/youtube_reader.py +3 -2
agno/knowledge/remote_content/__init__.py +33 -0
agno/knowledge/remote_content/config.py +266 -0
agno/knowledge/remote_content/remote_content.py +105 -17
agno/knowledge/utils.py +76 -22
agno/learn/__init__.py +71 -0
agno/learn/config.py +463 -0
agno/learn/curate.py +185 -0
agno/learn/machine.py +725 -0
agno/learn/schemas.py +1114 -0
agno/learn/stores/__init__.py +38 -0
agno/learn/stores/decision_log.py +1156 -0
agno/learn/stores/entity_memory.py +3275 -0
agno/learn/stores/learned_knowledge.py +1583 -0
agno/learn/stores/protocol.py +117 -0
agno/learn/stores/session_context.py +1217 -0
agno/learn/stores/user_memory.py +1495 -0
agno/learn/stores/user_profile.py +1220 -0
agno/learn/utils.py +209 -0
agno/media.py +22 -6
agno/memory/__init__.py +14 -1
agno/memory/manager.py +223 -8
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +17 -0
agno/models/anthropic/claude.py +434 -59
agno/models/aws/bedrock.py +121 -20
agno/models/aws/claude.py +131 -274
agno/models/azure/ai_foundry.py +10 -6
agno/models/azure/openai_chat.py +33 -10
agno/models/base.py +1162 -561
agno/models/cerebras/cerebras.py +120 -24
agno/models/cerebras/cerebras_openai.py +21 -2
agno/models/cohere/chat.py +65 -6
agno/models/cometapi/cometapi.py +18 -1
agno/models/dashscope/dashscope.py +2 -3
agno/models/deepinfra/deepinfra.py +18 -1
agno/models/deepseek/deepseek.py +69 -3
agno/models/fireworks/fireworks.py +18 -1
agno/models/google/gemini.py +959 -89
agno/models/google/utils.py +22 -0
agno/models/groq/groq.py +48 -18
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/internlm/internlm.py +18 -1
agno/models/langdb/langdb.py +13 -1
agno/models/litellm/chat.py +88 -9
agno/models/litellm/litellm_openai.py +18 -1
agno/models/message.py +24 -5
agno/models/meta/llama.py +40 -13
agno/models/meta/llama_openai.py +22 -21
agno/models/metrics.py +12 -0
agno/models/mistral/mistral.py +8 -4
agno/models/n1n/__init__.py +3 -0
agno/models/n1n/n1n.py +57 -0
agno/models/nebius/nebius.py +6 -7
agno/models/nvidia/nvidia.py +20 -3
agno/models/ollama/__init__.py +2 -0
agno/models/ollama/chat.py +17 -6
agno/models/ollama/responses.py +100 -0
agno/models/openai/__init__.py +2 -0
agno/models/openai/chat.py +117 -26
agno/models/openai/open_responses.py +46 -0
agno/models/openai/responses.py +110 -32
agno/models/openrouter/__init__.py +2 -0
agno/models/openrouter/openrouter.py +67 -2
agno/models/openrouter/responses.py +146 -0
agno/models/perplexity/perplexity.py +19 -1
agno/models/portkey/portkey.py +7 -6
agno/models/requesty/requesty.py +19 -2
agno/models/response.py +20 -2
agno/models/sambanova/sambanova.py +20 -3
agno/models/siliconflow/siliconflow.py +19 -2
agno/models/together/together.py +20 -3
agno/models/vercel/v0.py +20 -3
agno/models/vertexai/claude.py +124 -4
agno/models/vllm/vllm.py +19 -14
agno/models/xai/xai.py +19 -2
agno/os/app.py +467 -137
agno/os/auth.py +253 -5
agno/os/config.py +22 -0
agno/os/interfaces/a2a/a2a.py +7 -6
agno/os/interfaces/a2a/router.py +635 -26
agno/os/interfaces/a2a/utils.py +32 -33
agno/os/interfaces/agui/agui.py +5 -3
agno/os/interfaces/agui/router.py +26 -16
agno/os/interfaces/agui/utils.py +97 -57
agno/os/interfaces/base.py +7 -7
agno/os/interfaces/slack/router.py +16 -7
agno/os/interfaces/slack/slack.py +7 -7
agno/os/interfaces/whatsapp/router.py +35 -7
agno/os/interfaces/whatsapp/security.py +3 -1
agno/os/interfaces/whatsapp/whatsapp.py +11 -8
agno/os/managers.py +326 -0
agno/os/mcp.py +652 -79
agno/os/middleware/__init__.py +4 -0
agno/os/middleware/jwt.py +718 -115
agno/os/middleware/trailing_slash.py +27 -0
agno/os/router.py +105 -1558
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +655 -0
agno/os/routers/agents/schema.py +288 -0
agno/os/routers/components/__init__.py +3 -0
agno/os/routers/components/components.py +475 -0
agno/os/routers/database.py +155 -0
agno/os/routers/evals/evals.py +111 -18
agno/os/routers/evals/schemas.py +38 -5
agno/os/routers/evals/utils.py +80 -11
agno/os/routers/health.py +3 -3
agno/os/routers/knowledge/knowledge.py +284 -35
agno/os/routers/knowledge/schemas.py +14 -2
agno/os/routers/memory/memory.py +274 -11
agno/os/routers/memory/schemas.py +44 -3
agno/os/routers/metrics/metrics.py +30 -15
agno/os/routers/metrics/schemas.py +10 -6
agno/os/routers/registry/__init__.py +3 -0
agno/os/routers/registry/registry.py +337 -0
agno/os/routers/session/session.py +143 -14
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +550 -0
agno/os/routers/teams/schema.py +280 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +549 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +757 -0
agno/os/routers/workflows/schema.py +139 -0
agno/os/schema.py +157 -584
agno/os/scopes.py +469 -0
agno/os/settings.py +3 -0
agno/os/utils.py +574 -185
agno/reasoning/anthropic.py +85 -1
agno/reasoning/azure_ai_foundry.py +93 -1
agno/reasoning/deepseek.py +102 -2
agno/reasoning/default.py +6 -7
agno/reasoning/gemini.py +87 -3
agno/reasoning/groq.py +109 -2
agno/reasoning/helpers.py +6 -7
agno/reasoning/manager.py +1238 -0
agno/reasoning/ollama.py +93 -1
agno/reasoning/openai.py +115 -1
agno/reasoning/vertexai.py +85 -1
agno/registry/__init__.py +3 -0
agno/registry/registry.py +68 -0
agno/remote/__init__.py +3 -0
agno/remote/base.py +581 -0
agno/run/__init__.py +2 -4
agno/run/agent.py +134 -19
agno/run/base.py +49 -1
agno/run/cancel.py +65 -52
agno/run/cancellation_management/__init__.py +9 -0
agno/run/cancellation_management/base.py +78 -0
agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
agno/run/requirement.py +181 -0
agno/run/team.py +111 -19
agno/run/workflow.py +2 -1
agno/session/agent.py +57 -92
agno/session/summary.py +1 -1
agno/session/team.py +62 -115
agno/session/workflow.py +353 -57
agno/skills/__init__.py +17 -0
agno/skills/agent_skills.py +377 -0
agno/skills/errors.py +32 -0
agno/skills/loaders/__init__.py +4 -0
agno/skills/loaders/base.py +27 -0
agno/skills/loaders/local.py +216 -0
agno/skills/skill.py +65 -0
agno/skills/utils.py +107 -0
agno/skills/validator.py +277 -0
agno/table.py +10 -0
agno/team/__init__.py +5 -1
agno/team/remote.py +447 -0
agno/team/team.py +3769 -2202
agno/tools/brandfetch.py +27 -18
agno/tools/browserbase.py +225 -16
agno/tools/crawl4ai.py +3 -0
agno/tools/duckduckgo.py +25 -71
agno/tools/exa.py +0 -21
agno/tools/file.py +14 -13
agno/tools/file_generation.py +12 -6
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +94 -113
agno/tools/google_bigquery.py +11 -2
agno/tools/google_drive.py +4 -3
agno/tools/knowledge.py +9 -4
agno/tools/mcp/mcp.py +301 -18
agno/tools/mcp/multi_mcp.py +269 -14
agno/tools/mem0.py +11 -10
agno/tools/memory.py +47 -46
agno/tools/mlx_transcribe.py +10 -7
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/nano_banana.py +151 -0
agno/tools/parallel.py +0 -7
agno/tools/postgres.py +76 -36
agno/tools/python.py +14 -6
agno/tools/reasoning.py +30 -23
agno/tools/redshift.py +406 -0
agno/tools/shopify.py +1519 -0
agno/tools/spotify.py +919 -0
agno/tools/tavily.py +4 -1
agno/tools/toolkit.py +253 -18
agno/tools/websearch.py +93 -0
agno/tools/website.py +1 -1
agno/tools/wikipedia.py +1 -1
agno/tools/workflow.py +56 -48
agno/tools/yfinance.py +12 -11
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +161 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +112 -0
agno/utils/agent.py +251 -10
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +264 -7
agno/utils/hooks.py +111 -3
agno/utils/http.py +161 -2
agno/utils/mcp.py +49 -8
agno/utils/media.py +22 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +20 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/os.py +0 -0
agno/utils/print_response/agent.py +99 -16
agno/utils/print_response/team.py +223 -24
agno/utils/print_response/workflow.py +0 -2
agno/utils/prompts.py +8 -6
agno/utils/remote.py +23 -0
agno/utils/response.py +1 -13
agno/utils/string.py +91 -2
agno/utils/team.py +62 -12
agno/utils/tokens.py +657 -0
agno/vectordb/base.py +15 -2
agno/vectordb/cassandra/cassandra.py +1 -1
agno/vectordb/chroma/__init__.py +2 -1
agno/vectordb/chroma/chromadb.py +468 -23
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/couchbase/couchbase.py +6 -2
agno/vectordb/lancedb/lance_db.py +7 -38
agno/vectordb/lightrag/lightrag.py +7 -6
agno/vectordb/milvus/milvus.py +118 -84
agno/vectordb/mongodb/__init__.py +2 -1
agno/vectordb/mongodb/mongodb.py +14 -31
agno/vectordb/pgvector/pgvector.py +120 -66
agno/vectordb/pineconedb/pineconedb.py +2 -19
agno/vectordb/qdrant/__init__.py +2 -1
agno/vectordb/qdrant/qdrant.py +33 -56
agno/vectordb/redis/__init__.py +2 -1
agno/vectordb/redis/redisdb.py +19 -31
agno/vectordb/singlestore/singlestore.py +17 -9
agno/vectordb/surrealdb/surrealdb.py +2 -38
agno/vectordb/weaviate/__init__.py +2 -1
agno/vectordb/weaviate/weaviate.py +7 -3
agno/workflow/__init__.py +5 -1
agno/workflow/agent.py +2 -2
agno/workflow/condition.py +12 -10
agno/workflow/loop.py +28 -9
agno/workflow/parallel.py +21 -13
agno/workflow/remote.py +362 -0
agno/workflow/router.py +12 -9
agno/workflow/step.py +261 -36
agno/workflow/steps.py +12 -8
agno/workflow/types.py +40 -77
agno/workflow/workflow.py +939 -213
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
agno-2.4.3.dist-info/RECORD +677 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
agno/tools/googlesearch.py +0 -98
agno/tools/memori.py +0 -339
agno-2.2.13.dist-info/RECORD +0 -575
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0

agno/knowledge/reader/pdf_reader.py CHANGED Viewed

@@ -9,7 +9,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_error, log_info, logger
+from agno.utils.log import log_debug, log_error
 try:
     from pypdf import PdfReader as DocumentReader  # noqa: F401
@@ -200,10 +200,11 @@ class BasePDFReader(Reader):
         super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for PDF readers."""
         return [
             ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.CODE_CHUNKER,
             ChunkingStrategyType.FIXED_SIZE_CHUNKER,
             ChunkingStrategyType.AGENTIC_CHUNKER,
             ChunkingStrategyType.SEMANTIC_CHUNKER,
@@ -218,31 +219,29 @@ class BasePDFReader(Reader):
     def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
         """Determines the document name from the source or a provided name."""
-        try:
-            if name:
-                return name
-            if isinstance(pdf_source, str):
-                return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
-            # Assumes a file-like object with a .name attribute
-            return pdf_source.name.split(".")[0]
-        except Exception:
-            # The original code had a bug here, it should check `name` first.
-            return name or "pdf"
+        if name:
+            return name
+        if isinstance(pdf_source, str):
+            return Path(pdf_source).stem.replace(" ", "_")
+        if isinstance(pdf_source, Path):
+            return pdf_source.stem.replace(" ", "_")
+        return getattr(pdf_source, "name", "pdf_file").split(".")[0].replace(" ", "_")
     def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
         if not doc_reader.is_encrypted:
             return True
         # Use provided password or fall back to instance password
-        pdf_password = password or self.password
-        if not pdf_password:
-            logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
+        # Note: Empty string "" is a valid password for PDFs with blank user password
+        pdf_password = self.password if password is None else password
+        if pdf_password is None:
+            log_error(f'PDF file "{doc_name}" is password protected but no password provided')
             return False
         try:
             decrypted_pdf = doc_reader.decrypt(pdf_password)
             if decrypted_pdf:
-                log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
+                log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
                 return True
             else:
                 log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
@@ -337,19 +336,25 @@ class PDFReader(BasePDFReader):
     """Reader for PDF files"""
     @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
+    def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.PDF]
     def read(
-        self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
+        self,
+        pdf: Optional[Union[str, Path, IO[Any]]] = None,
+        name: Optional[str] = None,
+        password: Optional[str] = None,
     ) -> List[Document]:
+        if pdf is None:
+            log_error("No pdf provided")
+            return []
         doc_name = self._get_doc_name(pdf, name)
-        log_info(f"Reading: {doc_name}")
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
         if not self._decrypt_pdf(pdf_reader, doc_name, password):
@@ -368,12 +373,12 @@ class PDFReader(BasePDFReader):
             log_error("No pdf provided")
             return []
         doc_name = self._get_doc_name(pdf, name)
-        log_info(f"Reading: {doc_name}")
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
@@ -394,11 +399,11 @@ class PDFImageReader(BasePDFReader):
             raise ValueError("No pdf provided")
         doc_name = self._get_doc_name(pdf, name)
-        log_info(f"Reading: {doc_name}")
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
@@ -406,7 +411,7 @@ class PDFImageReader(BasePDFReader):
             return []
         # Read and chunk.
-        return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
+        return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
     async def async_read(
         self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
@@ -415,12 +420,12 @@ class PDFImageReader(BasePDFReader):
             raise ValueError("No pdf provided")
         doc_name = self._get_doc_name(pdf, name)
-        log_info(f"Reading: {doc_name}")
+        log_debug(f"Reading: {doc_name}")
         try:
             pdf_reader = DocumentReader(pdf)
         except PdfStreamError as e:
-            logger.error(f"Error reading PDF: {e}")
+            log_error(f"Error reading PDF: {e}")
             return []
         # Handle PDF decryption
@@ -428,4 +433,4 @@ class PDFImageReader(BasePDFReader):
             return []
         # Read and chunk.
-        return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
+        return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)

agno/knowledge/reader/pptx_reader.py CHANGED Viewed

@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_info, logger
+from agno.utils.log import log_debug, log_error
 try:
     from pptx import Presentation  # type: ignore
@@ -23,10 +23,11 @@ class PPTXReader(Reader):
         super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for PPTX readers."""
         return [
             ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.CODE_CHUNKER,
             ChunkingStrategyType.FIXED_SIZE_CHUNKER,
             ChunkingStrategyType.SEMANTIC_CHUNKER,
             ChunkingStrategyType.AGENTIC_CHUNKER,
@@ -34,7 +35,7 @@ class PPTXReader(Reader):
         ]
     @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
+    def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.PPTX]
     def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -43,15 +44,13 @@ class PPTXReader(Reader):
             if isinstance(file, Path):
                 if not file.exists():
                     raise FileNotFoundError(f"Could not find file: {file}")
-                log_info(f"Reading: {file}")
+                log_debug(f"Reading: {file}")
                 presentation = Presentation(str(file))
                 doc_name = name or file.stem
             else:
-                log_info(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
+                log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
                 presentation = Presentation(file)
-                doc_name = name or (
-                    getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
-                )
+                doc_name = name or getattr(file, "name", "pptx_file").split(".")[0]
             # Extract text from all slides
             slide_texts = []
@@ -89,7 +88,7 @@ class PPTXReader(Reader):
             return documents
         except Exception as e:
-            logger.error(f"Error reading file: {e}")
+            log_error(f"Error reading file: {e}")
             return []
     async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -97,5 +96,5 @@ class PPTXReader(Reader):
         try:
             return await asyncio.to_thread(self.read, file, name)
         except Exception as e:
-            logger.error(f"Error reading file asynchronously: {e}")
+            log_error(f"Error reading file asynchronously: {e}")
             return []

agno/knowledge/reader/reader_factory.py CHANGED Viewed

@@ -10,6 +10,74 @@ class ReaderFactory:
     # Cache for instantiated readers
     _reader_cache: Dict[str, Reader] = {}
+    # Static metadata for readers - avoids instantiation just to get metadata
+    READER_METADATA: Dict[str, Dict[str, str]] = {
+        "pdf": {
+            "name": "PdfReader",
+            "description": "Processes PDF documents with OCR support for images and text extraction",
+        },
+        "csv": {
+            "name": "CsvReader",
+            "description": "Parses CSV files with custom delimiter support",
+        },
+        "excel": {
+            "name": "ExcelReader",
+            "description": "Processes Excel workbooks (.xlsx and .xls) with sheet filtering and row-based chunking",
+        },
+        "field_labeled_csv": {
+            "name": "FieldLabeledCsvReader",
+            "description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
+        },
+        "docx": {
+            "name": "DocxReader",
+            "description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
+        },
+        "pptx": {
+            "name": "PptxReader",
+            "description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
+        },
+        "json": {
+            "name": "JsonReader",
+            "description": "Processes JSON data structures and API responses with nested object handling",
+        },
+        "markdown": {
+            "name": "MarkdownReader",
+            "description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
+        },
+        "text": {
+            "name": "TextReader",
+            "description": "Handles plain text files with customizable chunking strategies and encoding detection",
+        },
+        "website": {
+            "name": "WebsiteReader",
+            "description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
+        },
+        "firecrawl": {
+            "name": "FirecrawlReader",
+            "description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
+        },
+        "tavily": {
+            "name": "TavilyReader",
+            "description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
+        },
+        "youtube": {
+            "name": "YouTubeReader",
+            "description": "Extracts transcripts and metadata from YouTube videos and playlists",
+        },
+        "arxiv": {
+            "name": "ArxivReader",
+            "description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
+        },
+        "wikipedia": {
+            "name": "WikipediaReader",
+            "description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
+        },
+        "web_search": {
+            "name": "WebSearchReader",
+            "description": "Executes web searches and processes results with relevance ranking and content extraction",
+        },
+    }
     @classmethod
     def _get_pdf_reader(cls, **kwargs) -> Reader:
         """Get PDF reader instance."""
@@ -29,11 +97,23 @@ class ReaderFactory:
         config: Dict[str, Any] = {
             "name": "CSV Reader",
-            "description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
+            "description": "Parses CSV files with custom delimiter support",
         }
         config.update(kwargs)
         return CSVReader(**config)
+    @classmethod
+    def _get_excel_reader(cls, **kwargs) -> Reader:
+        """Get Excel reader instance."""
+        from agno.knowledge.reader.excel_reader import ExcelReader
+        config: Dict[str, Any] = {
+            "name": "Excel Reader",
+            "description": "Processes Excel workbooks (.xlsx and .xls) with sheet filtering and row-based chunking",
+        }
+        config.update(kwargs)
+        return ExcelReader(**config)
     @classmethod
     def _get_field_labeled_csv_reader(cls, **kwargs) -> Reader:
         """Get Field Labeled CSV reader instance."""
@@ -203,6 +283,53 @@ class ReaderFactory:
             raise ValueError(f"Unknown reader: {reader_key}")
         return getattr(cls, method_name)
+    @classmethod
+    def get_reader_class(cls, reader_key: str) -> type:
+        """Get the reader CLASS without instantiation.
+        This is useful for accessing class methods like get_supported_chunking_strategies()
+        without the overhead of creating an instance.
+        Args:
+            reader_key: The reader key (e.g., 'pdf', 'csv', 'markdown')
+        Returns:
+            The reader class (not an instance)
+        Raises:
+            ValueError: If the reader key is unknown
+            ImportError: If the reader's dependencies are not installed
+        """
+        # Map reader keys to their import paths
+        reader_class_map: Dict[str, tuple] = {
+            "pdf": ("agno.knowledge.reader.pdf_reader", "PDFReader"),
+            "csv": ("agno.knowledge.reader.csv_reader", "CSVReader"),
+            "excel": ("agno.knowledge.reader.excel_reader", "ExcelReader"),
+            "field_labeled_csv": ("agno.knowledge.reader.field_labeled_csv_reader", "FieldLabeledCSVReader"),
+            "docx": ("agno.knowledge.reader.docx_reader", "DocxReader"),
+            "pptx": ("agno.knowledge.reader.pptx_reader", "PPTXReader"),
+            "json": ("agno.knowledge.reader.json_reader", "JSONReader"),
+            "markdown": ("agno.knowledge.reader.markdown_reader", "MarkdownReader"),
+            "text": ("agno.knowledge.reader.text_reader", "TextReader"),
+            "website": ("agno.knowledge.reader.website_reader", "WebsiteReader"),
+            "firecrawl": ("agno.knowledge.reader.firecrawl_reader", "FirecrawlReader"),
+            "tavily": ("agno.knowledge.reader.tavily_reader", "TavilyReader"),
+            "youtube": ("agno.knowledge.reader.youtube_reader", "YouTubeReader"),
+            "arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
+            "wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
+            "web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
+        }
+        if reader_key not in reader_class_map:
+            raise ValueError(f"Unknown reader: {reader_key}")
+        module_path, class_name = reader_class_map[reader_key]
+        import importlib
+        module = importlib.import_module(module_path)
+        return getattr(module, class_name)
     @classmethod
     def create_reader(cls, reader_key: str, **kwargs) -> Reader:
         """Create a reader instance with the given key and optional overrides."""
@@ -227,6 +354,13 @@ class ReaderFactory:
             return cls.create_reader("pdf")
         elif extension in [".csv", "text/csv"]:
             return cls.create_reader("csv")
+        elif extension in [
+            ".xlsx",
+            ".xls",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            "application/vnd.ms-excel",
+        ]:
+            return cls.create_reader("excel")
         elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
             return cls.create_reader("docx")
         elif extension == ".pptx":

agno/knowledge/reader/s3_reader.py CHANGED Viewed

@@ -10,7 +10,7 @@ from agno.knowledge.reader.base import Reader
 from agno.knowledge.reader.pdf_reader import PDFReader
 from agno.knowledge.reader.text_reader import TextReader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_info, logger
+from agno.utils.log import log_debug, log_error
 try:
     from agno.aws.resource.s3.object import S3Object  # type: ignore
@@ -35,9 +35,10 @@ class S3Reader(Reader):
         super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for S3 readers."""
         return [
+            ChunkingStrategyType.CODE_CHUNKER,
             ChunkingStrategyType.FIXED_SIZE_CHUNKER,
             ChunkingStrategyType.AGENTIC_CHUNKER,
             ChunkingStrategyType.DOCUMENT_CHUNKER,
@@ -46,41 +47,32 @@ class S3Reader(Reader):
         ]
     @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
+    def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
     def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
         try:
-            log_info(f"Reading S3 file: {s3_object.uri}")
+            log_debug(f"Reading S3 file: {s3_object.uri}")
+            doc_name = name or s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
             # Read PDF files
             if s3_object.uri.endswith(".pdf"):
                 object_resource = s3_object.get_resource()
                 object_body = object_resource.get()["Body"]
-                doc_name = (
-                    s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-                    if name is None
-                    else name
-                )
                 return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
             # Read text files
             else:
-                doc_name = (
-                    s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-                    if name is None
-                    else name
-                )
                 obj_name = s3_object.name.split("/")[-1]
                 temporary_file = Path("storage").joinpath(obj_name)
                 s3_object.download(temporary_file)
                 documents = TextReader().read(file=temporary_file, name=doc_name)
                 temporary_file.unlink()
                 return documents
         except Exception as e:
-            logger.error(f"Error reading: {s3_object.uri}: {e}")
+            log_error(f"Error reading: {s3_object.uri}: {e}")
         return []

agno/knowledge/reader/tavily_reader.py CHANGED Viewed

@@ -62,9 +62,10 @@ class TavilyReader(Reader):
         self.extract_depth = extract_depth
     @classmethod
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for Tavily readers."""
         return [
+            ChunkingStrategyType.CODE_CHUNKER,
             ChunkingStrategyType.SEMANTIC_CHUNKER,
             ChunkingStrategyType.FIXED_SIZE_CHUNKER,
             ChunkingStrategyType.AGENTIC_CHUNKER,
@@ -73,7 +74,7 @@ class TavilyReader(Reader):
         ]
     @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
+    def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.URL]
     def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
@@ -140,7 +141,6 @@ class TavilyReader(Reader):
                 documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
             else:
                 documents.append(Document(name=name or url, id=url, content=content))
             return documents
         except Exception as e:

agno/knowledge/reader/text_reader.py CHANGED Viewed

@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_info, logger
+from agno.utils.log import log_debug, log_error, log_warning
 class TextReader(Reader):
@@ -18,9 +18,10 @@ class TextReader(Reader):
         super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for Text readers."""
         return [
+            ChunkingStrategyType.CODE_CHUNKER,
             ChunkingStrategyType.FIXED_SIZE_CHUNKER,
             ChunkingStrategyType.AGENTIC_CHUNKER,
             ChunkingStrategyType.DOCUMENT_CHUNKER,
@@ -29,7 +30,7 @@ class TextReader(Reader):
         ]
     @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
+    def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.TXT]
     def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -37,12 +38,12 @@ class TextReader(Reader):
             if isinstance(file, Path):
                 if not file.exists():
                     raise FileNotFoundError(f"Could not find file: {file}")
-                log_info(f"Reading: {file}")
+                log_debug(f"Reading: {file}")
                 file_name = name or file.stem
-                file_contents = file.read_text(self.encoding or "utf-8")
+                file_contents = file.read_text(encoding=self.encoding or "utf-8")
             else:
-                file_name = name or file.name.split(".")[0]
-                log_info(f"Reading uploaded file: {file_name}")
+                log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
+                file_name = name or getattr(file, "name", "text_file").split(".")[0]
                 file.seek(0)
                 file_contents = file.read().decode(self.encoding or "utf-8")
@@ -60,7 +61,7 @@ class TextReader(Reader):
                 return chunked_documents
             return documents
         except Exception as e:
-            logger.error(f"Error reading: {file}: {e}")
+            log_error(f"Error reading: {file}: {e}")
             return []
     async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -69,7 +70,7 @@ class TextReader(Reader):
                 if not file.exists():
                     raise FileNotFoundError(f"Could not find file: {file}")
-                log_info(f"Reading asynchronously: {file}")
+                log_debug(f"Reading asynchronously: {file}")
                 file_name = name or file.stem
                 try:
@@ -78,11 +79,11 @@ class TextReader(Reader):
                     async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
                         file_contents = await f.read()
                 except ImportError:
-                    logger.warning("aiofiles not installed, using synchronous file I/O")
-                    file_contents = file.read_text(self.encoding or "utf-8")
+                    log_warning("aiofiles not installed, using synchronous file I/O")
+                    file_contents = file.read_text(encoding=self.encoding or "utf-8")
             else:
-                log_info(f"Reading uploaded file asynchronously: {file.name}")
-                file_name = name or file.name.split(".")[0]
+                log_debug(f"Reading uploaded file asynchronously: {getattr(file, 'name', 'BytesIO')}")
+                file_name = name or getattr(file, "name", "text_file").split(".")[0]
                 file.seek(0)
                 file_contents = file.read().decode(self.encoding or "utf-8")
@@ -96,7 +97,7 @@ class TextReader(Reader):
                 return await self._async_chunk_document(document)
             return [document]
         except Exception as e:
-            logger.error(f"Error reading asynchronously: {file}: {e}")
+            log_error(f"Error reading asynchronously: {file}: {e}")
             return []
     async def _async_chunk_document(self, document: Document) -> List[Document]:

agno/knowledge/reader/utils/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from agno.knowledge.reader.utils.spreadsheet import (
+    convert_xls_cell_value,
+    excel_rows_to_documents,
+    get_workbook_name,
+    infer_file_extension,
+    row_to_csv_line,
+    stringify_cell_value,
+)
+__all__ = [
+    "convert_xls_cell_value",
+    "excel_rows_to_documents",
+    "get_workbook_name",
+    "infer_file_extension",
+    "row_to_csv_line",
+    "stringify_cell_value",
+]

agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl