PyPI - agno - Versions diffs - 2.3.23__py3-none-any.whl → 2.3.24__py3-none-any.whl - Mend

agno 2.3.23py3-none-any.whl → 2.3.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

agno/agent/agent.py +6 -0
agno/db/mongo/mongo.py +9 -1
agno/knowledge/chunking/markdown.py +94 -8
agno/knowledge/chunking/semantic.py +2 -2
agno/knowledge/knowledge.py +215 -207
agno/models/base.py +28 -8
agno/os/routers/knowledge/knowledge.py +19 -3
agno/os/utils.py +1 -1
agno/team/team.py +5 -3
agno/tools/crawl4ai.py +3 -0
agno/tools/file.py +14 -13
agno/tools/function.py +9 -1
agno/tools/mlx_transcribe.py +10 -7
agno/tools/python.py +14 -6
agno/tools/toolkit.py +33 -2
agno/vectordb/cassandra/cassandra.py +1 -1
agno/vectordb/chroma/chromadb.py +1 -1
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/couchbase/couchbase.py +1 -1
agno/vectordb/milvus/milvus.py +1 -1
agno/vectordb/mongodb/mongodb.py +13 -3
agno/vectordb/pgvector/pgvector.py +1 -1
agno/vectordb/pineconedb/pineconedb.py +2 -2
agno/vectordb/qdrant/qdrant.py +1 -1
agno/vectordb/redis/redisdb.py +2 -2
agno/vectordb/singlestore/singlestore.py +1 -1
agno/vectordb/surrealdb/surrealdb.py +2 -2
agno/vectordb/weaviate/weaviate.py +1 -1
{agno-2.3.23.dist-info → agno-2.3.24.dist-info}/METADATA +1 -1
{agno-2.3.23.dist-info → agno-2.3.24.dist-info}/RECORD +33 -33
{agno-2.3.23.dist-info → agno-2.3.24.dist-info}/WHEEL +0 -0
{agno-2.3.23.dist-info → agno-2.3.24.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.23.dist-info → agno-2.3.24.dist-info}/top_level.txt +0 -0

agno/agent/agent.py CHANGED Viewed

@@ -3302,6 +3302,9 @@ class Agent:
                         tools=tools,
                         tool_choice=self.tool_choice,
                         tool_call_limit=self.tool_call_limit,
+                        run_response=run_response,
+                        send_media_to_model=self.send_media_to_model,
+                        compression_manager=self.compression_manager if self.compress_tool_results else None,
                     )
                     # Check for cancellation after model processing
@@ -4015,6 +4018,9 @@ class Agent:
                         tools=_tools,
                         tool_choice=self.tool_choice,
                         tool_call_limit=self.tool_call_limit,
+                        run_response=run_response,
+                        send_media_to_model=self.send_media_to_model,
+                        compression_manager=self.compression_manager if self.compress_tool_results else None,
                     )
                     # Check for cancellation after model call
                     await araise_if_cancelled(run_response.run_id)  # type: ignore

agno/db/mongo/mongo.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
 from datetime import date, datetime, timedelta, timezone
+from importlib import metadata
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 from uuid import uuid4
@@ -31,10 +32,13 @@ try:
     from pymongo import MongoClient, ReturnDocument
     from pymongo.collection import Collection
     from pymongo.database import Database
+    from pymongo.driver_info import DriverInfo
     from pymongo.errors import OperationFailure
 except ImportError:
     raise ImportError("`pymongo` not installed. Please install it using `pip install pymongo`")
+DRIVER_METADATA = DriverInfo(name="Agno", version=metadata.version("agno"))
 class MongoDb(BaseDb):
     def __init__(
@@ -92,10 +96,14 @@ class MongoDb(BaseDb):
         _client: Optional[MongoClient] = db_client
         if _client is None and db_url is not None:
-            _client = MongoClient(db_url)
+            _client = MongoClient(db_url, driver=DRIVER_METADATA)
         if _client is None:
             raise ValueError("One of db_url or db_client must be provided")
+        # append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
+        if callable(_client.append_metadata):
+            _client.append_metadata(DRIVER_METADATA)
         self.db_url: Optional[str] = db_url
         self.db_client: MongoClient = _client

agno/knowledge/chunking/markdown.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
+import re
 import tempfile
-from typing import List
+from typing import List, Union
 try:
     from unstructured.chunking.title import chunk_by_title  # type: ignore
@@ -13,17 +14,83 @@ from agno.knowledge.document.base import Document
 class MarkdownChunking(ChunkingStrategy):
-    """A chunking strategy that splits markdown based on structure like headers, paragraphs and sections"""
-    def __init__(self, chunk_size: int = 5000, overlap: int = 0):
+    """A chunking strategy that splits markdown based on structure like headers, paragraphs and sections
+    Args:
+        chunk_size: Maximum size of each chunk in characters
+        overlap: Number of characters to overlap between chunks
+        split_on_headings: Controls heading-based splitting behavior:
+            - False: Use size-based chunking (default)
+            - True: Split on all headings (H1-H6)
+            - int: Split on headings at or above this level (1-6)
+                  e.g., 2 splits on H1 and H2, keeping H3-H6 content together
+    """
+    def __init__(self, chunk_size: int = 5000, overlap: int = 0, split_on_headings: Union[bool, int] = False):
         self.chunk_size = chunk_size
         self.overlap = overlap
+        self.split_on_headings = split_on_headings
+        # Validate split_on_headings parameter
+        # Note: In Python, isinstance(False, int) is True, so we exclude booleans explicitly
+        if isinstance(split_on_headings, int) and not isinstance(split_on_headings, bool):
+            if not (1 <= split_on_headings <= 6):
+                raise ValueError("split_on_headings must be between 1 and 6 when using integer value")
+    def _split_by_headings(self, content: str) -> List[str]:
+        """
+        Split markdown content by headings, keeping each heading with its content.
+        Returns a list of sections where each section starts with a heading.
+        When split_on_headings is an int, only splits on headings at or above that level.
+        For example, split_on_headings=2 splits on H1 and H2, keeping H3-H6 content together.
+        """
+        # Determine which heading levels to split on
+        if isinstance(self.split_on_headings, int) and not isinstance(self.split_on_headings, bool):
+            # Split on headings at or above this level (1 to split_on_headings)
+            max_heading_level = self.split_on_headings
+            heading_pattern = rf"^#{{{1},{max_heading_level}}}\s+.+$"
+        else:
+            # split_on_headings is True: split on all headings (# to ######)
+            heading_pattern = r"^#{1,6}\s+.+$"
+        # Split content while keeping the delimiter (heading)
+        # Use non-capturing group for the pattern to avoid extra capture groups
+        parts = re.split(f"({heading_pattern})", content, flags=re.MULTILINE)
+        sections = []
+        current_section = ""
+        for part in parts:
+            if not part or not part.strip():
+                continue
+            # Check if this part is a heading
+            if re.match(heading_pattern, part.strip(), re.MULTILINE):
+                # Save previous section if exists
+                if current_section.strip():
+                    sections.append(current_section.strip())
+                # Start new section with this heading
+                current_section = part
+            else:
+                # Add content to current section
+                current_section += "\n\n" + part if current_section else part
+        # Don't forget the last section
+        if current_section.strip():
+            sections.append(current_section.strip())
+        return sections if sections else [content]
     def _partition_markdown_content(self, content: str) -> List[str]:
         """
         Partition markdown content and return a list of text chunks.
         Falls back to paragraph splitting if the markdown chunking fails.
         """
+        # When split_on_headings is True or an int, use regex-based splitting to preserve headings
+        if self.split_on_headings:
+            return self._split_by_headings(content)
         try:
             # Create a temporary file with the markdown content.
             # This is the recommended usage of the unstructured library.
@@ -38,7 +105,6 @@ class MarkdownChunking(ChunkingStrategy):
                     raw_paragraphs = content.split("\n\n")
                     return [self.clean_text(para) for para in raw_paragraphs]
-                # Chunk by title with some default values
                 chunked_elements = chunk_by_title(
                     elements=elements,
                     max_characters=self.chunk_size,
@@ -74,7 +140,13 @@ class MarkdownChunking(ChunkingStrategy):
     def chunk(self, document: Document) -> List[Document]:
         """Split markdown document into chunks based on markdown structure"""
-        if not document.content or len(document.content) <= self.chunk_size:
+        # If content is empty, return as-is
+        if not document.content:
+            return [document]
+        # When split_on_headings is enabled, always split by headings regardless of size
+        # Only skip chunking for small content when using size-based chunking
+        if not self.split_on_headings and len(document.content) <= self.chunk_size:
             return [document]
         # Split using markdown chunking logic, or fallback to paragraphs
@@ -90,7 +162,20 @@ class MarkdownChunking(ChunkingStrategy):
             section = section.strip()
             section_size = len(section)
-            if current_size + section_size <= self.chunk_size:
+            # When split_on_headings is True or an int, each section becomes its own chunk
+            if self.split_on_headings:
+                meta_data = chunk_meta_data.copy()
+                meta_data["chunk"] = chunk_number
+                chunk_id = None
+                if document.id:
+                    chunk_id = f"{document.id}_{chunk_number}"
+                elif document.name:
+                    chunk_id = f"{document.name}_{chunk_number}"
+                meta_data["chunk_size"] = section_size
+                chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=section))
+                chunk_number += 1
+            elif current_size + section_size <= self.chunk_size:
                 current_chunk.append(section)
                 current_size += section_size
             else:
@@ -114,7 +199,8 @@ class MarkdownChunking(ChunkingStrategy):
                 current_chunk = [section]
                 current_size = section_size
-        if current_chunk:
+        # Handle remaining content (only when not split_on_headings)
+        if current_chunk and not self.split_on_headings:
             meta_data = chunk_meta_data.copy()
             meta_data["chunk"] = chunk_number
             chunk_id = None

agno/knowledge/chunking/semantic.py CHANGED Viewed

@@ -17,7 +17,7 @@ except ImportError:
 from agno.knowledge.chunking.strategy import ChunkingStrategy
 from agno.knowledge.document.base import Document
 from agno.knowledge.embedder.base import Embedder
-from agno.utils.log import log_info
+from agno.utils.log import log_debug
 def _get_chonkie_embedder_wrapper(embedder: Embedder):
@@ -87,7 +87,7 @@ class SemanticChunking(ChunkingStrategy):
             from agno.knowledge.embedder.openai import OpenAIEmbedder
             embedder = OpenAIEmbedder()  # type: ignore
-            log_info("Embedder not provided, using OpenAIEmbedder as default.")
+            log_debug("Embedder not provided, using OpenAIEmbedder as default.")
         self.embedder = embedder
         self.chunk_size = chunk_size
         self.similarity_threshold = similarity_threshold

agno 2.3.23__py3-none-any.whl → 2.3.24__py3-none-any.whl

agno 2.3.23py3-none-any.whl → 2.3.24py3-none-any.whl