PyPI - agno - Versions diffs - 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (383) hide show

agno/agent/__init__.py +6 -0
agno/agent/agent.py +5252 -3145
agno/agent/remote.py +525 -0
agno/api/api.py +2 -0
agno/client/__init__.py +3 -0
agno/client/a2a/__init__.py +10 -0
agno/client/a2a/client.py +554 -0
agno/client/a2a/schemas.py +112 -0
agno/client/a2a/utils.py +369 -0
agno/client/os.py +2669 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/manager.py +2 -2
agno/db/base.py +927 -6
agno/db/dynamo/dynamo.py +788 -2
agno/db/dynamo/schemas.py +128 -0
agno/db/dynamo/utils.py +26 -3
agno/db/firestore/firestore.py +674 -50
agno/db/firestore/schemas.py +41 -0
agno/db/firestore/utils.py +25 -10
agno/db/gcs_json/gcs_json_db.py +506 -3
agno/db/gcs_json/utils.py +14 -2
agno/db/in_memory/in_memory_db.py +203 -4
agno/db/in_memory/utils.py +14 -2
agno/db/json/json_db.py +498 -2
agno/db/json/utils.py +14 -2
agno/db/migrations/manager.py +199 -0
agno/db/migrations/utils.py +19 -0
agno/db/migrations/v1_to_v2.py +54 -16
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +977 -0
agno/db/mongo/async_mongo.py +1013 -39
agno/db/mongo/mongo.py +684 -4
agno/db/mongo/schemas.py +48 -0
agno/db/mongo/utils.py +17 -0
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2958 -0
agno/db/mysql/mysql.py +722 -53
agno/db/mysql/schemas.py +77 -11
agno/db/mysql/utils.py +151 -8
agno/db/postgres/async_postgres.py +1254 -137
agno/db/postgres/postgres.py +2316 -93
agno/db/postgres/schemas.py +153 -21
agno/db/postgres/utils.py +22 -7
agno/db/redis/redis.py +531 -3
agno/db/redis/schemas.py +36 -0
agno/db/redis/utils.py +31 -15
agno/db/schemas/evals.py +1 -0
agno/db/schemas/memory.py +20 -9
agno/db/singlestore/schemas.py +70 -1
agno/db/singlestore/singlestore.py +737 -74
agno/db/singlestore/utils.py +13 -3
agno/db/sqlite/async_sqlite.py +1069 -89
agno/db/sqlite/schemas.py +133 -1
agno/db/sqlite/sqlite.py +2203 -165
agno/db/sqlite/utils.py +21 -11
agno/db/surrealdb/models.py +25 -0
agno/db/surrealdb/surrealdb.py +603 -1
agno/db/utils.py +60 -0
agno/eval/__init__.py +26 -3
agno/eval/accuracy.py +25 -12
agno/eval/agent_as_judge.py +871 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +10 -4
agno/eval/reliability.py +22 -13
agno/eval/utils.py +2 -1
agno/exceptions.py +42 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/client.py +13 -2
agno/knowledge/__init__.py +4 -0
agno/knowledge/chunking/code.py +90 -0
agno/knowledge/chunking/document.py +65 -4
agno/knowledge/chunking/fixed.py +4 -1
agno/knowledge/chunking/markdown.py +102 -11
agno/knowledge/chunking/recursive.py +2 -2
agno/knowledge/chunking/semantic.py +130 -48
agno/knowledge/chunking/strategy.py +18 -0
agno/knowledge/embedder/azure_openai.py +0 -1
agno/knowledge/embedder/google.py +1 -1
agno/knowledge/embedder/mistral.py +1 -1
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/openai.py +16 -12
agno/knowledge/filesystem.py +412 -0
agno/knowledge/knowledge.py +4261 -1199
agno/knowledge/protocol.py +134 -0
agno/knowledge/reader/arxiv_reader.py +3 -2
agno/knowledge/reader/base.py +9 -7
agno/knowledge/reader/csv_reader.py +91 -42
agno/knowledge/reader/docx_reader.py +9 -10
agno/knowledge/reader/excel_reader.py +225 -0
agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
agno/knowledge/reader/firecrawl_reader.py +3 -2
agno/knowledge/reader/json_reader.py +16 -22
agno/knowledge/reader/markdown_reader.py +15 -14
agno/knowledge/reader/pdf_reader.py +33 -28
agno/knowledge/reader/pptx_reader.py +9 -10
agno/knowledge/reader/reader_factory.py +135 -1
agno/knowledge/reader/s3_reader.py +8 -16
agno/knowledge/reader/tavily_reader.py +3 -3
agno/knowledge/reader/text_reader.py +15 -14
agno/knowledge/reader/utils/__init__.py +17 -0
agno/knowledge/reader/utils/spreadsheet.py +114 -0
agno/knowledge/reader/web_search_reader.py +8 -65
agno/knowledge/reader/website_reader.py +16 -13
agno/knowledge/reader/wikipedia_reader.py +36 -3
agno/knowledge/reader/youtube_reader.py +3 -2
agno/knowledge/remote_content/__init__.py +33 -0
agno/knowledge/remote_content/config.py +266 -0
agno/knowledge/remote_content/remote_content.py +105 -17
agno/knowledge/utils.py +76 -22
agno/learn/__init__.py +71 -0
agno/learn/config.py +463 -0
agno/learn/curate.py +185 -0
agno/learn/machine.py +725 -0
agno/learn/schemas.py +1114 -0
agno/learn/stores/__init__.py +38 -0
agno/learn/stores/decision_log.py +1156 -0
agno/learn/stores/entity_memory.py +3275 -0
agno/learn/stores/learned_knowledge.py +1583 -0
agno/learn/stores/protocol.py +117 -0
agno/learn/stores/session_context.py +1217 -0
agno/learn/stores/user_memory.py +1495 -0
agno/learn/stores/user_profile.py +1220 -0
agno/learn/utils.py +209 -0
agno/media.py +22 -6
agno/memory/__init__.py +14 -1
agno/memory/manager.py +223 -8
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +17 -0
agno/models/anthropic/claude.py +434 -59
agno/models/aws/bedrock.py +121 -20
agno/models/aws/claude.py +131 -274
agno/models/azure/ai_foundry.py +10 -6
agno/models/azure/openai_chat.py +33 -10
agno/models/base.py +1162 -561
agno/models/cerebras/cerebras.py +120 -24
agno/models/cerebras/cerebras_openai.py +21 -2
agno/models/cohere/chat.py +65 -6
agno/models/cometapi/cometapi.py +18 -1
agno/models/dashscope/dashscope.py +2 -3
agno/models/deepinfra/deepinfra.py +18 -1
agno/models/deepseek/deepseek.py +69 -3
agno/models/fireworks/fireworks.py +18 -1
agno/models/google/gemini.py +959 -89
agno/models/google/utils.py +22 -0
agno/models/groq/groq.py +48 -18
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/internlm/internlm.py +18 -1
agno/models/langdb/langdb.py +13 -1
agno/models/litellm/chat.py +88 -9
agno/models/litellm/litellm_openai.py +18 -1
agno/models/message.py +24 -5
agno/models/meta/llama.py +40 -13
agno/models/meta/llama_openai.py +22 -21
agno/models/metrics.py +12 -0
agno/models/mistral/mistral.py +8 -4
agno/models/n1n/__init__.py +3 -0
agno/models/n1n/n1n.py +57 -0
agno/models/nebius/nebius.py +6 -7
agno/models/nvidia/nvidia.py +20 -3
agno/models/ollama/__init__.py +2 -0
agno/models/ollama/chat.py +17 -6
agno/models/ollama/responses.py +100 -0
agno/models/openai/__init__.py +2 -0
agno/models/openai/chat.py +117 -26
agno/models/openai/open_responses.py +46 -0
agno/models/openai/responses.py +110 -32
agno/models/openrouter/__init__.py +2 -0
agno/models/openrouter/openrouter.py +67 -2
agno/models/openrouter/responses.py +146 -0
agno/models/perplexity/perplexity.py +19 -1
agno/models/portkey/portkey.py +7 -6
agno/models/requesty/requesty.py +19 -2
agno/models/response.py +20 -2
agno/models/sambanova/sambanova.py +20 -3
agno/models/siliconflow/siliconflow.py +19 -2
agno/models/together/together.py +20 -3
agno/models/vercel/v0.py +20 -3
agno/models/vertexai/claude.py +124 -4
agno/models/vllm/vllm.py +19 -14
agno/models/xai/xai.py +19 -2
agno/os/app.py +467 -137
agno/os/auth.py +253 -5
agno/os/config.py +22 -0
agno/os/interfaces/a2a/a2a.py +7 -6
agno/os/interfaces/a2a/router.py +635 -26
agno/os/interfaces/a2a/utils.py +32 -33
agno/os/interfaces/agui/agui.py +5 -3
agno/os/interfaces/agui/router.py +26 -16
agno/os/interfaces/agui/utils.py +97 -57
agno/os/interfaces/base.py +7 -7
agno/os/interfaces/slack/router.py +16 -7
agno/os/interfaces/slack/slack.py +7 -7
agno/os/interfaces/whatsapp/router.py +35 -7
agno/os/interfaces/whatsapp/security.py +3 -1
agno/os/interfaces/whatsapp/whatsapp.py +11 -8
agno/os/managers.py +326 -0
agno/os/mcp.py +652 -79
agno/os/middleware/__init__.py +4 -0
agno/os/middleware/jwt.py +718 -115
agno/os/middleware/trailing_slash.py +27 -0
agno/os/router.py +105 -1558
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +655 -0
agno/os/routers/agents/schema.py +288 -0
agno/os/routers/components/__init__.py +3 -0
agno/os/routers/components/components.py +475 -0
agno/os/routers/database.py +155 -0
agno/os/routers/evals/evals.py +111 -18
agno/os/routers/evals/schemas.py +38 -5
agno/os/routers/evals/utils.py +80 -11
agno/os/routers/health.py +3 -3
agno/os/routers/knowledge/knowledge.py +284 -35
agno/os/routers/knowledge/schemas.py +14 -2
agno/os/routers/memory/memory.py +274 -11
agno/os/routers/memory/schemas.py +44 -3
agno/os/routers/metrics/metrics.py +30 -15
agno/os/routers/metrics/schemas.py +10 -6
agno/os/routers/registry/__init__.py +3 -0
agno/os/routers/registry/registry.py +337 -0
agno/os/routers/session/session.py +143 -14
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +550 -0
agno/os/routers/teams/schema.py +280 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +549 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +757 -0
agno/os/routers/workflows/schema.py +139 -0
agno/os/schema.py +157 -584
agno/os/scopes.py +469 -0
agno/os/settings.py +3 -0
agno/os/utils.py +574 -185
agno/reasoning/anthropic.py +85 -1
agno/reasoning/azure_ai_foundry.py +93 -1
agno/reasoning/deepseek.py +102 -2
agno/reasoning/default.py +6 -7
agno/reasoning/gemini.py +87 -3
agno/reasoning/groq.py +109 -2
agno/reasoning/helpers.py +6 -7
agno/reasoning/manager.py +1238 -0
agno/reasoning/ollama.py +93 -1
agno/reasoning/openai.py +115 -1
agno/reasoning/vertexai.py +85 -1
agno/registry/__init__.py +3 -0
agno/registry/registry.py +68 -0
agno/remote/__init__.py +3 -0
agno/remote/base.py +581 -0
agno/run/__init__.py +2 -4
agno/run/agent.py +134 -19
agno/run/base.py +49 -1
agno/run/cancel.py +65 -52
agno/run/cancellation_management/__init__.py +9 -0
agno/run/cancellation_management/base.py +78 -0
agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
agno/run/requirement.py +181 -0
agno/run/team.py +111 -19
agno/run/workflow.py +2 -1
agno/session/agent.py +57 -92
agno/session/summary.py +1 -1
agno/session/team.py +62 -115
agno/session/workflow.py +353 -57
agno/skills/__init__.py +17 -0
agno/skills/agent_skills.py +377 -0
agno/skills/errors.py +32 -0
agno/skills/loaders/__init__.py +4 -0
agno/skills/loaders/base.py +27 -0
agno/skills/loaders/local.py +216 -0
agno/skills/skill.py +65 -0
agno/skills/utils.py +107 -0
agno/skills/validator.py +277 -0
agno/table.py +10 -0
agno/team/__init__.py +5 -1
agno/team/remote.py +447 -0
agno/team/team.py +3769 -2202
agno/tools/brandfetch.py +27 -18
agno/tools/browserbase.py +225 -16
agno/tools/crawl4ai.py +3 -0
agno/tools/duckduckgo.py +25 -71
agno/tools/exa.py +0 -21
agno/tools/file.py +14 -13
agno/tools/file_generation.py +12 -6
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +94 -113
agno/tools/google_bigquery.py +11 -2
agno/tools/google_drive.py +4 -3
agno/tools/knowledge.py +9 -4
agno/tools/mcp/mcp.py +301 -18
agno/tools/mcp/multi_mcp.py +269 -14
agno/tools/mem0.py +11 -10
agno/tools/memory.py +47 -46
agno/tools/mlx_transcribe.py +10 -7
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/nano_banana.py +151 -0
agno/tools/parallel.py +0 -7
agno/tools/postgres.py +76 -36
agno/tools/python.py +14 -6
agno/tools/reasoning.py +30 -23
agno/tools/redshift.py +406 -0
agno/tools/shopify.py +1519 -0
agno/tools/spotify.py +919 -0
agno/tools/tavily.py +4 -1
agno/tools/toolkit.py +253 -18
agno/tools/websearch.py +93 -0
agno/tools/website.py +1 -1
agno/tools/wikipedia.py +1 -1
agno/tools/workflow.py +56 -48
agno/tools/yfinance.py +12 -11
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +161 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +112 -0
agno/utils/agent.py +251 -10
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +264 -7
agno/utils/hooks.py +111 -3
agno/utils/http.py +161 -2
agno/utils/mcp.py +49 -8
agno/utils/media.py +22 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +20 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/os.py +0 -0
agno/utils/print_response/agent.py +99 -16
agno/utils/print_response/team.py +223 -24
agno/utils/print_response/workflow.py +0 -2
agno/utils/prompts.py +8 -6
agno/utils/remote.py +23 -0
agno/utils/response.py +1 -13
agno/utils/string.py +91 -2
agno/utils/team.py +62 -12
agno/utils/tokens.py +657 -0
agno/vectordb/base.py +15 -2
agno/vectordb/cassandra/cassandra.py +1 -1
agno/vectordb/chroma/__init__.py +2 -1
agno/vectordb/chroma/chromadb.py +468 -23
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/couchbase/couchbase.py +6 -2
agno/vectordb/lancedb/lance_db.py +7 -38
agno/vectordb/lightrag/lightrag.py +7 -6
agno/vectordb/milvus/milvus.py +118 -84
agno/vectordb/mongodb/__init__.py +2 -1
agno/vectordb/mongodb/mongodb.py +14 -31
agno/vectordb/pgvector/pgvector.py +120 -66
agno/vectordb/pineconedb/pineconedb.py +2 -19
agno/vectordb/qdrant/__init__.py +2 -1
agno/vectordb/qdrant/qdrant.py +33 -56
agno/vectordb/redis/__init__.py +2 -1
agno/vectordb/redis/redisdb.py +19 -31
agno/vectordb/singlestore/singlestore.py +17 -9
agno/vectordb/surrealdb/surrealdb.py +2 -38
agno/vectordb/weaviate/__init__.py +2 -1
agno/vectordb/weaviate/weaviate.py +7 -3
agno/workflow/__init__.py +5 -1
agno/workflow/agent.py +2 -2
agno/workflow/condition.py +12 -10
agno/workflow/loop.py +28 -9
agno/workflow/parallel.py +21 -13
agno/workflow/remote.py +362 -0
agno/workflow/router.py +12 -9
agno/workflow/step.py +261 -36
agno/workflow/steps.py +12 -8
agno/workflow/types.py +40 -77
agno/workflow/workflow.py +939 -213
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
agno-2.4.3.dist-info/RECORD +677 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
agno/tools/googlesearch.py +0 -98
agno/tools/memori.py +0 -339
agno-2.2.13.dist-info/RECORD +0 -575
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0

agno/knowledge/chunking/markdown.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
+import re
 import tempfile
-from typing import List
+from typing import List, Union
 try:
     from unstructured.chunking.title import chunk_by_title  # type: ignore
@@ -13,17 +14,83 @@ from agno.knowledge.document.base import Document
 class MarkdownChunking(ChunkingStrategy):
-    """A chunking strategy that splits markdown based on structure like headers, paragraphs and sections"""
-    def __init__(self, chunk_size: int = 5000, overlap: int = 0):
+    """A chunking strategy that splits markdown based on structure like headers, paragraphs and sections
+    Args:
+        chunk_size: Maximum size of each chunk in characters
+        overlap: Number of characters to overlap between chunks
+        split_on_headings: Controls heading-based splitting behavior:
+            - False: Use size-based chunking (default)
+            - True: Split on all headings (H1-H6)
+            - int: Split on headings at or above this level (1-6)
+                  e.g., 2 splits on H1 and H2, keeping H3-H6 content together
+    """
+    def __init__(self, chunk_size: int = 5000, overlap: int = 0, split_on_headings: Union[bool, int] = False):
         self.chunk_size = chunk_size
         self.overlap = overlap
+        self.split_on_headings = split_on_headings
+        # Validate split_on_headings parameter
+        # Note: In Python, isinstance(False, int) is True, so we exclude booleans explicitly
+        if isinstance(split_on_headings, int) and not isinstance(split_on_headings, bool):
+            if not (1 <= split_on_headings <= 6):
+                raise ValueError("split_on_headings must be between 1 and 6 when using integer value")
+    def _split_by_headings(self, content: str) -> List[str]:
+        """
+        Split markdown content by headings, keeping each heading with its content.
+        Returns a list of sections where each section starts with a heading.
+        When split_on_headings is an int, only splits on headings at or above that level.
+        For example, split_on_headings=2 splits on H1 and H2, keeping H3-H6 content together.
+        """
+        # Determine which heading levels to split on
+        if isinstance(self.split_on_headings, int) and not isinstance(self.split_on_headings, bool):
+            # Split on headings at or above this level (1 to split_on_headings)
+            max_heading_level = self.split_on_headings
+            heading_pattern = rf"^#{{{1},{max_heading_level}}}\s+.+$"
+        else:
+            # split_on_headings is True: split on all headings (# to ######)
+            heading_pattern = r"^#{1,6}\s+.+$"
+        # Split content while keeping the delimiter (heading)
+        # Use non-capturing group for the pattern to avoid extra capture groups
+        parts = re.split(f"({heading_pattern})", content, flags=re.MULTILINE)
+        sections = []
+        current_section = ""
+        for part in parts:
+            if not part or not part.strip():
+                continue
+            # Check if this part is a heading
+            if re.match(heading_pattern, part.strip(), re.MULTILINE):
+                # Save previous section if exists
+                if current_section.strip():
+                    sections.append(current_section.strip())
+                # Start new section with this heading
+                current_section = part
+            else:
+                # Add content to current section
+                current_section += "\n\n" + part if current_section else part
+        # Don't forget the last section
+        if current_section.strip():
+            sections.append(current_section.strip())
+        return sections if sections else [content]
     def _partition_markdown_content(self, content: str) -> List[str]:
         """
         Partition markdown content and return a list of text chunks.
         Falls back to paragraph splitting if the markdown chunking fails.
         """
+        # When split_on_headings is True or an int, use regex-based splitting to preserve headings
+        if self.split_on_headings:
+            return self._split_by_headings(content)
         try:
             # Create a temporary file with the markdown content.
             # This is the recommended usage of the unstructured library.
@@ -35,9 +102,9 @@ class MarkdownChunking(ChunkingStrategy):
                 elements = partition_md(filename=temp_file_path)
                 if not elements:
-                    return self.clean_text(content).split("\n\n")
+                    raw_paragraphs = content.split("\n\n")
+                    return [self.clean_text(para) for para in raw_paragraphs]
-                # Chunk by title with some default values
                 chunked_elements = chunk_by_title(
                     elements=elements,
                     max_characters=self.chunk_size,
@@ -57,7 +124,10 @@ class MarkdownChunking(ChunkingStrategy):
                     if chunk_text.strip():
                         text_chunks.append(chunk_text.strip())
-                return text_chunks if text_chunks else self.clean_text(content).split("\n\n")
+                if text_chunks:
+                    return text_chunks
+                raw_paragraphs = content.split("\n\n")
+                return [self.clean_text(para) for para in raw_paragraphs]
             # Always clean up the temporary file
             finally:
@@ -65,11 +135,18 @@ class MarkdownChunking(ChunkingStrategy):
         # Fallback to simple paragraph splitting if the markdown chunking fails
         except Exception:
-            return self.clean_text(content).split("\n\n")
+            raw_paragraphs = content.split("\n\n")
+            return [self.clean_text(para) for para in raw_paragraphs]
     def chunk(self, document: Document) -> List[Document]:
         """Split markdown document into chunks based on markdown structure"""
-        if not document.content or len(document.content) <= self.chunk_size:
+        # If content is empty, return as-is
+        if not document.content:
+            return [document]
+        # When split_on_headings is enabled, always split by headings regardless of size
+        # Only skip chunking for small content when using size-based chunking
+        if not self.split_on_headings and len(document.content) <= self.chunk_size:
             return [document]
         # Split using markdown chunking logic, or fallback to paragraphs
@@ -85,7 +162,20 @@ class MarkdownChunking(ChunkingStrategy):
             section = section.strip()
             section_size = len(section)
-            if current_size + section_size <= self.chunk_size:
+            # When split_on_headings is True or an int, each section becomes its own chunk
+            if self.split_on_headings:
+                meta_data = chunk_meta_data.copy()
+                meta_data["chunk"] = chunk_number
+                chunk_id = None
+                if document.id:
+                    chunk_id = f"{document.id}_{chunk_number}"
+                elif document.name:
+                    chunk_id = f"{document.name}_{chunk_number}"
+                meta_data["chunk_size"] = section_size
+                chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=section))
+                chunk_number += 1
+            elif current_size + section_size <= self.chunk_size:
                 current_chunk.append(section)
                 current_size += section_size
             else:
@@ -109,7 +199,8 @@ class MarkdownChunking(ChunkingStrategy):
                 current_chunk = [section]
                 current_size = section_size
-        if current_chunk:
+        # Handle remaining content (only when not split_on_headings)
+        if current_chunk and not self.split_on_headings:
             meta_data = chunk_meta_data.copy()
             meta_data["chunk"] = chunk_number
             chunk_id = None

agno/knowledge/chunking/recursive.py CHANGED Viewed

@@ -31,7 +31,7 @@ class RecursiveChunking(ChunkingStrategy):
         start = 0
         chunk_meta_data = document.meta_data
         chunk_number = 1
-        content = self.clean_text(document.content)
+        content = document.content
         while start < len(content):
             end = min(start + self.chunk_size, len(content))
@@ -43,7 +43,7 @@ class RecursiveChunking(ChunkingStrategy):
                         end = start + last_sep + 1
                         break
-            chunk = content[start:end]
+            chunk = self.clean_text(content[start:end])
             meta_data = chunk_meta_data.copy()
             meta_data["chunk"] = chunk_number
             chunk_id = None

agno/knowledge/chunking/semantic.py CHANGED Viewed

@@ -1,63 +1,145 @@
-import inspect
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Literal, Optional, Union
+try:
+    import numpy as np
+except ImportError:
+    raise ImportError("`numpy` not installed. Please install using `pip install numpy`")
+try:
+    from chonkie import SemanticChunker
+    from chonkie.embeddings.base import BaseEmbeddings
+except ImportError:
+    raise ImportError(
+        "`chonkie` is required for semantic chunking. "
+        'Please install it using `pip install "chonkie[semantic]"` to use SemanticChunking.'
+    )
 from agno.knowledge.chunking.strategy import ChunkingStrategy
 from agno.knowledge.document.base import Document
 from agno.knowledge.embedder.base import Embedder
-from agno.knowledge.embedder.openai import OpenAIEmbedder
+from agno.utils.log import log_debug
-class SemanticChunking(ChunkingStrategy):
-    """Chunking strategy that splits text into semantic chunks using chonkie"""
+def _get_chonkie_embedder_wrapper(embedder: Embedder):
+    """Create a wrapper that adapts Agno Embedder to chonkie's BaseEmbeddings interface."""
+    class _ChonkieEmbedderWrapper(BaseEmbeddings):
+        """Wrapper to make Agno Embedders compatible with chonkie."""
+        def __init__(self, agno_embedder: Embedder):
+            super().__init__()
+            self._embedder = agno_embedder
+        def embed(self, text: str):
+            embedding = self._embedder.get_embedding(text)  # type: ignore[attr-defined]
+            return np.array(embedding, dtype=np.float32)
+        def get_tokenizer(self):
+            """Return a simple token counter function."""
+            return lambda text: len(text.split())
-    def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
-        self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small")  # type: ignore
+        @property
+        def dimension(self) -> int:
+            return getattr(self._embedder, "dimensions")
+    return _ChonkieEmbedderWrapper(embedder)
+class SemanticChunking(ChunkingStrategy):
+    """Chunking strategy that splits text into semantic chunks using chonkie.
+    Args:
+        embedder: The embedder to use for generating embeddings. Can be:
+            - A string model identifier (e.g., "minishlab/potion-base-32M") for chonkie's built-in models
+            - A chonkie BaseEmbeddings instance (used directly)
+            - An Agno Embedder (wrapped for chonkie compatibility)
+        chunk_size: Maximum tokens allowed per chunk.
+        similarity_threshold: Threshold for semantic similarity (0-1).
+        similarity_window: Number of sentences to consider for similarity calculation.
+        min_sentences_per_chunk: Minimum number of sentences per chunk.
+        min_characters_per_sentence: Minimum number of characters per sentence.
+        delimiters: Delimiters to use for sentence splitting.
+        include_delimiters: Whether to include delimiter in prev/next sentence or None.
+        skip_window: Number of groups to skip when merging (0=disabled).
+        filter_window: Window length for the Savitzky-Golay filter.
+        filter_polyorder: Polynomial order for the Savitzky-Golay filter.
+        filter_tolerance: Tolerance for the Savitzky-Golay filter.
+        chunker_params: Additional parameters to pass to chonkie's SemanticChunker.
+    """
+    def __init__(
+        self,
+        embedder: Optional[Union[str, Embedder, BaseEmbeddings]] = None,
+        chunk_size: int = 5000,
+        similarity_threshold: float = 0.5,
+        similarity_window: int = 3,
+        min_sentences_per_chunk: int = 1,
+        min_characters_per_sentence: int = 24,
+        delimiters: Optional[List[str]] = None,
+        include_delimiters: Literal["prev", "next", None] = "prev",
+        skip_window: int = 0,
+        filter_window: int = 5,
+        filter_polyorder: int = 3,
+        filter_tolerance: float = 0.2,
+        chunker_params: Optional[Dict[str, Any]] = None,
+    ):
+        if embedder is None:
+            from agno.knowledge.embedder.openai import OpenAIEmbedder
+            embedder = OpenAIEmbedder()  # type: ignore
+            log_debug("Embedder not provided, using OpenAIEmbedder as default.")
+        self.embedder = embedder
         self.chunk_size = chunk_size
         self.similarity_threshold = similarity_threshold
-        self.chunker = None  # Will be initialized lazily when needed
+        self.similarity_window = similarity_window
+        self.min_sentences_per_chunk = min_sentences_per_chunk
+        self.min_characters_per_sentence = min_characters_per_sentence
+        self.delimiters = delimiters if delimiters is not None else [". ", "! ", "? ", "\n"]
+        self.include_delimiters = include_delimiters
+        self.skip_window = skip_window
+        self.filter_window = filter_window
+        self.filter_polyorder = filter_polyorder
+        self.filter_tolerance = filter_tolerance
+        self.chunker_params = chunker_params
+        self.chunker: Optional[SemanticChunker] = None
     def _initialize_chunker(self):
         """Lazily initialize the chunker with chonkie dependency."""
-        if self.chunker is None:
-            try:
-                from chonkie import SemanticChunker
-            except ImportError:
-                raise ImportError(
-                    "`chonkie` is required for semantic chunking. "
-                    "Please install it using `pip install chonkie` to use SemanticChunking."
-                )
-            # Build arguments dynamically based on chonkie's supported signature
-            params: Dict[str, Any] = {
-                "chunk_size": self.chunk_size,
-                "threshold": self.similarity_threshold,
-            }
-            try:
-                sig = inspect.signature(SemanticChunker)
-                param_names = set(sig.parameters.keys())
-                # Prefer passing a callable to avoid Chonkie initializing its own client
-                if "embedding_fn" in param_names:
-                    params["embedding_fn"] = self.embedder.get_embedding  # type: ignore[attr-defined]
-                    # If chonkie allows specifying dimensions, provide them
-                    if "embedding_dimensions" in param_names and getattr(self.embedder, "dimensions", None):
-                        params["embedding_dimensions"] = self.embedder.dimensions  # type: ignore[attr-defined]
-                elif "embedder" in param_names:
-                    # Some versions may accept an embedder object directly
-                    params["embedder"] = self.embedder
-                else:
-                    # Fallback to model id
-                    params["embedding_model"] = getattr(self.embedder, "id", None) or "text-embedding-3-small"
-                self.chunker = SemanticChunker(**params)
-            except Exception:
-                # As a final fallback, use the original behavior
-                self.chunker = SemanticChunker(
-                    embedding_model=getattr(self.embedder, "id", None) or "text-embedding-3-small",
-                    chunk_size=self.chunk_size,
-                    threshold=self.similarity_threshold,
-                )
+        if self.chunker is not None:
+            return
+        # Determine embedding model based on type:
+        # - str: pass directly to chonkie (uses chonkie's built-in models)
+        # - BaseEmbeddings: pass directly to chonkie
+        # - Agno Embedder: wrap for chonkie compatibility
+        embedding_model: Union[str, BaseEmbeddings]
+        if isinstance(self.embedder, str):
+            embedding_model = self.embedder
+        elif isinstance(self.embedder, BaseEmbeddings):
+            embedding_model = self.embedder
+        elif isinstance(self.embedder, Embedder):
+            embedding_model = _get_chonkie_embedder_wrapper(self.embedder)
+        else:
+            raise ValueError("Invalid embedder type. Must be a string, BaseEmbeddings, or Embedder instance.")
+        _chunker_params: Dict[str, Any] = {
+            "embedding_model": embedding_model,
+            "chunk_size": self.chunk_size,
+            "threshold": self.similarity_threshold,
+            "similarity_window": self.similarity_window,
+            "min_sentences_per_chunk": self.min_sentences_per_chunk,
+            "min_characters_per_sentence": self.min_characters_per_sentence,
+            "delim": self.delimiters,
+            "include_delim": self.include_delimiters,
+            "skip_window": self.skip_window,
+            "filter_window": self.filter_window,
+            "filter_polyorder": self.filter_polyorder,
+            "filter_tolerance": self.filter_tolerance,
+        }
+        if self.chunker_params:
+            _chunker_params.update(self.chunker_params)
+        self.chunker = SemanticChunker(**_chunker_params)
     def chunk(self, document: Document) -> List[Document]:
         """Split document into semantic chunks using chonkie"""

agno/knowledge/chunking/strategy.py CHANGED Viewed

@@ -12,6 +12,10 @@ class ChunkingStrategy(ABC):
     def chunk(self, document: Document) -> List[Document]:
         raise NotImplementedError
+    async def achunk(self, document: Document) -> List[Document]:
+        """Async version of chunk. Override for truly async implementations."""
+        return self.chunk(document)
     def clean_text(self, text: str) -> str:
         """Clean the text by replacing multiple newlines with a single newline"""
         import re
@@ -36,6 +40,7 @@ class ChunkingStrategyType(str, Enum):
     """Enumeration of available chunking strategies."""
     AGENTIC_CHUNKER = "AgenticChunker"
+    CODE_CHUNKER = "CodeChunker"
     DOCUMENT_CHUNKER = "DocumentChunker"
     RECURSIVE_CHUNKER = "RecursiveChunker"
     SEMANTIC_CHUNKER = "SemanticChunker"
@@ -70,6 +75,7 @@ class ChunkingStrategyFactory:
         """Create an instance of the chunking strategy with the given parameters."""
         strategy_map = {
             ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
+            ChunkingStrategyType.CODE_CHUNKER: cls._create_code_chunking,
             ChunkingStrategyType.DOCUMENT_CHUNKER: cls._create_document_chunking,
             ChunkingStrategyType.RECURSIVE_CHUNKER: cls._create_recursive_chunking,
             ChunkingStrategyType.SEMANTIC_CHUNKER: cls._create_semantic_chunking,
@@ -91,6 +97,18 @@ class ChunkingStrategyFactory:
         # Remove overlap since AgenticChunking doesn't support it
         return AgenticChunking(**kwargs)
+    @classmethod
+    def _create_code_chunking(
+        cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
+    ) -> ChunkingStrategy:
+        from agno.knowledge.chunking.code import CodeChunking
+        # CodeChunking accepts chunk_size but not overlap
+        if chunk_size is not None:
+            kwargs["chunk_size"] = chunk_size
+        # Remove overlap since CodeChunking doesn't support it
+        return CodeChunking(**kwargs)
     @classmethod
     def _create_document_chunking(
         cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs

agno/knowledge/embedder/azure_openai.py CHANGED Viewed

@@ -18,7 +18,6 @@ except ImportError:
 @dataclass
 class AzureOpenAIEmbedder(Embedder):
     id: str = "text-embedding-3-small"  # This has to match the model that you deployed at the provided URL
     dimensions: int = 1536
     encoding_format: Literal["float", "base64"] = "float"
     user: Optional[str] = None

agno/knowledge/embedder/google.py CHANGED Viewed

@@ -15,7 +15,7 @@ except ImportError:
 @dataclass
 class GeminiEmbedder(Embedder):
-    id: str = "gemini-embedding-exp-03-07"
+    id: str = "gemini-embedding-001"
     task_type: str = "RETRIEVAL_QUERY"
     title: Optional[str] = None
     dimensions: Optional[int] = 1536

agno/knowledge/embedder/mistral.py CHANGED Viewed

@@ -37,7 +37,7 @@ class MistralEmbedder(Embedder):
             "api_key": self.api_key,
             "endpoint": self.endpoint,
             "max_retries": self.max_retries,
-            "timeout": self.timeout,
+            "timeout_ms": self.timeout * 1000 if self.timeout else None,
         }
         _client_params = {k: v for k, v in _client_params.items() if v is not None}

agno/knowledge/embedder/nebius.py CHANGED Viewed

@@ -10,4 +10,4 @@ class NebiusEmbedder(OpenAIEmbedder):
     id: str = "BAAI/bge-en-icl"
     dimensions: int = 1024
     api_key: Optional[str] = getenv("NEBIUS_API_KEY")
-    base_url: str = "https://api.studio.nebius.com/v1/"
+    base_url: str = "https://api.tokenfactory.nebius.com/v1/"

agno/knowledge/embedder/openai.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
 from typing_extensions import Literal
 from agno.knowledge.embedder.base import Embedder
-from agno.utils.log import logger
+from agno.utils.log import log_info, log_warning
 try:
     from openai import AsyncOpenAI
@@ -71,7 +71,8 @@ class OpenAIEmbedder(Embedder):
         }
         if self.user is not None:
             _request_params["user"] = self.user
-        if self.id.startswith("text-embedding-3"):
+        # Pass dimensions for text-embedding-3 models or when using custom base_url (third-party APIs)
+        if self.id.startswith("text-embedding-3") or self.base_url is not None:
             _request_params["dimensions"] = self.dimensions
         if self.request_params:
             _request_params.update(self.request_params)
@@ -82,7 +83,7 @@ class OpenAIEmbedder(Embedder):
             response: CreateEmbeddingResponse = self.response(text=text)
             return response.data[0].embedding
         except Exception as e:
-            logger.warning(e)
+            log_warning(e)
             return []
     def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
@@ -95,7 +96,7 @@ class OpenAIEmbedder(Embedder):
                 return embedding, usage.model_dump()
             return embedding, None
         except Exception as e:
-            logger.warning(e)
+            log_warning(e)
             return [], None
     async def async_get_embedding(self, text: str) -> List[float]:
@@ -106,7 +107,8 @@ class OpenAIEmbedder(Embedder):
         }
         if self.user is not None:
             req["user"] = self.user
-        if self.id.startswith("text-embedding-3"):
+        # Pass dimensions for text-embedding-3 models or when using custom base_url (third-party APIs)
+        if self.id.startswith("text-embedding-3") or self.base_url is not None:
             req["dimensions"] = self.dimensions
         if self.request_params:
             req.update(self.request_params)
@@ -115,7 +117,7 @@ class OpenAIEmbedder(Embedder):
             response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
             return response.data[0].embedding
         except Exception as e:
-            logger.warning(e)
+            log_warning(e)
             return []
     async def async_get_embedding_and_usage(self, text: str):
@@ -126,7 +128,8 @@ class OpenAIEmbedder(Embedder):
         }
         if self.user is not None:
             req["user"] = self.user
-        if self.id.startswith("text-embedding-3"):
+        # Pass dimensions for text-embedding-3 models or when using custom base_url (third-party APIs)
+        if self.id.startswith("text-embedding-3") or self.base_url is not None:
             req["dimensions"] = self.dimensions
         if self.request_params:
             req.update(self.request_params)
@@ -137,7 +140,7 @@ class OpenAIEmbedder(Embedder):
             usage = response.usage
             return embedding, usage.model_dump() if usage else None
         except Exception as e:
-            logger.warning(e)
+            log_warning(f"Error getting embedding: {e}")
             return [], None
     async def async_get_embeddings_batch_and_usage(
@@ -154,7 +157,7 @@ class OpenAIEmbedder(Embedder):
         """
         all_embeddings = []
         all_usage = []
-        logger.info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
+        log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
         for i in range(0, len(texts), self.batch_size):
             batch_texts = texts[i : i + self.batch_size]
@@ -166,7 +169,8 @@ class OpenAIEmbedder(Embedder):
             }
             if self.user is not None:
                 req["user"] = self.user
-            if self.id.startswith("text-embedding-3"):
+            # Pass dimensions for text-embedding-3 models or when using custom base_url (third-party APIs)
+            if self.id.startswith("text-embedding-3") or self.base_url is not None:
                 req["dimensions"] = self.dimensions
             if self.request_params:
                 req.update(self.request_params)
@@ -180,7 +184,7 @@ class OpenAIEmbedder(Embedder):
                 usage_dict = response.usage.model_dump() if response.usage else None
                 all_usage.extend([usage_dict] * len(batch_embeddings))
             except Exception as e:
-                logger.warning(f"Error in async batch embedding: {e}")
+                log_warning(f"Error in async batch embedding: {e}")
                 # Fallback to individual calls for this batch
                 for text in batch_texts:
                     try:
@@ -188,7 +192,7 @@ class OpenAIEmbedder(Embedder):
                         all_embeddings.append(embedding)
                         all_usage.append(usage)
                     except Exception as e2:
-                        logger.warning(f"Error in individual async embedding fallback: {e2}")
+                        log_warning(f"Error in individual async embedding fallback: {e2}")
                         all_embeddings.append([])
                         all_usage.append(None)

agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl