agno 2.3.23__py3-none-any.whl → 2.3.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6 -0
- agno/db/mongo/mongo.py +9 -1
- agno/knowledge/chunking/markdown.py +94 -8
- agno/knowledge/chunking/semantic.py +2 -2
- agno/knowledge/knowledge.py +215 -207
- agno/models/base.py +28 -8
- agno/os/routers/knowledge/knowledge.py +19 -3
- agno/os/utils.py +1 -1
- agno/team/team.py +5 -3
- agno/tools/crawl4ai.py +3 -0
- agno/tools/file.py +14 -13
- agno/tools/function.py +9 -1
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/python.py +14 -6
- agno/tools/toolkit.py +33 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/chromadb.py +1 -1
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +1 -1
- agno/vectordb/milvus/milvus.py +1 -1
- agno/vectordb/mongodb/mongodb.py +13 -3
- agno/vectordb/pgvector/pgvector.py +1 -1
- agno/vectordb/pineconedb/pineconedb.py +2 -2
- agno/vectordb/qdrant/qdrant.py +1 -1
- agno/vectordb/redis/redisdb.py +2 -2
- agno/vectordb/singlestore/singlestore.py +1 -1
- agno/vectordb/surrealdb/surrealdb.py +2 -2
- agno/vectordb/weaviate/weaviate.py +1 -1
- {agno-2.3.23.dist-info → agno-2.3.24.dist-info}/METADATA +1 -1
- {agno-2.3.23.dist-info → agno-2.3.24.dist-info}/RECORD +33 -33
- {agno-2.3.23.dist-info → agno-2.3.24.dist-info}/WHEEL +0 -0
- {agno-2.3.23.dist-info → agno-2.3.24.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.23.dist-info → agno-2.3.24.dist-info}/top_level.txt +0 -0
agno/agent/agent.py
CHANGED
|
@@ -3302,6 +3302,9 @@ class Agent:
|
|
|
3302
3302
|
tools=tools,
|
|
3303
3303
|
tool_choice=self.tool_choice,
|
|
3304
3304
|
tool_call_limit=self.tool_call_limit,
|
|
3305
|
+
run_response=run_response,
|
|
3306
|
+
send_media_to_model=self.send_media_to_model,
|
|
3307
|
+
compression_manager=self.compression_manager if self.compress_tool_results else None,
|
|
3305
3308
|
)
|
|
3306
3309
|
|
|
3307
3310
|
# Check for cancellation after model processing
|
|
@@ -4015,6 +4018,9 @@ class Agent:
|
|
|
4015
4018
|
tools=_tools,
|
|
4016
4019
|
tool_choice=self.tool_choice,
|
|
4017
4020
|
tool_call_limit=self.tool_call_limit,
|
|
4021
|
+
run_response=run_response,
|
|
4022
|
+
send_media_to_model=self.send_media_to_model,
|
|
4023
|
+
compression_manager=self.compression_manager if self.compress_tool_results else None,
|
|
4018
4024
|
)
|
|
4019
4025
|
# Check for cancellation after model call
|
|
4020
4026
|
await araise_if_cancelled(run_response.run_id) # type: ignore
|
agno/db/mongo/mongo.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from datetime import date, datetime, timedelta, timezone
|
|
3
|
+
from importlib import metadata
|
|
3
4
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
@@ -31,10 +32,13 @@ try:
|
|
|
31
32
|
from pymongo import MongoClient, ReturnDocument
|
|
32
33
|
from pymongo.collection import Collection
|
|
33
34
|
from pymongo.database import Database
|
|
35
|
+
from pymongo.driver_info import DriverInfo
|
|
34
36
|
from pymongo.errors import OperationFailure
|
|
35
37
|
except ImportError:
|
|
36
38
|
raise ImportError("`pymongo` not installed. Please install it using `pip install pymongo`")
|
|
37
39
|
|
|
40
|
+
DRIVER_METADATA = DriverInfo(name="Agno", version=metadata.version("agno"))
|
|
41
|
+
|
|
38
42
|
|
|
39
43
|
class MongoDb(BaseDb):
|
|
40
44
|
def __init__(
|
|
@@ -92,10 +96,14 @@ class MongoDb(BaseDb):
|
|
|
92
96
|
|
|
93
97
|
_client: Optional[MongoClient] = db_client
|
|
94
98
|
if _client is None and db_url is not None:
|
|
95
|
-
_client = MongoClient(db_url)
|
|
99
|
+
_client = MongoClient(db_url, driver=DRIVER_METADATA)
|
|
96
100
|
if _client is None:
|
|
97
101
|
raise ValueError("One of db_url or db_client must be provided")
|
|
98
102
|
|
|
103
|
+
# append_metadata was added in PyMongo 4.14.0, but is a valid database name on earlier versions
|
|
104
|
+
if callable(_client.append_metadata):
|
|
105
|
+
_client.append_metadata(DRIVER_METADATA)
|
|
106
|
+
|
|
99
107
|
self.db_url: Optional[str] = db_url
|
|
100
108
|
self.db_client: MongoClient = _client
|
|
101
109
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import re
|
|
2
3
|
import tempfile
|
|
3
|
-
from typing import List
|
|
4
|
+
from typing import List, Union
|
|
4
5
|
|
|
5
6
|
try:
|
|
6
7
|
from unstructured.chunking.title import chunk_by_title # type: ignore
|
|
@@ -13,17 +14,83 @@ from agno.knowledge.document.base import Document
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class MarkdownChunking(ChunkingStrategy):
|
|
16
|
-
"""A chunking strategy that splits markdown based on structure like headers, paragraphs and sections
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
"""A chunking strategy that splits markdown based on structure like headers, paragraphs and sections
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
chunk_size: Maximum size of each chunk in characters
|
|
21
|
+
overlap: Number of characters to overlap between chunks
|
|
22
|
+
split_on_headings: Controls heading-based splitting behavior:
|
|
23
|
+
- False: Use size-based chunking (default)
|
|
24
|
+
- True: Split on all headings (H1-H6)
|
|
25
|
+
- int: Split on headings at or above this level (1-6)
|
|
26
|
+
e.g., 2 splits on H1 and H2, keeping H3-H6 content together
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0, split_on_headings: Union[bool, int] = False):
|
|
19
30
|
self.chunk_size = chunk_size
|
|
20
31
|
self.overlap = overlap
|
|
32
|
+
self.split_on_headings = split_on_headings
|
|
33
|
+
|
|
34
|
+
# Validate split_on_headings parameter
|
|
35
|
+
# Note: In Python, isinstance(False, int) is True, so we exclude booleans explicitly
|
|
36
|
+
if isinstance(split_on_headings, int) and not isinstance(split_on_headings, bool):
|
|
37
|
+
if not (1 <= split_on_headings <= 6):
|
|
38
|
+
raise ValueError("split_on_headings must be between 1 and 6 when using integer value")
|
|
39
|
+
|
|
40
|
+
def _split_by_headings(self, content: str) -> List[str]:
|
|
41
|
+
"""
|
|
42
|
+
Split markdown content by headings, keeping each heading with its content.
|
|
43
|
+
Returns a list of sections where each section starts with a heading.
|
|
44
|
+
|
|
45
|
+
When split_on_headings is an int, only splits on headings at or above that level.
|
|
46
|
+
For example, split_on_headings=2 splits on H1 and H2, keeping H3-H6 content together.
|
|
47
|
+
"""
|
|
48
|
+
# Determine which heading levels to split on
|
|
49
|
+
if isinstance(self.split_on_headings, int) and not isinstance(self.split_on_headings, bool):
|
|
50
|
+
# Split on headings at or above this level (1 to split_on_headings)
|
|
51
|
+
max_heading_level = self.split_on_headings
|
|
52
|
+
heading_pattern = rf"^#{{{1},{max_heading_level}}}\s+.+$"
|
|
53
|
+
else:
|
|
54
|
+
# split_on_headings is True: split on all headings (# to ######)
|
|
55
|
+
heading_pattern = r"^#{1,6}\s+.+$"
|
|
56
|
+
|
|
57
|
+
# Split content while keeping the delimiter (heading)
|
|
58
|
+
# Use non-capturing group for the pattern to avoid extra capture groups
|
|
59
|
+
parts = re.split(f"({heading_pattern})", content, flags=re.MULTILINE)
|
|
60
|
+
|
|
61
|
+
sections = []
|
|
62
|
+
current_section = ""
|
|
63
|
+
|
|
64
|
+
for part in parts:
|
|
65
|
+
if not part or not part.strip():
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# Check if this part is a heading
|
|
69
|
+
if re.match(heading_pattern, part.strip(), re.MULTILINE):
|
|
70
|
+
# Save previous section if exists
|
|
71
|
+
if current_section.strip():
|
|
72
|
+
sections.append(current_section.strip())
|
|
73
|
+
# Start new section with this heading
|
|
74
|
+
current_section = part
|
|
75
|
+
else:
|
|
76
|
+
# Add content to current section
|
|
77
|
+
current_section += "\n\n" + part if current_section else part
|
|
78
|
+
|
|
79
|
+
# Don't forget the last section
|
|
80
|
+
if current_section.strip():
|
|
81
|
+
sections.append(current_section.strip())
|
|
82
|
+
|
|
83
|
+
return sections if sections else [content]
|
|
21
84
|
|
|
22
85
|
def _partition_markdown_content(self, content: str) -> List[str]:
|
|
23
86
|
"""
|
|
24
87
|
Partition markdown content and return a list of text chunks.
|
|
25
88
|
Falls back to paragraph splitting if the markdown chunking fails.
|
|
26
89
|
"""
|
|
90
|
+
# When split_on_headings is True or an int, use regex-based splitting to preserve headings
|
|
91
|
+
if self.split_on_headings:
|
|
92
|
+
return self._split_by_headings(content)
|
|
93
|
+
|
|
27
94
|
try:
|
|
28
95
|
# Create a temporary file with the markdown content.
|
|
29
96
|
# This is the recommended usage of the unstructured library.
|
|
@@ -38,7 +105,6 @@ class MarkdownChunking(ChunkingStrategy):
|
|
|
38
105
|
raw_paragraphs = content.split("\n\n")
|
|
39
106
|
return [self.clean_text(para) for para in raw_paragraphs]
|
|
40
107
|
|
|
41
|
-
# Chunk by title with some default values
|
|
42
108
|
chunked_elements = chunk_by_title(
|
|
43
109
|
elements=elements,
|
|
44
110
|
max_characters=self.chunk_size,
|
|
@@ -74,7 +140,13 @@ class MarkdownChunking(ChunkingStrategy):
|
|
|
74
140
|
|
|
75
141
|
def chunk(self, document: Document) -> List[Document]:
|
|
76
142
|
"""Split markdown document into chunks based on markdown structure"""
|
|
77
|
-
|
|
143
|
+
# If content is empty, return as-is
|
|
144
|
+
if not document.content:
|
|
145
|
+
return [document]
|
|
146
|
+
|
|
147
|
+
# When split_on_headings is enabled, always split by headings regardless of size
|
|
148
|
+
# Only skip chunking for small content when using size-based chunking
|
|
149
|
+
if not self.split_on_headings and len(document.content) <= self.chunk_size:
|
|
78
150
|
return [document]
|
|
79
151
|
|
|
80
152
|
# Split using markdown chunking logic, or fallback to paragraphs
|
|
@@ -90,7 +162,20 @@ class MarkdownChunking(ChunkingStrategy):
|
|
|
90
162
|
section = section.strip()
|
|
91
163
|
section_size = len(section)
|
|
92
164
|
|
|
93
|
-
|
|
165
|
+
# When split_on_headings is True or an int, each section becomes its own chunk
|
|
166
|
+
if self.split_on_headings:
|
|
167
|
+
meta_data = chunk_meta_data.copy()
|
|
168
|
+
meta_data["chunk"] = chunk_number
|
|
169
|
+
chunk_id = None
|
|
170
|
+
if document.id:
|
|
171
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
172
|
+
elif document.name:
|
|
173
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
174
|
+
meta_data["chunk_size"] = section_size
|
|
175
|
+
|
|
176
|
+
chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=section))
|
|
177
|
+
chunk_number += 1
|
|
178
|
+
elif current_size + section_size <= self.chunk_size:
|
|
94
179
|
current_chunk.append(section)
|
|
95
180
|
current_size += section_size
|
|
96
181
|
else:
|
|
@@ -114,7 +199,8 @@ class MarkdownChunking(ChunkingStrategy):
|
|
|
114
199
|
current_chunk = [section]
|
|
115
200
|
current_size = section_size
|
|
116
201
|
|
|
117
|
-
|
|
202
|
+
# Handle remaining content (only when not split_on_headings)
|
|
203
|
+
if current_chunk and not self.split_on_headings:
|
|
118
204
|
meta_data = chunk_meta_data.copy()
|
|
119
205
|
meta_data["chunk"] = chunk_number
|
|
120
206
|
chunk_id = None
|
|
@@ -17,7 +17,7 @@ except ImportError:
|
|
|
17
17
|
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
18
18
|
from agno.knowledge.document.base import Document
|
|
19
19
|
from agno.knowledge.embedder.base import Embedder
|
|
20
|
-
from agno.utils.log import
|
|
20
|
+
from agno.utils.log import log_debug
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def _get_chonkie_embedder_wrapper(embedder: Embedder):
|
|
@@ -87,7 +87,7 @@ class SemanticChunking(ChunkingStrategy):
|
|
|
87
87
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
88
88
|
|
|
89
89
|
embedder = OpenAIEmbedder() # type: ignore
|
|
90
|
-
|
|
90
|
+
log_debug("Embedder not provided, using OpenAIEmbedder as default.")
|
|
91
91
|
self.embedder = embedder
|
|
92
92
|
self.chunk_size = chunk_size
|
|
93
93
|
self.similarity_threshold = similarity_threshold
|