agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
agno/eval/base.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from agno.run.agent import RunInput, RunOutput
|
|
5
|
+
from agno.run.team import TeamRunInput, TeamRunOutput
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseEval(ABC):
|
|
9
|
+
"""Abstract base class for all evaluations."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
|
|
13
|
+
"""Perform sync pre-evals."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
async def async_pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
|
|
18
|
+
"""Perform async pre-evals."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
|
|
23
|
+
"""Perform sync post-evals."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
async def async_post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
|
|
28
|
+
"""Perform async post-evals."""
|
|
29
|
+
pass
|
agno/eval/performance.py
CHANGED
|
@@ -498,13 +498,16 @@ class PerformanceEval:
|
|
|
498
498
|
from rich.live import Live
|
|
499
499
|
from rich.status import Status
|
|
500
500
|
|
|
501
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
502
|
+
run_id = str(uuid4())
|
|
503
|
+
|
|
501
504
|
run_times = []
|
|
502
505
|
memory_usages = []
|
|
503
506
|
previous_snapshot = None
|
|
504
507
|
|
|
505
508
|
self._set_log_level()
|
|
506
509
|
|
|
507
|
-
log_debug(f"************ Evaluation Start: {
|
|
510
|
+
log_debug(f"************ Evaluation Start: {run_id} ************")
|
|
508
511
|
|
|
509
512
|
# Add a spinner while running the evaluations
|
|
510
513
|
console = Console()
|
|
@@ -615,7 +618,7 @@ class PerformanceEval:
|
|
|
615
618
|
),
|
|
616
619
|
)
|
|
617
620
|
|
|
618
|
-
log_debug(f"*********** Evaluation End: {
|
|
621
|
+
log_debug(f"*********** Evaluation End: {run_id} ***********")
|
|
619
622
|
return self.result
|
|
620
623
|
|
|
621
624
|
async def arun(
|
|
@@ -641,13 +644,16 @@ class PerformanceEval:
|
|
|
641
644
|
from rich.live import Live
|
|
642
645
|
from rich.status import Status
|
|
643
646
|
|
|
647
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
648
|
+
run_id = str(uuid4())
|
|
649
|
+
|
|
644
650
|
run_times = []
|
|
645
651
|
memory_usages = []
|
|
646
652
|
previous_snapshot = None
|
|
647
653
|
|
|
648
654
|
self._set_log_level()
|
|
649
655
|
|
|
650
|
-
log_debug(f"************ Evaluation Start: {
|
|
656
|
+
log_debug(f"************ Evaluation Start: {run_id} ************")
|
|
651
657
|
|
|
652
658
|
# Add a spinner while running the evaluations
|
|
653
659
|
console = Console()
|
|
@@ -758,7 +764,7 @@ class PerformanceEval:
|
|
|
758
764
|
),
|
|
759
765
|
)
|
|
760
766
|
|
|
761
|
-
log_debug(f"*********** Evaluation End: {
|
|
767
|
+
log_debug(f"*********** Evaluation End: {run_id} ***********")
|
|
762
768
|
return self.result
|
|
763
769
|
|
|
764
770
|
def _get_telemetry_data(self) -> Dict[str, Any]:
|
agno/eval/reliability.py
CHANGED
|
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
|
4
4
|
from uuid import uuid4
|
|
5
5
|
|
|
6
6
|
from agno.db.base import AsyncBaseDb, BaseDb
|
|
7
|
+
from agno.run.team import TeamRunOutput
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from rich.console import Console
|
|
@@ -11,7 +12,6 @@ if TYPE_CHECKING:
|
|
|
11
12
|
from agno.agent import RunOutput
|
|
12
13
|
from agno.db.schemas.evals import EvalType
|
|
13
14
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
14
|
-
from agno.run.team import TeamRunOutput
|
|
15
15
|
from agno.utils.log import logger
|
|
16
16
|
|
|
17
17
|
|
|
@@ -86,6 +86,9 @@ class ReliabilityEval:
|
|
|
86
86
|
from rich.live import Live
|
|
87
87
|
from rich.status import Status
|
|
88
88
|
|
|
89
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
90
|
+
run_id = str(uuid4())
|
|
91
|
+
|
|
89
92
|
# Add a spinner while running the evaluations
|
|
90
93
|
console = Console()
|
|
91
94
|
with Live(console=console, transient=True) as live_log:
|
|
@@ -118,7 +121,7 @@ class ReliabilityEval:
|
|
|
118
121
|
if not tool_name:
|
|
119
122
|
continue
|
|
120
123
|
else:
|
|
121
|
-
if tool_name not in self.expected_tool_calls:
|
|
124
|
+
if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
|
|
122
125
|
failed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
123
126
|
else:
|
|
124
127
|
passed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
@@ -183,7 +186,7 @@ class ReliabilityEval:
|
|
|
183
186
|
),
|
|
184
187
|
)
|
|
185
188
|
|
|
186
|
-
logger.debug(f"*********** Evaluation End: {
|
|
189
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
187
190
|
return self.result
|
|
188
191
|
|
|
189
192
|
async def arun(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
|
|
@@ -199,6 +202,9 @@ class ReliabilityEval:
|
|
|
199
202
|
from rich.live import Live
|
|
200
203
|
from rich.status import Status
|
|
201
204
|
|
|
205
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
206
|
+
run_id = str(uuid4())
|
|
207
|
+
|
|
202
208
|
# Add a spinner while running the evaluations
|
|
203
209
|
console = Console()
|
|
204
210
|
with Live(console=console, transient=True) as live_log:
|
|
@@ -223,15 +229,18 @@ class ReliabilityEval:
|
|
|
223
229
|
|
|
224
230
|
failed_tool_calls = []
|
|
225
231
|
passed_tool_calls = []
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if
|
|
232
|
-
|
|
232
|
+
if not actual_tool_calls:
|
|
233
|
+
failed_tool_calls = self.expected_tool_calls or []
|
|
234
|
+
else:
|
|
235
|
+
for tool_call in actual_tool_calls: # type: ignore
|
|
236
|
+
tool_name = tool_call.get("function", {}).get("name")
|
|
237
|
+
if not tool_name:
|
|
238
|
+
continue
|
|
233
239
|
else:
|
|
234
|
-
|
|
240
|
+
if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
|
|
241
|
+
failed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
242
|
+
else:
|
|
243
|
+
passed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
235
244
|
|
|
236
245
|
self.result = ReliabilityResult(
|
|
237
246
|
eval_status="PASSED" if len(failed_tool_calls) == 0 else "FAILED",
|
|
@@ -244,7 +253,7 @@ class ReliabilityEval:
|
|
|
244
253
|
store_result_in_file(
|
|
245
254
|
file_path=self.file_path_to_save_results,
|
|
246
255
|
name=self.name,
|
|
247
|
-
eval_id=
|
|
256
|
+
eval_id=run_id,
|
|
248
257
|
result=self.result,
|
|
249
258
|
)
|
|
250
259
|
|
|
@@ -293,7 +302,7 @@ class ReliabilityEval:
|
|
|
293
302
|
),
|
|
294
303
|
)
|
|
295
304
|
|
|
296
|
-
logger.debug(f"*********** Evaluation End: {
|
|
305
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
297
306
|
return self.result
|
|
298
307
|
|
|
299
308
|
def _get_telemetry_data(self) -> Dict[str, Any]:
|
agno/eval/utils.py
CHANGED
|
@@ -8,6 +8,7 @@ from agno.utils.log import log_debug, logger
|
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from agno.eval.accuracy import AccuracyResult
|
|
11
|
+
from agno.eval.agent_as_judge import AgentAsJudgeResult
|
|
11
12
|
from agno.eval.performance import PerformanceResult
|
|
12
13
|
from agno.eval.reliability import ReliabilityResult
|
|
13
14
|
|
|
@@ -103,7 +104,7 @@ async def async_log_eval(
|
|
|
103
104
|
|
|
104
105
|
def store_result_in_file(
|
|
105
106
|
file_path: str,
|
|
106
|
-
result: Union["AccuracyResult", "PerformanceResult", "ReliabilityResult"],
|
|
107
|
+
result: Union["AccuracyResult", "AgentAsJudgeResult", "PerformanceResult", "ReliabilityResult"],
|
|
107
108
|
eval_id: Optional[str] = None,
|
|
108
109
|
name: Optional[str] = None,
|
|
109
110
|
):
|
agno/exceptions.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from enum import Enum
|
|
2
3
|
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
@@ -77,6 +78,17 @@ class AgnoError(Exception):
|
|
|
77
78
|
return str(self.message)
|
|
78
79
|
|
|
79
80
|
|
|
81
|
+
class ModelAuthenticationError(AgnoError):
|
|
82
|
+
"""Raised when model authentication fails."""
|
|
83
|
+
|
|
84
|
+
def __init__(self, message: str, status_code: int = 401, model_name: Optional[str] = None):
|
|
85
|
+
super().__init__(message, status_code)
|
|
86
|
+
self.model_name = model_name
|
|
87
|
+
|
|
88
|
+
self.type = "model_authentication_error"
|
|
89
|
+
self.error_id = "model_authentication_error"
|
|
90
|
+
|
|
91
|
+
|
|
80
92
|
class ModelProviderError(AgnoError):
|
|
81
93
|
"""Exception raised when a model provider returns an error."""
|
|
82
94
|
|
|
@@ -159,3 +171,33 @@ class OutputCheckError(Exception):
|
|
|
159
171
|
self.message = message
|
|
160
172
|
self.check_trigger = check_trigger
|
|
161
173
|
self.additional_data = additional_data
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass
|
|
177
|
+
class RetryableModelProviderError(Exception):
|
|
178
|
+
original_error: Optional[str] = None
|
|
179
|
+
# Guidance message to retry a model invocation after an error
|
|
180
|
+
retry_guidance_message: Optional[str] = None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class RemoteServerUnavailableError(AgnoError):
|
|
184
|
+
"""Exception raised when a remote server is unavailable.
|
|
185
|
+
|
|
186
|
+
This can happen due to:
|
|
187
|
+
- Connection refused (server not running)
|
|
188
|
+
- Connection timeout
|
|
189
|
+
- Network errors
|
|
190
|
+
- DNS resolution failures
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self,
|
|
195
|
+
message: str,
|
|
196
|
+
base_url: Optional[str] = None,
|
|
197
|
+
original_error: Optional[Exception] = None,
|
|
198
|
+
):
|
|
199
|
+
super().__init__(message, status_code=503)
|
|
200
|
+
self.base_url = base_url
|
|
201
|
+
self.original_error = original_error
|
|
202
|
+
self.type = "remote_server_unavailable_error"
|
|
203
|
+
self.error_id = "remote_server_unavailable_error"
|
agno/hooks/__init__.py
ADDED
agno/hooks/decorator.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import Any, Callable, TypeVar, Union, overload
|
|
3
|
+
|
|
4
|
+
# Type variable for better type hints
|
|
5
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
6
|
+
|
|
7
|
+
# Attribute name used to mark hooks for background execution
|
|
8
|
+
HOOK_RUN_IN_BACKGROUND_ATTR = "_agno_run_in_background"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_async_function(func: Callable) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Check if a function is async, even when wrapped by decorators like @staticmethod.
|
|
14
|
+
Traverses the full wrapper chain to find the original function.
|
|
15
|
+
"""
|
|
16
|
+
from inspect import iscoroutinefunction, unwrap
|
|
17
|
+
|
|
18
|
+
# First, try the standard inspect function on the wrapper
|
|
19
|
+
if iscoroutinefunction(func):
|
|
20
|
+
return True
|
|
21
|
+
|
|
22
|
+
# Use unwrap to traverse the full __wrapped__ chain to the original function
|
|
23
|
+
try:
|
|
24
|
+
original_func = unwrap(func)
|
|
25
|
+
if original_func is not func and iscoroutinefunction(original_func):
|
|
26
|
+
return True
|
|
27
|
+
except ValueError:
|
|
28
|
+
# unwrap raises ValueError if it hits a cycle
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
# Check if the function has CO_COROUTINE flag in its code object
|
|
32
|
+
try:
|
|
33
|
+
if hasattr(func, "__code__") and func.__code__.co_flags & 0x80: # CO_COROUTINE flag
|
|
34
|
+
return True
|
|
35
|
+
except (AttributeError, TypeError):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@overload
|
|
42
|
+
def hook() -> Callable[[F], F]: ...
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@overload
|
|
46
|
+
def hook(
|
|
47
|
+
*,
|
|
48
|
+
run_in_background: bool = False,
|
|
49
|
+
) -> Callable[[F], F]: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@overload
|
|
53
|
+
def hook(func: F) -> F: ...
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def hook(*args, **kwargs) -> Union[F, Callable[[F], F]]:
|
|
57
|
+
"""Decorator to configure hook behavior.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
run_in_background: If True, this hook will be scheduled as a FastAPI background task
|
|
61
|
+
when background_tasks is available, regardless of the agent/team's
|
|
62
|
+
run_hooks_in_background setting. This allows per-hook control over
|
|
63
|
+
background execution. This is only use-able when running with AgentOS.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Union[F, Callable[[F], F]]: Decorated function or decorator
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
@hook
|
|
70
|
+
def my_hook(run_output, agent):
|
|
71
|
+
# This runs normally (blocking)
|
|
72
|
+
process_output(run_output.content)
|
|
73
|
+
|
|
74
|
+
@hook()
|
|
75
|
+
def another_hook(run_output, agent):
|
|
76
|
+
# Same as above - runs normally
|
|
77
|
+
process_output(run_output.content)
|
|
78
|
+
|
|
79
|
+
@hook(run_in_background=True)
|
|
80
|
+
def my_background_hook(run_output, agent):
|
|
81
|
+
# This will run in the background when background_tasks is available
|
|
82
|
+
send_notification(run_output.content)
|
|
83
|
+
|
|
84
|
+
@hook(run_in_background=True)
|
|
85
|
+
async def my_async_background_hook(run_output, agent):
|
|
86
|
+
# Async hooks also supported
|
|
87
|
+
await send_async_notification(run_output.content)
|
|
88
|
+
|
|
89
|
+
agent = Agent(
|
|
90
|
+
model=OpenAIChat(id="gpt-4o"),
|
|
91
|
+
post_hooks=[my_hook, my_background_hook],
|
|
92
|
+
)
|
|
93
|
+
"""
|
|
94
|
+
# Valid kwargs for the hook decorator
|
|
95
|
+
VALID_KWARGS = frozenset({"run_in_background"})
|
|
96
|
+
|
|
97
|
+
# Validate kwargs
|
|
98
|
+
invalid_kwargs = set(kwargs.keys()) - VALID_KWARGS
|
|
99
|
+
if invalid_kwargs:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Invalid hook configuration arguments: {invalid_kwargs}. Valid arguments are: {sorted(VALID_KWARGS)}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def decorator(func: F) -> F:
|
|
105
|
+
run_in_background = kwargs.get("run_in_background", False)
|
|
106
|
+
|
|
107
|
+
# Preserve existing hook attributes from previously applied decorators
|
|
108
|
+
# Use OR logic: if any decorator sets run_in_background=True, it stays True
|
|
109
|
+
existing_run_in_background = should_run_in_background(func)
|
|
110
|
+
final_run_in_background = run_in_background or existing_run_in_background
|
|
111
|
+
|
|
112
|
+
@wraps(func)
|
|
113
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
114
|
+
return func(*args, **kwargs)
|
|
115
|
+
|
|
116
|
+
@wraps(func)
|
|
117
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
118
|
+
return await func(*args, **kwargs)
|
|
119
|
+
|
|
120
|
+
# Choose appropriate wrapper based on function type
|
|
121
|
+
if _is_async_function(func):
|
|
122
|
+
wrapper = async_wrapper
|
|
123
|
+
else:
|
|
124
|
+
wrapper = sync_wrapper
|
|
125
|
+
|
|
126
|
+
# Set the background execution attribute (combined from all decorators)
|
|
127
|
+
setattr(wrapper, HOOK_RUN_IN_BACKGROUND_ATTR, final_run_in_background)
|
|
128
|
+
|
|
129
|
+
return wrapper # type: ignore
|
|
130
|
+
|
|
131
|
+
# Handle both @hook and @hook() cases
|
|
132
|
+
if len(args) == 1 and callable(args[0]) and not kwargs:
|
|
133
|
+
return decorator(args[0])
|
|
134
|
+
|
|
135
|
+
return decorator
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def should_run_in_background(hook_func: Callable) -> bool:
|
|
139
|
+
"""
|
|
140
|
+
Check if a hook function is marked to run in background.
|
|
141
|
+
Traverses the wrapper chain to find the attribute when multiple decorators are stacked.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
hook_func: The hook function to check
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
True if the hook is decorated with @hook(run_in_background=True)
|
|
148
|
+
"""
|
|
149
|
+
# Check the function directly first
|
|
150
|
+
if hasattr(hook_func, HOOK_RUN_IN_BACKGROUND_ATTR):
|
|
151
|
+
return getattr(hook_func, HOOK_RUN_IN_BACKGROUND_ATTR)
|
|
152
|
+
|
|
153
|
+
# Traverse the wrapper chain to find the attribute
|
|
154
|
+
current = hook_func
|
|
155
|
+
seen: set[int] = set()
|
|
156
|
+
while hasattr(current, "__wrapped__"):
|
|
157
|
+
if id(current) in seen:
|
|
158
|
+
break
|
|
159
|
+
seen.add(id(current))
|
|
160
|
+
current = current.__wrapped__
|
|
161
|
+
if hasattr(current, HOOK_RUN_IN_BACKGROUND_ATTR):
|
|
162
|
+
return getattr(current, HOOK_RUN_IN_BACKGROUND_ATTR)
|
|
163
|
+
|
|
164
|
+
return False
|
|
@@ -7,14 +7,14 @@ import requests
|
|
|
7
7
|
from agno.agent.agent import Agent, RunOutput
|
|
8
8
|
from agno.media import Audio, File, Image, Video
|
|
9
9
|
from agno.team.team import Team, TeamRunOutput
|
|
10
|
-
from agno.utils.log import log_info, log_warning
|
|
10
|
+
from agno.utils.log import log_error, log_info, log_warning
|
|
11
11
|
from agno.utils.message import get_text_from_message
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
import discord
|
|
15
15
|
|
|
16
16
|
except (ImportError, ModuleNotFoundError):
|
|
17
|
-
|
|
17
|
+
raise ImportError("`discord.py` not installed. Please install using `pip install discord.py`")
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class RequiresConfirmationView(discord.ui.View):
|
|
@@ -126,6 +126,11 @@ class DiscordClient:
|
|
|
126
126
|
audio=[Audio(url=message_audio)] if message_audio else None,
|
|
127
127
|
files=[File(content=message_file)] if message_file else None,
|
|
128
128
|
)
|
|
129
|
+
if agent_response.status == "ERROR":
|
|
130
|
+
log_error(agent_response.content)
|
|
131
|
+
agent_response.content = (
|
|
132
|
+
"Sorry, there was an error processing your message. Please try again later."
|
|
133
|
+
)
|
|
129
134
|
await self._handle_response_in_thread(agent_response, thread)
|
|
130
135
|
elif self.team:
|
|
131
136
|
self.team.additional_context = additional_context
|
|
@@ -138,6 +143,12 @@ class DiscordClient:
|
|
|
138
143
|
audio=[Audio(url=message_audio)] if message_audio else None,
|
|
139
144
|
files=[File(content=message_file)] if message_file else None,
|
|
140
145
|
)
|
|
146
|
+
if team_response.status == "ERROR":
|
|
147
|
+
log_error(team_response.content)
|
|
148
|
+
team_response.content = (
|
|
149
|
+
"Sorry, there was an error processing your message. Please try again later."
|
|
150
|
+
)
|
|
151
|
+
|
|
141
152
|
await self._handle_response_in_thread(team_response, thread)
|
|
142
153
|
|
|
143
154
|
async def handle_hitl(
|
agno/knowledge/__init__.py
CHANGED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from chonkie import CodeChunker
|
|
5
|
+
from chonkie.tokenizer import TokenizerProtocol
|
|
6
|
+
except ImportError:
|
|
7
|
+
raise ImportError(
|
|
8
|
+
"`chonkie` is required for code chunking. "
|
|
9
|
+
'Please install it using `pip install "chonkie[code]"` to use CodeChunking.'
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
13
|
+
from agno.knowledge.document.base import Document
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CodeChunking(ChunkingStrategy):
|
|
17
|
+
"""Splits code into chunks based on its structure, leveraging Abstract Syntax Trees (ASTs) to create contextually relevant segments using Chonkie.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
tokenizer: The tokenizer to use. Can be a string name or a TokenizerProtocol instance.
|
|
21
|
+
chunk_size: The size of the chunks to create.
|
|
22
|
+
language: The language to parse. Use "auto" for detection or specify a tree-sitter-language-pack language.
|
|
23
|
+
include_nodes: Whether to include AST nodes (Note: Chonkie's base Chunk type does not store node information).
|
|
24
|
+
chunker_params: Additional parameters to pass to Chonkie's CodeChunker.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
tokenizer: Union[str, TokenizerProtocol] = "character",
|
|
30
|
+
chunk_size: int = 2048,
|
|
31
|
+
language: Union[Literal["auto"], Any] = "auto",
|
|
32
|
+
include_nodes: bool = False,
|
|
33
|
+
chunker_params: Optional[Dict[str, Any]] = None,
|
|
34
|
+
):
|
|
35
|
+
self.tokenizer = tokenizer
|
|
36
|
+
self.chunk_size = chunk_size
|
|
37
|
+
self.language = language
|
|
38
|
+
self.include_nodes = include_nodes
|
|
39
|
+
self.chunker_params = chunker_params
|
|
40
|
+
self.chunker: Optional[CodeChunker] = None
|
|
41
|
+
|
|
42
|
+
def _initialize_chunker(self):
|
|
43
|
+
"""Lazily initialize the chunker with Chonkie dependency."""
|
|
44
|
+
if self.chunker is not None:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
_chunker_params: Dict[str, Any] = {
|
|
48
|
+
"tokenizer": self.tokenizer,
|
|
49
|
+
"chunk_size": self.chunk_size,
|
|
50
|
+
"language": self.language,
|
|
51
|
+
"include_nodes": self.include_nodes,
|
|
52
|
+
}
|
|
53
|
+
if self.chunker_params:
|
|
54
|
+
_chunker_params.update(self.chunker_params)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
self.chunker = CodeChunker(**_chunker_params)
|
|
58
|
+
except ValueError as e:
|
|
59
|
+
if "Tokenizer not found" in str(e):
|
|
60
|
+
raise ImportError(
|
|
61
|
+
f"Missing dependencies for tokenizer `{self.tokenizer}`. "
|
|
62
|
+
f"Please install using `pip install tiktoken`, `pip install transformers`, or `pip install tokenizers`"
|
|
63
|
+
) from e
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
67
|
+
"""Split document into code chunks using Chonkie."""
|
|
68
|
+
if not document.content:
|
|
69
|
+
return [document]
|
|
70
|
+
|
|
71
|
+
# Ensure chunker is initialized (will raise ImportError if Chonkie is missing)
|
|
72
|
+
self._initialize_chunker()
|
|
73
|
+
|
|
74
|
+
# Use Chonkie to split into code chunks
|
|
75
|
+
if self.chunker is None:
|
|
76
|
+
raise RuntimeError("Chunker failed to initialize")
|
|
77
|
+
|
|
78
|
+
chunks = self.chunker.chunk(document.content)
|
|
79
|
+
|
|
80
|
+
# Convert chunks to Documents
|
|
81
|
+
chunked_documents: List[Document] = []
|
|
82
|
+
for i, chunk in enumerate(chunks, 1):
|
|
83
|
+
meta_data = document.meta_data.copy()
|
|
84
|
+
meta_data["chunk"] = i
|
|
85
|
+
chunk_id = f"{document.id}_{i}" if document.id else None
|
|
86
|
+
meta_data["chunk_size"] = len(chunk.text)
|
|
87
|
+
|
|
88
|
+
chunked_documents.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk.text))
|
|
89
|
+
|
|
90
|
+
return chunked_documents
|
|
@@ -16,19 +16,79 @@ class DocumentChunking(ChunkingStrategy):
|
|
|
16
16
|
if len(document.content) <= self.chunk_size:
|
|
17
17
|
return [document]
|
|
18
18
|
|
|
19
|
-
# Split on double newlines first (paragraphs)
|
|
20
|
-
|
|
19
|
+
# Split on double newlines first (paragraphs), then clean each paragraph
|
|
20
|
+
raw_paragraphs = document.content.split("\n\n")
|
|
21
|
+
paragraphs = [self.clean_text(para) for para in raw_paragraphs]
|
|
21
22
|
chunks: List[Document] = []
|
|
22
|
-
current_chunk = []
|
|
23
|
+
current_chunk: List[str] = []
|
|
23
24
|
current_size = 0
|
|
24
25
|
chunk_meta_data = document.meta_data
|
|
25
26
|
chunk_number = 1
|
|
26
27
|
|
|
27
28
|
for para in paragraphs:
|
|
28
29
|
para = para.strip()
|
|
30
|
+
if not para:
|
|
31
|
+
continue
|
|
32
|
+
|
|
29
33
|
para_size = len(para)
|
|
30
34
|
|
|
31
|
-
|
|
35
|
+
# If paragraph itself is larger than chunk_size, split it by sentences
|
|
36
|
+
if para_size > self.chunk_size:
|
|
37
|
+
# Save current chunk first
|
|
38
|
+
if current_chunk:
|
|
39
|
+
meta_data = chunk_meta_data.copy()
|
|
40
|
+
meta_data["chunk"] = chunk_number
|
|
41
|
+
chunk_id = None
|
|
42
|
+
if document.id:
|
|
43
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
44
|
+
elif document.name:
|
|
45
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
46
|
+
meta_data["chunk_size"] = len("\n\n".join(current_chunk))
|
|
47
|
+
chunks.append(
|
|
48
|
+
Document(
|
|
49
|
+
id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
|
|
50
|
+
)
|
|
51
|
+
)
|
|
52
|
+
chunk_number += 1
|
|
53
|
+
current_chunk = []
|
|
54
|
+
current_size = 0
|
|
55
|
+
|
|
56
|
+
# Split oversized paragraph by sentences
|
|
57
|
+
import re
|
|
58
|
+
|
|
59
|
+
sentences = re.split(r"(?<=[.!?])\s+", para)
|
|
60
|
+
for sentence in sentences:
|
|
61
|
+
sentence = sentence.strip()
|
|
62
|
+
if not sentence:
|
|
63
|
+
continue
|
|
64
|
+
sentence_size = len(sentence)
|
|
65
|
+
|
|
66
|
+
if current_size + sentence_size <= self.chunk_size:
|
|
67
|
+
current_chunk.append(sentence)
|
|
68
|
+
current_size += sentence_size
|
|
69
|
+
else:
|
|
70
|
+
if current_chunk:
|
|
71
|
+
meta_data = chunk_meta_data.copy()
|
|
72
|
+
meta_data["chunk"] = chunk_number
|
|
73
|
+
chunk_id = None
|
|
74
|
+
if document.id:
|
|
75
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
76
|
+
elif document.name:
|
|
77
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
78
|
+
meta_data["chunk_size"] = len(" ".join(current_chunk))
|
|
79
|
+
chunks.append(
|
|
80
|
+
Document(
|
|
81
|
+
id=chunk_id,
|
|
82
|
+
name=document.name,
|
|
83
|
+
meta_data=meta_data,
|
|
84
|
+
content=" ".join(current_chunk),
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
chunk_number += 1
|
|
88
|
+
current_chunk = [sentence]
|
|
89
|
+
current_size = sentence_size
|
|
90
|
+
|
|
91
|
+
elif current_size + para_size <= self.chunk_size:
|
|
32
92
|
current_chunk.append(para)
|
|
33
93
|
current_size += para_size
|
|
34
94
|
else:
|
|
@@ -46,6 +106,7 @@ class DocumentChunking(ChunkingStrategy):
|
|
|
46
106
|
id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
|
|
47
107
|
)
|
|
48
108
|
)
|
|
109
|
+
chunk_number += 1
|
|
49
110
|
current_chunk = [para]
|
|
50
111
|
current_size = para_size
|
|
51
112
|
|
agno/knowledge/chunking/fixed.py
CHANGED
|
@@ -53,5 +53,8 @@ class FixedSizeChunking(ChunkingStrategy):
|
|
|
53
53
|
)
|
|
54
54
|
)
|
|
55
55
|
chunk_number += 1
|
|
56
|
-
start
|
|
56
|
+
# Ensure start always advances by at least 1 to prevent infinite loops
|
|
57
|
+
# when overlap is large relative to chunk_size
|
|
58
|
+
new_start = max(start + 1, end - self.overlap)
|
|
59
|
+
start = new_start
|
|
57
60
|
return chunked_documents
|