agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from os import getenv
|
|
2
|
+
from textwrap import dedent
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from agno.agent.agent import Agent, RunOutput
|
|
8
|
+
from agno.media import Audio, File, Image, Video
|
|
9
|
+
from agno.team.team import Team, TeamRunOutput
|
|
10
|
+
from agno.utils.log import log_info, log_warning
|
|
11
|
+
from agno.utils.message import get_text_from_message
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import discord
|
|
15
|
+
|
|
16
|
+
except (ImportError, ModuleNotFoundError):
|
|
17
|
+
print("`discord.py` not installed. Please install using `pip install discord.py`")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RequiresConfirmationView(discord.ui.View):
|
|
21
|
+
def __init__(self):
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.value = None
|
|
24
|
+
|
|
25
|
+
@discord.ui.button(label="Confirm", style=discord.ButtonStyle.primary)
|
|
26
|
+
async def confirm(
|
|
27
|
+
self,
|
|
28
|
+
interaction: discord.Interaction,
|
|
29
|
+
button: discord.ui.Button,
|
|
30
|
+
):
|
|
31
|
+
self.value = True
|
|
32
|
+
button.disabled = True
|
|
33
|
+
await interaction.response.edit_message(view=self)
|
|
34
|
+
self.clear_items()
|
|
35
|
+
self.stop()
|
|
36
|
+
|
|
37
|
+
@discord.ui.button(label="Cancel", style=discord.ButtonStyle.secondary)
|
|
38
|
+
async def cancel(
|
|
39
|
+
self,
|
|
40
|
+
interaction: discord.Interaction,
|
|
41
|
+
button: discord.ui.Button,
|
|
42
|
+
):
|
|
43
|
+
self.value = False
|
|
44
|
+
button.disabled = True
|
|
45
|
+
await interaction.response.edit_message(view=self)
|
|
46
|
+
self.clear_items()
|
|
47
|
+
self.stop()
|
|
48
|
+
|
|
49
|
+
async def on_timeout(self):
|
|
50
|
+
log_warning("Agent Timeout Error")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DiscordClient:
|
|
54
|
+
def __init__(
|
|
55
|
+
self, agent: Optional[Agent] = None, team: Optional[Team] = None, client: Optional[discord.Client] = None
|
|
56
|
+
):
|
|
57
|
+
self.agent = agent
|
|
58
|
+
self.team = team
|
|
59
|
+
if client is None:
|
|
60
|
+
self.intents = discord.Intents.all()
|
|
61
|
+
self.client = discord.Client(intents=self.intents)
|
|
62
|
+
else:
|
|
63
|
+
self.client = client
|
|
64
|
+
self._setup_events()
|
|
65
|
+
|
|
66
|
+
def _setup_events(self):
|
|
67
|
+
@self.client.event
|
|
68
|
+
async def on_message(message):
|
|
69
|
+
if message.author == self.client.user:
|
|
70
|
+
log_info(f"sent {message.content}")
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
message_image = None
|
|
74
|
+
message_video = None
|
|
75
|
+
message_audio = None
|
|
76
|
+
message_file = None
|
|
77
|
+
media_url = None
|
|
78
|
+
message_text = message.content
|
|
79
|
+
message_url = message.jump_url
|
|
80
|
+
message_user = message.author.name
|
|
81
|
+
message_user_id = message.author.id
|
|
82
|
+
|
|
83
|
+
if message.attachments:
|
|
84
|
+
media = message.attachments[0]
|
|
85
|
+
media_type = media.content_type
|
|
86
|
+
media_url = media.url
|
|
87
|
+
if media_type.startswith("image/"):
|
|
88
|
+
message_image = media_url
|
|
89
|
+
elif media_type.startswith("video/"):
|
|
90
|
+
req = requests.get(media_url)
|
|
91
|
+
video = req.content
|
|
92
|
+
message_video = video
|
|
93
|
+
elif media_type.startswith("application/"):
|
|
94
|
+
req = requests.get(media_url)
|
|
95
|
+
document = req.content
|
|
96
|
+
message_file = document
|
|
97
|
+
elif media_type.startswith("audio/"):
|
|
98
|
+
message_audio = media_url
|
|
99
|
+
|
|
100
|
+
log_info(f"processing message:{message_text} \n with media: {media_url} \n url:{message_url}")
|
|
101
|
+
if isinstance(message.channel, discord.Thread):
|
|
102
|
+
thread = message.channel
|
|
103
|
+
elif isinstance(message.channel, discord.channel.DMChannel):
|
|
104
|
+
thread = message.channel # type: ignore
|
|
105
|
+
elif isinstance(message.channel, discord.TextChannel):
|
|
106
|
+
thread = await message.create_thread(name=f"{message_user}'s thread")
|
|
107
|
+
else:
|
|
108
|
+
log_info(f"received {message.content} but not in a supported channel")
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
async with thread.typing():
|
|
112
|
+
# TODO Unhappy with the duplication here but it keeps MyPy from complaining
|
|
113
|
+
additional_context = dedent(f"""
|
|
114
|
+
Discord username: {message_user}
|
|
115
|
+
Discord userid: {message_user_id}
|
|
116
|
+
Discord url: {message_url}
|
|
117
|
+
""")
|
|
118
|
+
if self.agent:
|
|
119
|
+
self.agent.additional_context = additional_context
|
|
120
|
+
agent_response: RunOutput = await self.agent.arun(
|
|
121
|
+
input=message_text,
|
|
122
|
+
user_id=message_user_id,
|
|
123
|
+
session_id=str(thread.id),
|
|
124
|
+
images=[Image(url=message_image)] if message_image else None,
|
|
125
|
+
videos=[Video(content=message_video)] if message_video else None,
|
|
126
|
+
audio=[Audio(url=message_audio)] if message_audio else None,
|
|
127
|
+
files=[File(content=message_file)] if message_file else None,
|
|
128
|
+
)
|
|
129
|
+
await self._handle_response_in_thread(agent_response, thread)
|
|
130
|
+
elif self.team:
|
|
131
|
+
self.team.additional_context = additional_context
|
|
132
|
+
team_response: TeamRunOutput = await self.team.arun(
|
|
133
|
+
input=message_text,
|
|
134
|
+
user_id=message_user_id,
|
|
135
|
+
session_id=str(thread.id),
|
|
136
|
+
images=[Image(url=message_image)] if message_image else None,
|
|
137
|
+
videos=[Video(content=message_video)] if message_video else None,
|
|
138
|
+
audio=[Audio(url=message_audio)] if message_audio else None,
|
|
139
|
+
files=[File(content=message_file)] if message_file else None,
|
|
140
|
+
)
|
|
141
|
+
await self._handle_response_in_thread(team_response, thread)
|
|
142
|
+
|
|
143
|
+
async def handle_hitl(
|
|
144
|
+
self, run_response: RunOutput, thread: Union[discord.Thread, discord.TextChannel]
|
|
145
|
+
) -> RunOutput:
|
|
146
|
+
"""Handles optional Human-In-The-Loop interaction."""
|
|
147
|
+
if run_response.is_paused:
|
|
148
|
+
for tool in run_response.tools_requiring_confirmation:
|
|
149
|
+
view = RequiresConfirmationView()
|
|
150
|
+
await thread.send(f"Tool requiring confirmation: {tool.tool_name}", view=view)
|
|
151
|
+
await view.wait()
|
|
152
|
+
tool.confirmed = view.value if view.value is not None else False
|
|
153
|
+
|
|
154
|
+
if self.agent:
|
|
155
|
+
run_response = await self.agent.acontinue_run(
|
|
156
|
+
run_response=run_response,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return run_response
|
|
160
|
+
|
|
161
|
+
async def _handle_response_in_thread(
|
|
162
|
+
self, response: Union[RunOutput, TeamRunOutput], thread: Union[discord.TextChannel, discord.Thread]
|
|
163
|
+
):
|
|
164
|
+
if isinstance(response, RunOutput):
|
|
165
|
+
response = await self.handle_hitl(response, thread)
|
|
166
|
+
|
|
167
|
+
if response.reasoning_content:
|
|
168
|
+
await self._send_discord_messages(
|
|
169
|
+
thread=thread, message=f"Reasoning: \n{response.reasoning_content}", italics=True
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Handle structured outputs properly
|
|
173
|
+
content_message = get_text_from_message(response.content) if response.content is not None else ""
|
|
174
|
+
|
|
175
|
+
await self._send_discord_messages(thread=thread, message=content_message)
|
|
176
|
+
|
|
177
|
+
async def _send_discord_messages(self, thread: discord.channel, message: str, italics: bool = False): # type: ignore
|
|
178
|
+
if len(message) < 1500:
|
|
179
|
+
if italics:
|
|
180
|
+
formatted_message = "\n".join([f"_{line}_" for line in message.split("\n")])
|
|
181
|
+
await thread.send(formatted_message) # type: ignore
|
|
182
|
+
else:
|
|
183
|
+
await thread.send(message) # type: ignore
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
message_batches = [message[i : i + 1500] for i in range(0, len(message), 1500)]
|
|
187
|
+
|
|
188
|
+
for i, batch in enumerate(message_batches, 1):
|
|
189
|
+
batch_message = f"[{i}/{len(message_batches)}] {batch}"
|
|
190
|
+
if italics:
|
|
191
|
+
formatted_batch = "\n".join([f"_{line}_" for line in batch_message.split("\n")])
|
|
192
|
+
await thread.send(formatted_batch) # type: ignore
|
|
193
|
+
else:
|
|
194
|
+
await thread.send(batch_message) # type: ignore
|
|
195
|
+
|
|
196
|
+
def serve(self):
|
|
197
|
+
try:
|
|
198
|
+
token = getenv("DISCORD_BOT_TOKEN")
|
|
199
|
+
if not token:
|
|
200
|
+
raise ValueError("DISCORD_BOT_TOKEN NOT SET")
|
|
201
|
+
return self.client.run(token)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
raise ValueError(f"Failed to run Discord client: {str(e)}")
|
|
File without changes
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
4
|
+
from agno.knowledge.document.base import Document
|
|
5
|
+
from agno.models.base import Model
|
|
6
|
+
from agno.models.defaults import DEFAULT_OPENAI_MODEL_ID
|
|
7
|
+
from agno.models.message import Message
|
|
8
|
+
from agno.models.utils import get_model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AgenticChunking(ChunkingStrategy):
|
|
12
|
+
"""Chunking strategy that uses an LLM to determine natural breakpoints in the text"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, model: Optional[Union[Model, str]] = None, max_chunk_size: int = 5000):
|
|
15
|
+
# Convert model string to Model instance
|
|
16
|
+
model = get_model(model)
|
|
17
|
+
if model is None:
|
|
18
|
+
try:
|
|
19
|
+
from agno.models.openai import OpenAIChat
|
|
20
|
+
except Exception:
|
|
21
|
+
raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
|
|
22
|
+
model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
|
|
23
|
+
self.chunk_size = max_chunk_size
|
|
24
|
+
self.model = model
|
|
25
|
+
|
|
26
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
27
|
+
"""Split text into chunks using LLM to determine natural breakpoints based on context"""
|
|
28
|
+
if len(document.content) <= self.chunk_size:
|
|
29
|
+
return [document]
|
|
30
|
+
|
|
31
|
+
chunks: List[Document] = []
|
|
32
|
+
remaining_text = self.clean_text(document.content)
|
|
33
|
+
chunk_meta_data = document.meta_data
|
|
34
|
+
chunk_number = 1
|
|
35
|
+
|
|
36
|
+
while remaining_text:
|
|
37
|
+
# Ask model to find a good breakpoint within chunk_size
|
|
38
|
+
prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
|
|
39
|
+
Consider semantic completeness, paragraph boundaries, and topic transitions.
|
|
40
|
+
Return only the character position number of where to break the text:
|
|
41
|
+
|
|
42
|
+
{remaining_text[: self.chunk_size]}"""
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
response = self.model.response([Message(role="user", content=prompt)])
|
|
46
|
+
if response and response.content:
|
|
47
|
+
break_point = min(int(response.content.strip()), self.chunk_size)
|
|
48
|
+
else:
|
|
49
|
+
break_point = self.chunk_size
|
|
50
|
+
except Exception:
|
|
51
|
+
# Fallback to max size if model fails
|
|
52
|
+
break_point = self.chunk_size
|
|
53
|
+
|
|
54
|
+
# Extract chunk and update remaining text
|
|
55
|
+
chunk = remaining_text[:break_point].strip()
|
|
56
|
+
meta_data = chunk_meta_data.copy()
|
|
57
|
+
meta_data["chunk"] = chunk_number
|
|
58
|
+
chunk_id = None
|
|
59
|
+
if document.id:
|
|
60
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
61
|
+
elif document.name:
|
|
62
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
63
|
+
meta_data["chunk_size"] = len(chunk)
|
|
64
|
+
chunks.append(
|
|
65
|
+
Document(
|
|
66
|
+
id=chunk_id,
|
|
67
|
+
name=document.name,
|
|
68
|
+
meta_data=meta_data,
|
|
69
|
+
content=chunk,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
chunk_number += 1
|
|
73
|
+
|
|
74
|
+
remaining_text = remaining_text[break_point:].strip()
|
|
75
|
+
|
|
76
|
+
if not remaining_text:
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
return chunks
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
4
|
+
from agno.knowledge.document.base import Document
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DocumentChunking(ChunkingStrategy):
|
|
8
|
+
"""A chunking strategy that splits text based on document structure like paragraphs and sections"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0):
|
|
11
|
+
self.chunk_size = chunk_size
|
|
12
|
+
self.overlap = overlap
|
|
13
|
+
|
|
14
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
15
|
+
"""Split document into chunks based on document structure"""
|
|
16
|
+
if len(document.content) <= self.chunk_size:
|
|
17
|
+
return [document]
|
|
18
|
+
|
|
19
|
+
# Split on double newlines first (paragraphs)
|
|
20
|
+
paragraphs = self.clean_text(document.content).split("\n\n")
|
|
21
|
+
chunks: List[Document] = []
|
|
22
|
+
current_chunk = []
|
|
23
|
+
current_size = 0
|
|
24
|
+
chunk_meta_data = document.meta_data
|
|
25
|
+
chunk_number = 1
|
|
26
|
+
|
|
27
|
+
for para in paragraphs:
|
|
28
|
+
para = para.strip()
|
|
29
|
+
para_size = len(para)
|
|
30
|
+
|
|
31
|
+
if current_size + para_size <= self.chunk_size:
|
|
32
|
+
current_chunk.append(para)
|
|
33
|
+
current_size += para_size
|
|
34
|
+
else:
|
|
35
|
+
meta_data = chunk_meta_data.copy()
|
|
36
|
+
meta_data["chunk"] = chunk_number
|
|
37
|
+
chunk_id = None
|
|
38
|
+
if document.id:
|
|
39
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
40
|
+
elif document.name:
|
|
41
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
42
|
+
meta_data["chunk_size"] = len("\n\n".join(current_chunk))
|
|
43
|
+
if current_chunk:
|
|
44
|
+
chunks.append(
|
|
45
|
+
Document(
|
|
46
|
+
id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
current_chunk = [para]
|
|
50
|
+
current_size = para_size
|
|
51
|
+
|
|
52
|
+
if current_chunk:
|
|
53
|
+
meta_data = chunk_meta_data.copy()
|
|
54
|
+
meta_data["chunk"] = chunk_number
|
|
55
|
+
chunk_id = None
|
|
56
|
+
if document.id:
|
|
57
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
58
|
+
elif document.name:
|
|
59
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
60
|
+
meta_data["chunk_size"] = len("\n\n".join(current_chunk))
|
|
61
|
+
chunks.append(
|
|
62
|
+
Document(id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk))
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Handle overlap if specified
|
|
66
|
+
if self.overlap > 0:
|
|
67
|
+
overlapped_chunks = []
|
|
68
|
+
for i in range(len(chunks)):
|
|
69
|
+
if i > 0:
|
|
70
|
+
# Add overlap from previous chunk
|
|
71
|
+
prev_text = chunks[i - 1].content[-self.overlap :]
|
|
72
|
+
meta_data = chunk_meta_data.copy()
|
|
73
|
+
meta_data["chunk"] = chunk_number
|
|
74
|
+
chunk_id = None
|
|
75
|
+
if document.id:
|
|
76
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
77
|
+
meta_data["chunk_size"] = len(prev_text + chunks[i].content)
|
|
78
|
+
if prev_text:
|
|
79
|
+
overlapped_chunks.append(
|
|
80
|
+
Document(
|
|
81
|
+
id=chunk_id,
|
|
82
|
+
name=document.name,
|
|
83
|
+
meta_data=meta_data,
|
|
84
|
+
content=prev_text + chunks[i].content,
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
overlapped_chunks.append(chunks[i])
|
|
89
|
+
chunks = overlapped_chunks
|
|
90
|
+
|
|
91
|
+
return chunks
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
4
|
+
from agno.knowledge.document.base import Document
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FixedSizeChunking(ChunkingStrategy):
|
|
8
|
+
"""Chunking strategy that splits text into fixed-size chunks with optional overlap"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0):
|
|
11
|
+
# overlap must be less than chunk size
|
|
12
|
+
if overlap >= chunk_size:
|
|
13
|
+
raise ValueError(f"Invalid parameters: overlap ({overlap}) must be less than chunk size ({chunk_size}).")
|
|
14
|
+
|
|
15
|
+
self.chunk_size = chunk_size
|
|
16
|
+
self.overlap = overlap
|
|
17
|
+
|
|
18
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
19
|
+
"""Split document into fixed-size chunks with optional overlap"""
|
|
20
|
+
content = self.clean_text(document.content)
|
|
21
|
+
content_length = len(content)
|
|
22
|
+
chunked_documents: List[Document] = []
|
|
23
|
+
chunk_number = 1
|
|
24
|
+
chunk_meta_data = document.meta_data
|
|
25
|
+
start = 0
|
|
26
|
+
while start + self.overlap < content_length:
|
|
27
|
+
end = min(start + self.chunk_size, content_length)
|
|
28
|
+
|
|
29
|
+
# Ensure we're not splitting a word in half
|
|
30
|
+
if end < content_length:
|
|
31
|
+
while end > start and content[end] not in [" ", "\n", "\r", "\t"]:
|
|
32
|
+
end -= 1
|
|
33
|
+
|
|
34
|
+
# If the entire chunk is a word, then just split it at chunk_size
|
|
35
|
+
if end == start:
|
|
36
|
+
end = start + self.chunk_size
|
|
37
|
+
|
|
38
|
+
chunk = content[start:end]
|
|
39
|
+
meta_data = chunk_meta_data.copy()
|
|
40
|
+
meta_data["chunk"] = chunk_number
|
|
41
|
+
chunk_id = None
|
|
42
|
+
if document.id:
|
|
43
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
44
|
+
elif document.name:
|
|
45
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
46
|
+
meta_data["chunk_size"] = len(chunk)
|
|
47
|
+
chunked_documents.append(
|
|
48
|
+
Document(
|
|
49
|
+
id=chunk_id,
|
|
50
|
+
name=document.name,
|
|
51
|
+
meta_data=meta_data,
|
|
52
|
+
content=chunk,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
chunk_number += 1
|
|
56
|
+
start = end - self.overlap
|
|
57
|
+
return chunked_documents
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from unstructured.chunking.title import chunk_by_title # type: ignore
|
|
7
|
+
from unstructured.partition.md import partition_md # type: ignore
|
|
8
|
+
except ImportError:
|
|
9
|
+
raise ImportError("`unstructured` not installed. Please install it using `pip install unstructured markdown`")
|
|
10
|
+
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MarkdownChunking(ChunkingStrategy):
|
|
16
|
+
"""A chunking strategy that splits markdown based on structure like headers, paragraphs and sections"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0):
|
|
19
|
+
self.chunk_size = chunk_size
|
|
20
|
+
self.overlap = overlap
|
|
21
|
+
|
|
22
|
+
def _partition_markdown_content(self, content: str) -> List[str]:
|
|
23
|
+
"""
|
|
24
|
+
Partition markdown content and return a list of text chunks.
|
|
25
|
+
Falls back to paragraph splitting if the markdown chunking fails.
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
# Create a temporary file with the markdown content.
|
|
29
|
+
# This is the recommended usage of the unstructured library.
|
|
30
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False, encoding="utf-8") as temp_file:
|
|
31
|
+
temp_file.write(content)
|
|
32
|
+
temp_file_path = temp_file.name
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
elements = partition_md(filename=temp_file_path)
|
|
36
|
+
|
|
37
|
+
if not elements:
|
|
38
|
+
return self.clean_text(content).split("\n\n")
|
|
39
|
+
|
|
40
|
+
# Chunk by title with some default values
|
|
41
|
+
chunked_elements = chunk_by_title(
|
|
42
|
+
elements=elements,
|
|
43
|
+
max_characters=self.chunk_size,
|
|
44
|
+
new_after_n_chars=int(self.chunk_size * 0.8),
|
|
45
|
+
combine_text_under_n_chars=self.chunk_size,
|
|
46
|
+
overlap=0,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Generate the final text chunks
|
|
50
|
+
text_chunks = []
|
|
51
|
+
for chunk_group in chunked_elements:
|
|
52
|
+
if isinstance(chunk_group, list):
|
|
53
|
+
chunk_text = "\n\n".join([elem.text for elem in chunk_group if hasattr(elem, "text")])
|
|
54
|
+
else:
|
|
55
|
+
chunk_text = chunk_group.text if hasattr(chunk_group, "text") else str(chunk_group)
|
|
56
|
+
|
|
57
|
+
if chunk_text.strip():
|
|
58
|
+
text_chunks.append(chunk_text.strip())
|
|
59
|
+
|
|
60
|
+
return text_chunks if text_chunks else self.clean_text(content).split("\n\n")
|
|
61
|
+
|
|
62
|
+
# Always clean up the temporary file
|
|
63
|
+
finally:
|
|
64
|
+
os.unlink(temp_file_path)
|
|
65
|
+
|
|
66
|
+
# Fallback to simple paragraph splitting if the markdown chunking fails
|
|
67
|
+
except Exception:
|
|
68
|
+
return self.clean_text(content).split("\n\n")
|
|
69
|
+
|
|
70
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
71
|
+
"""Split markdown document into chunks based on markdown structure"""
|
|
72
|
+
if not document.content or len(document.content) <= self.chunk_size:
|
|
73
|
+
return [document]
|
|
74
|
+
|
|
75
|
+
# Split using markdown chunking logic, or fallback to paragraphs
|
|
76
|
+
sections = self._partition_markdown_content(document.content)
|
|
77
|
+
|
|
78
|
+
chunks: List[Document] = []
|
|
79
|
+
current_chunk = []
|
|
80
|
+
current_size = 0
|
|
81
|
+
chunk_meta_data = document.meta_data
|
|
82
|
+
chunk_number = 1
|
|
83
|
+
|
|
84
|
+
for section in sections:
|
|
85
|
+
section = section.strip()
|
|
86
|
+
section_size = len(section)
|
|
87
|
+
|
|
88
|
+
if current_size + section_size <= self.chunk_size:
|
|
89
|
+
current_chunk.append(section)
|
|
90
|
+
current_size += section_size
|
|
91
|
+
else:
|
|
92
|
+
meta_data = chunk_meta_data.copy()
|
|
93
|
+
meta_data["chunk"] = chunk_number
|
|
94
|
+
chunk_id = None
|
|
95
|
+
if document.id:
|
|
96
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
97
|
+
elif document.name:
|
|
98
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
99
|
+
meta_data["chunk_size"] = len("\n\n".join(current_chunk))
|
|
100
|
+
|
|
101
|
+
if current_chunk:
|
|
102
|
+
chunks.append(
|
|
103
|
+
Document(
|
|
104
|
+
id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk)
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
chunk_number += 1
|
|
108
|
+
|
|
109
|
+
current_chunk = [section]
|
|
110
|
+
current_size = section_size
|
|
111
|
+
|
|
112
|
+
if current_chunk:
|
|
113
|
+
meta_data = chunk_meta_data.copy()
|
|
114
|
+
meta_data["chunk"] = chunk_number
|
|
115
|
+
chunk_id = None
|
|
116
|
+
if document.id:
|
|
117
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
118
|
+
elif document.name:
|
|
119
|
+
chunk_id = f"{document.name}_{chunk_number}"
|
|
120
|
+
meta_data["chunk_size"] = len("\n\n".join(current_chunk))
|
|
121
|
+
chunks.append(
|
|
122
|
+
Document(id=chunk_id, name=document.name, meta_data=meta_data, content="\n\n".join(current_chunk))
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Handle overlap if specified
|
|
126
|
+
if self.overlap > 0:
|
|
127
|
+
overlapped_chunks = []
|
|
128
|
+
for i in range(len(chunks)):
|
|
129
|
+
if i > 0:
|
|
130
|
+
# Add overlap from previous chunk
|
|
131
|
+
prev_text = chunks[i - 1].content[-self.overlap :]
|
|
132
|
+
meta_data = chunk_meta_data.copy()
|
|
133
|
+
meta_data["chunk"] = chunks[i].meta_data["chunk"]
|
|
134
|
+
chunk_id = chunks[i].id
|
|
135
|
+
meta_data["chunk_size"] = len(prev_text + chunks[i].content)
|
|
136
|
+
|
|
137
|
+
if prev_text:
|
|
138
|
+
overlapped_chunks.append(
|
|
139
|
+
Document(
|
|
140
|
+
id=chunk_id,
|
|
141
|
+
name=document.name,
|
|
142
|
+
meta_data=meta_data,
|
|
143
|
+
content=prev_text + chunks[i].content,
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
overlapped_chunks.append(chunks[i])
|
|
148
|
+
else:
|
|
149
|
+
overlapped_chunks.append(chunks[i])
|
|
150
|
+
chunks = overlapped_chunks
|
|
151
|
+
return chunks
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
5
|
+
from agno.knowledge.document.base import Document
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RecursiveChunking(ChunkingStrategy):
|
|
9
|
+
"""Chunking strategy that recursively splits text into chunks by finding natural break points"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0):
|
|
12
|
+
# overlap must be less than chunk size
|
|
13
|
+
if overlap >= chunk_size:
|
|
14
|
+
raise ValueError(f"Invalid parameters: overlap ({overlap}) must be less than chunk size ({chunk_size}).")
|
|
15
|
+
|
|
16
|
+
if overlap > chunk_size * 0.15:
|
|
17
|
+
warnings.warn(
|
|
18
|
+
f"High overlap: {overlap} > 15% of chunk size ({chunk_size}). May cause slow processing.",
|
|
19
|
+
RuntimeWarning,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
self.chunk_size = chunk_size
|
|
23
|
+
self.overlap = overlap
|
|
24
|
+
|
|
25
|
+
def chunk(self, document: Document) -> List[Document]:
|
|
26
|
+
"""Recursively chunk text by finding natural break points"""
|
|
27
|
+
if len(document.content) <= self.chunk_size:
|
|
28
|
+
return [document]
|
|
29
|
+
|
|
30
|
+
chunks: List[Document] = []
|
|
31
|
+
start = 0
|
|
32
|
+
chunk_meta_data = document.meta_data
|
|
33
|
+
chunk_number = 1
|
|
34
|
+
content = self.clean_text(document.content)
|
|
35
|
+
|
|
36
|
+
while start < len(content):
|
|
37
|
+
end = min(start + self.chunk_size, len(content))
|
|
38
|
+
|
|
39
|
+
if end < len(content):
|
|
40
|
+
for sep in ["\n", "."]:
|
|
41
|
+
last_sep = content[start:end].rfind(sep)
|
|
42
|
+
if last_sep != -1:
|
|
43
|
+
end = start + last_sep + 1
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
chunk = content[start:end]
|
|
47
|
+
meta_data = chunk_meta_data.copy()
|
|
48
|
+
meta_data["chunk"] = chunk_number
|
|
49
|
+
chunk_id = None
|
|
50
|
+
if document.id:
|
|
51
|
+
chunk_id = f"{document.id}_{chunk_number}"
|
|
52
|
+
chunk_number += 1
|
|
53
|
+
meta_data["chunk_size"] = len(chunk)
|
|
54
|
+
chunks.append(Document(id=chunk_id, name=document.name, meta_data=meta_data, content=chunk))
|
|
55
|
+
|
|
56
|
+
new_start = end - self.overlap
|
|
57
|
+
if new_start <= start: # Prevent infinite loop
|
|
58
|
+
new_start = min(
|
|
59
|
+
len(content), start + max(1, self.chunk_size // 10)
|
|
60
|
+
) # Move forward by at least 10% of chunk size
|
|
61
|
+
start = new_start
|
|
62
|
+
|
|
63
|
+
return chunks
|