agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,997 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import math
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, UploadFile
|
|
7
|
+
|
|
8
|
+
from agno.knowledge.content import Content, FileData
|
|
9
|
+
from agno.knowledge.knowledge import Knowledge
|
|
10
|
+
from agno.knowledge.reader import ReaderFactory
|
|
11
|
+
from agno.knowledge.reader.base import Reader
|
|
12
|
+
from agno.knowledge.utils import get_all_chunkers_info, get_all_readers_info, get_content_types_to_readers_mapping
|
|
13
|
+
from agno.os.auth import get_authentication_dependency
|
|
14
|
+
from agno.os.routers.knowledge.schemas import (
|
|
15
|
+
ChunkerSchema,
|
|
16
|
+
ConfigResponseSchema,
|
|
17
|
+
ContentResponseSchema,
|
|
18
|
+
ContentStatus,
|
|
19
|
+
ContentStatusResponse,
|
|
20
|
+
ContentUpdateSchema,
|
|
21
|
+
ReaderSchema,
|
|
22
|
+
VectorDbSchema,
|
|
23
|
+
VectorSearchRequestSchema,
|
|
24
|
+
VectorSearchResult,
|
|
25
|
+
)
|
|
26
|
+
from agno.os.schema import (
|
|
27
|
+
BadRequestResponse,
|
|
28
|
+
InternalServerErrorResponse,
|
|
29
|
+
NotFoundResponse,
|
|
30
|
+
PaginatedResponse,
|
|
31
|
+
PaginationInfo,
|
|
32
|
+
SortOrder,
|
|
33
|
+
UnauthenticatedResponse,
|
|
34
|
+
ValidationErrorResponse,
|
|
35
|
+
)
|
|
36
|
+
from agno.os.settings import AgnoAPISettings
|
|
37
|
+
from agno.os.utils import get_knowledge_instance_by_db_id
|
|
38
|
+
from agno.utils.log import log_debug, log_info
|
|
39
|
+
from agno.utils.string import generate_id
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_knowledge_router(
|
|
45
|
+
knowledge_instances: List[Knowledge], settings: AgnoAPISettings = AgnoAPISettings()
|
|
46
|
+
) -> APIRouter:
|
|
47
|
+
"""Create knowledge router with comprehensive OpenAPI documentation for content management endpoints."""
|
|
48
|
+
router = APIRouter(
|
|
49
|
+
dependencies=[Depends(get_authentication_dependency(settings))],
|
|
50
|
+
tags=["Knowledge"],
|
|
51
|
+
responses={
|
|
52
|
+
400: {"description": "Bad Request", "model": BadRequestResponse},
|
|
53
|
+
401: {"description": "Unauthorized", "model": UnauthenticatedResponse},
|
|
54
|
+
404: {"description": "Not Found", "model": NotFoundResponse},
|
|
55
|
+
422: {"description": "Validation Error", "model": ValidationErrorResponse},
|
|
56
|
+
500: {"description": "Internal Server Error", "model": InternalServerErrorResponse},
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
return attach_routes(router=router, knowledge_instances=knowledge_instances)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> APIRouter:
|
|
63
|
+
@router.post(
|
|
64
|
+
"/knowledge/content",
|
|
65
|
+
response_model=ContentResponseSchema,
|
|
66
|
+
status_code=202,
|
|
67
|
+
operation_id="upload_content",
|
|
68
|
+
summary="Upload Content",
|
|
69
|
+
description=(
|
|
70
|
+
"Upload content to the knowledge base. Supports file uploads, text content, or URLs. "
|
|
71
|
+
"Content is processed asynchronously in the background. Supports custom readers and chunking strategies."
|
|
72
|
+
),
|
|
73
|
+
responses={
|
|
74
|
+
202: {
|
|
75
|
+
"description": "Content upload accepted for processing",
|
|
76
|
+
"content": {
|
|
77
|
+
"application/json": {
|
|
78
|
+
"example": {
|
|
79
|
+
"id": "content-123",
|
|
80
|
+
"name": "example-document.pdf",
|
|
81
|
+
"description": "Sample document for processing",
|
|
82
|
+
"metadata": {"category": "documentation", "priority": "high"},
|
|
83
|
+
"status": "processing",
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
400: {
|
|
89
|
+
"description": "Invalid request - malformed metadata or missing content",
|
|
90
|
+
"model": BadRequestResponse,
|
|
91
|
+
},
|
|
92
|
+
422: {"description": "Validation error in form data", "model": ValidationErrorResponse},
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
async def upload_content(
|
|
96
|
+
background_tasks: BackgroundTasks,
|
|
97
|
+
name: Optional[str] = Form(None, description="Content name (auto-generated from file/URL if not provided)"),
|
|
98
|
+
description: Optional[str] = Form(None, description="Content description for context"),
|
|
99
|
+
url: Optional[str] = Form(None, description="URL to fetch content from (JSON array or single URL string)"),
|
|
100
|
+
metadata: Optional[str] = Form(None, description="JSON metadata object for additional content properties"),
|
|
101
|
+
file: Optional[UploadFile] = File(None, description="File to upload for processing"),
|
|
102
|
+
text_content: Optional[str] = Form(None, description="Raw text content to process"),
|
|
103
|
+
reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
|
|
104
|
+
chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
|
|
105
|
+
chunk_size: Optional[int] = Form(None, description="Chunk size to use for processing"),
|
|
106
|
+
chunk_overlap: Optional[int] = Form(None, description="Chunk overlap to use for processing"),
|
|
107
|
+
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
108
|
+
):
|
|
109
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
110
|
+
log_info(f"Adding content: {name}, {description}, {url}, {metadata}")
|
|
111
|
+
|
|
112
|
+
parsed_metadata = None
|
|
113
|
+
if metadata:
|
|
114
|
+
try:
|
|
115
|
+
parsed_metadata = json.loads(metadata)
|
|
116
|
+
except json.JSONDecodeError:
|
|
117
|
+
# If it's not valid JSON, treat as a simple key-value pair
|
|
118
|
+
parsed_metadata = {"value": metadata} if metadata != "string" else None
|
|
119
|
+
if file:
|
|
120
|
+
content_bytes = await file.read()
|
|
121
|
+
elif text_content:
|
|
122
|
+
content_bytes = text_content.encode("utf-8")
|
|
123
|
+
else:
|
|
124
|
+
content_bytes = None
|
|
125
|
+
|
|
126
|
+
parsed_urls = None
|
|
127
|
+
if url and url.strip():
|
|
128
|
+
try:
|
|
129
|
+
parsed_urls = json.loads(url)
|
|
130
|
+
log_debug(f"Parsed URLs: {parsed_urls}")
|
|
131
|
+
except json.JSONDecodeError:
|
|
132
|
+
# If it's not valid JSON, treat as a single URL string
|
|
133
|
+
parsed_urls = url
|
|
134
|
+
|
|
135
|
+
# # Parse metadata with proper error handling
|
|
136
|
+
parsed_metadata = None
|
|
137
|
+
if metadata:
|
|
138
|
+
try:
|
|
139
|
+
parsed_metadata = json.loads(metadata)
|
|
140
|
+
except json.JSONDecodeError:
|
|
141
|
+
# If it's not valid JSON, treat as a simple key-value pair
|
|
142
|
+
parsed_metadata = {"value": metadata}
|
|
143
|
+
|
|
144
|
+
if text_content:
|
|
145
|
+
file_data = FileData(
|
|
146
|
+
content=content_bytes,
|
|
147
|
+
type="manual",
|
|
148
|
+
)
|
|
149
|
+
elif file:
|
|
150
|
+
file_data = FileData(
|
|
151
|
+
content=content_bytes,
|
|
152
|
+
type=file.content_type if file.content_type else None,
|
|
153
|
+
filename=file.filename,
|
|
154
|
+
size=file.size,
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
file_data = None
|
|
158
|
+
|
|
159
|
+
if not name:
|
|
160
|
+
if file and file.filename:
|
|
161
|
+
name = file.filename
|
|
162
|
+
elif url:
|
|
163
|
+
name = parsed_urls
|
|
164
|
+
|
|
165
|
+
content = Content(
|
|
166
|
+
name=name,
|
|
167
|
+
description=description,
|
|
168
|
+
url=parsed_urls,
|
|
169
|
+
metadata=parsed_metadata,
|
|
170
|
+
file_data=file_data,
|
|
171
|
+
size=file.size if file else None if text_content else None,
|
|
172
|
+
)
|
|
173
|
+
content_hash = knowledge._build_content_hash(content)
|
|
174
|
+
content.content_hash = content_hash
|
|
175
|
+
content.id = generate_id(content_hash)
|
|
176
|
+
|
|
177
|
+
background_tasks.add_task(process_content, knowledge, content, reader_id, chunker, chunk_size, chunk_overlap)
|
|
178
|
+
|
|
179
|
+
response = ContentResponseSchema(
|
|
180
|
+
id=content.id,
|
|
181
|
+
name=name,
|
|
182
|
+
description=description,
|
|
183
|
+
metadata=parsed_metadata,
|
|
184
|
+
status=ContentStatus.PROCESSING,
|
|
185
|
+
)
|
|
186
|
+
return response
|
|
187
|
+
|
|
188
|
+
@router.patch(
|
|
189
|
+
"/knowledge/content/{content_id}",
|
|
190
|
+
response_model=ContentResponseSchema,
|
|
191
|
+
status_code=200,
|
|
192
|
+
operation_id="update_content",
|
|
193
|
+
summary="Update Content",
|
|
194
|
+
description=(
|
|
195
|
+
"Update content properties such as name, description, metadata, or processing configuration. "
|
|
196
|
+
"Allows modification of existing content without re-uploading."
|
|
197
|
+
),
|
|
198
|
+
responses={
|
|
199
|
+
200: {
|
|
200
|
+
"description": "Content updated successfully",
|
|
201
|
+
"content": {
|
|
202
|
+
"application/json": {
|
|
203
|
+
"example": {
|
|
204
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
205
|
+
"name": "example.pdf",
|
|
206
|
+
"description": "",
|
|
207
|
+
"type": "application/pdf",
|
|
208
|
+
"size": "251261",
|
|
209
|
+
"linked_to": None,
|
|
210
|
+
"metadata": {},
|
|
211
|
+
"access_count": 1,
|
|
212
|
+
"status": "completed",
|
|
213
|
+
"status_message": "",
|
|
214
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
215
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
400: {
|
|
221
|
+
"description": "Invalid request - malformed metadata or invalid reader_id",
|
|
222
|
+
"model": BadRequestResponse,
|
|
223
|
+
},
|
|
224
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
225
|
+
},
|
|
226
|
+
)
|
|
227
|
+
async def update_content(
|
|
228
|
+
content_id: str = Path(..., description="Content ID"),
|
|
229
|
+
name: Optional[str] = Form(None, description="Content name"),
|
|
230
|
+
description: Optional[str] = Form(None, description="Content description"),
|
|
231
|
+
metadata: Optional[str] = Form(None, description="Content metadata as JSON string"),
|
|
232
|
+
reader_id: Optional[str] = Form(None, description="ID of the reader to use for processing"),
|
|
233
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
234
|
+
) -> Optional[ContentResponseSchema]:
|
|
235
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
236
|
+
|
|
237
|
+
# Parse metadata JSON string if provided
|
|
238
|
+
parsed_metadata = None
|
|
239
|
+
if metadata and metadata.strip():
|
|
240
|
+
try:
|
|
241
|
+
parsed_metadata = json.loads(metadata)
|
|
242
|
+
except json.JSONDecodeError:
|
|
243
|
+
raise HTTPException(status_code=400, detail="Invalid JSON format for metadata")
|
|
244
|
+
|
|
245
|
+
# Create ContentUpdateSchema object from form data
|
|
246
|
+
update_data = ContentUpdateSchema(
|
|
247
|
+
name=name if name and name.strip() else None,
|
|
248
|
+
description=description if description and description.strip() else None,
|
|
249
|
+
metadata=parsed_metadata,
|
|
250
|
+
reader_id=reader_id if reader_id and reader_id.strip() else None,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
content = Content(
|
|
254
|
+
id=content_id,
|
|
255
|
+
name=update_data.name,
|
|
256
|
+
description=update_data.description,
|
|
257
|
+
metadata=update_data.metadata,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
if update_data.reader_id:
|
|
261
|
+
if knowledge.readers and update_data.reader_id in knowledge.readers:
|
|
262
|
+
content.reader = knowledge.readers[update_data.reader_id]
|
|
263
|
+
else:
|
|
264
|
+
raise HTTPException(status_code=400, detail=f"Invalid reader_id: {update_data.reader_id}")
|
|
265
|
+
|
|
266
|
+
updated_content_dict = knowledge.patch_content(content)
|
|
267
|
+
if not updated_content_dict:
|
|
268
|
+
raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
269
|
+
|
|
270
|
+
return ContentResponseSchema.from_dict(updated_content_dict)
|
|
271
|
+
|
|
272
|
+
@router.get(
|
|
273
|
+
"/knowledge/content",
|
|
274
|
+
response_model=PaginatedResponse[ContentResponseSchema],
|
|
275
|
+
status_code=200,
|
|
276
|
+
operation_id="get_content",
|
|
277
|
+
summary="List Content",
|
|
278
|
+
description=(
|
|
279
|
+
"Retrieve paginated list of all content in the knowledge base with filtering and sorting options. "
|
|
280
|
+
"Filter by status, content type, or metadata properties."
|
|
281
|
+
),
|
|
282
|
+
responses={
|
|
283
|
+
200: {
|
|
284
|
+
"description": "Content list retrieved successfully",
|
|
285
|
+
"content": {
|
|
286
|
+
"application/json": {
|
|
287
|
+
"example": {
|
|
288
|
+
"data": [
|
|
289
|
+
{
|
|
290
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
291
|
+
"name": "example.pdf",
|
|
292
|
+
"description": "",
|
|
293
|
+
"type": "application/pdf",
|
|
294
|
+
"size": "251261",
|
|
295
|
+
"linked_to": None,
|
|
296
|
+
"metadata": {},
|
|
297
|
+
"access_count": 1,
|
|
298
|
+
"status": "completed",
|
|
299
|
+
"status_message": "",
|
|
300
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
301
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
302
|
+
},
|
|
303
|
+
],
|
|
304
|
+
"meta": {"page": 1, "limit": 20, "total_pages": 1, "total_count": 2},
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
},
|
|
308
|
+
}
|
|
309
|
+
},
|
|
310
|
+
)
|
|
311
|
+
async def get_content(
|
|
312
|
+
limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
|
|
313
|
+
page: Optional[int] = Query(default=1, description="Page number"),
|
|
314
|
+
sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
|
|
315
|
+
sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
|
|
316
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
317
|
+
) -> PaginatedResponse[ContentResponseSchema]:
|
|
318
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
319
|
+
contents, count = await knowledge.aget_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
|
|
320
|
+
|
|
321
|
+
return PaginatedResponse(
|
|
322
|
+
data=[
|
|
323
|
+
ContentResponseSchema.from_dict(
|
|
324
|
+
{
|
|
325
|
+
"id": content.id,
|
|
326
|
+
"name": content.name,
|
|
327
|
+
"description": content.description,
|
|
328
|
+
"file_type": content.file_type,
|
|
329
|
+
"size": content.size,
|
|
330
|
+
"metadata": content.metadata,
|
|
331
|
+
"status": content.status,
|
|
332
|
+
"status_message": content.status_message,
|
|
333
|
+
"created_at": content.created_at,
|
|
334
|
+
"updated_at": content.updated_at,
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
for content in contents
|
|
338
|
+
],
|
|
339
|
+
meta=PaginationInfo(
|
|
340
|
+
page=page,
|
|
341
|
+
limit=limit,
|
|
342
|
+
total_count=count,
|
|
343
|
+
total_pages=math.ceil(count / limit) if limit is not None and limit > 0 else 0,
|
|
344
|
+
),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
@router.get(
|
|
348
|
+
"/knowledge/content/{content_id}",
|
|
349
|
+
response_model=ContentResponseSchema,
|
|
350
|
+
status_code=200,
|
|
351
|
+
operation_id="get_content_by_id",
|
|
352
|
+
summary="Get Content by ID",
|
|
353
|
+
description="Retrieve detailed information about a specific content item including processing status and metadata.",
|
|
354
|
+
responses={
|
|
355
|
+
200: {
|
|
356
|
+
"description": "Content details retrieved successfully",
|
|
357
|
+
"content": {
|
|
358
|
+
"application/json": {
|
|
359
|
+
"example": {
|
|
360
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
361
|
+
"name": "example.pdf",
|
|
362
|
+
"description": "",
|
|
363
|
+
"type": "application/pdf",
|
|
364
|
+
"size": "251261",
|
|
365
|
+
"linked_to": None,
|
|
366
|
+
"metadata": {},
|
|
367
|
+
"access_count": 1,
|
|
368
|
+
"status": "completed",
|
|
369
|
+
"status_message": "",
|
|
370
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
371
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
},
|
|
375
|
+
},
|
|
376
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
377
|
+
},
|
|
378
|
+
)
|
|
379
|
+
async def get_content_by_id(
|
|
380
|
+
content_id: str,
|
|
381
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
382
|
+
) -> ContentResponseSchema:
|
|
383
|
+
log_info(f"Getting content by id: {content_id}")
|
|
384
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
385
|
+
content = await knowledge.aget_content_by_id(content_id=content_id)
|
|
386
|
+
if not content:
|
|
387
|
+
raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
388
|
+
response = ContentResponseSchema.from_dict(
|
|
389
|
+
{
|
|
390
|
+
"id": content_id,
|
|
391
|
+
"name": content.name,
|
|
392
|
+
"description": content.description,
|
|
393
|
+
"file_type": content.file_type,
|
|
394
|
+
"size": len(content.file_data.content) if content.file_data and content.file_data.content else 0,
|
|
395
|
+
"metadata": content.metadata,
|
|
396
|
+
"status": content.status,
|
|
397
|
+
"status_message": content.status_message,
|
|
398
|
+
"created_at": content.created_at,
|
|
399
|
+
"updated_at": content.updated_at,
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
return response
|
|
404
|
+
|
|
405
|
+
@router.delete(
|
|
406
|
+
"/knowledge/content/{content_id}",
|
|
407
|
+
response_model=ContentResponseSchema,
|
|
408
|
+
status_code=200,
|
|
409
|
+
response_model_exclude_none=True,
|
|
410
|
+
operation_id="delete_content_by_id",
|
|
411
|
+
summary="Delete Content by ID",
|
|
412
|
+
description="Permanently remove a specific content item from the knowledge base. This action cannot be undone.",
|
|
413
|
+
responses={
|
|
414
|
+
200: {},
|
|
415
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
416
|
+
500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
|
|
417
|
+
},
|
|
418
|
+
)
|
|
419
|
+
async def delete_content_by_id(
|
|
420
|
+
content_id: str,
|
|
421
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
422
|
+
) -> ContentResponseSchema:
|
|
423
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
424
|
+
await knowledge.aremove_content_by_id(content_id=content_id)
|
|
425
|
+
log_info(f"Deleting content by id: {content_id}")
|
|
426
|
+
|
|
427
|
+
return ContentResponseSchema(
|
|
428
|
+
id=content_id,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
@router.delete(
|
|
432
|
+
"/knowledge/content",
|
|
433
|
+
status_code=200,
|
|
434
|
+
operation_id="delete_all_content",
|
|
435
|
+
summary="Delete All Content",
|
|
436
|
+
description=(
|
|
437
|
+
"Permanently remove all content from the knowledge base. This is a destructive operation that "
|
|
438
|
+
"cannot be undone. Use with extreme caution."
|
|
439
|
+
),
|
|
440
|
+
responses={
|
|
441
|
+
200: {},
|
|
442
|
+
500: {"description": "Failed to delete all content", "model": InternalServerErrorResponse},
|
|
443
|
+
},
|
|
444
|
+
)
|
|
445
|
+
def delete_all_content(
|
|
446
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
447
|
+
):
|
|
448
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
449
|
+
log_info("Deleting all content")
|
|
450
|
+
knowledge.remove_all_content()
|
|
451
|
+
return "success"
|
|
452
|
+
|
|
453
|
+
@router.get(
|
|
454
|
+
"/knowledge/content/{content_id}/status",
|
|
455
|
+
status_code=200,
|
|
456
|
+
response_model=ContentStatusResponse,
|
|
457
|
+
operation_id="get_content_status",
|
|
458
|
+
summary="Get Content Status",
|
|
459
|
+
description=(
|
|
460
|
+
"Retrieve the current processing status of a content item. Useful for monitoring "
|
|
461
|
+
"asynchronous content processing progress and identifying any processing errors."
|
|
462
|
+
),
|
|
463
|
+
responses={
|
|
464
|
+
200: {
|
|
465
|
+
"description": "Content status retrieved successfully",
|
|
466
|
+
"content": {
|
|
467
|
+
"application/json": {
|
|
468
|
+
"examples": {
|
|
469
|
+
"completed": {
|
|
470
|
+
"summary": "Example completed content status",
|
|
471
|
+
"value": {
|
|
472
|
+
"status": "completed",
|
|
473
|
+
"status_message": "",
|
|
474
|
+
},
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
},
|
|
479
|
+
},
|
|
480
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
481
|
+
},
|
|
482
|
+
)
|
|
483
|
+
async def get_content_status(
|
|
484
|
+
content_id: str,
|
|
485
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
486
|
+
) -> ContentStatusResponse:
|
|
487
|
+
log_info(f"Getting content status: {content_id}")
|
|
488
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
489
|
+
knowledge_status, status_message = await knowledge.aget_content_status(content_id=content_id)
|
|
490
|
+
|
|
491
|
+
# Handle the case where content is not found
|
|
492
|
+
if knowledge_status is None:
|
|
493
|
+
return ContentStatusResponse(
|
|
494
|
+
status=ContentStatus.FAILED, status_message=status_message or "Content not found"
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Convert knowledge ContentStatus to schema ContentStatus (they have same values)
|
|
498
|
+
if hasattr(knowledge_status, "value"):
|
|
499
|
+
status_value = knowledge_status.value
|
|
500
|
+
else:
|
|
501
|
+
status_value = str(knowledge_status)
|
|
502
|
+
|
|
503
|
+
# Convert string status to ContentStatus enum if needed (for backward compatibility and mocks)
|
|
504
|
+
if isinstance(status_value, str):
|
|
505
|
+
try:
|
|
506
|
+
status = ContentStatus(status_value.lower())
|
|
507
|
+
except ValueError:
|
|
508
|
+
# Handle legacy or unknown statuses gracefully
|
|
509
|
+
if "failed" in status_value.lower():
|
|
510
|
+
status = ContentStatus.FAILED
|
|
511
|
+
elif "completed" in status_value.lower():
|
|
512
|
+
status = ContentStatus.COMPLETED
|
|
513
|
+
else:
|
|
514
|
+
status = ContentStatus.PROCESSING
|
|
515
|
+
else:
|
|
516
|
+
status = ContentStatus.PROCESSING
|
|
517
|
+
|
|
518
|
+
return ContentStatusResponse(status=status, status_message=status_message or "")
|
|
519
|
+
|
|
520
|
+
@router.post(
|
|
521
|
+
"/knowledge/search",
|
|
522
|
+
status_code=200,
|
|
523
|
+
operation_id="search_knowledge",
|
|
524
|
+
summary="Search Knowledge",
|
|
525
|
+
description="Search the knowledge base for relevant documents using query, filters and search type.",
|
|
526
|
+
response_model=PaginatedResponse[VectorSearchResult],
|
|
527
|
+
responses={
|
|
528
|
+
200: {
|
|
529
|
+
"description": "Search results retrieved successfully",
|
|
530
|
+
"content": {
|
|
531
|
+
"application/json": {
|
|
532
|
+
"example": {
|
|
533
|
+
"data": [
|
|
534
|
+
{
|
|
535
|
+
"id": "doc_123",
|
|
536
|
+
"content": "Jordan Mitchell - Software Engineer with skills in JavaScript, React, Python",
|
|
537
|
+
"name": "cv_1",
|
|
538
|
+
"meta_data": {"page": 1, "chunk": 1},
|
|
539
|
+
"usage": {"total_tokens": 14},
|
|
540
|
+
"reranking_score": 0.95,
|
|
541
|
+
"content_id": "content_456",
|
|
542
|
+
}
|
|
543
|
+
],
|
|
544
|
+
"meta": {"page": 1, "limit": 20, "total_pages": 2, "total_count": 35},
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
},
|
|
548
|
+
},
|
|
549
|
+
400: {"description": "Invalid search parameters"},
|
|
550
|
+
404: {"description": "No documents found"},
|
|
551
|
+
},
|
|
552
|
+
)
|
|
553
|
+
def search_knowledge(request: VectorSearchRequestSchema) -> PaginatedResponse[VectorSearchResult]:
|
|
554
|
+
import time
|
|
555
|
+
|
|
556
|
+
start_time = time.time()
|
|
557
|
+
|
|
558
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, request.db_id)
|
|
559
|
+
|
|
560
|
+
# For now, validate the vector db ids exist in the knowledge base
|
|
561
|
+
# We will add more logic around this once we have multi vectordb support
|
|
562
|
+
# If vector db ids are provided, check if any of them match the knowledge's vector db
|
|
563
|
+
if request.vector_db_ids:
|
|
564
|
+
if knowledge.vector_db and knowledge.vector_db.id:
|
|
565
|
+
if knowledge.vector_db.id not in request.vector_db_ids:
|
|
566
|
+
raise HTTPException(
|
|
567
|
+
status_code=400,
|
|
568
|
+
detail=f"None of the provided Vector DB IDs {request.vector_db_ids} match the knowledge base Vector DB ID {knowledge.vector_db.id}",
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
raise HTTPException(status_code=400, detail="Knowledge base has no vector database configured")
|
|
572
|
+
|
|
573
|
+
# Calculate pagination parameters
|
|
574
|
+
meta = request.meta
|
|
575
|
+
limit = meta.limit if meta and meta.limit is not None else 20
|
|
576
|
+
page = meta.page if meta and meta.page is not None else 1
|
|
577
|
+
|
|
578
|
+
# Use max_results if specified, otherwise use a higher limit for search then paginate
|
|
579
|
+
search_limit = request.max_results
|
|
580
|
+
|
|
581
|
+
results = knowledge.search(
|
|
582
|
+
query=request.query, max_results=search_limit, filters=request.filters, search_type=request.search_type
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Calculate pagination
|
|
586
|
+
total_results = len(results)
|
|
587
|
+
start_idx = (page - 1) * limit
|
|
588
|
+
|
|
589
|
+
# Ensure start_idx doesn't exceed the total results
|
|
590
|
+
if start_idx >= total_results and total_results > 0:
|
|
591
|
+
# If page is beyond available results, return empty results
|
|
592
|
+
paginated_results = []
|
|
593
|
+
else:
|
|
594
|
+
end_idx = min(start_idx + limit, total_results)
|
|
595
|
+
paginated_results = results[start_idx:end_idx]
|
|
596
|
+
|
|
597
|
+
search_time_ms = (time.time() - start_time) * 1000
|
|
598
|
+
|
|
599
|
+
# Convert Document objects to serializable format
|
|
600
|
+
document_results = [VectorSearchResult.from_document(doc) for doc in paginated_results]
|
|
601
|
+
|
|
602
|
+
# Calculate pagination info
|
|
603
|
+
total_pages = (total_results + limit - 1) // limit # Ceiling division
|
|
604
|
+
|
|
605
|
+
return PaginatedResponse(
|
|
606
|
+
data=document_results,
|
|
607
|
+
meta=PaginationInfo(
|
|
608
|
+
page=page,
|
|
609
|
+
limit=limit,
|
|
610
|
+
total_pages=total_pages,
|
|
611
|
+
total_count=total_results,
|
|
612
|
+
search_time_ms=search_time_ms,
|
|
613
|
+
),
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
@router.get(
|
|
617
|
+
"/knowledge/config",
|
|
618
|
+
status_code=200,
|
|
619
|
+
operation_id="get_knowledge_config",
|
|
620
|
+
summary="Get Knowledge Configuration",
|
|
621
|
+
description=(
|
|
622
|
+
"Retrieve available readers, chunkers, and configuration options for content processing. "
|
|
623
|
+
"This endpoint provides metadata about supported file types, processing strategies, and filters."
|
|
624
|
+
),
|
|
625
|
+
responses={
|
|
626
|
+
200: {
|
|
627
|
+
"description": "Knowledge configuration retrieved successfully",
|
|
628
|
+
"content": {
|
|
629
|
+
"application/json": {
|
|
630
|
+
"example": {
|
|
631
|
+
"readers": {
|
|
632
|
+
"website": {
|
|
633
|
+
"id": "website",
|
|
634
|
+
"name": "WebsiteReader",
|
|
635
|
+
"description": "Reads website files",
|
|
636
|
+
"chunkers": [
|
|
637
|
+
"AgenticChunker",
|
|
638
|
+
"DocumentChunker",
|
|
639
|
+
"RecursiveChunker",
|
|
640
|
+
"SemanticChunker",
|
|
641
|
+
"FixedSizeChunker",
|
|
642
|
+
],
|
|
643
|
+
},
|
|
644
|
+
"firecrawl": {
|
|
645
|
+
"id": "firecrawl",
|
|
646
|
+
"name": "FirecrawlReader",
|
|
647
|
+
"description": "Reads firecrawl files",
|
|
648
|
+
"chunkers": [
|
|
649
|
+
"SemanticChunker",
|
|
650
|
+
"FixedSizeChunker",
|
|
651
|
+
"AgenticChunker",
|
|
652
|
+
"DocumentChunker",
|
|
653
|
+
"RecursiveChunker",
|
|
654
|
+
],
|
|
655
|
+
},
|
|
656
|
+
"youtube": {
|
|
657
|
+
"id": "youtube",
|
|
658
|
+
"name": "YoutubeReader",
|
|
659
|
+
"description": "Reads youtube files",
|
|
660
|
+
"chunkers": [
|
|
661
|
+
"RecursiveChunker",
|
|
662
|
+
"AgenticChunker",
|
|
663
|
+
"DocumentChunker",
|
|
664
|
+
"SemanticChunker",
|
|
665
|
+
"FixedSizeChunker",
|
|
666
|
+
],
|
|
667
|
+
},
|
|
668
|
+
"web_search": {
|
|
669
|
+
"id": "web_search",
|
|
670
|
+
"name": "WebSearchReader",
|
|
671
|
+
"description": "Reads web_search files",
|
|
672
|
+
"chunkers": [
|
|
673
|
+
"AgenticChunker",
|
|
674
|
+
"DocumentChunker",
|
|
675
|
+
"RecursiveChunker",
|
|
676
|
+
"SemanticChunker",
|
|
677
|
+
"FixedSizeChunker",
|
|
678
|
+
],
|
|
679
|
+
},
|
|
680
|
+
"arxiv": {
|
|
681
|
+
"id": "arxiv",
|
|
682
|
+
"name": "ArxivReader",
|
|
683
|
+
"description": "Reads arxiv files",
|
|
684
|
+
"chunkers": [
|
|
685
|
+
"FixedSizeChunker",
|
|
686
|
+
"AgenticChunker",
|
|
687
|
+
"DocumentChunker",
|
|
688
|
+
"RecursiveChunker",
|
|
689
|
+
"SemanticChunker",
|
|
690
|
+
],
|
|
691
|
+
},
|
|
692
|
+
"csv": {
|
|
693
|
+
"id": "csv",
|
|
694
|
+
"name": "CsvReader",
|
|
695
|
+
"description": "Reads csv files",
|
|
696
|
+
"chunkers": [
|
|
697
|
+
"RowChunker",
|
|
698
|
+
"FixedSizeChunker",
|
|
699
|
+
"AgenticChunker",
|
|
700
|
+
"DocumentChunker",
|
|
701
|
+
"RecursiveChunker",
|
|
702
|
+
],
|
|
703
|
+
},
|
|
704
|
+
"docx": {
|
|
705
|
+
"id": "docx",
|
|
706
|
+
"name": "DocxReader",
|
|
707
|
+
"description": "Reads docx files",
|
|
708
|
+
"chunkers": [
|
|
709
|
+
"DocumentChunker",
|
|
710
|
+
"FixedSizeChunker",
|
|
711
|
+
"SemanticChunker",
|
|
712
|
+
"AgenticChunker",
|
|
713
|
+
"RecursiveChunker",
|
|
714
|
+
],
|
|
715
|
+
},
|
|
716
|
+
"gcs": {
|
|
717
|
+
"id": "gcs",
|
|
718
|
+
"name": "GcsReader",
|
|
719
|
+
"description": "Reads gcs files",
|
|
720
|
+
"chunkers": [
|
|
721
|
+
"FixedSizeChunker",
|
|
722
|
+
"AgenticChunker",
|
|
723
|
+
"DocumentChunker",
|
|
724
|
+
"RecursiveChunker",
|
|
725
|
+
"SemanticChunker",
|
|
726
|
+
],
|
|
727
|
+
},
|
|
728
|
+
"json": {
|
|
729
|
+
"id": "json",
|
|
730
|
+
"name": "JsonReader",
|
|
731
|
+
"description": "Reads json files",
|
|
732
|
+
"chunkers": [
|
|
733
|
+
"FixedSizeChunker",
|
|
734
|
+
"AgenticChunker",
|
|
735
|
+
"DocumentChunker",
|
|
736
|
+
"RecursiveChunker",
|
|
737
|
+
"SemanticChunker",
|
|
738
|
+
],
|
|
739
|
+
},
|
|
740
|
+
"markdown": {
|
|
741
|
+
"id": "markdown",
|
|
742
|
+
"name": "MarkdownReader",
|
|
743
|
+
"description": "Reads markdown files",
|
|
744
|
+
"chunkers": [
|
|
745
|
+
"MarkdownChunker",
|
|
746
|
+
"DocumentChunker",
|
|
747
|
+
"AgenticChunker",
|
|
748
|
+
"RecursiveChunker",
|
|
749
|
+
"SemanticChunker",
|
|
750
|
+
"FixedSizeChunker",
|
|
751
|
+
],
|
|
752
|
+
},
|
|
753
|
+
"pdf": {
|
|
754
|
+
"id": "pdf",
|
|
755
|
+
"name": "PdfReader",
|
|
756
|
+
"description": "Reads pdf files",
|
|
757
|
+
"chunkers": [
|
|
758
|
+
"DocumentChunker",
|
|
759
|
+
"FixedSizeChunker",
|
|
760
|
+
"AgenticChunker",
|
|
761
|
+
"SemanticChunker",
|
|
762
|
+
"RecursiveChunker",
|
|
763
|
+
],
|
|
764
|
+
},
|
|
765
|
+
"text": {
|
|
766
|
+
"id": "text",
|
|
767
|
+
"name": "TextReader",
|
|
768
|
+
"description": "Reads text files",
|
|
769
|
+
"chunkers": [
|
|
770
|
+
"FixedSizeChunker",
|
|
771
|
+
"AgenticChunker",
|
|
772
|
+
"DocumentChunker",
|
|
773
|
+
"RecursiveChunker",
|
|
774
|
+
"SemanticChunker",
|
|
775
|
+
],
|
|
776
|
+
},
|
|
777
|
+
},
|
|
778
|
+
"readersForType": {
|
|
779
|
+
"url": [
|
|
780
|
+
"url",
|
|
781
|
+
"website",
|
|
782
|
+
"firecrawl",
|
|
783
|
+
"youtube",
|
|
784
|
+
"web_search",
|
|
785
|
+
"gcs",
|
|
786
|
+
],
|
|
787
|
+
"youtube": ["youtube"],
|
|
788
|
+
"text": ["web_search"],
|
|
789
|
+
"topic": ["arxiv"],
|
|
790
|
+
"file": ["csv", "gcs"],
|
|
791
|
+
".csv": ["csv"],
|
|
792
|
+
".xlsx": ["csv"],
|
|
793
|
+
".xls": ["csv"],
|
|
794
|
+
".docx": ["docx"],
|
|
795
|
+
".doc": ["docx"],
|
|
796
|
+
".json": ["json"],
|
|
797
|
+
".md": ["markdown"],
|
|
798
|
+
".pdf": ["pdf"],
|
|
799
|
+
".txt": ["text"],
|
|
800
|
+
},
|
|
801
|
+
"chunkers": {
|
|
802
|
+
"AgenticChunker": {
|
|
803
|
+
"key": "AgenticChunker",
|
|
804
|
+
"name": "AgenticChunker",
|
|
805
|
+
"description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
|
|
806
|
+
"metadata": {"chunk_size": 5000},
|
|
807
|
+
},
|
|
808
|
+
"DocumentChunker": {
|
|
809
|
+
"key": "DocumentChunker",
|
|
810
|
+
"name": "DocumentChunker",
|
|
811
|
+
"description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
|
|
812
|
+
"metadata": {
|
|
813
|
+
"chunk_size": 5000,
|
|
814
|
+
"chunk_overlap": 0,
|
|
815
|
+
},
|
|
816
|
+
},
|
|
817
|
+
"FixedSizeChunker": {
|
|
818
|
+
"key": "FixedSizeChunker",
|
|
819
|
+
"name": "FixedSizeChunker",
|
|
820
|
+
"description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
|
|
821
|
+
"metadata": {
|
|
822
|
+
"chunk_size": 5000,
|
|
823
|
+
"chunk_overlap": 0,
|
|
824
|
+
},
|
|
825
|
+
},
|
|
826
|
+
"MarkdownChunker": {
|
|
827
|
+
"key": "MarkdownChunker",
|
|
828
|
+
"name": "MarkdownChunker",
|
|
829
|
+
"description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
|
|
830
|
+
"metadata": {
|
|
831
|
+
"chunk_size": 5000,
|
|
832
|
+
"chunk_overlap": 0,
|
|
833
|
+
},
|
|
834
|
+
},
|
|
835
|
+
"RecursiveChunker": {
|
|
836
|
+
"key": "RecursiveChunker",
|
|
837
|
+
"name": "RecursiveChunker",
|
|
838
|
+
"description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
|
|
839
|
+
"metadata": {
|
|
840
|
+
"chunk_size": 5000,
|
|
841
|
+
"chunk_overlap": 0,
|
|
842
|
+
},
|
|
843
|
+
},
|
|
844
|
+
"RowChunker": {
|
|
845
|
+
"key": "RowChunker",
|
|
846
|
+
"name": "RowChunker",
|
|
847
|
+
"description": "RowChunking chunking strategy",
|
|
848
|
+
"metadata": {},
|
|
849
|
+
},
|
|
850
|
+
"SemanticChunker": {
|
|
851
|
+
"key": "SemanticChunker",
|
|
852
|
+
"name": "SemanticChunker",
|
|
853
|
+
"description": "Chunking strategy that splits text into semantic chunks using chonkie",
|
|
854
|
+
"metadata": {"chunk_size": 5000},
|
|
855
|
+
},
|
|
856
|
+
},
|
|
857
|
+
"vector_dbs": [
|
|
858
|
+
{
|
|
859
|
+
"id": "vector_db_1",
|
|
860
|
+
"name": "Vector DB 1",
|
|
861
|
+
"description": "Vector DB 1 description",
|
|
862
|
+
"search_types": ["vector", "keyword", "hybrid"],
|
|
863
|
+
}
|
|
864
|
+
],
|
|
865
|
+
"filters": ["filter_tag_1", "filter_tag2"],
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
},
|
|
869
|
+
}
|
|
870
|
+
},
|
|
871
|
+
)
|
|
872
|
+
def get_config(
|
|
873
|
+
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
874
|
+
) -> ConfigResponseSchema:
|
|
875
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
876
|
+
|
|
877
|
+
# Get factory readers info
|
|
878
|
+
readers_info = get_all_readers_info()
|
|
879
|
+
reader_schemas = {}
|
|
880
|
+
# Add factory readers
|
|
881
|
+
for reader_info in readers_info:
|
|
882
|
+
reader_schemas[reader_info["id"]] = ReaderSchema(
|
|
883
|
+
id=reader_info["id"],
|
|
884
|
+
name=reader_info["name"],
|
|
885
|
+
description=reader_info.get("description"),
|
|
886
|
+
chunkers=reader_info.get("chunking_strategies", []),
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
# Add custom readers from knowledge.readers
|
|
890
|
+
readers_dict: Dict[str, Reader] = knowledge.get_readers() or {}
|
|
891
|
+
if readers_dict:
|
|
892
|
+
for reader_id, reader in readers_dict.items():
|
|
893
|
+
# Get chunking strategies from the reader
|
|
894
|
+
chunking_strategies = []
|
|
895
|
+
try:
|
|
896
|
+
strategies = reader.get_supported_chunking_strategies()
|
|
897
|
+
chunking_strategies = [strategy.value for strategy in strategies]
|
|
898
|
+
except Exception:
|
|
899
|
+
chunking_strategies = []
|
|
900
|
+
|
|
901
|
+
# Check if this reader ID already exists in factory readers
|
|
902
|
+
if reader_id not in reader_schemas:
|
|
903
|
+
reader_schemas[reader_id] = ReaderSchema(
|
|
904
|
+
id=reader_id,
|
|
905
|
+
name=getattr(reader, "name", reader.__class__.__name__),
|
|
906
|
+
description=getattr(reader, "description", f"Custom {reader.__class__.__name__}"),
|
|
907
|
+
chunkers=chunking_strategies,
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
# Get content types to readers mapping
|
|
911
|
+
types_of_readers = get_content_types_to_readers_mapping()
|
|
912
|
+
chunkers_list = get_all_chunkers_info()
|
|
913
|
+
|
|
914
|
+
# Convert chunkers list to dictionary format expected by schema
|
|
915
|
+
chunkers_dict = {}
|
|
916
|
+
for chunker_info in chunkers_list:
|
|
917
|
+
chunker_key = chunker_info.get("key")
|
|
918
|
+
if chunker_key:
|
|
919
|
+
chunkers_dict[chunker_key] = ChunkerSchema(
|
|
920
|
+
key=chunker_key,
|
|
921
|
+
name=chunker_info.get("name"),
|
|
922
|
+
description=chunker_info.get("description"),
|
|
923
|
+
metadata=chunker_info.get("metadata", {}),
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
vector_dbs = []
|
|
927
|
+
if knowledge.vector_db:
|
|
928
|
+
search_types = knowledge.vector_db.get_supported_search_types()
|
|
929
|
+
name = knowledge.vector_db.name
|
|
930
|
+
db_id = knowledge.vector_db.id
|
|
931
|
+
vector_dbs.append(
|
|
932
|
+
VectorDbSchema(
|
|
933
|
+
id=db_id,
|
|
934
|
+
name=name,
|
|
935
|
+
description=knowledge.vector_db.description,
|
|
936
|
+
search_types=search_types,
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
return ConfigResponseSchema(
|
|
941
|
+
readers=reader_schemas,
|
|
942
|
+
vector_dbs=vector_dbs,
|
|
943
|
+
readersForType=types_of_readers,
|
|
944
|
+
chunkers=chunkers_dict,
|
|
945
|
+
filters=knowledge.get_filters(),
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
return router
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
async def process_content(
|
|
952
|
+
knowledge: Knowledge,
|
|
953
|
+
content: Content,
|
|
954
|
+
reader_id: Optional[str] = None,
|
|
955
|
+
chunker: Optional[str] = None,
|
|
956
|
+
chunk_size: Optional[int] = None,
|
|
957
|
+
chunk_overlap: Optional[int] = None,
|
|
958
|
+
):
|
|
959
|
+
"""Background task to process the content"""
|
|
960
|
+
|
|
961
|
+
try:
|
|
962
|
+
if reader_id:
|
|
963
|
+
reader = None
|
|
964
|
+
if knowledge.readers and reader_id in knowledge.readers:
|
|
965
|
+
reader = knowledge.readers[reader_id]
|
|
966
|
+
else:
|
|
967
|
+
key = reader_id.lower().strip().replace("-", "_").replace(" ", "_")
|
|
968
|
+
candidates = [key] + ([key[:-6]] if key.endswith("reader") else [])
|
|
969
|
+
for cand in candidates:
|
|
970
|
+
try:
|
|
971
|
+
reader = ReaderFactory.create_reader(cand)
|
|
972
|
+
log_debug(f"Resolved reader: {reader.__class__.__name__}")
|
|
973
|
+
break
|
|
974
|
+
except Exception:
|
|
975
|
+
continue
|
|
976
|
+
if reader:
|
|
977
|
+
content.reader = reader
|
|
978
|
+
if chunker and content.reader:
|
|
979
|
+
# Set the chunker name on the reader - let the reader handle it internally
|
|
980
|
+
content.reader.set_chunking_strategy_from_string(chunker, chunk_size=chunk_size, overlap=chunk_overlap)
|
|
981
|
+
log_debug(f"Set chunking strategy: {chunker}")
|
|
982
|
+
|
|
983
|
+
log_debug(f"Using reader: {content.reader.__class__.__name__}")
|
|
984
|
+
await knowledge._load_content(content, upsert=False, skip_if_exists=True)
|
|
985
|
+
log_info(f"Content {content.id} processed successfully")
|
|
986
|
+
except Exception as e:
|
|
987
|
+
log_info(f"Error processing content: {e}")
|
|
988
|
+
# Mark content as failed in the contents DB
|
|
989
|
+
try:
|
|
990
|
+
from agno.knowledge.content import ContentStatus as KnowledgeContentStatus
|
|
991
|
+
|
|
992
|
+
content.status = KnowledgeContentStatus.FAILED
|
|
993
|
+
content.status_message = str(e)
|
|
994
|
+
knowledge.patch_content(content)
|
|
995
|
+
except Exception:
|
|
996
|
+
# Swallow any secondary errors to avoid crashing the background task
|
|
997
|
+
pass
|