PyPI - agno - Versions diffs - 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

agno/agent/agent.py +6009 -2874
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +595 -187
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +3 -0
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +339 -266
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +1011 -566
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +110 -37
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +143 -4
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +60 -6
agno/models/openai/chat.py +102 -43
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +81 -5
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -175
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +266 -112
agno/run/base.py +53 -24
agno/run/team.py +252 -111
agno/run/workflow.py +156 -45
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1692
agno/tools/brightdata.py +3 -3
agno/tools/cartesia.py +3 -5
agno/tools/dalle.py +9 -8
agno/tools/decorator.py +4 -2
agno/tools/desi_vocal.py +2 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +20 -13
agno/tools/eleven_labs.py +26 -28
agno/tools/exa.py +21 -16
agno/tools/fal.py +4 -4
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +257 -37
agno/tools/giphy.py +2 -2
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/lumalab.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/azure_openai.py +2 -2
agno/tools/models/gemini.py +3 -3
agno/tools/models/groq.py +3 -5
agno/tools/models/nebius.py +7 -7
agno/tools/models_labs.py +25 -15
agno/tools/notion.py +204 -0
agno/tools/openai.py +4 -9
agno/tools/opencv.py +3 -3
agno/tools/parallel.py +314 -0
agno/tools/replicate.py +7 -7
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +222 -7
agno/utils/gemini.py +181 -23
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +95 -5
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/models/cohere.py +1 -1
agno/utils/models/watsonx.py +1 -1
agno/utils/openai.py +1 -1
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +183 -135
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +645 -136
agno/workflow/steps.py +65 -6
agno/workflow/types.py +71 -33
agno/workflow/workflow.py +2113 -300
agno-2.3.0.dist-info/METADATA +618 -0
agno-2.3.0.dist-info/RECORD +577 -0
agno-2.3.0.dist-info/licenses/LICENSE +201 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.0rc2.dist-info/METADATA +0 -355
agno-2.0.0rc2.dist-info/RECORD +0 -515
agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/os/routers/knowledge/knowledge.py CHANGED Viewed

@@ -2,7 +2,6 @@ import json
 import logging
 import math
 from typing import Dict, List, Optional
-from uuid import uuid4
 from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, UploadFile
@@ -20,6 +19,9 @@ from agno.os.routers.knowledge.schemas import (
     ContentStatusResponse,
     ContentUpdateSchema,
     ReaderSchema,
+    VectorDbSchema,
+    VectorSearchRequestSchema,
+    VectorSearchResult,
 )
 from agno.os.schema import (
     BadRequestResponse,
@@ -34,6 +36,7 @@ from agno.os.schema import (
 from agno.os.settings import AgnoAPISettings
 from agno.os.utils import get_knowledge_instance_by_db_id
 from agno.utils.log import log_debug, log_info
+from agno.utils.string import generate_id
 logger = logging.getLogger(__name__)
@@ -99,11 +102,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
         text_content: Optional[str] = Form(None, description="Raw text content to process"),
         reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
         chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
+        chunk_size: Optional[int] = Form(None, description="Chunk size to use for processing"),
+        chunk_overlap: Optional[int] = Form(None, description="Chunk overlap to use for processing"),
         db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
     ):
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
-        content_id = str(uuid4())
-        log_info(f"Adding content: {name}, {description}, {url}, {metadata} with ID: {content_id}")
+        log_info(f"Adding content: {name}, {description}, {url}, {metadata}")
         parsed_metadata = None
         if metadata:
@@ -166,10 +170,14 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             file_data=file_data,
             size=file.size if file else None if text_content else None,
         )
-        background_tasks.add_task(process_content, knowledge, content_id, content, reader_id, chunker)
+        content_hash = knowledge._build_content_hash(content)
+        content.content_hash = content_hash
+        content.id = generate_id(content_hash)
+        background_tasks.add_task(process_content, knowledge, content, reader_id, chunker, chunk_size, chunk_overlap)
         response = ContentResponseSchema(
-            id=content_id,
+            id=content.id,
             name=name,
             description=description,
             metadata=parsed_metadata,
@@ -300,7 +308,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             }
         },
     )
-    def get_content(
+    async def get_content(
         limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
         page: Optional[int] = Query(default=1, description="Page number"),
         sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
@@ -308,7 +316,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
     ) -> PaginatedResponse[ContentResponseSchema]:
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
-        contents, count = knowledge.get_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
+        contents, count = await knowledge.aget_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
         return PaginatedResponse(
             data=[
@@ -368,13 +376,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             404: {"description": "Content not found", "model": NotFoundResponse},
         },
     )
-    def get_content_by_id(
+    async def get_content_by_id(
         content_id: str,
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
     ) -> ContentResponseSchema:
         log_info(f"Getting content by id: {content_id}")
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
-        content = knowledge.get_content_by_id(content_id=content_id)
+        content = await knowledge.aget_content_by_id(content_id=content_id)
         if not content:
             raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
         response = ContentResponseSchema.from_dict(
@@ -408,12 +416,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
         },
     )
-    def delete_content_by_id(
+    async def delete_content_by_id(
         content_id: str,
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
     ) -> ContentResponseSchema:
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
-        knowledge.remove_content_by_id(content_id=content_id)
+        await knowledge.aremove_content_by_id(content_id=content_id)
         log_info(f"Deleting content by id: {content_id}")
         return ContentResponseSchema(
@@ -440,7 +448,6 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
         log_info("Deleting all content")
         knowledge.remove_all_content()
         return "success"
     @router.get(
@@ -473,13 +480,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             404: {"description": "Content not found", "model": NotFoundResponse},
         },
     )
-    def get_content_status(
+    async def get_content_status(
         content_id: str,
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
     ) -> ContentStatusResponse:
         log_info(f"Getting content status: {content_id}")
         knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
-        knowledge_status, status_message = knowledge.get_content_status(content_id=content_id)
+        knowledge_status, status_message = await knowledge.aget_content_status(content_id=content_id)
         # Handle the case where content is not found
         if knowledge_status is None:
@@ -510,11 +517,107 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
         return ContentStatusResponse(status=status, status_message=status_message or "")
+    @router.post(
+        "/knowledge/search",
+        status_code=200,
+        operation_id="search_knowledge",
+        summary="Search Knowledge",
+        description="Search the knowledge base for relevant documents using query, filters and search type.",
+        response_model=PaginatedResponse[VectorSearchResult],
+        responses={
+            200: {
+                "description": "Search results retrieved successfully",
+                "content": {
+                    "application/json": {
+                        "example": {
+                            "data": [
+                                {
+                                    "id": "doc_123",
+                                    "content": "Jordan Mitchell - Software Engineer with skills in JavaScript, React, Python",
+                                    "name": "cv_1",
+                                    "meta_data": {"page": 1, "chunk": 1},
+                                    "usage": {"total_tokens": 14},
+                                    "reranking_score": 0.95,
+                                    "content_id": "content_456",
+                                }
+                            ],
+                            "meta": {"page": 1, "limit": 20, "total_pages": 2, "total_count": 35},
+                        }
+                    }
+                },
+            },
+            400: {"description": "Invalid search parameters"},
+            404: {"description": "No documents found"},
+        },
+    )
+    def search_knowledge(request: VectorSearchRequestSchema) -> PaginatedResponse[VectorSearchResult]:
+        import time
+        start_time = time.time()
+        knowledge = get_knowledge_instance_by_db_id(knowledge_instances, request.db_id)
+        # For now, validate the vector db ids exist in the knowledge base
+        # We will add more logic around this once we have multi vectordb support
+        # If vector db ids are provided, check if any of them match the knowledge's vector db
+        if request.vector_db_ids:
+            if knowledge.vector_db and knowledge.vector_db.id:
+                if knowledge.vector_db.id not in request.vector_db_ids:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"None of the provided Vector DB IDs {request.vector_db_ids} match the knowledge base Vector DB ID {knowledge.vector_db.id}",
+                    )
+            else:
+                raise HTTPException(status_code=400, detail="Knowledge base has no vector database configured")
+        # Calculate pagination parameters
+        meta = request.meta
+        limit = meta.limit if meta and meta.limit is not None else 20
+        page = meta.page if meta and meta.page is not None else 1
+        # Use max_results if specified, otherwise use a higher limit for search then paginate
+        search_limit = request.max_results
+        results = knowledge.search(
+            query=request.query, max_results=search_limit, filters=request.filters, search_type=request.search_type
+        )
+        # Calculate pagination
+        total_results = len(results)
+        start_idx = (page - 1) * limit
+        # Ensure start_idx doesn't exceed the total results
+        if start_idx >= total_results and total_results > 0:
+            # If page is beyond available results, return empty results
+            paginated_results = []
+        else:
+            end_idx = min(start_idx + limit, total_results)
+            paginated_results = results[start_idx:end_idx]
+        search_time_ms = (time.time() - start_time) * 1000
+        # Convert Document objects to serializable format
+        document_results = [VectorSearchResult.from_document(doc) for doc in paginated_results]
+        # Calculate pagination info
+        total_pages = (total_results + limit - 1) // limit  # Ceiling division
+        return PaginatedResponse(
+            data=document_results,
+            meta=PaginationInfo(
+                page=page,
+                limit=limit,
+                total_pages=total_pages,
+                total_count=total_results,
+                search_time_ms=search_time_ms,
+            ),
+        )
     @router.get(
         "/knowledge/config",
         status_code=200,
         operation_id="get_knowledge_config",
-        summary="Get Knowledge Configuration",
+        summary="Get Config",
         description=(
             "Retrieve available readers, chunkers, and configuration options for content processing. "
             "This endpoint provides metadata about supported file types, processing strategies, and filters."
@@ -700,38 +803,65 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
                                     "key": "AgenticChunker",
                                     "name": "AgenticChunker",
                                     "description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
+                                    "metadata": {"chunk_size": 5000},
                                 },
                                 "DocumentChunker": {
                                     "key": "DocumentChunker",
                                     "name": "DocumentChunker",
                                     "description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
-                                },
-                                "RecursiveChunker": {
-                                    "key": "RecursiveChunker",
-                                    "name": "RecursiveChunker",
-                                    "description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
-                                },
-                                "SemanticChunker": {
-                                    "key": "SemanticChunker",
-                                    "name": "SemanticChunker",
-                                    "description": "Chunking strategy that splits text into semantic chunks using chonkie",
+                                    "metadata": {
+                                        "chunk_size": 5000,
+                                        "chunk_overlap": 0,
+                                    },
                                 },
                                 "FixedSizeChunker": {
                                     "key": "FixedSizeChunker",
                                     "name": "FixedSizeChunker",
                                     "description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
+                                    "metadata": {
+                                        "chunk_size": 5000,
+                                        "chunk_overlap": 0,
+                                    },
+                                },
+                                "MarkdownChunker": {
+                                    "key": "MarkdownChunker",
+                                    "name": "MarkdownChunker",
+                                    "description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
+                                    "metadata": {
+                                        "chunk_size": 5000,
+                                        "chunk_overlap": 0,
+                                    },
+                                },
+                                "RecursiveChunker": {
+                                    "key": "RecursiveChunker",
+                                    "name": "RecursiveChunker",
+                                    "description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
+                                    "metadata": {
+                                        "chunk_size": 5000,
+                                        "chunk_overlap": 0,
+                                    },
                                 },
                                 "RowChunker": {
                                     "key": "RowChunker",
                                     "name": "RowChunker",
                                     "description": "RowChunking chunking strategy",
+                                    "metadata": {},
                                 },
-                                "MarkdownChunker": {
-                                    "key": "MarkdownChunker",
-                                    "name": "MarkdownChunker",
-                                    "description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
+                                "SemanticChunker": {
+                                    "key": "SemanticChunker",
+                                    "name": "SemanticChunker",
+                                    "description": "Chunking strategy that splits text into semantic chunks using chonkie",
+                                    "metadata": {"chunk_size": 5000},
                                 },
                             },
+                            "vector_dbs": [
+                                {
+                                    "id": "vector_db_1",
+                                    "name": "Vector DB 1",
+                                    "description": "Vector DB 1 description",
+                                    "search_types": ["vector", "keyword", "hybrid"],
+                                }
+                            ],
                             "filters": ["filter_tag_1", "filter_tag2"],
                         }
                     }
@@ -787,14 +917,32 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
             chunker_key = chunker_info.get("key")
             if chunker_key:
                 chunkers_dict[chunker_key] = ChunkerSchema(
-                    key=chunker_key, name=chunker_info.get("name"), description=chunker_info.get("description")
+                    key=chunker_key,
+                    name=chunker_info.get("name"),
+                    description=chunker_info.get("description"),
+                    metadata=chunker_info.get("metadata", {}),
                 )
+        vector_dbs = []
+        if knowledge.vector_db:
+            search_types = knowledge.vector_db.get_supported_search_types()
+            name = knowledge.vector_db.name
+            db_id = knowledge.vector_db.id
+            vector_dbs.append(
+                VectorDbSchema(
+                    id=db_id,
+                    name=name,
+                    description=knowledge.vector_db.description,
+                    search_types=search_types,
+                )
+            )
         return ConfigResponseSchema(
             readers=reader_schemas,
+            vector_dbs=vector_dbs,
             readersForType=types_of_readers,
             chunkers=chunkers_dict,
-            filters=knowledge.get_filters(),
+            filters=knowledge.get_valid_filters(),
         )
     return router
@@ -802,15 +950,15 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
 async def process_content(
     knowledge: Knowledge,
-    content_id: str,
     content: Content,
     reader_id: Optional[str] = None,
     chunker: Optional[str] = None,
+    chunk_size: Optional[int] = None,
+    chunk_overlap: Optional[int] = None,
 ):
     """Background task to process the content"""
-    log_info(f"Processing content {content_id}")
     try:
-        content.id = content_id
         if reader_id:
             reader = None
             if knowledge.readers and reader_id in knowledge.readers:
@@ -829,21 +977,20 @@ async def process_content(
                 content.reader = reader
         if chunker and content.reader:
             # Set the chunker name on the reader - let the reader handle it internally
-            content.reader.set_chunking_strategy_from_string(chunker)
+            content.reader.set_chunking_strategy_from_string(chunker, chunk_size=chunk_size, overlap=chunk_overlap)
             log_debug(f"Set chunking strategy: {chunker}")
         log_debug(f"Using reader: {content.reader.__class__.__name__}")
         await knowledge._load_content(content, upsert=False, skip_if_exists=True)
-        log_info(f"Content {content_id} processed successfully")
+        log_info(f"Content {content.id} processed successfully")
     except Exception as e:
-        log_info(f"Error processing content {content_id}: {e}")
+        log_info(f"Error processing content: {e}")
         # Mark content as failed in the contents DB
         try:
             from agno.knowledge.content import ContentStatus as KnowledgeContentStatus
             content.status = KnowledgeContentStatus.FAILED
             content.status_message = str(e)
-            content.id = content_id
             knowledge.patch_content(content)
         except Exception:
             # Swallow any secondary errors to avoid crashing the background task

agno/os/routers/knowledge/schemas.py CHANGED Viewed

@@ -16,23 +16,23 @@ class ContentStatus(str, Enum):
 class ContentStatusResponse(BaseModel):
     """Response model for content status endpoint."""
-    status: ContentStatus
-    status_message: str = ""
+    status: ContentStatus = Field(..., description="Current processing status of the content")
+    status_message: str = Field("", description="Status message or error details")
 class ContentResponseSchema(BaseModel):
-    id: str
-    name: Optional[str] = None
-    description: Optional[str] = None
-    type: Optional[str] = None
-    size: Optional[str] = None
-    linked_to: Optional[str] = None
-    metadata: Optional[dict] = None
-    access_count: Optional[int] = None
-    status: Optional[ContentStatus] = None
-    status_message: Optional[str] = None
-    created_at: Optional[datetime] = None
-    updated_at: Optional[datetime] = None
+    id: str = Field(..., description="Unique identifier for the content")
+    name: Optional[str] = Field(None, description="Name of the content")
+    description: Optional[str] = Field(None, description="Description of the content")
+    type: Optional[str] = Field(None, description="MIME type of the content")
+    size: Optional[str] = Field(None, description="Size of the content in bytes")
+    linked_to: Optional[str] = Field(None, description="ID of related content if linked")
+    metadata: Optional[dict] = Field(None, description="Additional metadata as key-value pairs")
+    access_count: Optional[int] = Field(None, description="Number of times content has been accessed", ge=0)
+    status: Optional[ContentStatus] = Field(None, description="Processing status of the content")
+    status_message: Optional[str] = Field(None, description="Status message or error details")
+    created_at: Optional[datetime] = Field(None, description="Timestamp when content was created")
+    updated_at: Optional[datetime] = Field(None, description="Timestamp when content was last updated")
     @classmethod
     def from_dict(cls, content: Dict[str, Any]) -> "ContentResponseSchema":
@@ -99,20 +99,80 @@ class ContentUpdateSchema(BaseModel):
 class ReaderSchema(BaseModel):
-    id: str
-    name: Optional[str] = None
-    description: Optional[str] = None
-    chunkers: Optional[List[str]] = None
+    id: str = Field(..., description="Unique identifier for the reader")
+    name: Optional[str] = Field(None, description="Name of the reader")
+    description: Optional[str] = Field(None, description="Description of the reader's capabilities")
+    chunkers: Optional[List[str]] = Field(None, description="List of supported chunking strategies")
 class ChunkerSchema(BaseModel):
     key: str
     name: Optional[str] = None
     description: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+class VectorDbSchema(BaseModel):
+    id: str = Field(..., description="Unique identifier for the vector database")
+    name: Optional[str] = Field(None, description="Name of the vector database")
+    description: Optional[str] = Field(None, description="Description of the vector database")
+    search_types: Optional[List[str]] = Field(
+        None, description="List of supported search types (vector, keyword, hybrid)"
+    )
+class VectorSearchResult(BaseModel):
+    """Schema for search result documents."""
+    id: str = Field(..., description="Unique identifier for the search result document")
+    content: str = Field(..., description="Content text of the document")
+    name: Optional[str] = Field(None, description="Name of the document")
+    meta_data: Optional[Dict[str, Any]] = Field(None, description="Metadata associated with the document")
+    usage: Optional[Dict[str, Any]] = Field(None, description="Usage statistics (e.g., token counts)")
+    reranking_score: Optional[float] = Field(None, description="Reranking score for relevance", ge=0.0, le=1.0)
+    content_id: Optional[str] = Field(None, description="ID of the source content")
+    content_origin: Optional[str] = Field(None, description="Origin URL or source of the content")
+    size: Optional[int] = Field(None, description="Size of the content in bytes", ge=0)
+    @classmethod
+    def from_document(cls, document) -> "VectorSearchResult":
+        """Convert a Document object to a serializable VectorSearchResult."""
+        return cls(
+            id=document.id,
+            content=document.content,
+            name=getattr(document, "name", None),
+            meta_data=getattr(document, "meta_data", None),
+            usage=getattr(document, "usage", None),
+            reranking_score=getattr(document, "reranking_score", None),
+            content_id=getattr(document, "content_id", None),
+            content_origin=getattr(document, "content_origin", None),
+            size=getattr(document, "size", None),
+        )
+class VectorSearchRequestSchema(BaseModel):
+    """Schema for vector search request."""
+    class Meta(BaseModel):
+        """Inline metadata schema for pagination."""
+        limit: int = Field(20, description="Number of results per page", ge=1, le=100)
+        page: int = Field(1, description="Page number", ge=1)
+    query: str = Field(..., description="The search query text")
+    db_id: Optional[str] = Field(None, description="The content database ID to search in")
+    vector_db_ids: Optional[List[str]] = Field(None, description="List of vector database IDs to search in")
+    search_type: Optional[str] = Field(None, description="The type of search to perform (vector, keyword, hybrid)")
+    max_results: Optional[int] = Field(None, description="The maximum number of results to return", ge=1, le=1000)
+    filters: Optional[Dict[str, Any]] = Field(None, description="Filters to apply to the search results")
+    meta: Optional[Meta] = Field(
+        None, description="Pagination metadata. Limit and page number to return a subset of results."
+    )
 class ConfigResponseSchema(BaseModel):
-    readers: Optional[Dict[str, ReaderSchema]] = None
-    readersForType: Optional[Dict[str, List[str]]] = None
-    chunkers: Optional[Dict[str, ChunkerSchema]] = None
-    filters: Optional[List[str]] = None
+    readers: Optional[Dict[str, ReaderSchema]] = Field(None, description="Available content readers")
+    readersForType: Optional[Dict[str, List[str]]] = Field(None, description="Mapping of content types to reader IDs")
+    chunkers: Optional[Dict[str, ChunkerSchema]] = Field(None, description="Available chunking strategies")
+    filters: Optional[List[str]] = Field(None, description="Available filter tags")
+    vector_dbs: Optional[List[VectorDbSchema]] = Field(None, description="Configured vector databases")

agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl