agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6009 -2874
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +385 -6
- agno/db/dynamo/dynamo.py +388 -81
- agno/db/dynamo/schemas.py +47 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +435 -64
- agno/db/firestore/schemas.py +11 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +384 -42
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +351 -66
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +339 -48
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +510 -37
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2036 -0
- agno/db/mongo/mongo.py +653 -76
- agno/db/mongo/schemas.py +13 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/mysql.py +687 -25
- agno/db/mysql/schemas.py +61 -37
- agno/db/mysql/utils.py +60 -2
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2001 -0
- agno/db/postgres/postgres.py +676 -57
- agno/db/postgres/schemas.py +43 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +344 -38
- agno/db/redis/schemas.py +18 -0
- agno/db/redis/utils.py +60 -2
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +26 -1
- agno/db/singlestore/singlestore.py +687 -53
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2371 -0
- agno/db/sqlite/schemas.py +24 -0
- agno/db/sqlite/sqlite.py +774 -85
- agno/db/sqlite/utils.py +168 -5
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1361 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +50 -22
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +68 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +1 -1
- agno/knowledge/chunking/semantic.py +40 -8
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +13 -0
- agno/knowledge/embedder/openai.py +37 -65
- agno/knowledge/embedder/sentence_transformer.py +8 -4
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +595 -187
- agno/knowledge/reader/base.py +9 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/json_reader.py +6 -5
- agno/knowledge/reader/markdown_reader.py +13 -13
- agno/knowledge/reader/pdf_reader.py +43 -68
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +51 -6
- agno/knowledge/reader/s3_reader.py +3 -15
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +13 -13
- agno/knowledge/reader/web_search_reader.py +2 -43
- agno/knowledge/reader/website_reader.py +43 -25
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/types.py +9 -0
- agno/knowledge/utils.py +20 -0
- agno/media.py +339 -266
- agno/memory/manager.py +336 -82
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/anthropic/claude.py +183 -37
- agno/models/aws/bedrock.py +52 -112
- agno/models/aws/claude.py +33 -1
- agno/models/azure/ai_foundry.py +33 -15
- agno/models/azure/openai_chat.py +25 -8
- agno/models/base.py +1011 -566
- agno/models/cerebras/cerebras.py +19 -13
- agno/models/cerebras/cerebras_openai.py +8 -5
- agno/models/cohere/chat.py +27 -1
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/google/gemini.py +110 -37
- agno/models/groq/groq.py +28 -11
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/chat.py +18 -1
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/message.py +143 -4
- agno/models/meta/llama.py +27 -10
- agno/models/meta/llama_openai.py +5 -17
- agno/models/nebius/nebius.py +6 -6
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/ollama/chat.py +60 -6
- agno/models/openai/chat.py +102 -43
- agno/models/openai/responses.py +103 -106
- agno/models/openrouter/openrouter.py +41 -3
- agno/models/perplexity/perplexity.py +4 -5
- agno/models/portkey/portkey.py +3 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +81 -5
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/together.py +2 -2
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +2 -2
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +96 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +3 -2
- agno/os/app.py +543 -175
- agno/os/auth.py +24 -14
- agno/os/config.py +1 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/agui.py +23 -7
- agno/os/interfaces/agui/router.py +27 -3
- agno/os/interfaces/agui/utils.py +242 -142
- agno/os/interfaces/base.py +6 -2
- agno/os/interfaces/slack/router.py +81 -23
- agno/os/interfaces/slack/slack.py +29 -14
- agno/os/interfaces/whatsapp/router.py +11 -4
- agno/os/interfaces/whatsapp/whatsapp.py +14 -7
- agno/os/mcp.py +111 -54
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +556 -139
- agno/os/routers/evals/evals.py +71 -34
- agno/os/routers/evals/schemas.py +31 -31
- agno/os/routers/evals/utils.py +6 -5
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +185 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +158 -53
- agno/os/routers/memory/schemas.py +20 -16
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +499 -38
- agno/os/schema.py +308 -198
- agno/os/utils.py +401 -41
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +7 -2
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +266 -112
- agno/run/base.py +53 -24
- agno/run/team.py +252 -111
- agno/run/workflow.py +156 -45
- agno/session/agent.py +105 -89
- agno/session/summary.py +65 -25
- agno/session/team.py +176 -96
- agno/session/workflow.py +406 -40
- agno/team/team.py +3854 -1692
- agno/tools/brightdata.py +3 -3
- agno/tools/cartesia.py +3 -5
- agno/tools/dalle.py +9 -8
- agno/tools/decorator.py +4 -2
- agno/tools/desi_vocal.py +2 -2
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +20 -13
- agno/tools/eleven_labs.py +26 -28
- agno/tools/exa.py +21 -16
- agno/tools/fal.py +4 -4
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +350 -0
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +257 -37
- agno/tools/giphy.py +2 -2
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +270 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/knowledge.py +3 -3
- agno/tools/lumalab.py +3 -3
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +11 -17
- agno/tools/memori.py +1 -53
- agno/tools/memory.py +419 -0
- agno/tools/models/azure_openai.py +2 -2
- agno/tools/models/gemini.py +3 -3
- agno/tools/models/groq.py +3 -5
- agno/tools/models/nebius.py +7 -7
- agno/tools/models_labs.py +25 -15
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +4 -9
- agno/tools/opencv.py +3 -3
- agno/tools/parallel.py +314 -0
- agno/tools/replicate.py +7 -7
- agno/tools/scrapegraph.py +58 -31
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/slack.py +18 -3
- agno/tools/spider.py +2 -2
- agno/tools/tavily.py +146 -0
- agno/tools/whatsapp.py +1 -1
- agno/tools/workflow.py +278 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +27 -0
- agno/utils/common.py +90 -1
- agno/utils/events.py +222 -7
- agno/utils/gemini.py +181 -23
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +111 -0
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +95 -5
- agno/utils/media.py +188 -10
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +60 -0
- agno/utils/models/claude.py +40 -11
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/print_response/agent.py +105 -21
- agno/utils/print_response/team.py +103 -38
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/reasoning.py +22 -1
- agno/utils/serialize.py +32 -0
- agno/utils/streamlit.py +16 -10
- agno/utils/string.py +41 -0
- agno/utils/team.py +98 -9
- agno/utils/tools.py +1 -1
- agno/vectordb/base.py +23 -4
- agno/vectordb/cassandra/cassandra.py +65 -9
- agno/vectordb/chroma/chromadb.py +182 -38
- agno/vectordb/clickhouse/clickhousedb.py +64 -11
- agno/vectordb/couchbase/couchbase.py +105 -10
- agno/vectordb/lancedb/lance_db.py +183 -135
- agno/vectordb/langchaindb/langchaindb.py +25 -7
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +46 -7
- agno/vectordb/milvus/milvus.py +126 -9
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +112 -7
- agno/vectordb/pgvector/pgvector.py +142 -21
- agno/vectordb/pineconedb/pineconedb.py +80 -8
- agno/vectordb/qdrant/qdrant.py +125 -39
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/singlestore/singlestore.py +111 -25
- agno/vectordb/surrealdb/surrealdb.py +31 -5
- agno/vectordb/upstashdb/upstashdb.py +76 -8
- agno/vectordb/weaviate/weaviate.py +86 -15
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +112 -18
- agno/workflow/loop.py +69 -10
- agno/workflow/parallel.py +266 -118
- agno/workflow/router.py +110 -17
- agno/workflow/step.py +645 -136
- agno/workflow/steps.py +65 -6
- agno/workflow/types.py +71 -33
- agno/workflow/workflow.py +2113 -300
- agno-2.3.0.dist-info/METADATA +618 -0
- agno-2.3.0.dist-info/RECORD +577 -0
- agno-2.3.0.dist-info/licenses/LICENSE +201 -0
- agno/knowledge/reader/url_reader.py +0 -128
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -610
- agno/utils/models/aws_claude.py +0 -170
- agno-2.0.0rc2.dist-info/METADATA +0 -355
- agno-2.0.0rc2.dist-info/RECORD +0 -515
- agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
- {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
- {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import math
|
|
4
4
|
from typing import Dict, List, Optional
|
|
5
|
-
from uuid import uuid4
|
|
6
5
|
|
|
7
6
|
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, UploadFile
|
|
8
7
|
|
|
@@ -20,6 +19,9 @@ from agno.os.routers.knowledge.schemas import (
|
|
|
20
19
|
ContentStatusResponse,
|
|
21
20
|
ContentUpdateSchema,
|
|
22
21
|
ReaderSchema,
|
|
22
|
+
VectorDbSchema,
|
|
23
|
+
VectorSearchRequestSchema,
|
|
24
|
+
VectorSearchResult,
|
|
23
25
|
)
|
|
24
26
|
from agno.os.schema import (
|
|
25
27
|
BadRequestResponse,
|
|
@@ -34,6 +36,7 @@ from agno.os.schema import (
|
|
|
34
36
|
from agno.os.settings import AgnoAPISettings
|
|
35
37
|
from agno.os.utils import get_knowledge_instance_by_db_id
|
|
36
38
|
from agno.utils.log import log_debug, log_info
|
|
39
|
+
from agno.utils.string import generate_id
|
|
37
40
|
|
|
38
41
|
logger = logging.getLogger(__name__)
|
|
39
42
|
|
|
@@ -99,11 +102,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
99
102
|
text_content: Optional[str] = Form(None, description="Raw text content to process"),
|
|
100
103
|
reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
|
|
101
104
|
chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
|
|
105
|
+
chunk_size: Optional[int] = Form(None, description="Chunk size to use for processing"),
|
|
106
|
+
chunk_overlap: Optional[int] = Form(None, description="Chunk overlap to use for processing"),
|
|
102
107
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
103
108
|
):
|
|
104
109
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
105
|
-
|
|
106
|
-
log_info(f"Adding content: {name}, {description}, {url}, {metadata} with ID: {content_id}")
|
|
110
|
+
log_info(f"Adding content: {name}, {description}, {url}, {metadata}")
|
|
107
111
|
|
|
108
112
|
parsed_metadata = None
|
|
109
113
|
if metadata:
|
|
@@ -166,10 +170,14 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
166
170
|
file_data=file_data,
|
|
167
171
|
size=file.size if file else None if text_content else None,
|
|
168
172
|
)
|
|
169
|
-
|
|
173
|
+
content_hash = knowledge._build_content_hash(content)
|
|
174
|
+
content.content_hash = content_hash
|
|
175
|
+
content.id = generate_id(content_hash)
|
|
176
|
+
|
|
177
|
+
background_tasks.add_task(process_content, knowledge, content, reader_id, chunker, chunk_size, chunk_overlap)
|
|
170
178
|
|
|
171
179
|
response = ContentResponseSchema(
|
|
172
|
-
id=
|
|
180
|
+
id=content.id,
|
|
173
181
|
name=name,
|
|
174
182
|
description=description,
|
|
175
183
|
metadata=parsed_metadata,
|
|
@@ -300,7 +308,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
300
308
|
}
|
|
301
309
|
},
|
|
302
310
|
)
|
|
303
|
-
def get_content(
|
|
311
|
+
async def get_content(
|
|
304
312
|
limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
|
|
305
313
|
page: Optional[int] = Query(default=1, description="Page number"),
|
|
306
314
|
sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
|
|
@@ -308,7 +316,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
308
316
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
309
317
|
) -> PaginatedResponse[ContentResponseSchema]:
|
|
310
318
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
311
|
-
contents, count = knowledge.
|
|
319
|
+
contents, count = await knowledge.aget_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
|
|
312
320
|
|
|
313
321
|
return PaginatedResponse(
|
|
314
322
|
data=[
|
|
@@ -368,13 +376,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
368
376
|
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
369
377
|
},
|
|
370
378
|
)
|
|
371
|
-
def get_content_by_id(
|
|
379
|
+
async def get_content_by_id(
|
|
372
380
|
content_id: str,
|
|
373
381
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
374
382
|
) -> ContentResponseSchema:
|
|
375
383
|
log_info(f"Getting content by id: {content_id}")
|
|
376
384
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
377
|
-
content = knowledge.
|
|
385
|
+
content = await knowledge.aget_content_by_id(content_id=content_id)
|
|
378
386
|
if not content:
|
|
379
387
|
raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
380
388
|
response = ContentResponseSchema.from_dict(
|
|
@@ -408,12 +416,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
408
416
|
500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
|
|
409
417
|
},
|
|
410
418
|
)
|
|
411
|
-
def delete_content_by_id(
|
|
419
|
+
async def delete_content_by_id(
|
|
412
420
|
content_id: str,
|
|
413
421
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
414
422
|
) -> ContentResponseSchema:
|
|
415
423
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
416
|
-
knowledge.
|
|
424
|
+
await knowledge.aremove_content_by_id(content_id=content_id)
|
|
417
425
|
log_info(f"Deleting content by id: {content_id}")
|
|
418
426
|
|
|
419
427
|
return ContentResponseSchema(
|
|
@@ -440,7 +448,6 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
440
448
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
441
449
|
log_info("Deleting all content")
|
|
442
450
|
knowledge.remove_all_content()
|
|
443
|
-
|
|
444
451
|
return "success"
|
|
445
452
|
|
|
446
453
|
@router.get(
|
|
@@ -473,13 +480,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
473
480
|
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
474
481
|
},
|
|
475
482
|
)
|
|
476
|
-
def get_content_status(
|
|
483
|
+
async def get_content_status(
|
|
477
484
|
content_id: str,
|
|
478
485
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
479
486
|
) -> ContentStatusResponse:
|
|
480
487
|
log_info(f"Getting content status: {content_id}")
|
|
481
488
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
482
|
-
knowledge_status, status_message = knowledge.
|
|
489
|
+
knowledge_status, status_message = await knowledge.aget_content_status(content_id=content_id)
|
|
483
490
|
|
|
484
491
|
# Handle the case where content is not found
|
|
485
492
|
if knowledge_status is None:
|
|
@@ -510,11 +517,107 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
510
517
|
|
|
511
518
|
return ContentStatusResponse(status=status, status_message=status_message or "")
|
|
512
519
|
|
|
520
|
+
@router.post(
|
|
521
|
+
"/knowledge/search",
|
|
522
|
+
status_code=200,
|
|
523
|
+
operation_id="search_knowledge",
|
|
524
|
+
summary="Search Knowledge",
|
|
525
|
+
description="Search the knowledge base for relevant documents using query, filters and search type.",
|
|
526
|
+
response_model=PaginatedResponse[VectorSearchResult],
|
|
527
|
+
responses={
|
|
528
|
+
200: {
|
|
529
|
+
"description": "Search results retrieved successfully",
|
|
530
|
+
"content": {
|
|
531
|
+
"application/json": {
|
|
532
|
+
"example": {
|
|
533
|
+
"data": [
|
|
534
|
+
{
|
|
535
|
+
"id": "doc_123",
|
|
536
|
+
"content": "Jordan Mitchell - Software Engineer with skills in JavaScript, React, Python",
|
|
537
|
+
"name": "cv_1",
|
|
538
|
+
"meta_data": {"page": 1, "chunk": 1},
|
|
539
|
+
"usage": {"total_tokens": 14},
|
|
540
|
+
"reranking_score": 0.95,
|
|
541
|
+
"content_id": "content_456",
|
|
542
|
+
}
|
|
543
|
+
],
|
|
544
|
+
"meta": {"page": 1, "limit": 20, "total_pages": 2, "total_count": 35},
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
},
|
|
548
|
+
},
|
|
549
|
+
400: {"description": "Invalid search parameters"},
|
|
550
|
+
404: {"description": "No documents found"},
|
|
551
|
+
},
|
|
552
|
+
)
|
|
553
|
+
def search_knowledge(request: VectorSearchRequestSchema) -> PaginatedResponse[VectorSearchResult]:
|
|
554
|
+
import time
|
|
555
|
+
|
|
556
|
+
start_time = time.time()
|
|
557
|
+
|
|
558
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, request.db_id)
|
|
559
|
+
|
|
560
|
+
# For now, validate the vector db ids exist in the knowledge base
|
|
561
|
+
# We will add more logic around this once we have multi vectordb support
|
|
562
|
+
# If vector db ids are provided, check if any of them match the knowledge's vector db
|
|
563
|
+
if request.vector_db_ids:
|
|
564
|
+
if knowledge.vector_db and knowledge.vector_db.id:
|
|
565
|
+
if knowledge.vector_db.id not in request.vector_db_ids:
|
|
566
|
+
raise HTTPException(
|
|
567
|
+
status_code=400,
|
|
568
|
+
detail=f"None of the provided Vector DB IDs {request.vector_db_ids} match the knowledge base Vector DB ID {knowledge.vector_db.id}",
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
raise HTTPException(status_code=400, detail="Knowledge base has no vector database configured")
|
|
572
|
+
|
|
573
|
+
# Calculate pagination parameters
|
|
574
|
+
meta = request.meta
|
|
575
|
+
limit = meta.limit if meta and meta.limit is not None else 20
|
|
576
|
+
page = meta.page if meta and meta.page is not None else 1
|
|
577
|
+
|
|
578
|
+
# Use max_results if specified, otherwise use a higher limit for search then paginate
|
|
579
|
+
search_limit = request.max_results
|
|
580
|
+
|
|
581
|
+
results = knowledge.search(
|
|
582
|
+
query=request.query, max_results=search_limit, filters=request.filters, search_type=request.search_type
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Calculate pagination
|
|
586
|
+
total_results = len(results)
|
|
587
|
+
start_idx = (page - 1) * limit
|
|
588
|
+
|
|
589
|
+
# Ensure start_idx doesn't exceed the total results
|
|
590
|
+
if start_idx >= total_results and total_results > 0:
|
|
591
|
+
# If page is beyond available results, return empty results
|
|
592
|
+
paginated_results = []
|
|
593
|
+
else:
|
|
594
|
+
end_idx = min(start_idx + limit, total_results)
|
|
595
|
+
paginated_results = results[start_idx:end_idx]
|
|
596
|
+
|
|
597
|
+
search_time_ms = (time.time() - start_time) * 1000
|
|
598
|
+
|
|
599
|
+
# Convert Document objects to serializable format
|
|
600
|
+
document_results = [VectorSearchResult.from_document(doc) for doc in paginated_results]
|
|
601
|
+
|
|
602
|
+
# Calculate pagination info
|
|
603
|
+
total_pages = (total_results + limit - 1) // limit # Ceiling division
|
|
604
|
+
|
|
605
|
+
return PaginatedResponse(
|
|
606
|
+
data=document_results,
|
|
607
|
+
meta=PaginationInfo(
|
|
608
|
+
page=page,
|
|
609
|
+
limit=limit,
|
|
610
|
+
total_pages=total_pages,
|
|
611
|
+
total_count=total_results,
|
|
612
|
+
search_time_ms=search_time_ms,
|
|
613
|
+
),
|
|
614
|
+
)
|
|
615
|
+
|
|
513
616
|
@router.get(
|
|
514
617
|
"/knowledge/config",
|
|
515
618
|
status_code=200,
|
|
516
619
|
operation_id="get_knowledge_config",
|
|
517
|
-
summary="Get
|
|
620
|
+
summary="Get Config",
|
|
518
621
|
description=(
|
|
519
622
|
"Retrieve available readers, chunkers, and configuration options for content processing. "
|
|
520
623
|
"This endpoint provides metadata about supported file types, processing strategies, and filters."
|
|
@@ -700,38 +803,65 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
700
803
|
"key": "AgenticChunker",
|
|
701
804
|
"name": "AgenticChunker",
|
|
702
805
|
"description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
|
|
806
|
+
"metadata": {"chunk_size": 5000},
|
|
703
807
|
},
|
|
704
808
|
"DocumentChunker": {
|
|
705
809
|
"key": "DocumentChunker",
|
|
706
810
|
"name": "DocumentChunker",
|
|
707
811
|
"description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
"description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
|
|
713
|
-
},
|
|
714
|
-
"SemanticChunker": {
|
|
715
|
-
"key": "SemanticChunker",
|
|
716
|
-
"name": "SemanticChunker",
|
|
717
|
-
"description": "Chunking strategy that splits text into semantic chunks using chonkie",
|
|
812
|
+
"metadata": {
|
|
813
|
+
"chunk_size": 5000,
|
|
814
|
+
"chunk_overlap": 0,
|
|
815
|
+
},
|
|
718
816
|
},
|
|
719
817
|
"FixedSizeChunker": {
|
|
720
818
|
"key": "FixedSizeChunker",
|
|
721
819
|
"name": "FixedSizeChunker",
|
|
722
820
|
"description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
|
|
821
|
+
"metadata": {
|
|
822
|
+
"chunk_size": 5000,
|
|
823
|
+
"chunk_overlap": 0,
|
|
824
|
+
},
|
|
825
|
+
},
|
|
826
|
+
"MarkdownChunker": {
|
|
827
|
+
"key": "MarkdownChunker",
|
|
828
|
+
"name": "MarkdownChunker",
|
|
829
|
+
"description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
|
|
830
|
+
"metadata": {
|
|
831
|
+
"chunk_size": 5000,
|
|
832
|
+
"chunk_overlap": 0,
|
|
833
|
+
},
|
|
834
|
+
},
|
|
835
|
+
"RecursiveChunker": {
|
|
836
|
+
"key": "RecursiveChunker",
|
|
837
|
+
"name": "RecursiveChunker",
|
|
838
|
+
"description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
|
|
839
|
+
"metadata": {
|
|
840
|
+
"chunk_size": 5000,
|
|
841
|
+
"chunk_overlap": 0,
|
|
842
|
+
},
|
|
723
843
|
},
|
|
724
844
|
"RowChunker": {
|
|
725
845
|
"key": "RowChunker",
|
|
726
846
|
"name": "RowChunker",
|
|
727
847
|
"description": "RowChunking chunking strategy",
|
|
848
|
+
"metadata": {},
|
|
728
849
|
},
|
|
729
|
-
"
|
|
730
|
-
"key": "
|
|
731
|
-
"name": "
|
|
732
|
-
"description": "
|
|
850
|
+
"SemanticChunker": {
|
|
851
|
+
"key": "SemanticChunker",
|
|
852
|
+
"name": "SemanticChunker",
|
|
853
|
+
"description": "Chunking strategy that splits text into semantic chunks using chonkie",
|
|
854
|
+
"metadata": {"chunk_size": 5000},
|
|
733
855
|
},
|
|
734
856
|
},
|
|
857
|
+
"vector_dbs": [
|
|
858
|
+
{
|
|
859
|
+
"id": "vector_db_1",
|
|
860
|
+
"name": "Vector DB 1",
|
|
861
|
+
"description": "Vector DB 1 description",
|
|
862
|
+
"search_types": ["vector", "keyword", "hybrid"],
|
|
863
|
+
}
|
|
864
|
+
],
|
|
735
865
|
"filters": ["filter_tag_1", "filter_tag2"],
|
|
736
866
|
}
|
|
737
867
|
}
|
|
@@ -787,14 +917,32 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
787
917
|
chunker_key = chunker_info.get("key")
|
|
788
918
|
if chunker_key:
|
|
789
919
|
chunkers_dict[chunker_key] = ChunkerSchema(
|
|
790
|
-
key=chunker_key,
|
|
920
|
+
key=chunker_key,
|
|
921
|
+
name=chunker_info.get("name"),
|
|
922
|
+
description=chunker_info.get("description"),
|
|
923
|
+
metadata=chunker_info.get("metadata", {}),
|
|
791
924
|
)
|
|
792
925
|
|
|
926
|
+
vector_dbs = []
|
|
927
|
+
if knowledge.vector_db:
|
|
928
|
+
search_types = knowledge.vector_db.get_supported_search_types()
|
|
929
|
+
name = knowledge.vector_db.name
|
|
930
|
+
db_id = knowledge.vector_db.id
|
|
931
|
+
vector_dbs.append(
|
|
932
|
+
VectorDbSchema(
|
|
933
|
+
id=db_id,
|
|
934
|
+
name=name,
|
|
935
|
+
description=knowledge.vector_db.description,
|
|
936
|
+
search_types=search_types,
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
|
|
793
940
|
return ConfigResponseSchema(
|
|
794
941
|
readers=reader_schemas,
|
|
942
|
+
vector_dbs=vector_dbs,
|
|
795
943
|
readersForType=types_of_readers,
|
|
796
944
|
chunkers=chunkers_dict,
|
|
797
|
-
filters=knowledge.
|
|
945
|
+
filters=knowledge.get_valid_filters(),
|
|
798
946
|
)
|
|
799
947
|
|
|
800
948
|
return router
|
|
@@ -802,15 +950,15 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
802
950
|
|
|
803
951
|
async def process_content(
|
|
804
952
|
knowledge: Knowledge,
|
|
805
|
-
content_id: str,
|
|
806
953
|
content: Content,
|
|
807
954
|
reader_id: Optional[str] = None,
|
|
808
955
|
chunker: Optional[str] = None,
|
|
956
|
+
chunk_size: Optional[int] = None,
|
|
957
|
+
chunk_overlap: Optional[int] = None,
|
|
809
958
|
):
|
|
810
959
|
"""Background task to process the content"""
|
|
811
|
-
|
|
960
|
+
|
|
812
961
|
try:
|
|
813
|
-
content.id = content_id
|
|
814
962
|
if reader_id:
|
|
815
963
|
reader = None
|
|
816
964
|
if knowledge.readers and reader_id in knowledge.readers:
|
|
@@ -829,21 +977,20 @@ async def process_content(
|
|
|
829
977
|
content.reader = reader
|
|
830
978
|
if chunker and content.reader:
|
|
831
979
|
# Set the chunker name on the reader - let the reader handle it internally
|
|
832
|
-
content.reader.set_chunking_strategy_from_string(chunker)
|
|
980
|
+
content.reader.set_chunking_strategy_from_string(chunker, chunk_size=chunk_size, overlap=chunk_overlap)
|
|
833
981
|
log_debug(f"Set chunking strategy: {chunker}")
|
|
834
982
|
|
|
835
983
|
log_debug(f"Using reader: {content.reader.__class__.__name__}")
|
|
836
984
|
await knowledge._load_content(content, upsert=False, skip_if_exists=True)
|
|
837
|
-
log_info(f"Content {
|
|
985
|
+
log_info(f"Content {content.id} processed successfully")
|
|
838
986
|
except Exception as e:
|
|
839
|
-
log_info(f"Error processing content
|
|
987
|
+
log_info(f"Error processing content: {e}")
|
|
840
988
|
# Mark content as failed in the contents DB
|
|
841
989
|
try:
|
|
842
990
|
from agno.knowledge.content import ContentStatus as KnowledgeContentStatus
|
|
843
991
|
|
|
844
992
|
content.status = KnowledgeContentStatus.FAILED
|
|
845
993
|
content.status_message = str(e)
|
|
846
|
-
content.id = content_id
|
|
847
994
|
knowledge.patch_content(content)
|
|
848
995
|
except Exception:
|
|
849
996
|
# Swallow any secondary errors to avoid crashing the background task
|
|
@@ -16,23 +16,23 @@ class ContentStatus(str, Enum):
|
|
|
16
16
|
class ContentStatusResponse(BaseModel):
|
|
17
17
|
"""Response model for content status endpoint."""
|
|
18
18
|
|
|
19
|
-
status: ContentStatus
|
|
20
|
-
status_message: str = ""
|
|
19
|
+
status: ContentStatus = Field(..., description="Current processing status of the content")
|
|
20
|
+
status_message: str = Field("", description="Status message or error details")
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class ContentResponseSchema(BaseModel):
|
|
24
|
-
id: str
|
|
25
|
-
name: Optional[str] = None
|
|
26
|
-
description: Optional[str] = None
|
|
27
|
-
type: Optional[str] = None
|
|
28
|
-
size: Optional[str] = None
|
|
29
|
-
linked_to: Optional[str] = None
|
|
30
|
-
metadata: Optional[dict] = None
|
|
31
|
-
access_count: Optional[int] = None
|
|
32
|
-
status: Optional[ContentStatus] = None
|
|
33
|
-
status_message: Optional[str] = None
|
|
34
|
-
created_at: Optional[datetime] = None
|
|
35
|
-
updated_at: Optional[datetime] = None
|
|
24
|
+
id: str = Field(..., description="Unique identifier for the content")
|
|
25
|
+
name: Optional[str] = Field(None, description="Name of the content")
|
|
26
|
+
description: Optional[str] = Field(None, description="Description of the content")
|
|
27
|
+
type: Optional[str] = Field(None, description="MIME type of the content")
|
|
28
|
+
size: Optional[str] = Field(None, description="Size of the content in bytes")
|
|
29
|
+
linked_to: Optional[str] = Field(None, description="ID of related content if linked")
|
|
30
|
+
metadata: Optional[dict] = Field(None, description="Additional metadata as key-value pairs")
|
|
31
|
+
access_count: Optional[int] = Field(None, description="Number of times content has been accessed", ge=0)
|
|
32
|
+
status: Optional[ContentStatus] = Field(None, description="Processing status of the content")
|
|
33
|
+
status_message: Optional[str] = Field(None, description="Status message or error details")
|
|
34
|
+
created_at: Optional[datetime] = Field(None, description="Timestamp when content was created")
|
|
35
|
+
updated_at: Optional[datetime] = Field(None, description="Timestamp when content was last updated")
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
38
|
def from_dict(cls, content: Dict[str, Any]) -> "ContentResponseSchema":
|
|
@@ -99,20 +99,80 @@ class ContentUpdateSchema(BaseModel):
|
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
class ReaderSchema(BaseModel):
|
|
102
|
-
id: str
|
|
103
|
-
name: Optional[str] = None
|
|
104
|
-
description: Optional[str] = None
|
|
105
|
-
chunkers: Optional[List[str]] = None
|
|
102
|
+
id: str = Field(..., description="Unique identifier for the reader")
|
|
103
|
+
name: Optional[str] = Field(None, description="Name of the reader")
|
|
104
|
+
description: Optional[str] = Field(None, description="Description of the reader's capabilities")
|
|
105
|
+
chunkers: Optional[List[str]] = Field(None, description="List of supported chunking strategies")
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
class ChunkerSchema(BaseModel):
|
|
109
109
|
key: str
|
|
110
110
|
name: Optional[str] = None
|
|
111
111
|
description: Optional[str] = None
|
|
112
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class VectorDbSchema(BaseModel):
|
|
116
|
+
id: str = Field(..., description="Unique identifier for the vector database")
|
|
117
|
+
name: Optional[str] = Field(None, description="Name of the vector database")
|
|
118
|
+
description: Optional[str] = Field(None, description="Description of the vector database")
|
|
119
|
+
search_types: Optional[List[str]] = Field(
|
|
120
|
+
None, description="List of supported search types (vector, keyword, hybrid)"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class VectorSearchResult(BaseModel):
|
|
125
|
+
"""Schema for search result documents."""
|
|
126
|
+
|
|
127
|
+
id: str = Field(..., description="Unique identifier for the search result document")
|
|
128
|
+
content: str = Field(..., description="Content text of the document")
|
|
129
|
+
name: Optional[str] = Field(None, description="Name of the document")
|
|
130
|
+
meta_data: Optional[Dict[str, Any]] = Field(None, description="Metadata associated with the document")
|
|
131
|
+
usage: Optional[Dict[str, Any]] = Field(None, description="Usage statistics (e.g., token counts)")
|
|
132
|
+
reranking_score: Optional[float] = Field(None, description="Reranking score for relevance", ge=0.0, le=1.0)
|
|
133
|
+
content_id: Optional[str] = Field(None, description="ID of the source content")
|
|
134
|
+
content_origin: Optional[str] = Field(None, description="Origin URL or source of the content")
|
|
135
|
+
size: Optional[int] = Field(None, description="Size of the content in bytes", ge=0)
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_document(cls, document) -> "VectorSearchResult":
|
|
139
|
+
"""Convert a Document object to a serializable VectorSearchResult."""
|
|
140
|
+
return cls(
|
|
141
|
+
id=document.id,
|
|
142
|
+
content=document.content,
|
|
143
|
+
name=getattr(document, "name", None),
|
|
144
|
+
meta_data=getattr(document, "meta_data", None),
|
|
145
|
+
usage=getattr(document, "usage", None),
|
|
146
|
+
reranking_score=getattr(document, "reranking_score", None),
|
|
147
|
+
content_id=getattr(document, "content_id", None),
|
|
148
|
+
content_origin=getattr(document, "content_origin", None),
|
|
149
|
+
size=getattr(document, "size", None),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class VectorSearchRequestSchema(BaseModel):
|
|
154
|
+
"""Schema for vector search request."""
|
|
155
|
+
|
|
156
|
+
class Meta(BaseModel):
|
|
157
|
+
"""Inline metadata schema for pagination."""
|
|
158
|
+
|
|
159
|
+
limit: int = Field(20, description="Number of results per page", ge=1, le=100)
|
|
160
|
+
page: int = Field(1, description="Page number", ge=1)
|
|
161
|
+
|
|
162
|
+
query: str = Field(..., description="The search query text")
|
|
163
|
+
db_id: Optional[str] = Field(None, description="The content database ID to search in")
|
|
164
|
+
vector_db_ids: Optional[List[str]] = Field(None, description="List of vector database IDs to search in")
|
|
165
|
+
search_type: Optional[str] = Field(None, description="The type of search to perform (vector, keyword, hybrid)")
|
|
166
|
+
max_results: Optional[int] = Field(None, description="The maximum number of results to return", ge=1, le=1000)
|
|
167
|
+
filters: Optional[Dict[str, Any]] = Field(None, description="Filters to apply to the search results")
|
|
168
|
+
meta: Optional[Meta] = Field(
|
|
169
|
+
None, description="Pagination metadata. Limit and page number to return a subset of results."
|
|
170
|
+
)
|
|
112
171
|
|
|
113
172
|
|
|
114
173
|
class ConfigResponseSchema(BaseModel):
|
|
115
|
-
readers: Optional[Dict[str, ReaderSchema]] = None
|
|
116
|
-
readersForType: Optional[Dict[str, List[str]]] = None
|
|
117
|
-
chunkers: Optional[Dict[str, ChunkerSchema]] = None
|
|
118
|
-
filters: Optional[List[str]] = None
|
|
174
|
+
readers: Optional[Dict[str, ReaderSchema]] = Field(None, description="Available content readers")
|
|
175
|
+
readersForType: Optional[Dict[str, List[str]]] = Field(None, description="Mapping of content types to reader IDs")
|
|
176
|
+
chunkers: Optional[Dict[str, ChunkerSchema]] = Field(None, description="Available chunking strategies")
|
|
177
|
+
filters: Optional[List[str]] = Field(None, description="Available filter tags")
|
|
178
|
+
vector_dbs: Optional[List[VectorDbSchema]] = Field(None, description="Configured vector databases")
|