agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import math
|
|
4
|
-
from typing import Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
-
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, UploadFile
|
|
6
|
+
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, Request, UploadFile
|
|
7
7
|
|
|
8
|
+
from agno.db.base import AsyncBaseDb
|
|
8
9
|
from agno.knowledge.content import Content, FileData
|
|
9
10
|
from agno.knowledge.knowledge import Knowledge
|
|
10
11
|
from agno.knowledge.reader import ReaderFactory
|
|
11
12
|
from agno.knowledge.reader.base import Reader
|
|
12
13
|
from agno.knowledge.utils import get_all_chunkers_info, get_all_readers_info, get_content_types_to_readers_mapping
|
|
13
|
-
from agno.os.auth import get_authentication_dependency
|
|
14
|
+
from agno.os.auth import get_auth_token_from_request, get_authentication_dependency
|
|
14
15
|
from agno.os.routers.knowledge.schemas import (
|
|
15
16
|
ChunkerSchema,
|
|
16
17
|
ConfigResponseSchema,
|
|
@@ -35,14 +36,15 @@ from agno.os.schema import (
|
|
|
35
36
|
)
|
|
36
37
|
from agno.os.settings import AgnoAPISettings
|
|
37
38
|
from agno.os.utils import get_knowledge_instance_by_db_id
|
|
38
|
-
from agno.
|
|
39
|
+
from agno.remote.base import RemoteKnowledge
|
|
40
|
+
from agno.utils.log import log_debug, log_error, log_info
|
|
39
41
|
from agno.utils.string import generate_id
|
|
40
42
|
|
|
41
43
|
logger = logging.getLogger(__name__)
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
def get_knowledge_router(
|
|
45
|
-
knowledge_instances: List[Knowledge], settings: AgnoAPISettings = AgnoAPISettings()
|
|
47
|
+
knowledge_instances: List[Union[Knowledge, RemoteKnowledge]], settings: AgnoAPISettings = AgnoAPISettings()
|
|
46
48
|
) -> APIRouter:
|
|
47
49
|
"""Create knowledge router with comprehensive OpenAPI documentation for content management endpoints."""
|
|
48
50
|
router = APIRouter(
|
|
@@ -59,7 +61,7 @@ def get_knowledge_router(
|
|
|
59
61
|
return attach_routes(router=router, knowledge_instances=knowledge_instances)
|
|
60
62
|
|
|
61
63
|
|
|
62
|
-
def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> APIRouter:
|
|
64
|
+
def attach_routes(router: APIRouter, knowledge_instances: List[Union[Knowledge, RemoteKnowledge]]) -> APIRouter:
|
|
63
65
|
@router.post(
|
|
64
66
|
"/knowledge/content",
|
|
65
67
|
response_model=ContentResponseSchema,
|
|
@@ -93,6 +95,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
93
95
|
},
|
|
94
96
|
)
|
|
95
97
|
async def upload_content(
|
|
98
|
+
request: Request,
|
|
96
99
|
background_tasks: BackgroundTasks,
|
|
97
100
|
name: Optional[str] = Form(None, description="Content name (auto-generated from file/URL if not provided)"),
|
|
98
101
|
description: Optional[str] = Form(None, description="Content description for context"),
|
|
@@ -107,7 +110,6 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
107
110
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
108
111
|
):
|
|
109
112
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
110
|
-
log_info(f"Adding content: {name}, {description}, {url}, {metadata}")
|
|
111
113
|
|
|
112
114
|
parsed_metadata = None
|
|
113
115
|
if metadata:
|
|
@@ -116,6 +118,25 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
116
118
|
except json.JSONDecodeError:
|
|
117
119
|
# If it's not valid JSON, treat as a simple key-value pair
|
|
118
120
|
parsed_metadata = {"value": metadata} if metadata != "string" else None
|
|
121
|
+
|
|
122
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
123
|
+
auth_token = get_auth_token_from_request(request)
|
|
124
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
125
|
+
return await knowledge.upload_content(
|
|
126
|
+
name=name,
|
|
127
|
+
description=description,
|
|
128
|
+
url=url,
|
|
129
|
+
metadata=parsed_metadata,
|
|
130
|
+
file=file,
|
|
131
|
+
text_content=text_content,
|
|
132
|
+
reader_id=reader_id,
|
|
133
|
+
chunker=chunker,
|
|
134
|
+
chunk_size=chunk_size,
|
|
135
|
+
chunk_overlap=chunk_overlap,
|
|
136
|
+
db_id=db_id,
|
|
137
|
+
headers=headers,
|
|
138
|
+
)
|
|
139
|
+
|
|
119
140
|
if file:
|
|
120
141
|
content_bytes = await file.read()
|
|
121
142
|
elif text_content:
|
|
@@ -185,6 +206,113 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
185
206
|
)
|
|
186
207
|
return response
|
|
187
208
|
|
|
209
|
+
@router.post(
|
|
210
|
+
"/knowledge/remote-content",
|
|
211
|
+
response_model=ContentResponseSchema,
|
|
212
|
+
status_code=202,
|
|
213
|
+
operation_id="upload_remote_content",
|
|
214
|
+
summary="Upload Remote Content",
|
|
215
|
+
description=(
|
|
216
|
+
"Upload content from a remote source (S3, GCS, SharePoint, GitHub) to the knowledge base. "
|
|
217
|
+
"Content is processed asynchronously in the background. "
|
|
218
|
+
"Use the /knowledge/config endpoint to see available remote content sources."
|
|
219
|
+
),
|
|
220
|
+
responses={
|
|
221
|
+
202: {
|
|
222
|
+
"description": "Remote content upload accepted for processing",
|
|
223
|
+
"content": {
|
|
224
|
+
"application/json": {
|
|
225
|
+
"example": {
|
|
226
|
+
"id": "content-456",
|
|
227
|
+
"name": "reports/q1-2024.pdf",
|
|
228
|
+
"description": "Q1 Report from S3",
|
|
229
|
+
"metadata": {"source": "s3-docs"},
|
|
230
|
+
"status": "processing",
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
400: {
|
|
236
|
+
"description": "Invalid request - unknown config or missing path",
|
|
237
|
+
"model": BadRequestResponse,
|
|
238
|
+
},
|
|
239
|
+
422: {"description": "Validation error in request body", "model": ValidationErrorResponse},
|
|
240
|
+
},
|
|
241
|
+
)
|
|
242
|
+
async def upload_remote_content(
|
|
243
|
+
request: Request,
|
|
244
|
+
background_tasks: BackgroundTasks,
|
|
245
|
+
config_id: str = Form(..., description="ID of the configured remote content source (from /knowledge/config)"),
|
|
246
|
+
path: str = Form(..., description="Path to file or folder in the remote source"),
|
|
247
|
+
name: Optional[str] = Form(None, description="Content name (auto-generated if not provided)"),
|
|
248
|
+
description: Optional[str] = Form(None, description="Content description"),
|
|
249
|
+
metadata: Optional[str] = Form(None, description="JSON metadata object"),
|
|
250
|
+
reader_id: Optional[str] = Form(None, description="ID of the reader to use for processing"),
|
|
251
|
+
chunker: Optional[str] = Form(None, description="Chunking strategy to apply"),
|
|
252
|
+
chunk_size: Optional[int] = Form(None, description="Chunk size for processing"),
|
|
253
|
+
chunk_overlap: Optional[int] = Form(None, description="Chunk overlap for processing"),
|
|
254
|
+
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
255
|
+
):
|
|
256
|
+
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
257
|
+
|
|
258
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
259
|
+
# TODO: Forward to remote knowledge instance
|
|
260
|
+
raise HTTPException(status_code=501, detail="Remote content upload not yet supported for RemoteKnowledge")
|
|
261
|
+
|
|
262
|
+
# Validate that the config_id exists in configured sources
|
|
263
|
+
config = knowledge._get_remote_config_by_id(config_id)
|
|
264
|
+
if config is None:
|
|
265
|
+
raise HTTPException(
|
|
266
|
+
status_code=400,
|
|
267
|
+
detail=f"Unknown content source: {config_id}. Check /knowledge/config for available sources.",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Parse metadata if provided
|
|
271
|
+
parsed_metadata = None
|
|
272
|
+
if metadata:
|
|
273
|
+
try:
|
|
274
|
+
parsed_metadata = json.loads(metadata)
|
|
275
|
+
except json.JSONDecodeError:
|
|
276
|
+
parsed_metadata = {"value": metadata}
|
|
277
|
+
|
|
278
|
+
# Use the config's factory methods to create the remote content object
|
|
279
|
+
# If path ends with '/', treat as folder, otherwise treat as file
|
|
280
|
+
is_folder = path.endswith("/")
|
|
281
|
+
if is_folder:
|
|
282
|
+
if hasattr(config, "folder"):
|
|
283
|
+
remote_content = config.folder(path.rstrip("/"))
|
|
284
|
+
else:
|
|
285
|
+
raise HTTPException(status_code=400, detail=f"Config {config_id} does not support folder uploads")
|
|
286
|
+
else:
|
|
287
|
+
if hasattr(config, "file"):
|
|
288
|
+
remote_content = config.file(path)
|
|
289
|
+
else:
|
|
290
|
+
raise HTTPException(status_code=400, detail=f"Config {config_id} does not support file uploads")
|
|
291
|
+
|
|
292
|
+
# Set name from path if not provided
|
|
293
|
+
content_name = name or path
|
|
294
|
+
|
|
295
|
+
content = Content(
|
|
296
|
+
name=content_name,
|
|
297
|
+
description=description,
|
|
298
|
+
metadata=parsed_metadata,
|
|
299
|
+
remote_content=remote_content,
|
|
300
|
+
)
|
|
301
|
+
content_hash = knowledge._build_content_hash(content)
|
|
302
|
+
content.content_hash = content_hash
|
|
303
|
+
content.id = generate_id(content_hash)
|
|
304
|
+
|
|
305
|
+
background_tasks.add_task(process_content, knowledge, content, reader_id, chunker, chunk_size, chunk_overlap)
|
|
306
|
+
|
|
307
|
+
response = ContentResponseSchema(
|
|
308
|
+
id=content.id,
|
|
309
|
+
name=content_name,
|
|
310
|
+
description=description,
|
|
311
|
+
metadata=parsed_metadata,
|
|
312
|
+
status=ContentStatus.PROCESSING,
|
|
313
|
+
)
|
|
314
|
+
return response
|
|
315
|
+
|
|
188
316
|
@router.patch(
|
|
189
317
|
"/knowledge/content/{content_id}",
|
|
190
318
|
response_model=ContentResponseSchema,
|
|
@@ -225,6 +353,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
225
353
|
},
|
|
226
354
|
)
|
|
227
355
|
async def update_content(
|
|
356
|
+
request: Request,
|
|
228
357
|
content_id: str = Path(..., description="Content ID"),
|
|
229
358
|
name: Optional[str] = Form(None, description="Content name"),
|
|
230
359
|
description: Optional[str] = Form(None, description="Content description"),
|
|
@@ -242,6 +371,19 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
242
371
|
except json.JSONDecodeError:
|
|
243
372
|
raise HTTPException(status_code=400, detail="Invalid JSON format for metadata")
|
|
244
373
|
|
|
374
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
375
|
+
auth_token = get_auth_token_from_request(request)
|
|
376
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
377
|
+
return await knowledge.update_content(
|
|
378
|
+
content_id=content_id,
|
|
379
|
+
name=name,
|
|
380
|
+
description=description,
|
|
381
|
+
metadata=parsed_metadata,
|
|
382
|
+
reader_id=reader_id,
|
|
383
|
+
db_id=db_id,
|
|
384
|
+
headers=headers,
|
|
385
|
+
)
|
|
386
|
+
|
|
245
387
|
# Create ContentUpdateSchema object from form data
|
|
246
388
|
update_data = ContentUpdateSchema(
|
|
247
389
|
name=name if name and name.strip() else None,
|
|
@@ -263,7 +405,17 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
263
405
|
else:
|
|
264
406
|
raise HTTPException(status_code=400, detail=f"Invalid reader_id: {update_data.reader_id}")
|
|
265
407
|
|
|
266
|
-
|
|
408
|
+
# Use async patch method if contents_db is an AsyncBaseDb, otherwise use sync patch method
|
|
409
|
+
updated_content_dict = None
|
|
410
|
+
try:
|
|
411
|
+
if knowledge.contents_db is not None and isinstance(knowledge.contents_db, AsyncBaseDb):
|
|
412
|
+
updated_content_dict = await knowledge.apatch_content(content)
|
|
413
|
+
else:
|
|
414
|
+
updated_content_dict = knowledge.patch_content(content)
|
|
415
|
+
except Exception as e:
|
|
416
|
+
log_error(f"Error updating content: {str(e)}")
|
|
417
|
+
raise HTTPException(status_code=500, detail=f"Error updating content: {str(e)}")
|
|
418
|
+
|
|
267
419
|
if not updated_content_dict:
|
|
268
420
|
raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
269
421
|
|
|
@@ -309,13 +461,27 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
309
461
|
},
|
|
310
462
|
)
|
|
311
463
|
async def get_content(
|
|
312
|
-
|
|
313
|
-
|
|
464
|
+
request: Request,
|
|
465
|
+
limit: Optional[int] = Query(default=20, description="Number of content entries to return", ge=1),
|
|
466
|
+
page: Optional[int] = Query(default=1, description="Page number", ge=0),
|
|
314
467
|
sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
|
|
315
468
|
sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
|
|
316
469
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
317
470
|
) -> PaginatedResponse[ContentResponseSchema]:
|
|
318
471
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
472
|
+
|
|
473
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
474
|
+
auth_token = get_auth_token_from_request(request)
|
|
475
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
476
|
+
return await knowledge.get_content(
|
|
477
|
+
limit=limit,
|
|
478
|
+
page=page,
|
|
479
|
+
sort_by=sort_by,
|
|
480
|
+
sort_order=sort_order.value if sort_order else None,
|
|
481
|
+
db_id=db_id,
|
|
482
|
+
headers=headers,
|
|
483
|
+
)
|
|
484
|
+
|
|
319
485
|
contents, count = await knowledge.aget_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
|
|
320
486
|
|
|
321
487
|
return PaginatedResponse(
|
|
@@ -377,11 +543,16 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
377
543
|
},
|
|
378
544
|
)
|
|
379
545
|
async def get_content_by_id(
|
|
546
|
+
request: Request,
|
|
380
547
|
content_id: str,
|
|
381
548
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
382
549
|
) -> ContentResponseSchema:
|
|
383
|
-
log_info(f"Getting content by id: {content_id}")
|
|
384
550
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
551
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
552
|
+
auth_token = get_auth_token_from_request(request)
|
|
553
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
554
|
+
return await knowledge.get_content_by_id(content_id=content_id, db_id=db_id, headers=headers)
|
|
555
|
+
|
|
385
556
|
content = await knowledge.aget_content_by_id(content_id=content_id)
|
|
386
557
|
if not content:
|
|
387
558
|
raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
@@ -417,12 +588,17 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
417
588
|
},
|
|
418
589
|
)
|
|
419
590
|
async def delete_content_by_id(
|
|
591
|
+
request: Request,
|
|
420
592
|
content_id: str,
|
|
421
593
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
422
594
|
) -> ContentResponseSchema:
|
|
423
595
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
424
|
-
|
|
425
|
-
|
|
596
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
597
|
+
auth_token = get_auth_token_from_request(request)
|
|
598
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
599
|
+
await knowledge.delete_content_by_id(content_id=content_id, db_id=db_id, headers=headers)
|
|
600
|
+
else:
|
|
601
|
+
await knowledge.aremove_content_by_id(content_id=content_id)
|
|
426
602
|
|
|
427
603
|
return ContentResponseSchema(
|
|
428
604
|
id=content_id,
|
|
@@ -442,12 +618,17 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
442
618
|
500: {"description": "Failed to delete all content", "model": InternalServerErrorResponse},
|
|
443
619
|
},
|
|
444
620
|
)
|
|
445
|
-
def delete_all_content(
|
|
621
|
+
async def delete_all_content(
|
|
622
|
+
request: Request,
|
|
446
623
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
447
624
|
):
|
|
448
625
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
449
|
-
|
|
450
|
-
|
|
626
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
627
|
+
auth_token = get_auth_token_from_request(request)
|
|
628
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
629
|
+
return await knowledge.delete_all_content(db_id=db_id, headers=headers)
|
|
630
|
+
|
|
631
|
+
await knowledge.aremove_all_content()
|
|
451
632
|
return "success"
|
|
452
633
|
|
|
453
634
|
@router.get(
|
|
@@ -481,11 +662,16 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
481
662
|
},
|
|
482
663
|
)
|
|
483
664
|
async def get_content_status(
|
|
665
|
+
request: Request,
|
|
484
666
|
content_id: str,
|
|
485
667
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
486
668
|
) -> ContentStatusResponse:
|
|
487
|
-
log_info(f"Getting content status: {content_id}")
|
|
488
669
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
670
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
671
|
+
auth_token = get_auth_token_from_request(request)
|
|
672
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
673
|
+
return await knowledge.get_content_status(content_id=content_id, db_id=db_id, headers=headers)
|
|
674
|
+
|
|
489
675
|
knowledge_status, status_message = await knowledge.aget_content_status(content_id=content_id)
|
|
490
676
|
|
|
491
677
|
# Handle the case where content is not found
|
|
@@ -550,13 +736,27 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
550
736
|
404: {"description": "No documents found"},
|
|
551
737
|
},
|
|
552
738
|
)
|
|
553
|
-
def search_knowledge(
|
|
739
|
+
async def search_knowledge(
|
|
740
|
+
http_request: Request, request: VectorSearchRequestSchema
|
|
741
|
+
) -> PaginatedResponse[VectorSearchResult]:
|
|
554
742
|
import time
|
|
555
743
|
|
|
556
744
|
start_time = time.time()
|
|
557
745
|
|
|
558
746
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, request.db_id)
|
|
559
747
|
|
|
748
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
749
|
+
auth_token = get_auth_token_from_request(http_request)
|
|
750
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
751
|
+
return await knowledge.search_knowledge(
|
|
752
|
+
query=request.query,
|
|
753
|
+
max_results=request.max_results,
|
|
754
|
+
filters=request.filters,
|
|
755
|
+
search_type=request.search_type,
|
|
756
|
+
db_id=request.db_id,
|
|
757
|
+
headers=headers,
|
|
758
|
+
)
|
|
759
|
+
|
|
560
760
|
# For now, validate the vector db ids exist in the knowledge base
|
|
561
761
|
# We will add more logic around this once we have multi vectordb support
|
|
562
762
|
# If vector db ids are provided, check if any of them match the knowledge's vector db
|
|
@@ -578,7 +778,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
578
778
|
# Use max_results if specified, otherwise use a higher limit for search then paginate
|
|
579
779
|
search_limit = request.max_results
|
|
580
780
|
|
|
581
|
-
results = knowledge.
|
|
781
|
+
results = await knowledge.asearch(
|
|
582
782
|
query=request.query, max_results=search_limit, filters=request.filters, search_type=request.search_type
|
|
583
783
|
)
|
|
584
784
|
|
|
@@ -617,7 +817,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
617
817
|
"/knowledge/config",
|
|
618
818
|
status_code=200,
|
|
619
819
|
operation_id="get_knowledge_config",
|
|
620
|
-
summary="Get
|
|
820
|
+
summary="Get Config",
|
|
621
821
|
description=(
|
|
622
822
|
"Retrieve available readers, chunkers, and configuration options for content processing. "
|
|
623
823
|
"This endpoint provides metadata about supported file types, processing strategies, and filters."
|
|
@@ -767,6 +967,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
767
967
|
"name": "TextReader",
|
|
768
968
|
"description": "Reads text files",
|
|
769
969
|
"chunkers": [
|
|
970
|
+
"CodeChunker",
|
|
770
971
|
"FixedSizeChunker",
|
|
771
972
|
"AgenticChunker",
|
|
772
973
|
"DocumentChunker",
|
|
@@ -788,9 +989,11 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
788
989
|
"text": ["web_search"],
|
|
789
990
|
"topic": ["arxiv"],
|
|
790
991
|
"file": ["csv", "gcs"],
|
|
791
|
-
".csv": ["csv"],
|
|
792
|
-
".xlsx": ["
|
|
793
|
-
".xls": ["
|
|
992
|
+
".csv": ["csv", "field_labeled_csv"],
|
|
993
|
+
".xlsx": ["excel"],
|
|
994
|
+
".xls": ["excel"],
|
|
995
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ["excel"],
|
|
996
|
+
"application/vnd.ms-excel": ["excel"],
|
|
794
997
|
".docx": ["docx"],
|
|
795
998
|
".doc": ["docx"],
|
|
796
999
|
".json": ["json"],
|
|
@@ -805,6 +1008,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
805
1008
|
"description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
|
|
806
1009
|
"metadata": {"chunk_size": 5000},
|
|
807
1010
|
},
|
|
1011
|
+
"CodeChunker": {
|
|
1012
|
+
"key": "CodeChunker",
|
|
1013
|
+
"name": "CodeChunker",
|
|
1014
|
+
"description": "The CodeChunker splits code into chunks based on its structure, leveraging Abstract Syntax Trees (ASTs) to create contextually relevant segments",
|
|
1015
|
+
"metadata": {"chunk_size": 2048},
|
|
1016
|
+
},
|
|
808
1017
|
"DocumentChunker": {
|
|
809
1018
|
"key": "DocumentChunker",
|
|
810
1019
|
"name": "DocumentChunker",
|
|
@@ -869,13 +1078,19 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
869
1078
|
}
|
|
870
1079
|
},
|
|
871
1080
|
)
|
|
872
|
-
def get_config(
|
|
1081
|
+
async def get_config(
|
|
1082
|
+
request: Request,
|
|
873
1083
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
874
1084
|
) -> ConfigResponseSchema:
|
|
875
1085
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
876
1086
|
|
|
877
|
-
|
|
878
|
-
|
|
1087
|
+
if isinstance(knowledge, RemoteKnowledge):
|
|
1088
|
+
auth_token = get_auth_token_from_request(request)
|
|
1089
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
1090
|
+
return await knowledge.get_config(headers=headers)
|
|
1091
|
+
|
|
1092
|
+
# Get factory readers info (including custom readers from this knowledge instance)
|
|
1093
|
+
readers_info = get_all_readers_info(knowledge)
|
|
879
1094
|
reader_schemas = {}
|
|
880
1095
|
# Add factory readers
|
|
881
1096
|
for reader_info in readers_info:
|
|
@@ -887,7 +1102,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
887
1102
|
)
|
|
888
1103
|
|
|
889
1104
|
# Add custom readers from knowledge.readers
|
|
890
|
-
|
|
1105
|
+
readers_result: Any = knowledge.get_readers() or {}
|
|
1106
|
+
# Ensure readers_dict is a dictionary (defensive check)
|
|
1107
|
+
if not isinstance(readers_result, dict):
|
|
1108
|
+
readers_dict: Dict[str, Reader] = {}
|
|
1109
|
+
else:
|
|
1110
|
+
readers_dict = readers_result
|
|
891
1111
|
if readers_dict:
|
|
892
1112
|
for reader_id, reader in readers_dict.items():
|
|
893
1113
|
# Get chunking strategies from the reader
|
|
@@ -907,8 +1127,8 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
907
1127
|
chunkers=chunking_strategies,
|
|
908
1128
|
)
|
|
909
1129
|
|
|
910
|
-
# Get content types to readers mapping
|
|
911
|
-
types_of_readers = get_content_types_to_readers_mapping()
|
|
1130
|
+
# Get content types to readers mapping (including custom readers from this knowledge instance)
|
|
1131
|
+
types_of_readers = get_content_types_to_readers_mapping(knowledge)
|
|
912
1132
|
chunkers_list = get_all_chunkers_info()
|
|
913
1133
|
|
|
914
1134
|
# Convert chunkers list to dictionary format expected by schema
|
|
@@ -936,13 +1156,31 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
936
1156
|
search_types=search_types,
|
|
937
1157
|
)
|
|
938
1158
|
)
|
|
1159
|
+
filters = await knowledge.aget_valid_filters()
|
|
939
1160
|
|
|
1161
|
+
# Get remote content sources if available
|
|
1162
|
+
remote_content_sources = None
|
|
1163
|
+
if hasattr(knowledge, "_get_remote_configs") and callable(knowledge._get_remote_configs):
|
|
1164
|
+
remote_configs = knowledge._get_remote_configs()
|
|
1165
|
+
if remote_configs:
|
|
1166
|
+
from agno.os.routers.knowledge.schemas import RemoteContentSourceSchema
|
|
1167
|
+
|
|
1168
|
+
remote_content_sources = [
|
|
1169
|
+
RemoteContentSourceSchema(
|
|
1170
|
+
id=config.id,
|
|
1171
|
+
name=config.name,
|
|
1172
|
+
type=config.__class__.__name__.replace("Config", "").lower(),
|
|
1173
|
+
metadata=config.metadata,
|
|
1174
|
+
)
|
|
1175
|
+
for config in remote_configs
|
|
1176
|
+
]
|
|
940
1177
|
return ConfigResponseSchema(
|
|
941
1178
|
readers=reader_schemas,
|
|
942
1179
|
vector_dbs=vector_dbs,
|
|
943
1180
|
readersForType=types_of_readers,
|
|
944
1181
|
chunkers=chunkers_dict,
|
|
945
|
-
filters=
|
|
1182
|
+
filters=filters,
|
|
1183
|
+
remote_content_sources=remote_content_sources,
|
|
946
1184
|
)
|
|
947
1185
|
|
|
948
1186
|
return router
|
|
@@ -961,27 +1199,33 @@ async def process_content(
|
|
|
961
1199
|
try:
|
|
962
1200
|
if reader_id:
|
|
963
1201
|
reader = None
|
|
964
|
-
|
|
965
|
-
|
|
1202
|
+
# Use get_readers() to ensure we get a dict (handles list conversion)
|
|
1203
|
+
custom_readers = knowledge.get_readers()
|
|
1204
|
+
if custom_readers and reader_id in custom_readers:
|
|
1205
|
+
reader = custom_readers[reader_id]
|
|
1206
|
+
log_debug(f"Found custom reader: {reader.__class__.__name__}")
|
|
966
1207
|
else:
|
|
1208
|
+
# Try to resolve from factory readers
|
|
967
1209
|
key = reader_id.lower().strip().replace("-", "_").replace(" ", "_")
|
|
968
1210
|
candidates = [key] + ([key[:-6]] if key.endswith("reader") else [])
|
|
969
1211
|
for cand in candidates:
|
|
970
1212
|
try:
|
|
971
1213
|
reader = ReaderFactory.create_reader(cand)
|
|
972
|
-
log_debug(f"Resolved reader: {reader.__class__.__name__}")
|
|
1214
|
+
log_debug(f"Resolved reader from factory: {reader.__class__.__name__}")
|
|
973
1215
|
break
|
|
974
1216
|
except Exception:
|
|
975
1217
|
continue
|
|
976
1218
|
if reader:
|
|
977
1219
|
content.reader = reader
|
|
1220
|
+
else:
|
|
1221
|
+
log_debug(f"Could not resolve reader with id: {reader_id}")
|
|
978
1222
|
if chunker and content.reader:
|
|
979
1223
|
# Set the chunker name on the reader - let the reader handle it internally
|
|
980
1224
|
content.reader.set_chunking_strategy_from_string(chunker, chunk_size=chunk_size, overlap=chunk_overlap)
|
|
981
1225
|
log_debug(f"Set chunking strategy: {chunker}")
|
|
982
1226
|
|
|
983
1227
|
log_debug(f"Using reader: {content.reader.__class__.__name__}")
|
|
984
|
-
await knowledge.
|
|
1228
|
+
await knowledge._aload_content(content, upsert=False, skip_if_exists=True)
|
|
985
1229
|
log_info(f"Content {content.id} processed successfully")
|
|
986
1230
|
except Exception as e:
|
|
987
1231
|
log_info(f"Error processing content: {e}")
|
|
@@ -991,7 +1235,12 @@ async def process_content(
|
|
|
991
1235
|
|
|
992
1236
|
content.status = KnowledgeContentStatus.FAILED
|
|
993
1237
|
content.status_message = str(e)
|
|
994
|
-
|
|
1238
|
+
# Use async patch method if contents_db is an AsyncBaseDb, otherwise use sync patch method
|
|
1239
|
+
if knowledge.contents_db is not None and isinstance(knowledge.contents_db, AsyncBaseDb):
|
|
1240
|
+
await knowledge.apatch_content(content)
|
|
1241
|
+
else:
|
|
1242
|
+
knowledge.patch_content(content)
|
|
1243
|
+
|
|
995
1244
|
except Exception:
|
|
996
1245
|
# Swallow any secondary errors to avoid crashing the background task
|
|
997
1246
|
pass
|
|
@@ -82,7 +82,7 @@ class ContentResponseSchema(BaseModel):
|
|
|
82
82
|
status=status,
|
|
83
83
|
status_message=content.get("status_message"),
|
|
84
84
|
created_at=parse_timestamp(content.get("created_at")),
|
|
85
|
-
updated_at=parse_timestamp(content.get("updated_at")),
|
|
85
|
+
updated_at=parse_timestamp(content.get("updated_at", content.get("created_at", 0))),
|
|
86
86
|
# TODO: These fields are not available in the Content class. Fix the inconsistency
|
|
87
87
|
access_count=None,
|
|
88
88
|
linked_to=None,
|
|
@@ -156,7 +156,7 @@ class VectorSearchRequestSchema(BaseModel):
|
|
|
156
156
|
class Meta(BaseModel):
|
|
157
157
|
"""Inline metadata schema for pagination."""
|
|
158
158
|
|
|
159
|
-
limit: int = Field(20, description="Number of results per page", ge=1
|
|
159
|
+
limit: int = Field(20, description="Number of results per page", ge=1)
|
|
160
160
|
page: int = Field(1, description="Page number", ge=1)
|
|
161
161
|
|
|
162
162
|
query: str = Field(..., description="The search query text")
|
|
@@ -170,9 +170,21 @@ class VectorSearchRequestSchema(BaseModel):
|
|
|
170
170
|
)
|
|
171
171
|
|
|
172
172
|
|
|
173
|
+
class RemoteContentSourceSchema(BaseModel):
|
|
174
|
+
"""Schema for remote content source configuration."""
|
|
175
|
+
|
|
176
|
+
id: str = Field(..., description="Unique identifier for the content source")
|
|
177
|
+
name: str = Field(..., description="Display name for the content source")
|
|
178
|
+
type: str = Field(..., description="Type of content source (s3, gcs, sharepoint, github, azureblob)")
|
|
179
|
+
metadata: Optional[Dict[str, Any]] = Field(None, description="Custom metadata for the content source")
|
|
180
|
+
|
|
181
|
+
|
|
173
182
|
class ConfigResponseSchema(BaseModel):
|
|
174
183
|
readers: Optional[Dict[str, ReaderSchema]] = Field(None, description="Available content readers")
|
|
175
184
|
readersForType: Optional[Dict[str, List[str]]] = Field(None, description="Mapping of content types to reader IDs")
|
|
176
185
|
chunkers: Optional[Dict[str, ChunkerSchema]] = Field(None, description="Available chunking strategies")
|
|
177
186
|
filters: Optional[List[str]] = Field(None, description="Available filter tags")
|
|
178
187
|
vector_dbs: Optional[List[VectorDbSchema]] = Field(None, description="Configured vector databases")
|
|
188
|
+
remote_content_sources: Optional[List[RemoteContentSourceSchema]] = Field(
|
|
189
|
+
None, description="Configured remote content sources (S3, GCS, SharePoint, GitHub)"
|
|
190
|
+
)
|