agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +6015 -2823
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +594 -186
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +2 -8
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +72 -0
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +999 -519
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +103 -31
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +139 -0
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +59 -5
  142. agno/models/openai/chat.py +69 -29
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +77 -1
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -178
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +248 -94
  205. agno/run/base.py +44 -5
  206. agno/run/team.py +238 -97
  207. agno/run/workflow.py +144 -33
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1610
  213. agno/tools/dalle.py +2 -4
  214. agno/tools/decorator.py +4 -2
  215. agno/tools/duckduckgo.py +15 -11
  216. agno/tools/e2b.py +14 -7
  217. agno/tools/eleven_labs.py +23 -25
  218. agno/tools/exa.py +21 -16
  219. agno/tools/file.py +153 -23
  220. agno/tools/file_generation.py +350 -0
  221. agno/tools/firecrawl.py +4 -4
  222. agno/tools/function.py +250 -30
  223. agno/tools/gmail.py +238 -14
  224. agno/tools/google_drive.py +270 -0
  225. agno/tools/googlecalendar.py +36 -8
  226. agno/tools/googlesheets.py +20 -5
  227. agno/tools/jira.py +20 -0
  228. agno/tools/knowledge.py +3 -3
  229. agno/tools/mcp/__init__.py +10 -0
  230. agno/tools/mcp/mcp.py +331 -0
  231. agno/tools/mcp/multi_mcp.py +347 -0
  232. agno/tools/mcp/params.py +24 -0
  233. agno/tools/mcp_toolbox.py +284 -0
  234. agno/tools/mem0.py +11 -17
  235. agno/tools/memori.py +1 -53
  236. agno/tools/memory.py +419 -0
  237. agno/tools/models/nebius.py +5 -5
  238. agno/tools/models_labs.py +20 -10
  239. agno/tools/notion.py +204 -0
  240. agno/tools/parallel.py +314 -0
  241. agno/tools/scrapegraph.py +58 -31
  242. agno/tools/searxng.py +2 -2
  243. agno/tools/serper.py +2 -2
  244. agno/tools/slack.py +18 -3
  245. agno/tools/spider.py +2 -2
  246. agno/tools/tavily.py +146 -0
  247. agno/tools/whatsapp.py +1 -1
  248. agno/tools/workflow.py +278 -0
  249. agno/tools/yfinance.py +12 -11
  250. agno/utils/agent.py +820 -0
  251. agno/utils/audio.py +27 -0
  252. agno/utils/common.py +90 -1
  253. agno/utils/events.py +217 -2
  254. agno/utils/gemini.py +180 -22
  255. agno/utils/hooks.py +57 -0
  256. agno/utils/http.py +111 -0
  257. agno/utils/knowledge.py +12 -5
  258. agno/utils/log.py +1 -0
  259. agno/utils/mcp.py +92 -2
  260. agno/utils/media.py +188 -10
  261. agno/utils/merge_dict.py +22 -1
  262. agno/utils/message.py +60 -0
  263. agno/utils/models/claude.py +40 -11
  264. agno/utils/print_response/agent.py +105 -21
  265. agno/utils/print_response/team.py +103 -38
  266. agno/utils/print_response/workflow.py +251 -34
  267. agno/utils/reasoning.py +22 -1
  268. agno/utils/serialize.py +32 -0
  269. agno/utils/streamlit.py +16 -10
  270. agno/utils/string.py +41 -0
  271. agno/utils/team.py +98 -9
  272. agno/utils/tools.py +1 -1
  273. agno/vectordb/base.py +23 -4
  274. agno/vectordb/cassandra/cassandra.py +65 -9
  275. agno/vectordb/chroma/chromadb.py +182 -38
  276. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  277. agno/vectordb/couchbase/couchbase.py +105 -10
  278. agno/vectordb/lancedb/lance_db.py +124 -133
  279. agno/vectordb/langchaindb/langchaindb.py +25 -7
  280. agno/vectordb/lightrag/lightrag.py +17 -3
  281. agno/vectordb/llamaindex/__init__.py +3 -0
  282. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  283. agno/vectordb/milvus/milvus.py +126 -9
  284. agno/vectordb/mongodb/__init__.py +7 -1
  285. agno/vectordb/mongodb/mongodb.py +112 -7
  286. agno/vectordb/pgvector/pgvector.py +142 -21
  287. agno/vectordb/pineconedb/pineconedb.py +80 -8
  288. agno/vectordb/qdrant/qdrant.py +125 -39
  289. agno/vectordb/redis/__init__.py +9 -0
  290. agno/vectordb/redis/redisdb.py +694 -0
  291. agno/vectordb/singlestore/singlestore.py +111 -25
  292. agno/vectordb/surrealdb/surrealdb.py +31 -5
  293. agno/vectordb/upstashdb/upstashdb.py +76 -8
  294. agno/vectordb/weaviate/weaviate.py +86 -15
  295. agno/workflow/__init__.py +2 -0
  296. agno/workflow/agent.py +299 -0
  297. agno/workflow/condition.py +112 -18
  298. agno/workflow/loop.py +69 -10
  299. agno/workflow/parallel.py +266 -118
  300. agno/workflow/router.py +110 -17
  301. agno/workflow/step.py +638 -129
  302. agno/workflow/steps.py +65 -6
  303. agno/workflow/types.py +61 -23
  304. agno/workflow/workflow.py +2085 -272
  305. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
  306. agno-2.3.0.dist-info/RECORD +577 -0
  307. agno/knowledge/reader/url_reader.py +0 -128
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -610
  310. agno/utils/models/aws_claude.py +0 -170
  311. agno-2.0.1.dist-info/RECORD +0 -515
  312. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  313. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ import json
2
2
  import logging
3
3
  import math
4
4
  from typing import Dict, List, Optional
5
- from uuid import uuid4
6
5
 
7
6
  from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, HTTPException, Path, Query, UploadFile
8
7
 
@@ -20,6 +19,9 @@ from agno.os.routers.knowledge.schemas import (
20
19
  ContentStatusResponse,
21
20
  ContentUpdateSchema,
22
21
  ReaderSchema,
22
+ VectorDbSchema,
23
+ VectorSearchRequestSchema,
24
+ VectorSearchResult,
23
25
  )
24
26
  from agno.os.schema import (
25
27
  BadRequestResponse,
@@ -34,6 +36,7 @@ from agno.os.schema import (
34
36
  from agno.os.settings import AgnoAPISettings
35
37
  from agno.os.utils import get_knowledge_instance_by_db_id
36
38
  from agno.utils.log import log_debug, log_info
39
+ from agno.utils.string import generate_id
37
40
 
38
41
  logger = logging.getLogger(__name__)
39
42
 
@@ -99,11 +102,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
99
102
  text_content: Optional[str] = Form(None, description="Raw text content to process"),
100
103
  reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
101
104
  chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
105
+ chunk_size: Optional[int] = Form(None, description="Chunk size to use for processing"),
106
+ chunk_overlap: Optional[int] = Form(None, description="Chunk overlap to use for processing"),
102
107
  db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
103
108
  ):
104
109
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
105
- content_id = str(uuid4())
106
- log_info(f"Adding content: {name}, {description}, {url}, {metadata} with ID: {content_id}")
110
+ log_info(f"Adding content: {name}, {description}, {url}, {metadata}")
107
111
 
108
112
  parsed_metadata = None
109
113
  if metadata:
@@ -166,10 +170,14 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
166
170
  file_data=file_data,
167
171
  size=file.size if file else None if text_content else None,
168
172
  )
169
- background_tasks.add_task(process_content, knowledge, content_id, content, reader_id, chunker)
173
+ content_hash = knowledge._build_content_hash(content)
174
+ content.content_hash = content_hash
175
+ content.id = generate_id(content_hash)
176
+
177
+ background_tasks.add_task(process_content, knowledge, content, reader_id, chunker, chunk_size, chunk_overlap)
170
178
 
171
179
  response = ContentResponseSchema(
172
- id=content_id,
180
+ id=content.id,
173
181
  name=name,
174
182
  description=description,
175
183
  metadata=parsed_metadata,
@@ -300,7 +308,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
300
308
  }
301
309
  },
302
310
  )
303
- def get_content(
311
+ async def get_content(
304
312
  limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
305
313
  page: Optional[int] = Query(default=1, description="Page number"),
306
314
  sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
@@ -308,7 +316,7 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
308
316
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
309
317
  ) -> PaginatedResponse[ContentResponseSchema]:
310
318
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
311
- contents, count = knowledge.get_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
319
+ contents, count = await knowledge.aget_content(limit=limit, page=page, sort_by=sort_by, sort_order=sort_order)
312
320
 
313
321
  return PaginatedResponse(
314
322
  data=[
@@ -368,13 +376,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
368
376
  404: {"description": "Content not found", "model": NotFoundResponse},
369
377
  },
370
378
  )
371
- def get_content_by_id(
379
+ async def get_content_by_id(
372
380
  content_id: str,
373
381
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
374
382
  ) -> ContentResponseSchema:
375
383
  log_info(f"Getting content by id: {content_id}")
376
384
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
377
- content = knowledge.get_content_by_id(content_id=content_id)
385
+ content = await knowledge.aget_content_by_id(content_id=content_id)
378
386
  if not content:
379
387
  raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
380
388
  response = ContentResponseSchema.from_dict(
@@ -408,12 +416,12 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
408
416
  500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
409
417
  },
410
418
  )
411
- def delete_content_by_id(
419
+ async def delete_content_by_id(
412
420
  content_id: str,
413
421
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
414
422
  ) -> ContentResponseSchema:
415
423
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
416
- knowledge.remove_content_by_id(content_id=content_id)
424
+ await knowledge.aremove_content_by_id(content_id=content_id)
417
425
  log_info(f"Deleting content by id: {content_id}")
418
426
 
419
427
  return ContentResponseSchema(
@@ -440,7 +448,6 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
440
448
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
441
449
  log_info("Deleting all content")
442
450
  knowledge.remove_all_content()
443
-
444
451
  return "success"
445
452
 
446
453
  @router.get(
@@ -473,13 +480,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
473
480
  404: {"description": "Content not found", "model": NotFoundResponse},
474
481
  },
475
482
  )
476
- def get_content_status(
483
+ async def get_content_status(
477
484
  content_id: str,
478
485
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
479
486
  ) -> ContentStatusResponse:
480
487
  log_info(f"Getting content status: {content_id}")
481
488
  knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
482
- knowledge_status, status_message = knowledge.get_content_status(content_id=content_id)
489
+ knowledge_status, status_message = await knowledge.aget_content_status(content_id=content_id)
483
490
 
484
491
  # Handle the case where content is not found
485
492
  if knowledge_status is None:
@@ -510,11 +517,107 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
510
517
 
511
518
  return ContentStatusResponse(status=status, status_message=status_message or "")
512
519
 
520
+ @router.post(
521
+ "/knowledge/search",
522
+ status_code=200,
523
+ operation_id="search_knowledge",
524
+ summary="Search Knowledge",
525
+ description="Search the knowledge base for relevant documents using query, filters and search type.",
526
+ response_model=PaginatedResponse[VectorSearchResult],
527
+ responses={
528
+ 200: {
529
+ "description": "Search results retrieved successfully",
530
+ "content": {
531
+ "application/json": {
532
+ "example": {
533
+ "data": [
534
+ {
535
+ "id": "doc_123",
536
+ "content": "Jordan Mitchell - Software Engineer with skills in JavaScript, React, Python",
537
+ "name": "cv_1",
538
+ "meta_data": {"page": 1, "chunk": 1},
539
+ "usage": {"total_tokens": 14},
540
+ "reranking_score": 0.95,
541
+ "content_id": "content_456",
542
+ }
543
+ ],
544
+ "meta": {"page": 1, "limit": 20, "total_pages": 2, "total_count": 35},
545
+ }
546
+ }
547
+ },
548
+ },
549
+ 400: {"description": "Invalid search parameters"},
550
+ 404: {"description": "No documents found"},
551
+ },
552
+ )
553
+ def search_knowledge(request: VectorSearchRequestSchema) -> PaginatedResponse[VectorSearchResult]:
554
+ import time
555
+
556
+ start_time = time.time()
557
+
558
+ knowledge = get_knowledge_instance_by_db_id(knowledge_instances, request.db_id)
559
+
560
+ # For now, validate the vector db ids exist in the knowledge base
561
+ # We will add more logic around this once we have multi vectordb support
562
+ # If vector db ids are provided, check if any of them match the knowledge's vector db
563
+ if request.vector_db_ids:
564
+ if knowledge.vector_db and knowledge.vector_db.id:
565
+ if knowledge.vector_db.id not in request.vector_db_ids:
566
+ raise HTTPException(
567
+ status_code=400,
568
+ detail=f"None of the provided Vector DB IDs {request.vector_db_ids} match the knowledge base Vector DB ID {knowledge.vector_db.id}",
569
+ )
570
+ else:
571
+ raise HTTPException(status_code=400, detail="Knowledge base has no vector database configured")
572
+
573
+ # Calculate pagination parameters
574
+ meta = request.meta
575
+ limit = meta.limit if meta and meta.limit is not None else 20
576
+ page = meta.page if meta and meta.page is not None else 1
577
+
578
+ # Use max_results if specified, otherwise use a higher limit for search then paginate
579
+ search_limit = request.max_results
580
+
581
+ results = knowledge.search(
582
+ query=request.query, max_results=search_limit, filters=request.filters, search_type=request.search_type
583
+ )
584
+
585
+ # Calculate pagination
586
+ total_results = len(results)
587
+ start_idx = (page - 1) * limit
588
+
589
+ # Ensure start_idx doesn't exceed the total results
590
+ if start_idx >= total_results and total_results > 0:
591
+ # If page is beyond available results, return empty results
592
+ paginated_results = []
593
+ else:
594
+ end_idx = min(start_idx + limit, total_results)
595
+ paginated_results = results[start_idx:end_idx]
596
+
597
+ search_time_ms = (time.time() - start_time) * 1000
598
+
599
+ # Convert Document objects to serializable format
600
+ document_results = [VectorSearchResult.from_document(doc) for doc in paginated_results]
601
+
602
+ # Calculate pagination info
603
+ total_pages = (total_results + limit - 1) // limit # Ceiling division
604
+
605
+ return PaginatedResponse(
606
+ data=document_results,
607
+ meta=PaginationInfo(
608
+ page=page,
609
+ limit=limit,
610
+ total_pages=total_pages,
611
+ total_count=total_results,
612
+ search_time_ms=search_time_ms,
613
+ ),
614
+ )
615
+
513
616
  @router.get(
514
617
  "/knowledge/config",
515
618
  status_code=200,
516
619
  operation_id="get_knowledge_config",
517
- summary="Get Knowledge Configuration",
620
+ summary="Get Config",
518
621
  description=(
519
622
  "Retrieve available readers, chunkers, and configuration options for content processing. "
520
623
  "This endpoint provides metadata about supported file types, processing strategies, and filters."
@@ -700,38 +803,65 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
700
803
  "key": "AgenticChunker",
701
804
  "name": "AgenticChunker",
702
805
  "description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
806
+ "metadata": {"chunk_size": 5000},
703
807
  },
704
808
  "DocumentChunker": {
705
809
  "key": "DocumentChunker",
706
810
  "name": "DocumentChunker",
707
811
  "description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
708
- },
709
- "RecursiveChunker": {
710
- "key": "RecursiveChunker",
711
- "name": "RecursiveChunker",
712
- "description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
713
- },
714
- "SemanticChunker": {
715
- "key": "SemanticChunker",
716
- "name": "SemanticChunker",
717
- "description": "Chunking strategy that splits text into semantic chunks using chonkie",
812
+ "metadata": {
813
+ "chunk_size": 5000,
814
+ "chunk_overlap": 0,
815
+ },
718
816
  },
719
817
  "FixedSizeChunker": {
720
818
  "key": "FixedSizeChunker",
721
819
  "name": "FixedSizeChunker",
722
820
  "description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
821
+ "metadata": {
822
+ "chunk_size": 5000,
823
+ "chunk_overlap": 0,
824
+ },
825
+ },
826
+ "MarkdownChunker": {
827
+ "key": "MarkdownChunker",
828
+ "name": "MarkdownChunker",
829
+ "description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
830
+ "metadata": {
831
+ "chunk_size": 5000,
832
+ "chunk_overlap": 0,
833
+ },
834
+ },
835
+ "RecursiveChunker": {
836
+ "key": "RecursiveChunker",
837
+ "name": "RecursiveChunker",
838
+ "description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
839
+ "metadata": {
840
+ "chunk_size": 5000,
841
+ "chunk_overlap": 0,
842
+ },
723
843
  },
724
844
  "RowChunker": {
725
845
  "key": "RowChunker",
726
846
  "name": "RowChunker",
727
847
  "description": "RowChunking chunking strategy",
848
+ "metadata": {},
728
849
  },
729
- "MarkdownChunker": {
730
- "key": "MarkdownChunker",
731
- "name": "MarkdownChunker",
732
- "description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
850
+ "SemanticChunker": {
851
+ "key": "SemanticChunker",
852
+ "name": "SemanticChunker",
853
+ "description": "Chunking strategy that splits text into semantic chunks using chonkie",
854
+ "metadata": {"chunk_size": 5000},
733
855
  },
734
856
  },
857
+ "vector_dbs": [
858
+ {
859
+ "id": "vector_db_1",
860
+ "name": "Vector DB 1",
861
+ "description": "Vector DB 1 description",
862
+ "search_types": ["vector", "keyword", "hybrid"],
863
+ }
864
+ ],
735
865
  "filters": ["filter_tag_1", "filter_tag2"],
736
866
  }
737
867
  }
@@ -787,14 +917,32 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
787
917
  chunker_key = chunker_info.get("key")
788
918
  if chunker_key:
789
919
  chunkers_dict[chunker_key] = ChunkerSchema(
790
- key=chunker_key, name=chunker_info.get("name"), description=chunker_info.get("description")
920
+ key=chunker_key,
921
+ name=chunker_info.get("name"),
922
+ description=chunker_info.get("description"),
923
+ metadata=chunker_info.get("metadata", {}),
791
924
  )
792
925
 
926
+ vector_dbs = []
927
+ if knowledge.vector_db:
928
+ search_types = knowledge.vector_db.get_supported_search_types()
929
+ name = knowledge.vector_db.name
930
+ db_id = knowledge.vector_db.id
931
+ vector_dbs.append(
932
+ VectorDbSchema(
933
+ id=db_id,
934
+ name=name,
935
+ description=knowledge.vector_db.description,
936
+ search_types=search_types,
937
+ )
938
+ )
939
+
793
940
  return ConfigResponseSchema(
794
941
  readers=reader_schemas,
942
+ vector_dbs=vector_dbs,
795
943
  readersForType=types_of_readers,
796
944
  chunkers=chunkers_dict,
797
- filters=knowledge.get_filters(),
945
+ filters=knowledge.get_valid_filters(),
798
946
  )
799
947
 
800
948
  return router
@@ -802,15 +950,15 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
802
950
 
803
951
  async def process_content(
804
952
  knowledge: Knowledge,
805
- content_id: str,
806
953
  content: Content,
807
954
  reader_id: Optional[str] = None,
808
955
  chunker: Optional[str] = None,
956
+ chunk_size: Optional[int] = None,
957
+ chunk_overlap: Optional[int] = None,
809
958
  ):
810
959
  """Background task to process the content"""
811
- log_info(f"Processing content {content_id}")
960
+
812
961
  try:
813
- content.id = content_id
814
962
  if reader_id:
815
963
  reader = None
816
964
  if knowledge.readers and reader_id in knowledge.readers:
@@ -829,21 +977,20 @@ async def process_content(
829
977
  content.reader = reader
830
978
  if chunker and content.reader:
831
979
  # Set the chunker name on the reader - let the reader handle it internally
832
- content.reader.set_chunking_strategy_from_string(chunker)
980
+ content.reader.set_chunking_strategy_from_string(chunker, chunk_size=chunk_size, overlap=chunk_overlap)
833
981
  log_debug(f"Set chunking strategy: {chunker}")
834
982
 
835
983
  log_debug(f"Using reader: {content.reader.__class__.__name__}")
836
984
  await knowledge._load_content(content, upsert=False, skip_if_exists=True)
837
- log_info(f"Content {content_id} processed successfully")
985
+ log_info(f"Content {content.id} processed successfully")
838
986
  except Exception as e:
839
- log_info(f"Error processing content {content_id}: {e}")
987
+ log_info(f"Error processing content: {e}")
840
988
  # Mark content as failed in the contents DB
841
989
  try:
842
990
  from agno.knowledge.content import ContentStatus as KnowledgeContentStatus
843
991
 
844
992
  content.status = KnowledgeContentStatus.FAILED
845
993
  content.status_message = str(e)
846
- content.id = content_id
847
994
  knowledge.patch_content(content)
848
995
  except Exception:
849
996
  # Swallow any secondary errors to avoid crashing the background task
@@ -16,23 +16,23 @@ class ContentStatus(str, Enum):
16
16
  class ContentStatusResponse(BaseModel):
17
17
  """Response model for content status endpoint."""
18
18
 
19
- status: ContentStatus
20
- status_message: str = ""
19
+ status: ContentStatus = Field(..., description="Current processing status of the content")
20
+ status_message: str = Field("", description="Status message or error details")
21
21
 
22
22
 
23
23
  class ContentResponseSchema(BaseModel):
24
- id: str
25
- name: Optional[str] = None
26
- description: Optional[str] = None
27
- type: Optional[str] = None
28
- size: Optional[str] = None
29
- linked_to: Optional[str] = None
30
- metadata: Optional[dict] = None
31
- access_count: Optional[int] = None
32
- status: Optional[ContentStatus] = None
33
- status_message: Optional[str] = None
34
- created_at: Optional[datetime] = None
35
- updated_at: Optional[datetime] = None
24
+ id: str = Field(..., description="Unique identifier for the content")
25
+ name: Optional[str] = Field(None, description="Name of the content")
26
+ description: Optional[str] = Field(None, description="Description of the content")
27
+ type: Optional[str] = Field(None, description="MIME type of the content")
28
+ size: Optional[str] = Field(None, description="Size of the content in bytes")
29
+ linked_to: Optional[str] = Field(None, description="ID of related content if linked")
30
+ metadata: Optional[dict] = Field(None, description="Additional metadata as key-value pairs")
31
+ access_count: Optional[int] = Field(None, description="Number of times content has been accessed", ge=0)
32
+ status: Optional[ContentStatus] = Field(None, description="Processing status of the content")
33
+ status_message: Optional[str] = Field(None, description="Status message or error details")
34
+ created_at: Optional[datetime] = Field(None, description="Timestamp when content was created")
35
+ updated_at: Optional[datetime] = Field(None, description="Timestamp when content was last updated")
36
36
 
37
37
  @classmethod
38
38
  def from_dict(cls, content: Dict[str, Any]) -> "ContentResponseSchema":
@@ -99,20 +99,80 @@ class ContentUpdateSchema(BaseModel):
99
99
 
100
100
 
101
101
  class ReaderSchema(BaseModel):
102
- id: str
103
- name: Optional[str] = None
104
- description: Optional[str] = None
105
- chunkers: Optional[List[str]] = None
102
+ id: str = Field(..., description="Unique identifier for the reader")
103
+ name: Optional[str] = Field(None, description="Name of the reader")
104
+ description: Optional[str] = Field(None, description="Description of the reader's capabilities")
105
+ chunkers: Optional[List[str]] = Field(None, description="List of supported chunking strategies")
106
106
 
107
107
 
108
108
  class ChunkerSchema(BaseModel):
109
109
  key: str
110
110
  name: Optional[str] = None
111
111
  description: Optional[str] = None
112
+ metadata: Optional[Dict[str, Any]] = None
113
+
114
+
115
+ class VectorDbSchema(BaseModel):
116
+ id: str = Field(..., description="Unique identifier for the vector database")
117
+ name: Optional[str] = Field(None, description="Name of the vector database")
118
+ description: Optional[str] = Field(None, description="Description of the vector database")
119
+ search_types: Optional[List[str]] = Field(
120
+ None, description="List of supported search types (vector, keyword, hybrid)"
121
+ )
122
+
123
+
124
+ class VectorSearchResult(BaseModel):
125
+ """Schema for search result documents."""
126
+
127
+ id: str = Field(..., description="Unique identifier for the search result document")
128
+ content: str = Field(..., description="Content text of the document")
129
+ name: Optional[str] = Field(None, description="Name of the document")
130
+ meta_data: Optional[Dict[str, Any]] = Field(None, description="Metadata associated with the document")
131
+ usage: Optional[Dict[str, Any]] = Field(None, description="Usage statistics (e.g., token counts)")
132
+ reranking_score: Optional[float] = Field(None, description="Reranking score for relevance", ge=0.0, le=1.0)
133
+ content_id: Optional[str] = Field(None, description="ID of the source content")
134
+ content_origin: Optional[str] = Field(None, description="Origin URL or source of the content")
135
+ size: Optional[int] = Field(None, description="Size of the content in bytes", ge=0)
136
+
137
+ @classmethod
138
+ def from_document(cls, document) -> "VectorSearchResult":
139
+ """Convert a Document object to a serializable VectorSearchResult."""
140
+ return cls(
141
+ id=document.id,
142
+ content=document.content,
143
+ name=getattr(document, "name", None),
144
+ meta_data=getattr(document, "meta_data", None),
145
+ usage=getattr(document, "usage", None),
146
+ reranking_score=getattr(document, "reranking_score", None),
147
+ content_id=getattr(document, "content_id", None),
148
+ content_origin=getattr(document, "content_origin", None),
149
+ size=getattr(document, "size", None),
150
+ )
151
+
152
+
153
+ class VectorSearchRequestSchema(BaseModel):
154
+ """Schema for vector search request."""
155
+
156
+ class Meta(BaseModel):
157
+ """Inline metadata schema for pagination."""
158
+
159
+ limit: int = Field(20, description="Number of results per page", ge=1, le=100)
160
+ page: int = Field(1, description="Page number", ge=1)
161
+
162
+ query: str = Field(..., description="The search query text")
163
+ db_id: Optional[str] = Field(None, description="The content database ID to search in")
164
+ vector_db_ids: Optional[List[str]] = Field(None, description="List of vector database IDs to search in")
165
+ search_type: Optional[str] = Field(None, description="The type of search to perform (vector, keyword, hybrid)")
166
+ max_results: Optional[int] = Field(None, description="The maximum number of results to return", ge=1, le=1000)
167
+ filters: Optional[Dict[str, Any]] = Field(None, description="Filters to apply to the search results")
168
+ meta: Optional[Meta] = Field(
169
+ None, description="Pagination metadata. Limit and page number to return a subset of results."
170
+ )
112
171
 
113
172
 
114
173
  class ConfigResponseSchema(BaseModel):
115
- readers: Optional[Dict[str, ReaderSchema]] = None
116
- readersForType: Optional[Dict[str, List[str]]] = None
117
- chunkers: Optional[Dict[str, ChunkerSchema]] = None
118
- filters: Optional[List[str]] = None
174
+ readers: Optional[Dict[str, ReaderSchema]] = Field(None, description="Available content readers")
175
+ readersForType: Optional[Dict[str, List[str]]] = Field(None, description="Mapping of content types to reader IDs")
176
+ chunkers: Optional[Dict[str, ChunkerSchema]] = Field(None, description="Available chunking strategies")
177
+ filters: Optional[List[str]] = Field(None, description="Available filter tags")
178
+ vector_dbs: Optional[List[VectorDbSchema]] = Field(None, description="Configured vector databases")