agno 2.0.0a1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +416 -41
- agno/api/agent.py +2 -2
- agno/api/evals.py +2 -2
- agno/api/os.py +1 -1
- agno/api/settings.py +2 -2
- agno/api/team.py +2 -2
- agno/db/dynamo/dynamo.py +0 -6
- agno/db/firestore/firestore.py +0 -6
- agno/db/in_memory/in_memory_db.py +0 -6
- agno/db/json/json_db.py +0 -6
- agno/db/mongo/mongo.py +8 -9
- agno/db/mysql/utils.py +0 -1
- agno/db/postgres/postgres.py +0 -10
- agno/db/postgres/utils.py +0 -1
- agno/db/redis/redis.py +0 -4
- agno/db/singlestore/singlestore.py +0 -10
- agno/db/singlestore/utils.py +0 -1
- agno/db/sqlite/sqlite.py +0 -4
- agno/db/sqlite/utils.py +0 -1
- agno/eval/accuracy.py +12 -5
- agno/integrations/discord/client.py +5 -1
- agno/knowledge/chunking/strategy.py +14 -14
- agno/knowledge/embedder/aws_bedrock.py +2 -2
- agno/knowledge/knowledge.py +156 -120
- agno/knowledge/reader/arxiv_reader.py +5 -5
- agno/knowledge/reader/csv_reader.py +6 -77
- agno/knowledge/reader/docx_reader.py +5 -5
- agno/knowledge/reader/firecrawl_reader.py +5 -5
- agno/knowledge/reader/json_reader.py +5 -5
- agno/knowledge/reader/markdown_reader.py +31 -9
- agno/knowledge/reader/pdf_reader.py +10 -123
- agno/knowledge/reader/reader_factory.py +65 -72
- agno/knowledge/reader/s3_reader.py +44 -114
- agno/knowledge/reader/text_reader.py +5 -5
- agno/knowledge/reader/url_reader.py +75 -31
- agno/knowledge/reader/web_search_reader.py +6 -29
- agno/knowledge/reader/website_reader.py +5 -5
- agno/knowledge/reader/wikipedia_reader.py +5 -5
- agno/knowledge/reader/youtube_reader.py +6 -6
- agno/knowledge/utils.py +10 -10
- agno/models/anthropic/claude.py +2 -49
- agno/models/aws/bedrock.py +3 -7
- agno/models/base.py +37 -6
- agno/models/message.py +7 -6
- agno/os/app.py +168 -64
- agno/os/interfaces/agui/agui.py +1 -1
- agno/os/interfaces/agui/utils.py +16 -9
- agno/os/interfaces/slack/slack.py +2 -3
- agno/os/interfaces/whatsapp/whatsapp.py +2 -3
- agno/os/mcp.py +235 -0
- agno/os/router.py +576 -19
- agno/os/routers/evals/evals.py +201 -12
- agno/os/routers/knowledge/knowledge.py +455 -18
- agno/os/routers/memory/memory.py +260 -29
- agno/os/routers/metrics/metrics.py +127 -7
- agno/os/routers/session/session.py +398 -25
- agno/os/schema.py +55 -2
- agno/os/settings.py +0 -1
- agno/run/agent.py +96 -2
- agno/run/cancel.py +0 -2
- agno/run/team.py +93 -2
- agno/run/workflow.py +25 -12
- agno/team/team.py +863 -1053
- agno/tools/function.py +65 -7
- agno/tools/linear.py +1 -1
- agno/tools/mcp.py +1 -2
- agno/utils/gemini.py +31 -1
- agno/utils/log.py +52 -2
- agno/utils/mcp.py +55 -3
- agno/utils/models/claude.py +41 -0
- agno/utils/print_response/team.py +177 -73
- agno/utils/streamlit.py +481 -0
- agno/workflow/workflow.py +17 -1
- {agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/METADATA +1 -1
- {agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/RECORD +78 -77
- agno/knowledge/reader/gcs_reader.py +0 -67
- {agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/WHEEL +0 -0
- {agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,16 @@ from agno.os.routers.knowledge.schemas import (
|
|
|
21
21
|
ContentUpdateSchema,
|
|
22
22
|
ReaderSchema,
|
|
23
23
|
)
|
|
24
|
-
from agno.os.schema import
|
|
24
|
+
from agno.os.schema import (
|
|
25
|
+
BadRequestResponse,
|
|
26
|
+
InternalServerErrorResponse,
|
|
27
|
+
NotFoundResponse,
|
|
28
|
+
PaginatedResponse,
|
|
29
|
+
PaginationInfo,
|
|
30
|
+
SortOrder,
|
|
31
|
+
UnauthenticatedResponse,
|
|
32
|
+
ValidationErrorResponse,
|
|
33
|
+
)
|
|
25
34
|
from agno.os.settings import AgnoAPISettings
|
|
26
35
|
from agno.os.utils import get_knowledge_instance_by_db_id
|
|
27
36
|
from agno.utils.log import log_debug, log_info
|
|
@@ -32,23 +41,65 @@ logger = logging.getLogger(__name__)
|
|
|
32
41
|
def get_knowledge_router(
|
|
33
42
|
knowledge_instances: List[Knowledge], settings: AgnoAPISettings = AgnoAPISettings()
|
|
34
43
|
) -> APIRouter:
|
|
35
|
-
router
|
|
44
|
+
"""Create knowledge router with comprehensive OpenAPI documentation for content management endpoints."""
|
|
45
|
+
router = APIRouter(
|
|
46
|
+
dependencies=[Depends(get_authentication_dependency(settings))],
|
|
47
|
+
tags=["Knowledge"],
|
|
48
|
+
responses={
|
|
49
|
+
400: {"description": "Bad Request", "model": BadRequestResponse},
|
|
50
|
+
401: {"description": "Unauthorized", "model": UnauthenticatedResponse},
|
|
51
|
+
404: {"description": "Not Found", "model": NotFoundResponse},
|
|
52
|
+
422: {"description": "Validation Error", "model": ValidationErrorResponse},
|
|
53
|
+
500: {"description": "Internal Server Error", "model": InternalServerErrorResponse},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
36
56
|
return attach_routes(router=router, knowledge_instances=knowledge_instances)
|
|
37
57
|
|
|
38
58
|
|
|
39
59
|
def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> APIRouter:
|
|
40
|
-
@router.post(
|
|
60
|
+
@router.post(
|
|
61
|
+
"/knowledge/content",
|
|
62
|
+
response_model=ContentResponseSchema,
|
|
63
|
+
status_code=202,
|
|
64
|
+
operation_id="upload_content",
|
|
65
|
+
summary="Upload Content",
|
|
66
|
+
description=(
|
|
67
|
+
"Upload content to the knowledge base. Supports file uploads, text content, or URLs. "
|
|
68
|
+
"Content is processed asynchronously in the background. Supports custom readers and chunking strategies."
|
|
69
|
+
),
|
|
70
|
+
responses={
|
|
71
|
+
202: {
|
|
72
|
+
"description": "Content upload accepted for processing",
|
|
73
|
+
"content": {
|
|
74
|
+
"application/json": {
|
|
75
|
+
"example": {
|
|
76
|
+
"id": "content-123",
|
|
77
|
+
"name": "example-document.pdf",
|
|
78
|
+
"description": "Sample document for processing",
|
|
79
|
+
"metadata": {"category": "documentation", "priority": "high"},
|
|
80
|
+
"status": "processing",
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
400: {
|
|
86
|
+
"description": "Invalid request - malformed metadata or missing content",
|
|
87
|
+
"model": BadRequestResponse,
|
|
88
|
+
},
|
|
89
|
+
422: {"description": "Validation error in form data", "model": ValidationErrorResponse},
|
|
90
|
+
},
|
|
91
|
+
)
|
|
41
92
|
async def upload_content(
|
|
42
93
|
background_tasks: BackgroundTasks,
|
|
43
|
-
name: Optional[str] = Form(None),
|
|
44
|
-
description: Optional[str] = Form(None),
|
|
45
|
-
url: Optional[str] = Form(None),
|
|
46
|
-
metadata: Optional[str] = Form(None, description="JSON metadata"),
|
|
47
|
-
file: Optional[UploadFile] = File(None),
|
|
48
|
-
text_content: Optional[str] = Form(None),
|
|
49
|
-
reader_id: Optional[str] = Form(None),
|
|
50
|
-
chunker: Optional[str] = Form(None),
|
|
51
|
-
db_id: Optional[str] = Query(default=None, description="
|
|
94
|
+
name: Optional[str] = Form(None, description="Content name (auto-generated from file/URL if not provided)"),
|
|
95
|
+
description: Optional[str] = Form(None, description="Content description for context"),
|
|
96
|
+
url: Optional[str] = Form(None, description="URL to fetch content from (JSON array or single URL string)"),
|
|
97
|
+
metadata: Optional[str] = Form(None, description="JSON metadata object for additional content properties"),
|
|
98
|
+
file: Optional[UploadFile] = File(None, description="File to upload for processing"),
|
|
99
|
+
text_content: Optional[str] = Form(None, description="Raw text content to process"),
|
|
100
|
+
reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
|
|
101
|
+
chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
|
|
102
|
+
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
52
103
|
):
|
|
53
104
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
54
105
|
content_id = str(uuid4())
|
|
@@ -126,7 +177,45 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
126
177
|
)
|
|
127
178
|
return response
|
|
128
179
|
|
|
129
|
-
@router.patch(
|
|
180
|
+
@router.patch(
|
|
181
|
+
"/knowledge/content/{content_id}",
|
|
182
|
+
response_model=ContentResponseSchema,
|
|
183
|
+
status_code=200,
|
|
184
|
+
operation_id="update_content",
|
|
185
|
+
summary="Update Content",
|
|
186
|
+
description=(
|
|
187
|
+
"Update content properties such as name, description, metadata, or processing configuration. "
|
|
188
|
+
"Allows modification of existing content without re-uploading."
|
|
189
|
+
),
|
|
190
|
+
responses={
|
|
191
|
+
200: {
|
|
192
|
+
"description": "Content updated successfully",
|
|
193
|
+
"content": {
|
|
194
|
+
"application/json": {
|
|
195
|
+
"example": {
|
|
196
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
197
|
+
"name": "example.pdf",
|
|
198
|
+
"description": "",
|
|
199
|
+
"type": "application/pdf",
|
|
200
|
+
"size": "251261",
|
|
201
|
+
"linked_to": None,
|
|
202
|
+
"metadata": {},
|
|
203
|
+
"access_count": 1,
|
|
204
|
+
"status": "completed",
|
|
205
|
+
"status_message": "",
|
|
206
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
207
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
400: {
|
|
213
|
+
"description": "Invalid request - malformed metadata or invalid reader_id",
|
|
214
|
+
"model": BadRequestResponse,
|
|
215
|
+
},
|
|
216
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
217
|
+
},
|
|
218
|
+
)
|
|
130
219
|
async def update_content(
|
|
131
220
|
content_id: str = Path(..., description="Content ID"),
|
|
132
221
|
name: Optional[str] = Form(None, description="Content name"),
|
|
@@ -172,7 +261,45 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
172
261
|
|
|
173
262
|
return ContentResponseSchema.from_dict(updated_content_dict)
|
|
174
263
|
|
|
175
|
-
@router.get(
|
|
264
|
+
@router.get(
|
|
265
|
+
"/knowledge/content",
|
|
266
|
+
response_model=PaginatedResponse[ContentResponseSchema],
|
|
267
|
+
status_code=200,
|
|
268
|
+
operation_id="get_content",
|
|
269
|
+
summary="List Content",
|
|
270
|
+
description=(
|
|
271
|
+
"Retrieve paginated list of all content in the knowledge base with filtering and sorting options. "
|
|
272
|
+
"Filter by status, content type, or metadata properties."
|
|
273
|
+
),
|
|
274
|
+
responses={
|
|
275
|
+
200: {
|
|
276
|
+
"description": "Content list retrieved successfully",
|
|
277
|
+
"content": {
|
|
278
|
+
"application/json": {
|
|
279
|
+
"example": {
|
|
280
|
+
"data": [
|
|
281
|
+
{
|
|
282
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
283
|
+
"name": "example.pdf",
|
|
284
|
+
"description": "",
|
|
285
|
+
"type": "application/pdf",
|
|
286
|
+
"size": "251261",
|
|
287
|
+
"linked_to": None,
|
|
288
|
+
"metadata": {},
|
|
289
|
+
"access_count": 1,
|
|
290
|
+
"status": "completed",
|
|
291
|
+
"status_message": "",
|
|
292
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
293
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
294
|
+
},
|
|
295
|
+
],
|
|
296
|
+
"meta": {"page": 1, "limit": 20, "total_pages": 1, "total_count": 2},
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
)
|
|
176
303
|
def get_content(
|
|
177
304
|
limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
|
|
178
305
|
page: Optional[int] = Query(default=1, description="Page number"),
|
|
@@ -209,7 +336,38 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
209
336
|
),
|
|
210
337
|
)
|
|
211
338
|
|
|
212
|
-
@router.get(
|
|
339
|
+
@router.get(
|
|
340
|
+
"/knowledge/content/{content_id}",
|
|
341
|
+
response_model=ContentResponseSchema,
|
|
342
|
+
status_code=200,
|
|
343
|
+
operation_id="get_content_by_id",
|
|
344
|
+
summary="Get Content by ID",
|
|
345
|
+
description="Retrieve detailed information about a specific content item including processing status and metadata.",
|
|
346
|
+
responses={
|
|
347
|
+
200: {
|
|
348
|
+
"description": "Content details retrieved successfully",
|
|
349
|
+
"content": {
|
|
350
|
+
"application/json": {
|
|
351
|
+
"example": {
|
|
352
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
353
|
+
"name": "example.pdf",
|
|
354
|
+
"description": "",
|
|
355
|
+
"type": "application/pdf",
|
|
356
|
+
"size": "251261",
|
|
357
|
+
"linked_to": None,
|
|
358
|
+
"metadata": {},
|
|
359
|
+
"access_count": 1,
|
|
360
|
+
"status": "completed",
|
|
361
|
+
"status_message": "",
|
|
362
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
363
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
369
|
+
},
|
|
370
|
+
)
|
|
213
371
|
def get_content_by_id(
|
|
214
372
|
content_id: str,
|
|
215
373
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
@@ -241,6 +399,14 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
241
399
|
response_model=ContentResponseSchema,
|
|
242
400
|
status_code=200,
|
|
243
401
|
response_model_exclude_none=True,
|
|
402
|
+
operation_id="delete_content_by_id",
|
|
403
|
+
summary="Delete Content by ID",
|
|
404
|
+
description="Permanently remove a specific content item from the knowledge base. This action cannot be undone.",
|
|
405
|
+
responses={
|
|
406
|
+
200: {},
|
|
407
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
408
|
+
500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
|
|
409
|
+
},
|
|
244
410
|
)
|
|
245
411
|
def delete_content_by_id(
|
|
246
412
|
content_id: str,
|
|
@@ -254,16 +420,59 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
254
420
|
id=content_id,
|
|
255
421
|
)
|
|
256
422
|
|
|
257
|
-
@router.delete(
|
|
423
|
+
@router.delete(
|
|
424
|
+
"/knowledge/content",
|
|
425
|
+
status_code=200,
|
|
426
|
+
operation_id="delete_all_content",
|
|
427
|
+
summary="Delete All Content",
|
|
428
|
+
description=(
|
|
429
|
+
"Permanently remove all content from the knowledge base. This is a destructive operation that "
|
|
430
|
+
"cannot be undone. Use with extreme caution."
|
|
431
|
+
),
|
|
432
|
+
responses={
|
|
433
|
+
200: {},
|
|
434
|
+
500: {"description": "Failed to delete all content", "model": InternalServerErrorResponse},
|
|
435
|
+
},
|
|
436
|
+
)
|
|
258
437
|
def delete_all_content(
|
|
259
438
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
260
439
|
):
|
|
261
440
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
262
441
|
log_info("Deleting all content")
|
|
263
442
|
knowledge.remove_all_content()
|
|
443
|
+
|
|
264
444
|
return "success"
|
|
265
445
|
|
|
266
|
-
@router.get(
|
|
446
|
+
@router.get(
|
|
447
|
+
"/knowledge/content/{content_id}/status",
|
|
448
|
+
status_code=200,
|
|
449
|
+
response_model=ContentStatusResponse,
|
|
450
|
+
operation_id="get_content_status",
|
|
451
|
+
summary="Get Content Status",
|
|
452
|
+
description=(
|
|
453
|
+
"Retrieve the current processing status of a content item. Useful for monitoring "
|
|
454
|
+
"asynchronous content processing progress and identifying any processing errors."
|
|
455
|
+
),
|
|
456
|
+
responses={
|
|
457
|
+
200: {
|
|
458
|
+
"description": "Content status retrieved successfully",
|
|
459
|
+
"content": {
|
|
460
|
+
"application/json": {
|
|
461
|
+
"examples": {
|
|
462
|
+
"completed": {
|
|
463
|
+
"summary": "Example completed content status",
|
|
464
|
+
"value": {
|
|
465
|
+
"status": "completed",
|
|
466
|
+
"status_message": "",
|
|
467
|
+
},
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
},
|
|
472
|
+
},
|
|
473
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
474
|
+
},
|
|
475
|
+
)
|
|
267
476
|
def get_content_status(
|
|
268
477
|
content_id: str,
|
|
269
478
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
@@ -301,7 +510,235 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
301
510
|
|
|
302
511
|
return ContentStatusResponse(status=status, status_message=status_message or "")
|
|
303
512
|
|
|
304
|
-
@router.get(
|
|
513
|
+
@router.get(
|
|
514
|
+
"/knowledge/config",
|
|
515
|
+
status_code=200,
|
|
516
|
+
operation_id="get_knowledge_config",
|
|
517
|
+
summary="Get Knowledge Configuration",
|
|
518
|
+
description=(
|
|
519
|
+
"Retrieve available readers, chunkers, and configuration options for content processing. "
|
|
520
|
+
"This endpoint provides metadata about supported file types, processing strategies, and filters."
|
|
521
|
+
),
|
|
522
|
+
responses={
|
|
523
|
+
200: {
|
|
524
|
+
"description": "Knowledge configuration retrieved successfully",
|
|
525
|
+
"content": {
|
|
526
|
+
"application/json": {
|
|
527
|
+
"example": {
|
|
528
|
+
"readers": {
|
|
529
|
+
"website": {
|
|
530
|
+
"id": "website",
|
|
531
|
+
"name": "WebsiteReader",
|
|
532
|
+
"description": "Reads website files",
|
|
533
|
+
"chunkers": [
|
|
534
|
+
"AgenticChunker",
|
|
535
|
+
"DocumentChunker",
|
|
536
|
+
"RecursiveChunker",
|
|
537
|
+
"SemanticChunker",
|
|
538
|
+
"FixedSizeChunker",
|
|
539
|
+
],
|
|
540
|
+
},
|
|
541
|
+
"firecrawl": {
|
|
542
|
+
"id": "firecrawl",
|
|
543
|
+
"name": "FirecrawlReader",
|
|
544
|
+
"description": "Reads firecrawl files",
|
|
545
|
+
"chunkers": [
|
|
546
|
+
"SemanticChunker",
|
|
547
|
+
"FixedSizeChunker",
|
|
548
|
+
"AgenticChunker",
|
|
549
|
+
"DocumentChunker",
|
|
550
|
+
"RecursiveChunker",
|
|
551
|
+
],
|
|
552
|
+
},
|
|
553
|
+
"youtube": {
|
|
554
|
+
"id": "youtube",
|
|
555
|
+
"name": "YoutubeReader",
|
|
556
|
+
"description": "Reads youtube files",
|
|
557
|
+
"chunkers": [
|
|
558
|
+
"RecursiveChunker",
|
|
559
|
+
"AgenticChunker",
|
|
560
|
+
"DocumentChunker",
|
|
561
|
+
"SemanticChunker",
|
|
562
|
+
"FixedSizeChunker",
|
|
563
|
+
],
|
|
564
|
+
},
|
|
565
|
+
"web_search": {
|
|
566
|
+
"id": "web_search",
|
|
567
|
+
"name": "WebSearchReader",
|
|
568
|
+
"description": "Reads web_search files",
|
|
569
|
+
"chunkers": [
|
|
570
|
+
"AgenticChunker",
|
|
571
|
+
"DocumentChunker",
|
|
572
|
+
"RecursiveChunker",
|
|
573
|
+
"SemanticChunker",
|
|
574
|
+
"FixedSizeChunker",
|
|
575
|
+
],
|
|
576
|
+
},
|
|
577
|
+
"arxiv": {
|
|
578
|
+
"id": "arxiv",
|
|
579
|
+
"name": "ArxivReader",
|
|
580
|
+
"description": "Reads arxiv files",
|
|
581
|
+
"chunkers": [
|
|
582
|
+
"FixedSizeChunker",
|
|
583
|
+
"AgenticChunker",
|
|
584
|
+
"DocumentChunker",
|
|
585
|
+
"RecursiveChunker",
|
|
586
|
+
"SemanticChunker",
|
|
587
|
+
],
|
|
588
|
+
},
|
|
589
|
+
"csv": {
|
|
590
|
+
"id": "csv",
|
|
591
|
+
"name": "CsvReader",
|
|
592
|
+
"description": "Reads csv files",
|
|
593
|
+
"chunkers": [
|
|
594
|
+
"RowChunker",
|
|
595
|
+
"FixedSizeChunker",
|
|
596
|
+
"AgenticChunker",
|
|
597
|
+
"DocumentChunker",
|
|
598
|
+
"RecursiveChunker",
|
|
599
|
+
],
|
|
600
|
+
},
|
|
601
|
+
"docx": {
|
|
602
|
+
"id": "docx",
|
|
603
|
+
"name": "DocxReader",
|
|
604
|
+
"description": "Reads docx files",
|
|
605
|
+
"chunkers": [
|
|
606
|
+
"DocumentChunker",
|
|
607
|
+
"FixedSizeChunker",
|
|
608
|
+
"SemanticChunker",
|
|
609
|
+
"AgenticChunker",
|
|
610
|
+
"RecursiveChunker",
|
|
611
|
+
],
|
|
612
|
+
},
|
|
613
|
+
"gcs": {
|
|
614
|
+
"id": "gcs",
|
|
615
|
+
"name": "GcsReader",
|
|
616
|
+
"description": "Reads gcs files",
|
|
617
|
+
"chunkers": [
|
|
618
|
+
"FixedSizeChunker",
|
|
619
|
+
"AgenticChunker",
|
|
620
|
+
"DocumentChunker",
|
|
621
|
+
"RecursiveChunker",
|
|
622
|
+
"SemanticChunker",
|
|
623
|
+
],
|
|
624
|
+
},
|
|
625
|
+
"json": {
|
|
626
|
+
"id": "json",
|
|
627
|
+
"name": "JsonReader",
|
|
628
|
+
"description": "Reads json files",
|
|
629
|
+
"chunkers": [
|
|
630
|
+
"FixedSizeChunker",
|
|
631
|
+
"AgenticChunker",
|
|
632
|
+
"DocumentChunker",
|
|
633
|
+
"RecursiveChunker",
|
|
634
|
+
"SemanticChunker",
|
|
635
|
+
],
|
|
636
|
+
},
|
|
637
|
+
"markdown": {
|
|
638
|
+
"id": "markdown",
|
|
639
|
+
"name": "MarkdownReader",
|
|
640
|
+
"description": "Reads markdown files",
|
|
641
|
+
"chunkers": [
|
|
642
|
+
"MarkdownChunker",
|
|
643
|
+
"DocumentChunker",
|
|
644
|
+
"AgenticChunker",
|
|
645
|
+
"RecursiveChunker",
|
|
646
|
+
"SemanticChunker",
|
|
647
|
+
"FixedSizeChunker",
|
|
648
|
+
],
|
|
649
|
+
},
|
|
650
|
+
"pdf": {
|
|
651
|
+
"id": "pdf",
|
|
652
|
+
"name": "PdfReader",
|
|
653
|
+
"description": "Reads pdf files",
|
|
654
|
+
"chunkers": [
|
|
655
|
+
"DocumentChunker",
|
|
656
|
+
"FixedSizeChunker",
|
|
657
|
+
"AgenticChunker",
|
|
658
|
+
"SemanticChunker",
|
|
659
|
+
"RecursiveChunker",
|
|
660
|
+
],
|
|
661
|
+
},
|
|
662
|
+
"text": {
|
|
663
|
+
"id": "text",
|
|
664
|
+
"name": "TextReader",
|
|
665
|
+
"description": "Reads text files",
|
|
666
|
+
"chunkers": [
|
|
667
|
+
"FixedSizeChunker",
|
|
668
|
+
"AgenticChunker",
|
|
669
|
+
"DocumentChunker",
|
|
670
|
+
"RecursiveChunker",
|
|
671
|
+
"SemanticChunker",
|
|
672
|
+
],
|
|
673
|
+
},
|
|
674
|
+
},
|
|
675
|
+
"readersForType": {
|
|
676
|
+
"url": [
|
|
677
|
+
"url",
|
|
678
|
+
"website",
|
|
679
|
+
"firecrawl",
|
|
680
|
+
"youtube",
|
|
681
|
+
"web_search",
|
|
682
|
+
"gcs",
|
|
683
|
+
],
|
|
684
|
+
"youtube": ["youtube"],
|
|
685
|
+
"text": ["web_search"],
|
|
686
|
+
"topic": ["arxiv"],
|
|
687
|
+
"file": ["csv", "gcs"],
|
|
688
|
+
".csv": ["csv"],
|
|
689
|
+
".xlsx": ["csv"],
|
|
690
|
+
".xls": ["csv"],
|
|
691
|
+
".docx": ["docx"],
|
|
692
|
+
".doc": ["docx"],
|
|
693
|
+
".json": ["json"],
|
|
694
|
+
".md": ["markdown"],
|
|
695
|
+
".pdf": ["pdf"],
|
|
696
|
+
".txt": ["text"],
|
|
697
|
+
},
|
|
698
|
+
"chunkers": {
|
|
699
|
+
"AgenticChunker": {
|
|
700
|
+
"key": "AgenticChunker",
|
|
701
|
+
"name": "AgenticChunker",
|
|
702
|
+
"description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
|
|
703
|
+
},
|
|
704
|
+
"DocumentChunker": {
|
|
705
|
+
"key": "DocumentChunker",
|
|
706
|
+
"name": "DocumentChunker",
|
|
707
|
+
"description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
|
|
708
|
+
},
|
|
709
|
+
"RecursiveChunker": {
|
|
710
|
+
"key": "RecursiveChunker",
|
|
711
|
+
"name": "RecursiveChunker",
|
|
712
|
+
"description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
|
|
713
|
+
},
|
|
714
|
+
"SemanticChunker": {
|
|
715
|
+
"key": "SemanticChunker",
|
|
716
|
+
"name": "SemanticChunker",
|
|
717
|
+
"description": "Chunking strategy that splits text into semantic chunks using chonkie",
|
|
718
|
+
},
|
|
719
|
+
"FixedSizeChunker": {
|
|
720
|
+
"key": "FixedSizeChunker",
|
|
721
|
+
"name": "FixedSizeChunker",
|
|
722
|
+
"description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
|
|
723
|
+
},
|
|
724
|
+
"RowChunker": {
|
|
725
|
+
"key": "RowChunker",
|
|
726
|
+
"name": "RowChunker",
|
|
727
|
+
"description": "RowChunking chunking strategy",
|
|
728
|
+
},
|
|
729
|
+
"MarkdownChunker": {
|
|
730
|
+
"key": "MarkdownChunker",
|
|
731
|
+
"name": "MarkdownChunker",
|
|
732
|
+
"description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
|
|
733
|
+
},
|
|
734
|
+
},
|
|
735
|
+
"filters": ["filter_tag_1", "filter_tag2"],
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
},
|
|
739
|
+
}
|
|
740
|
+
},
|
|
741
|
+
)
|
|
305
742
|
def get_config(
|
|
306
743
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
307
744
|
) -> ConfigResponseSchema:
|