agno 2.0.0rc1__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +101 -140
- agno/db/mongo/mongo.py +8 -3
- agno/eval/accuracy.py +12 -5
- agno/knowledge/chunking/strategy.py +14 -14
- agno/knowledge/knowledge.py +156 -120
- agno/knowledge/reader/arxiv_reader.py +5 -5
- agno/knowledge/reader/csv_reader.py +6 -77
- agno/knowledge/reader/docx_reader.py +5 -5
- agno/knowledge/reader/firecrawl_reader.py +5 -5
- agno/knowledge/reader/json_reader.py +5 -5
- agno/knowledge/reader/markdown_reader.py +31 -9
- agno/knowledge/reader/pdf_reader.py +10 -123
- agno/knowledge/reader/reader_factory.py +65 -72
- agno/knowledge/reader/s3_reader.py +44 -114
- agno/knowledge/reader/text_reader.py +5 -5
- agno/knowledge/reader/url_reader.py +75 -31
- agno/knowledge/reader/web_search_reader.py +6 -29
- agno/knowledge/reader/website_reader.py +5 -5
- agno/knowledge/reader/wikipedia_reader.py +5 -5
- agno/knowledge/reader/youtube_reader.py +6 -6
- agno/knowledge/reranker/__init__.py +9 -0
- agno/knowledge/utils.py +10 -10
- agno/media.py +269 -268
- agno/models/aws/bedrock.py +3 -7
- agno/models/base.py +50 -54
- agno/models/google/gemini.py +11 -10
- agno/models/message.py +4 -4
- agno/models/ollama/chat.py +1 -1
- agno/models/openai/chat.py +33 -14
- agno/models/response.py +5 -5
- agno/os/app.py +40 -29
- agno/os/mcp.py +39 -59
- agno/os/router.py +547 -16
- agno/os/routers/evals/evals.py +197 -12
- agno/os/routers/knowledge/knowledge.py +428 -14
- agno/os/routers/memory/memory.py +250 -28
- agno/os/routers/metrics/metrics.py +125 -7
- agno/os/routers/session/session.py +393 -25
- agno/os/schema.py +55 -2
- agno/run/agent.py +37 -28
- agno/run/base.py +9 -19
- agno/run/team.py +110 -19
- agno/run/workflow.py +41 -28
- agno/team/team.py +808 -1080
- agno/tools/brightdata.py +3 -3
- agno/tools/cartesia.py +3 -5
- agno/tools/dalle.py +7 -4
- agno/tools/desi_vocal.py +2 -2
- agno/tools/e2b.py +6 -6
- agno/tools/eleven_labs.py +3 -3
- agno/tools/fal.py +4 -4
- agno/tools/function.py +7 -7
- agno/tools/giphy.py +2 -2
- agno/tools/lumalab.py +3 -3
- agno/tools/mcp.py +1 -2
- agno/tools/models/azure_openai.py +2 -2
- agno/tools/models/gemini.py +3 -3
- agno/tools/models/groq.py +3 -5
- agno/tools/models/nebius.py +2 -2
- agno/tools/models_labs.py +5 -5
- agno/tools/openai.py +4 -9
- agno/tools/opencv.py +3 -3
- agno/tools/replicate.py +7 -7
- agno/utils/events.py +5 -5
- agno/utils/gemini.py +1 -1
- agno/utils/log.py +52 -2
- agno/utils/mcp.py +57 -5
- agno/utils/models/aws_claude.py +1 -1
- agno/utils/models/claude.py +0 -8
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/print_response/team.py +177 -73
- agno/utils/streamlit.py +27 -0
- agno/vectordb/lancedb/lance_db.py +82 -25
- agno/workflow/step.py +7 -7
- agno/workflow/types.py +13 -13
- agno/workflow/workflow.py +37 -28
- {agno-2.0.0rc1.dist-info → agno-2.0.1.dist-info}/METADATA +140 -1
- {agno-2.0.0rc1.dist-info → agno-2.0.1.dist-info}/RECORD +83 -84
- agno-2.0.1.dist-info/licenses/LICENSE +201 -0
- agno/knowledge/reader/gcs_reader.py +0 -67
- agno-2.0.0rc1.dist-info/licenses/LICENSE +0 -375
- {agno-2.0.0rc1.dist-info → agno-2.0.1.dist-info}/WHEEL +0 -0
- {agno-2.0.0rc1.dist-info → agno-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,16 @@ from agno.os.routers.knowledge.schemas import (
|
|
|
21
21
|
ContentUpdateSchema,
|
|
22
22
|
ReaderSchema,
|
|
23
23
|
)
|
|
24
|
-
from agno.os.schema import
|
|
24
|
+
from agno.os.schema import (
|
|
25
|
+
BadRequestResponse,
|
|
26
|
+
InternalServerErrorResponse,
|
|
27
|
+
NotFoundResponse,
|
|
28
|
+
PaginatedResponse,
|
|
29
|
+
PaginationInfo,
|
|
30
|
+
SortOrder,
|
|
31
|
+
UnauthenticatedResponse,
|
|
32
|
+
ValidationErrorResponse,
|
|
33
|
+
)
|
|
25
34
|
from agno.os.settings import AgnoAPISettings
|
|
26
35
|
from agno.os.utils import get_knowledge_instance_by_db_id
|
|
27
36
|
from agno.utils.log import log_debug, log_info
|
|
@@ -32,25 +41,65 @@ logger = logging.getLogger(__name__)
|
|
|
32
41
|
def get_knowledge_router(
|
|
33
42
|
knowledge_instances: List[Knowledge], settings: AgnoAPISettings = AgnoAPISettings()
|
|
34
43
|
) -> APIRouter:
|
|
35
|
-
router
|
|
44
|
+
"""Create knowledge router with comprehensive OpenAPI documentation for content management endpoints."""
|
|
45
|
+
router = APIRouter(
|
|
46
|
+
dependencies=[Depends(get_authentication_dependency(settings))],
|
|
47
|
+
tags=["Knowledge"],
|
|
48
|
+
responses={
|
|
49
|
+
400: {"description": "Bad Request", "model": BadRequestResponse},
|
|
50
|
+
401: {"description": "Unauthorized", "model": UnauthenticatedResponse},
|
|
51
|
+
404: {"description": "Not Found", "model": NotFoundResponse},
|
|
52
|
+
422: {"description": "Validation Error", "model": ValidationErrorResponse},
|
|
53
|
+
500: {"description": "Internal Server Error", "model": InternalServerErrorResponse},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
36
56
|
return attach_routes(router=router, knowledge_instances=knowledge_instances)
|
|
37
57
|
|
|
38
58
|
|
|
39
59
|
def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> APIRouter:
|
|
40
60
|
@router.post(
|
|
41
|
-
"/knowledge/content",
|
|
61
|
+
"/knowledge/content",
|
|
62
|
+
response_model=ContentResponseSchema,
|
|
63
|
+
status_code=202,
|
|
64
|
+
operation_id="upload_content",
|
|
65
|
+
summary="Upload Content",
|
|
66
|
+
description=(
|
|
67
|
+
"Upload content to the knowledge base. Supports file uploads, text content, or URLs. "
|
|
68
|
+
"Content is processed asynchronously in the background. Supports custom readers and chunking strategies."
|
|
69
|
+
),
|
|
70
|
+
responses={
|
|
71
|
+
202: {
|
|
72
|
+
"description": "Content upload accepted for processing",
|
|
73
|
+
"content": {
|
|
74
|
+
"application/json": {
|
|
75
|
+
"example": {
|
|
76
|
+
"id": "content-123",
|
|
77
|
+
"name": "example-document.pdf",
|
|
78
|
+
"description": "Sample document for processing",
|
|
79
|
+
"metadata": {"category": "documentation", "priority": "high"},
|
|
80
|
+
"status": "processing",
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
400: {
|
|
86
|
+
"description": "Invalid request - malformed metadata or missing content",
|
|
87
|
+
"model": BadRequestResponse,
|
|
88
|
+
},
|
|
89
|
+
422: {"description": "Validation error in form data", "model": ValidationErrorResponse},
|
|
90
|
+
},
|
|
42
91
|
)
|
|
43
92
|
async def upload_content(
|
|
44
93
|
background_tasks: BackgroundTasks,
|
|
45
|
-
name: Optional[str] = Form(None),
|
|
46
|
-
description: Optional[str] = Form(None),
|
|
47
|
-
url: Optional[str] = Form(None),
|
|
48
|
-
metadata: Optional[str] = Form(None, description="JSON metadata"),
|
|
49
|
-
file: Optional[UploadFile] = File(None),
|
|
50
|
-
text_content: Optional[str] = Form(None),
|
|
51
|
-
reader_id: Optional[str] = Form(None),
|
|
52
|
-
chunker: Optional[str] = Form(None),
|
|
53
|
-
db_id: Optional[str] = Query(default=None, description="
|
|
94
|
+
name: Optional[str] = Form(None, description="Content name (auto-generated from file/URL if not provided)"),
|
|
95
|
+
description: Optional[str] = Form(None, description="Content description for context"),
|
|
96
|
+
url: Optional[str] = Form(None, description="URL to fetch content from (JSON array or single URL string)"),
|
|
97
|
+
metadata: Optional[str] = Form(None, description="JSON metadata object for additional content properties"),
|
|
98
|
+
file: Optional[UploadFile] = File(None, description="File to upload for processing"),
|
|
99
|
+
text_content: Optional[str] = Form(None, description="Raw text content to process"),
|
|
100
|
+
reader_id: Optional[str] = Form(None, description="ID of the reader to use for content processing"),
|
|
101
|
+
chunker: Optional[str] = Form(None, description="Chunking strategy to apply during processing"),
|
|
102
|
+
db_id: Optional[str] = Query(default=None, description="Database ID to use for content storage"),
|
|
54
103
|
):
|
|
55
104
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
56
105
|
content_id = str(uuid4())
|
|
@@ -133,6 +182,39 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
133
182
|
response_model=ContentResponseSchema,
|
|
134
183
|
status_code=200,
|
|
135
184
|
operation_id="update_content",
|
|
185
|
+
summary="Update Content",
|
|
186
|
+
description=(
|
|
187
|
+
"Update content properties such as name, description, metadata, or processing configuration. "
|
|
188
|
+
"Allows modification of existing content without re-uploading."
|
|
189
|
+
),
|
|
190
|
+
responses={
|
|
191
|
+
200: {
|
|
192
|
+
"description": "Content updated successfully",
|
|
193
|
+
"content": {
|
|
194
|
+
"application/json": {
|
|
195
|
+
"example": {
|
|
196
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
197
|
+
"name": "example.pdf",
|
|
198
|
+
"description": "",
|
|
199
|
+
"type": "application/pdf",
|
|
200
|
+
"size": "251261",
|
|
201
|
+
"linked_to": None,
|
|
202
|
+
"metadata": {},
|
|
203
|
+
"access_count": 1,
|
|
204
|
+
"status": "completed",
|
|
205
|
+
"status_message": "",
|
|
206
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
207
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
400: {
|
|
213
|
+
"description": "Invalid request - malformed metadata or invalid reader_id",
|
|
214
|
+
"model": BadRequestResponse,
|
|
215
|
+
},
|
|
216
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
217
|
+
},
|
|
136
218
|
)
|
|
137
219
|
async def update_content(
|
|
138
220
|
content_id: str = Path(..., description="Content ID"),
|
|
@@ -184,6 +266,39 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
184
266
|
response_model=PaginatedResponse[ContentResponseSchema],
|
|
185
267
|
status_code=200,
|
|
186
268
|
operation_id="get_content",
|
|
269
|
+
summary="List Content",
|
|
270
|
+
description=(
|
|
271
|
+
"Retrieve paginated list of all content in the knowledge base with filtering and sorting options. "
|
|
272
|
+
"Filter by status, content type, or metadata properties."
|
|
273
|
+
),
|
|
274
|
+
responses={
|
|
275
|
+
200: {
|
|
276
|
+
"description": "Content list retrieved successfully",
|
|
277
|
+
"content": {
|
|
278
|
+
"application/json": {
|
|
279
|
+
"example": {
|
|
280
|
+
"data": [
|
|
281
|
+
{
|
|
282
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
283
|
+
"name": "example.pdf",
|
|
284
|
+
"description": "",
|
|
285
|
+
"type": "application/pdf",
|
|
286
|
+
"size": "251261",
|
|
287
|
+
"linked_to": None,
|
|
288
|
+
"metadata": {},
|
|
289
|
+
"access_count": 1,
|
|
290
|
+
"status": "completed",
|
|
291
|
+
"status_message": "",
|
|
292
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
293
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
294
|
+
},
|
|
295
|
+
],
|
|
296
|
+
"meta": {"page": 1, "limit": 20, "total_pages": 1, "total_count": 2},
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
}
|
|
301
|
+
},
|
|
187
302
|
)
|
|
188
303
|
def get_content(
|
|
189
304
|
limit: Optional[int] = Query(default=20, description="Number of content entries to return"),
|
|
@@ -226,6 +341,32 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
226
341
|
response_model=ContentResponseSchema,
|
|
227
342
|
status_code=200,
|
|
228
343
|
operation_id="get_content_by_id",
|
|
344
|
+
summary="Get Content by ID",
|
|
345
|
+
description="Retrieve detailed information about a specific content item including processing status and metadata.",
|
|
346
|
+
responses={
|
|
347
|
+
200: {
|
|
348
|
+
"description": "Content details retrieved successfully",
|
|
349
|
+
"content": {
|
|
350
|
+
"application/json": {
|
|
351
|
+
"example": {
|
|
352
|
+
"id": "3c2fc685-d451-4d47-b0c0-b9a544c672b7",
|
|
353
|
+
"name": "example.pdf",
|
|
354
|
+
"description": "",
|
|
355
|
+
"type": "application/pdf",
|
|
356
|
+
"size": "251261",
|
|
357
|
+
"linked_to": None,
|
|
358
|
+
"metadata": {},
|
|
359
|
+
"access_count": 1,
|
|
360
|
+
"status": "completed",
|
|
361
|
+
"status_message": "",
|
|
362
|
+
"created_at": "2025-09-08T15:22:53Z",
|
|
363
|
+
"updated_at": "2025-09-08T15:22:54Z",
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
369
|
+
},
|
|
229
370
|
)
|
|
230
371
|
def get_content_by_id(
|
|
231
372
|
content_id: str,
|
|
@@ -259,6 +400,13 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
259
400
|
status_code=200,
|
|
260
401
|
response_model_exclude_none=True,
|
|
261
402
|
operation_id="delete_content_by_id",
|
|
403
|
+
summary="Delete Content by ID",
|
|
404
|
+
description="Permanently remove a specific content item from the knowledge base. This action cannot be undone.",
|
|
405
|
+
responses={
|
|
406
|
+
200: {},
|
|
407
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
408
|
+
500: {"description": "Failed to delete content", "model": InternalServerErrorResponse},
|
|
409
|
+
},
|
|
262
410
|
)
|
|
263
411
|
def delete_content_by_id(
|
|
264
412
|
content_id: str,
|
|
@@ -272,13 +420,27 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
272
420
|
id=content_id,
|
|
273
421
|
)
|
|
274
422
|
|
|
275
|
-
@router.delete(
|
|
423
|
+
@router.delete(
|
|
424
|
+
"/knowledge/content",
|
|
425
|
+
status_code=200,
|
|
426
|
+
operation_id="delete_all_content",
|
|
427
|
+
summary="Delete All Content",
|
|
428
|
+
description=(
|
|
429
|
+
"Permanently remove all content from the knowledge base. This is a destructive operation that "
|
|
430
|
+
"cannot be undone. Use with extreme caution."
|
|
431
|
+
),
|
|
432
|
+
responses={
|
|
433
|
+
200: {},
|
|
434
|
+
500: {"description": "Failed to delete all content", "model": InternalServerErrorResponse},
|
|
435
|
+
},
|
|
436
|
+
)
|
|
276
437
|
def delete_all_content(
|
|
277
438
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
278
439
|
):
|
|
279
440
|
knowledge = get_knowledge_instance_by_db_id(knowledge_instances, db_id)
|
|
280
441
|
log_info("Deleting all content")
|
|
281
442
|
knowledge.remove_all_content()
|
|
443
|
+
|
|
282
444
|
return "success"
|
|
283
445
|
|
|
284
446
|
@router.get(
|
|
@@ -286,6 +448,30 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
286
448
|
status_code=200,
|
|
287
449
|
response_model=ContentStatusResponse,
|
|
288
450
|
operation_id="get_content_status",
|
|
451
|
+
summary="Get Content Status",
|
|
452
|
+
description=(
|
|
453
|
+
"Retrieve the current processing status of a content item. Useful for monitoring "
|
|
454
|
+
"asynchronous content processing progress and identifying any processing errors."
|
|
455
|
+
),
|
|
456
|
+
responses={
|
|
457
|
+
200: {
|
|
458
|
+
"description": "Content status retrieved successfully",
|
|
459
|
+
"content": {
|
|
460
|
+
"application/json": {
|
|
461
|
+
"examples": {
|
|
462
|
+
"completed": {
|
|
463
|
+
"summary": "Example completed content status",
|
|
464
|
+
"value": {
|
|
465
|
+
"status": "completed",
|
|
466
|
+
"status_message": "",
|
|
467
|
+
},
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
},
|
|
472
|
+
},
|
|
473
|
+
404: {"description": "Content not found", "model": NotFoundResponse},
|
|
474
|
+
},
|
|
289
475
|
)
|
|
290
476
|
def get_content_status(
|
|
291
477
|
content_id: str,
|
|
@@ -324,7 +510,235 @@ def attach_routes(router: APIRouter, knowledge_instances: List[Knowledge]) -> AP
|
|
|
324
510
|
|
|
325
511
|
return ContentStatusResponse(status=status, status_message=status_message or "")
|
|
326
512
|
|
|
327
|
-
@router.get(
|
|
513
|
+
@router.get(
|
|
514
|
+
"/knowledge/config",
|
|
515
|
+
status_code=200,
|
|
516
|
+
operation_id="get_knowledge_config",
|
|
517
|
+
summary="Get Knowledge Configuration",
|
|
518
|
+
description=(
|
|
519
|
+
"Retrieve available readers, chunkers, and configuration options for content processing. "
|
|
520
|
+
"This endpoint provides metadata about supported file types, processing strategies, and filters."
|
|
521
|
+
),
|
|
522
|
+
responses={
|
|
523
|
+
200: {
|
|
524
|
+
"description": "Knowledge configuration retrieved successfully",
|
|
525
|
+
"content": {
|
|
526
|
+
"application/json": {
|
|
527
|
+
"example": {
|
|
528
|
+
"readers": {
|
|
529
|
+
"website": {
|
|
530
|
+
"id": "website",
|
|
531
|
+
"name": "WebsiteReader",
|
|
532
|
+
"description": "Reads website files",
|
|
533
|
+
"chunkers": [
|
|
534
|
+
"AgenticChunker",
|
|
535
|
+
"DocumentChunker",
|
|
536
|
+
"RecursiveChunker",
|
|
537
|
+
"SemanticChunker",
|
|
538
|
+
"FixedSizeChunker",
|
|
539
|
+
],
|
|
540
|
+
},
|
|
541
|
+
"firecrawl": {
|
|
542
|
+
"id": "firecrawl",
|
|
543
|
+
"name": "FirecrawlReader",
|
|
544
|
+
"description": "Reads firecrawl files",
|
|
545
|
+
"chunkers": [
|
|
546
|
+
"SemanticChunker",
|
|
547
|
+
"FixedSizeChunker",
|
|
548
|
+
"AgenticChunker",
|
|
549
|
+
"DocumentChunker",
|
|
550
|
+
"RecursiveChunker",
|
|
551
|
+
],
|
|
552
|
+
},
|
|
553
|
+
"youtube": {
|
|
554
|
+
"id": "youtube",
|
|
555
|
+
"name": "YoutubeReader",
|
|
556
|
+
"description": "Reads youtube files",
|
|
557
|
+
"chunkers": [
|
|
558
|
+
"RecursiveChunker",
|
|
559
|
+
"AgenticChunker",
|
|
560
|
+
"DocumentChunker",
|
|
561
|
+
"SemanticChunker",
|
|
562
|
+
"FixedSizeChunker",
|
|
563
|
+
],
|
|
564
|
+
},
|
|
565
|
+
"web_search": {
|
|
566
|
+
"id": "web_search",
|
|
567
|
+
"name": "WebSearchReader",
|
|
568
|
+
"description": "Reads web_search files",
|
|
569
|
+
"chunkers": [
|
|
570
|
+
"AgenticChunker",
|
|
571
|
+
"DocumentChunker",
|
|
572
|
+
"RecursiveChunker",
|
|
573
|
+
"SemanticChunker",
|
|
574
|
+
"FixedSizeChunker",
|
|
575
|
+
],
|
|
576
|
+
},
|
|
577
|
+
"arxiv": {
|
|
578
|
+
"id": "arxiv",
|
|
579
|
+
"name": "ArxivReader",
|
|
580
|
+
"description": "Reads arxiv files",
|
|
581
|
+
"chunkers": [
|
|
582
|
+
"FixedSizeChunker",
|
|
583
|
+
"AgenticChunker",
|
|
584
|
+
"DocumentChunker",
|
|
585
|
+
"RecursiveChunker",
|
|
586
|
+
"SemanticChunker",
|
|
587
|
+
],
|
|
588
|
+
},
|
|
589
|
+
"csv": {
|
|
590
|
+
"id": "csv",
|
|
591
|
+
"name": "CsvReader",
|
|
592
|
+
"description": "Reads csv files",
|
|
593
|
+
"chunkers": [
|
|
594
|
+
"RowChunker",
|
|
595
|
+
"FixedSizeChunker",
|
|
596
|
+
"AgenticChunker",
|
|
597
|
+
"DocumentChunker",
|
|
598
|
+
"RecursiveChunker",
|
|
599
|
+
],
|
|
600
|
+
},
|
|
601
|
+
"docx": {
|
|
602
|
+
"id": "docx",
|
|
603
|
+
"name": "DocxReader",
|
|
604
|
+
"description": "Reads docx files",
|
|
605
|
+
"chunkers": [
|
|
606
|
+
"DocumentChunker",
|
|
607
|
+
"FixedSizeChunker",
|
|
608
|
+
"SemanticChunker",
|
|
609
|
+
"AgenticChunker",
|
|
610
|
+
"RecursiveChunker",
|
|
611
|
+
],
|
|
612
|
+
},
|
|
613
|
+
"gcs": {
|
|
614
|
+
"id": "gcs",
|
|
615
|
+
"name": "GcsReader",
|
|
616
|
+
"description": "Reads gcs files",
|
|
617
|
+
"chunkers": [
|
|
618
|
+
"FixedSizeChunker",
|
|
619
|
+
"AgenticChunker",
|
|
620
|
+
"DocumentChunker",
|
|
621
|
+
"RecursiveChunker",
|
|
622
|
+
"SemanticChunker",
|
|
623
|
+
],
|
|
624
|
+
},
|
|
625
|
+
"json": {
|
|
626
|
+
"id": "json",
|
|
627
|
+
"name": "JsonReader",
|
|
628
|
+
"description": "Reads json files",
|
|
629
|
+
"chunkers": [
|
|
630
|
+
"FixedSizeChunker",
|
|
631
|
+
"AgenticChunker",
|
|
632
|
+
"DocumentChunker",
|
|
633
|
+
"RecursiveChunker",
|
|
634
|
+
"SemanticChunker",
|
|
635
|
+
],
|
|
636
|
+
},
|
|
637
|
+
"markdown": {
|
|
638
|
+
"id": "markdown",
|
|
639
|
+
"name": "MarkdownReader",
|
|
640
|
+
"description": "Reads markdown files",
|
|
641
|
+
"chunkers": [
|
|
642
|
+
"MarkdownChunker",
|
|
643
|
+
"DocumentChunker",
|
|
644
|
+
"AgenticChunker",
|
|
645
|
+
"RecursiveChunker",
|
|
646
|
+
"SemanticChunker",
|
|
647
|
+
"FixedSizeChunker",
|
|
648
|
+
],
|
|
649
|
+
},
|
|
650
|
+
"pdf": {
|
|
651
|
+
"id": "pdf",
|
|
652
|
+
"name": "PdfReader",
|
|
653
|
+
"description": "Reads pdf files",
|
|
654
|
+
"chunkers": [
|
|
655
|
+
"DocumentChunker",
|
|
656
|
+
"FixedSizeChunker",
|
|
657
|
+
"AgenticChunker",
|
|
658
|
+
"SemanticChunker",
|
|
659
|
+
"RecursiveChunker",
|
|
660
|
+
],
|
|
661
|
+
},
|
|
662
|
+
"text": {
|
|
663
|
+
"id": "text",
|
|
664
|
+
"name": "TextReader",
|
|
665
|
+
"description": "Reads text files",
|
|
666
|
+
"chunkers": [
|
|
667
|
+
"FixedSizeChunker",
|
|
668
|
+
"AgenticChunker",
|
|
669
|
+
"DocumentChunker",
|
|
670
|
+
"RecursiveChunker",
|
|
671
|
+
"SemanticChunker",
|
|
672
|
+
],
|
|
673
|
+
},
|
|
674
|
+
},
|
|
675
|
+
"readersForType": {
|
|
676
|
+
"url": [
|
|
677
|
+
"url",
|
|
678
|
+
"website",
|
|
679
|
+
"firecrawl",
|
|
680
|
+
"youtube",
|
|
681
|
+
"web_search",
|
|
682
|
+
"gcs",
|
|
683
|
+
],
|
|
684
|
+
"youtube": ["youtube"],
|
|
685
|
+
"text": ["web_search"],
|
|
686
|
+
"topic": ["arxiv"],
|
|
687
|
+
"file": ["csv", "gcs"],
|
|
688
|
+
".csv": ["csv"],
|
|
689
|
+
".xlsx": ["csv"],
|
|
690
|
+
".xls": ["csv"],
|
|
691
|
+
".docx": ["docx"],
|
|
692
|
+
".doc": ["docx"],
|
|
693
|
+
".json": ["json"],
|
|
694
|
+
".md": ["markdown"],
|
|
695
|
+
".pdf": ["pdf"],
|
|
696
|
+
".txt": ["text"],
|
|
697
|
+
},
|
|
698
|
+
"chunkers": {
|
|
699
|
+
"AgenticChunker": {
|
|
700
|
+
"key": "AgenticChunker",
|
|
701
|
+
"name": "AgenticChunker",
|
|
702
|
+
"description": "Chunking strategy that uses an LLM to determine natural breakpoints in the text",
|
|
703
|
+
},
|
|
704
|
+
"DocumentChunker": {
|
|
705
|
+
"key": "DocumentChunker",
|
|
706
|
+
"name": "DocumentChunker",
|
|
707
|
+
"description": "A chunking strategy that splits text based on document structure like paragraphs and sections",
|
|
708
|
+
},
|
|
709
|
+
"RecursiveChunker": {
|
|
710
|
+
"key": "RecursiveChunker",
|
|
711
|
+
"name": "RecursiveChunker",
|
|
712
|
+
"description": "Chunking strategy that recursively splits text into chunks by finding natural break points",
|
|
713
|
+
},
|
|
714
|
+
"SemanticChunker": {
|
|
715
|
+
"key": "SemanticChunker",
|
|
716
|
+
"name": "SemanticChunker",
|
|
717
|
+
"description": "Chunking strategy that splits text into semantic chunks using chonkie",
|
|
718
|
+
},
|
|
719
|
+
"FixedSizeChunker": {
|
|
720
|
+
"key": "FixedSizeChunker",
|
|
721
|
+
"name": "FixedSizeChunker",
|
|
722
|
+
"description": "Chunking strategy that splits text into fixed-size chunks with optional overlap",
|
|
723
|
+
},
|
|
724
|
+
"RowChunker": {
|
|
725
|
+
"key": "RowChunker",
|
|
726
|
+
"name": "RowChunker",
|
|
727
|
+
"description": "RowChunking chunking strategy",
|
|
728
|
+
},
|
|
729
|
+
"MarkdownChunker": {
|
|
730
|
+
"key": "MarkdownChunker",
|
|
731
|
+
"name": "MarkdownChunker",
|
|
732
|
+
"description": "A chunking strategy that splits markdown based on structure like headers, paragraphs and sections",
|
|
733
|
+
},
|
|
734
|
+
},
|
|
735
|
+
"filters": ["filter_tag_1", "filter_tag2"],
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
},
|
|
739
|
+
}
|
|
740
|
+
},
|
|
741
|
+
)
|
|
328
742
|
def get_config(
|
|
329
743
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
330
744
|
) -> ConfigResponseSchema:
|