llama-stack-api 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack_api/__init__.py +1100 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents/__init__.py +38 -0
- llama_stack_api/agents/api.py +52 -0
- llama_stack_api/agents/fastapi_routes.py +268 -0
- llama_stack_api/agents/models.py +181 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- llama_stack_api/common/__init__.py +5 -0
- llama_stack_api/common/content_types.py +101 -0
- llama_stack_api/common/errors.py +110 -0
- llama_stack_api/common/job_types.py +38 -0
- llama_stack_api/common/responses.py +77 -0
- llama_stack_api/common/training_types.py +47 -0
- llama_stack_api/common/type_system.py +146 -0
- llama_stack_api/connectors/__init__.py +38 -0
- llama_stack_api/connectors/api.py +50 -0
- llama_stack_api/connectors/fastapi_routes.py +103 -0
- llama_stack_api/connectors/models.py +103 -0
- llama_stack_api/conversations/__init__.py +61 -0
- llama_stack_api/conversations/api.py +44 -0
- llama_stack_api/conversations/fastapi_routes.py +177 -0
- llama_stack_api/conversations/models.py +245 -0
- llama_stack_api/datasetio/__init__.py +34 -0
- llama_stack_api/datasetio/api.py +42 -0
- llama_stack_api/datasetio/fastapi_routes.py +94 -0
- llama_stack_api/datasetio/models.py +48 -0
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- llama_stack_api/datatypes.py +373 -0
- llama_stack_api/eval/__init__.py +55 -0
- llama_stack_api/eval/api.py +51 -0
- llama_stack_api/eval/compat.py +300 -0
- llama_stack_api/eval/fastapi_routes.py +126 -0
- llama_stack_api/eval/models.py +141 -0
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- llama_stack_api/inference/__init__.py +207 -0
- llama_stack_api/inference/api.py +93 -0
- llama_stack_api/inference/fastapi_routes.py +243 -0
- llama_stack_api/inference/models.py +1035 -0
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- llama_stack_api/internal/__init__.py +9 -0
- llama_stack_api/internal/kvstore.py +28 -0
- llama_stack_api/internal/sqlstore.py +81 -0
- llama_stack_api/models/__init__.py +47 -0
- llama_stack_api/models/api.py +38 -0
- llama_stack_api/models/fastapi_routes.py +104 -0
- llama_stack_api/models/models.py +157 -0
- llama_stack_api/openai_responses.py +1494 -0
- llama_stack_api/post_training/__init__.py +73 -0
- llama_stack_api/post_training/api.py +36 -0
- llama_stack_api/post_training/fastapi_routes.py +116 -0
- llama_stack_api/post_training/models.py +339 -0
- llama_stack_api/prompts/__init__.py +47 -0
- llama_stack_api/prompts/api.py +44 -0
- llama_stack_api/prompts/fastapi_routes.py +163 -0
- llama_stack_api/prompts/models.py +177 -0
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- llama_stack_api/rag_tool.py +168 -0
- llama_stack_api/resource.py +36 -0
- llama_stack_api/router_utils.py +160 -0
- llama_stack_api/safety/__init__.py +37 -0
- llama_stack_api/safety/api.py +29 -0
- llama_stack_api/safety/datatypes.py +83 -0
- llama_stack_api/safety/fastapi_routes.py +55 -0
- llama_stack_api/safety/models.py +38 -0
- llama_stack_api/schema_utils.py +251 -0
- llama_stack_api/scoring/__init__.py +66 -0
- llama_stack_api/scoring/api.py +35 -0
- llama_stack_api/scoring/fastapi_routes.py +67 -0
- llama_stack_api/scoring/models.py +81 -0
- llama_stack_api/scoring_functions/__init__.py +50 -0
- llama_stack_api/scoring_functions/api.py +39 -0
- llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
- llama_stack_api/scoring_functions/models.py +214 -0
- llama_stack_api/shields/__init__.py +41 -0
- llama_stack_api/shields/api.py +39 -0
- llama_stack_api/shields/fastapi_routes.py +104 -0
- llama_stack_api/shields/models.py +74 -0
- llama_stack_api/tools.py +226 -0
- llama_stack_api/validators.py +46 -0
- llama_stack_api/vector_io/__init__.py +88 -0
- llama_stack_api/vector_io/api.py +234 -0
- llama_stack_api/vector_io/fastapi_routes.py +447 -0
- llama_stack_api/vector_io/models.py +663 -0
- llama_stack_api/vector_stores.py +53 -0
- llama_stack_api/version.py +9 -0
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
- llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
- llama_stack_api-0.5.0rc1.dist-info/top_level.txt +1 -0
- llama_stack_api-0.4.3.dist-info/RECORD +0 -4
- llama_stack_api-0.4.3.dist-info/top_level.txt +0 -1
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the File Processors API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the File Processors API using standard
|
|
10
|
+
FastAPI route decorators. The router is defined in the API package to keep
|
|
11
|
+
all API-related code together.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Annotated, Any
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, File, Form, UploadFile
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.router_utils import standard_responses
|
|
19
|
+
from llama_stack_api.vector_io import VectorStoreChunkingStrategy
|
|
20
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
|
21
|
+
|
|
22
|
+
from .api import FileProcessors
|
|
23
|
+
from .models import ProcessFileResponse
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_router(impl: FileProcessors) -> APIRouter:
|
|
27
|
+
"""Create a FastAPI router for the File Processors API.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
impl: The FileProcessors implementation instance
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
APIRouter configured for the File Processors API
|
|
34
|
+
"""
|
|
35
|
+
router = APIRouter(
|
|
36
|
+
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
|
|
37
|
+
tags=["File Processors"],
|
|
38
|
+
responses=standard_responses,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@router.post(
|
|
42
|
+
"/file-processors/process",
|
|
43
|
+
response_model=ProcessFileResponse,
|
|
44
|
+
summary="Process a file into chunks ready for vector database storage.",
|
|
45
|
+
description="Process a file into chunks ready for vector database storage. Supports direct upload via multipart form or processing files already uploaded to file storage via file_id. Exactly one of file or file_id must be provided.",
|
|
46
|
+
responses={
|
|
47
|
+
200: {"description": "The processed file chunks."},
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
async def process_file(
|
|
51
|
+
file: Annotated[
|
|
52
|
+
UploadFile | None,
|
|
53
|
+
File(description="The File object to be uploaded and processed. Mutually exclusive with file_id."),
|
|
54
|
+
] = None,
|
|
55
|
+
file_id: Annotated[
|
|
56
|
+
str | None, Form(description="ID of file already uploaded to file storage. Mutually exclusive with file.")
|
|
57
|
+
] = None,
|
|
58
|
+
options: Annotated[
|
|
59
|
+
dict[str, Any] | None,
|
|
60
|
+
Form(
|
|
61
|
+
description="Optional processing options. Provider-specific parameters (e.g., OCR settings, output format)."
|
|
62
|
+
),
|
|
63
|
+
] = None,
|
|
64
|
+
chunking_strategy: Annotated[
|
|
65
|
+
VectorStoreChunkingStrategy | None,
|
|
66
|
+
Form(description="Optional chunking strategy for splitting content into chunks."),
|
|
67
|
+
] = None,
|
|
68
|
+
) -> ProcessFileResponse:
|
|
69
|
+
# Pass the parameters directly to the implementation
|
|
70
|
+
# The protocol method signature expects individual parameters for multipart handling
|
|
71
|
+
return await impl.process_file(
|
|
72
|
+
file=file,
|
|
73
|
+
file_id=file_id,
|
|
74
|
+
options=options,
|
|
75
|
+
chunking_strategy=chunking_strategy,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return router
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for File Processors API responses.
|
|
8
|
+
|
|
9
|
+
This module defines the response models for the File Processors API
|
|
10
|
+
using Pydantic with Field descriptions for OpenAPI schema generation.
|
|
11
|
+
|
|
12
|
+
Request models are not needed for this API since it uses multipart form data
|
|
13
|
+
with individual parameters rather than a JSON request body.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
|
|
20
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
21
|
+
from llama_stack_api.vector_io import Chunk
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@json_schema_type
|
|
25
|
+
class ProcessFileResponse(BaseModel):
|
|
26
|
+
"""Response model for file processing operation.
|
|
27
|
+
|
|
28
|
+
Returns a list of chunks ready for storage in vector databases.
|
|
29
|
+
Each chunk contains the content and metadata.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
chunks: list[Chunk] = Field(..., description="Processed chunks from the file. Always returns at least one chunk.")
|
|
33
|
+
|
|
34
|
+
metadata: dict[str, Any] = Field(
|
|
35
|
+
...,
|
|
36
|
+
description="Processing-run metadata such as processor name/version, processing_time_ms, page_count, extraction_method (e.g. docling/pypdf/ocr), confidence scores, plus provider-specific fields.",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"ProcessFileResponse",
|
|
42
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from . import fastapi_routes
|
|
8
|
+
from .api import Files
|
|
9
|
+
from .models import (
|
|
10
|
+
DeleteFileRequest,
|
|
11
|
+
ExpiresAfter,
|
|
12
|
+
ListFilesRequest,
|
|
13
|
+
ListOpenAIFileResponse,
|
|
14
|
+
OpenAIFileDeleteResponse,
|
|
15
|
+
OpenAIFileObject,
|
|
16
|
+
OpenAIFilePurpose,
|
|
17
|
+
RetrieveFileContentRequest,
|
|
18
|
+
RetrieveFileRequest,
|
|
19
|
+
UploadFileRequest,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"DeleteFileRequest",
|
|
24
|
+
"ExpiresAfter",
|
|
25
|
+
"fastapi_routes",
|
|
26
|
+
"Files",
|
|
27
|
+
"ListFilesRequest",
|
|
28
|
+
"ListOpenAIFileResponse",
|
|
29
|
+
"OpenAIFileDeleteResponse",
|
|
30
|
+
"OpenAIFileObject",
|
|
31
|
+
"OpenAIFilePurpose",
|
|
32
|
+
"RetrieveFileContentRequest",
|
|
33
|
+
"RetrieveFileRequest",
|
|
34
|
+
"UploadFileRequest",
|
|
35
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from fastapi import Response, UploadFile
|
|
10
|
+
|
|
11
|
+
from .models import (
|
|
12
|
+
DeleteFileRequest,
|
|
13
|
+
ListFilesRequest,
|
|
14
|
+
ListOpenAIFileResponse,
|
|
15
|
+
OpenAIFileDeleteResponse,
|
|
16
|
+
OpenAIFileObject,
|
|
17
|
+
RetrieveFileContentRequest,
|
|
18
|
+
RetrieveFileRequest,
|
|
19
|
+
UploadFileRequest,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@runtime_checkable
|
|
24
|
+
class Files(Protocol):
|
|
25
|
+
"""Files API for managing file uploads and retrieval."""
|
|
26
|
+
|
|
27
|
+
async def openai_upload_file(
|
|
28
|
+
self,
|
|
29
|
+
request: UploadFileRequest,
|
|
30
|
+
file: UploadFile,
|
|
31
|
+
) -> OpenAIFileObject: ...
|
|
32
|
+
|
|
33
|
+
async def openai_list_files(
|
|
34
|
+
self,
|
|
35
|
+
request: ListFilesRequest,
|
|
36
|
+
) -> ListOpenAIFileResponse: ...
|
|
37
|
+
|
|
38
|
+
async def openai_retrieve_file(
|
|
39
|
+
self,
|
|
40
|
+
request: RetrieveFileRequest,
|
|
41
|
+
) -> OpenAIFileObject: ...
|
|
42
|
+
|
|
43
|
+
async def openai_delete_file(
|
|
44
|
+
self,
|
|
45
|
+
request: DeleteFileRequest,
|
|
46
|
+
) -> OpenAIFileDeleteResponse: ...
|
|
47
|
+
|
|
48
|
+
async def openai_retrieve_file_content(
|
|
49
|
+
self,
|
|
50
|
+
request: RetrieveFileContentRequest,
|
|
51
|
+
) -> Response: ...
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Depends, UploadFile
|
|
10
|
+
from fastapi.param_functions import File, Form
|
|
11
|
+
from fastapi.responses import Response
|
|
12
|
+
|
|
13
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
14
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
15
|
+
|
|
16
|
+
from .api import Files
|
|
17
|
+
from .models import (
|
|
18
|
+
DeleteFileRequest,
|
|
19
|
+
ExpiresAfter,
|
|
20
|
+
ListFilesRequest,
|
|
21
|
+
ListOpenAIFileResponse,
|
|
22
|
+
OpenAIFileDeleteResponse,
|
|
23
|
+
OpenAIFileObject,
|
|
24
|
+
OpenAIFilePurpose,
|
|
25
|
+
RetrieveFileContentRequest,
|
|
26
|
+
RetrieveFileRequest,
|
|
27
|
+
UploadFileRequest,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Automatically generate dependency functions from Pydantic models
|
|
31
|
+
# This ensures the models are the single source of truth for descriptions
|
|
32
|
+
get_list_files_request = create_query_dependency(ListFilesRequest)
|
|
33
|
+
get_get_files_request = create_path_dependency(RetrieveFileRequest)
|
|
34
|
+
get_delete_files_request = create_path_dependency(DeleteFileRequest)
|
|
35
|
+
get_retrieve_file_content_request = create_path_dependency(RetrieveFileContentRequest)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_router(impl: Files) -> APIRouter:
|
|
39
|
+
router = APIRouter(
|
|
40
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
41
|
+
tags=["Files"],
|
|
42
|
+
responses=standard_responses,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@router.get(
|
|
46
|
+
"/files",
|
|
47
|
+
response_model=ListOpenAIFileResponse,
|
|
48
|
+
summary="List files",
|
|
49
|
+
description="List files",
|
|
50
|
+
responses={
|
|
51
|
+
200: {"description": "The list of files."},
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
async def list_files(
|
|
55
|
+
request: Annotated[ListFilesRequest, Depends(get_list_files_request)],
|
|
56
|
+
) -> ListOpenAIFileResponse:
|
|
57
|
+
return await impl.openai_list_files(request)
|
|
58
|
+
|
|
59
|
+
@router.get(
|
|
60
|
+
"/files/{file_id}",
|
|
61
|
+
response_model=OpenAIFileObject,
|
|
62
|
+
summary="Get file",
|
|
63
|
+
description="Get file",
|
|
64
|
+
responses={
|
|
65
|
+
200: {"description": "The file."},
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
async def get_file(
|
|
69
|
+
request: Annotated[RetrieveFileRequest, Depends(get_get_files_request)],
|
|
70
|
+
) -> OpenAIFileObject:
|
|
71
|
+
return await impl.openai_retrieve_file(request)
|
|
72
|
+
|
|
73
|
+
@router.delete(
|
|
74
|
+
"/files/{file_id}",
|
|
75
|
+
response_model=OpenAIFileDeleteResponse,
|
|
76
|
+
summary="Delete file",
|
|
77
|
+
description="Delete file",
|
|
78
|
+
responses={
|
|
79
|
+
200: {"description": "The file was deleted."},
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
async def delete_file(
|
|
83
|
+
request: Annotated[DeleteFileRequest, Depends(get_delete_files_request)],
|
|
84
|
+
) -> OpenAIFileDeleteResponse:
|
|
85
|
+
return await impl.openai_delete_file(request)
|
|
86
|
+
|
|
87
|
+
@router.get(
|
|
88
|
+
"/files/{file_id}/content",
|
|
89
|
+
status_code=200,
|
|
90
|
+
summary="Retrieve file content",
|
|
91
|
+
description="Retrieve file content",
|
|
92
|
+
responses={
|
|
93
|
+
200: {
|
|
94
|
+
"description": "The raw file content as a binary response.",
|
|
95
|
+
"content": {"application/json": {"schema": {"$ref": "#/components/schemas/Response"}}},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
async def retrieve_file_content(
|
|
100
|
+
request: Annotated[RetrieveFileContentRequest, Depends(get_retrieve_file_content_request)],
|
|
101
|
+
) -> Response:
|
|
102
|
+
return await impl.openai_retrieve_file_content(request)
|
|
103
|
+
|
|
104
|
+
@router.post(
|
|
105
|
+
"/files",
|
|
106
|
+
response_model=OpenAIFileObject,
|
|
107
|
+
summary="Upload file",
|
|
108
|
+
description="Upload a file.",
|
|
109
|
+
responses={
|
|
110
|
+
200: {"description": "The uploaded file."},
|
|
111
|
+
},
|
|
112
|
+
)
|
|
113
|
+
async def upload_file(
|
|
114
|
+
file: Annotated[UploadFile, File(description="The file to upload.")],
|
|
115
|
+
purpose: Annotated[OpenAIFilePurpose, Form(description="The intended purpose of the uploaded file.")],
|
|
116
|
+
expires_after: Annotated[ExpiresAfter | None, Form(description="Optional expiration settings.")] = None,
|
|
117
|
+
) -> OpenAIFileObject:
|
|
118
|
+
request = UploadFileRequest(
|
|
119
|
+
purpose=purpose,
|
|
120
|
+
expires_after=expires_after,
|
|
121
|
+
)
|
|
122
|
+
return await impl.openai_upload_file(request, file)
|
|
123
|
+
|
|
124
|
+
return router
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from typing import ClassVar, Literal
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from llama_stack_api.common.responses import Order
|
|
13
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OpenAIFilePurpose(StrEnum):
|
|
17
|
+
"""
|
|
18
|
+
Valid purpose values for OpenAI Files API.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
ASSISTANTS = "assistants"
|
|
22
|
+
BATCH = "batch"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@json_schema_type
|
|
26
|
+
class OpenAIFileObject(BaseModel):
|
|
27
|
+
"""OpenAI File object as defined in the OpenAI Files API."""
|
|
28
|
+
|
|
29
|
+
object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
|
|
30
|
+
id: str = Field(..., description="The file identifier, which can be referenced in the API endpoints.")
|
|
31
|
+
bytes: int = Field(..., description="The size of the file, in bytes.")
|
|
32
|
+
created_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file was created.")
|
|
33
|
+
expires_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file expires.")
|
|
34
|
+
filename: str = Field(..., description="The name of the file.")
|
|
35
|
+
purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the file.")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@json_schema_type
|
|
39
|
+
class ExpiresAfter(BaseModel):
|
|
40
|
+
"""Control expiration of uploaded files."""
|
|
41
|
+
|
|
42
|
+
MIN: ClassVar[int] = 3600 # 1 hour
|
|
43
|
+
MAX: ClassVar[int] = 2592000 # 30 days
|
|
44
|
+
|
|
45
|
+
anchor: Literal["created_at"] = Field(..., description="The anchor point for expiration, must be 'created_at'.")
|
|
46
|
+
seconds: int = Field(
|
|
47
|
+
..., ge=MIN, le=MAX, description="Seconds until expiration, between 3600 (1 hour) and 2592000 (30 days)."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@json_schema_type
|
|
52
|
+
class ListOpenAIFileResponse(BaseModel):
|
|
53
|
+
"""Response for listing files in OpenAI Files API."""
|
|
54
|
+
|
|
55
|
+
data: list[OpenAIFileObject] = Field(..., description="The list of files.")
|
|
56
|
+
has_more: bool = Field(..., description="Whether there are more files available beyond this page.")
|
|
57
|
+
first_id: str = Field(..., description="The ID of the first file in the list for pagination.")
|
|
58
|
+
last_id: str = Field(..., description="The ID of the last file in the list for pagination.")
|
|
59
|
+
object: Literal["list"] = Field(default="list", description="The object type, which is always 'list'.")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@json_schema_type
|
|
63
|
+
class OpenAIFileDeleteResponse(BaseModel):
|
|
64
|
+
"""Response for deleting a file in OpenAI Files API."""
|
|
65
|
+
|
|
66
|
+
id: str = Field(..., description="The file identifier that was deleted.")
|
|
67
|
+
object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
|
|
68
|
+
deleted: bool = Field(..., description="Whether the file was successfully deleted.")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@json_schema_type
|
|
72
|
+
class ListFilesRequest(BaseModel):
|
|
73
|
+
"""Request model for listing files."""
|
|
74
|
+
|
|
75
|
+
after: str | None = Field(default=None, description="A cursor for pagination. Returns files after this ID.")
|
|
76
|
+
limit: int | None = Field(default=10000, description="Maximum number of files to return (1-10,000).")
|
|
77
|
+
order: Order | None = Field(default=Order.desc, description="Sort order by created_at timestamp ('asc' or 'desc').")
|
|
78
|
+
purpose: OpenAIFilePurpose | None = Field(default=None, description="Filter files by purpose.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@json_schema_type
|
|
82
|
+
class RetrieveFileRequest(BaseModel):
|
|
83
|
+
"""Request model for retrieving a file."""
|
|
84
|
+
|
|
85
|
+
file_id: str = Field(..., description="The ID of the file to retrieve.")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@json_schema_type
|
|
89
|
+
class DeleteFileRequest(BaseModel):
|
|
90
|
+
"""Request model for deleting a file."""
|
|
91
|
+
|
|
92
|
+
file_id: str = Field(..., description="The ID of the file to delete.")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@json_schema_type
|
|
96
|
+
class RetrieveFileContentRequest(BaseModel):
|
|
97
|
+
"""Request model for retrieving file content."""
|
|
98
|
+
|
|
99
|
+
file_id: str = Field(..., description="The ID of the file to retrieve content from.")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@json_schema_type
|
|
103
|
+
class UploadFileRequest(BaseModel):
|
|
104
|
+
"""Request model for uploading a file."""
|
|
105
|
+
|
|
106
|
+
purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the uploaded file.")
|
|
107
|
+
expires_after: ExpiresAfter | None = Field(default=None, description="Optional expiration settings for the file.")
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Inference API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Inference protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.inference.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.inference.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import common types for backward compatibility
|
|
15
|
+
# (these were previously available from the old inference.py)
|
|
16
|
+
from llama_stack_api.common.content_types import InterleavedContent
|
|
17
|
+
|
|
18
|
+
# Import fastapi_routes for router factory access
|
|
19
|
+
from . import fastapi_routes
|
|
20
|
+
|
|
21
|
+
# Import protocol for re-export
|
|
22
|
+
from .api import Inference, InferenceProvider, ModelStore
|
|
23
|
+
|
|
24
|
+
# Import models for re-export
|
|
25
|
+
from .models import (
|
|
26
|
+
AllowedToolsConfig,
|
|
27
|
+
Bf16QuantizationConfig,
|
|
28
|
+
ChatCompletionResponseEventType,
|
|
29
|
+
CompletionRequest,
|
|
30
|
+
CustomToolConfig,
|
|
31
|
+
EmbeddingsResponse,
|
|
32
|
+
EmbeddingTaskType,
|
|
33
|
+
Fp8QuantizationConfig,
|
|
34
|
+
FunctionToolConfig,
|
|
35
|
+
GetChatCompletionRequest,
|
|
36
|
+
GrammarResponseFormat,
|
|
37
|
+
GreedySamplingStrategy,
|
|
38
|
+
Int4QuantizationConfig,
|
|
39
|
+
JsonSchemaResponseFormat,
|
|
40
|
+
ListChatCompletionsRequest,
|
|
41
|
+
ListOpenAIChatCompletionResponse,
|
|
42
|
+
LogProbConfig,
|
|
43
|
+
OpenAIAssistantMessageParam,
|
|
44
|
+
OpenAIChatCompletion,
|
|
45
|
+
OpenAIChatCompletionChunk,
|
|
46
|
+
OpenAIChatCompletionContentPartImageParam,
|
|
47
|
+
OpenAIChatCompletionContentPartParam,
|
|
48
|
+
OpenAIChatCompletionContentPartTextParam,
|
|
49
|
+
OpenAIChatCompletionMessageContent,
|
|
50
|
+
OpenAIChatCompletionRequestWithExtraBody,
|
|
51
|
+
OpenAIChatCompletionTextOnlyMessageContent,
|
|
52
|
+
OpenAIChatCompletionToolCall,
|
|
53
|
+
OpenAIChatCompletionToolCallFunction,
|
|
54
|
+
OpenAIChatCompletionToolChoice,
|
|
55
|
+
OpenAIChatCompletionToolChoiceAllowedTools,
|
|
56
|
+
OpenAIChatCompletionToolChoiceCustomTool,
|
|
57
|
+
OpenAIChatCompletionToolChoiceFunctionTool,
|
|
58
|
+
OpenAIChatCompletionUsage,
|
|
59
|
+
OpenAIChatCompletionUsageCompletionTokensDetails,
|
|
60
|
+
OpenAIChatCompletionUsagePromptTokensDetails,
|
|
61
|
+
OpenAIChoice,
|
|
62
|
+
OpenAIChoiceDelta,
|
|
63
|
+
OpenAIChoiceLogprobs,
|
|
64
|
+
OpenAIChunkChoice,
|
|
65
|
+
OpenAICompletion,
|
|
66
|
+
OpenAICompletionChoice,
|
|
67
|
+
OpenAICompletionLogprobs,
|
|
68
|
+
OpenAICompletionRequestWithExtraBody,
|
|
69
|
+
OpenAICompletionWithInputMessages,
|
|
70
|
+
OpenAIDeveloperMessageParam,
|
|
71
|
+
OpenAIEmbeddingData,
|
|
72
|
+
OpenAIEmbeddingsRequestWithExtraBody,
|
|
73
|
+
OpenAIEmbeddingsResponse,
|
|
74
|
+
OpenAIEmbeddingUsage,
|
|
75
|
+
OpenAIFile,
|
|
76
|
+
OpenAIFileFile,
|
|
77
|
+
OpenAIFinishReason,
|
|
78
|
+
OpenAIImageURL,
|
|
79
|
+
OpenAIJSONSchema,
|
|
80
|
+
OpenAIMessageParam,
|
|
81
|
+
OpenAIResponseFormatJSONObject,
|
|
82
|
+
OpenAIResponseFormatJSONSchema,
|
|
83
|
+
OpenAIResponseFormatParam,
|
|
84
|
+
OpenAIResponseFormatText,
|
|
85
|
+
OpenAISystemMessageParam,
|
|
86
|
+
OpenAITokenLogProb,
|
|
87
|
+
OpenAIToolMessageParam,
|
|
88
|
+
OpenAITopLogProb,
|
|
89
|
+
OpenAIUserMessageParam,
|
|
90
|
+
QuantizationConfig,
|
|
91
|
+
QuantizationType,
|
|
92
|
+
RerankData,
|
|
93
|
+
RerankRequest,
|
|
94
|
+
RerankResponse,
|
|
95
|
+
ResponseFormat,
|
|
96
|
+
ResponseFormatType,
|
|
97
|
+
SamplingParams,
|
|
98
|
+
SamplingStrategy,
|
|
99
|
+
SystemMessage,
|
|
100
|
+
SystemMessageBehavior,
|
|
101
|
+
TextTruncation,
|
|
102
|
+
TokenLogProbs,
|
|
103
|
+
ToolChoice,
|
|
104
|
+
ToolResponseMessage,
|
|
105
|
+
TopKSamplingStrategy,
|
|
106
|
+
TopPSamplingStrategy,
|
|
107
|
+
UserMessage,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
__all__ = [
|
|
111
|
+
# Protocol
|
|
112
|
+
"Inference",
|
|
113
|
+
"InferenceProvider",
|
|
114
|
+
"ModelStore",
|
|
115
|
+
# Common types (for backward compatibility)
|
|
116
|
+
"InterleavedContent",
|
|
117
|
+
# Sampling
|
|
118
|
+
"GreedySamplingStrategy",
|
|
119
|
+
"TopPSamplingStrategy",
|
|
120
|
+
"TopKSamplingStrategy",
|
|
121
|
+
"SamplingStrategy",
|
|
122
|
+
"SamplingParams",
|
|
123
|
+
"LogProbConfig",
|
|
124
|
+
# Quantization
|
|
125
|
+
"QuantizationType",
|
|
126
|
+
"Fp8QuantizationConfig",
|
|
127
|
+
"Bf16QuantizationConfig",
|
|
128
|
+
"Int4QuantizationConfig",
|
|
129
|
+
"QuantizationConfig",
|
|
130
|
+
# Messages
|
|
131
|
+
"UserMessage",
|
|
132
|
+
"SystemMessage",
|
|
133
|
+
"ToolResponseMessage",
|
|
134
|
+
"ToolChoice",
|
|
135
|
+
"TokenLogProbs",
|
|
136
|
+
# Response
|
|
137
|
+
"ChatCompletionResponseEventType",
|
|
138
|
+
"ResponseFormatType",
|
|
139
|
+
"JsonSchemaResponseFormat",
|
|
140
|
+
"GrammarResponseFormat",
|
|
141
|
+
"ResponseFormat",
|
|
142
|
+
"CompletionRequest",
|
|
143
|
+
"SystemMessageBehavior",
|
|
144
|
+
"EmbeddingsResponse",
|
|
145
|
+
"RerankData",
|
|
146
|
+
"RerankResponse",
|
|
147
|
+
# OpenAI Compatibility
|
|
148
|
+
"OpenAIChatCompletionContentPartTextParam",
|
|
149
|
+
"OpenAIImageURL",
|
|
150
|
+
"OpenAIChatCompletionContentPartImageParam",
|
|
151
|
+
"OpenAIFileFile",
|
|
152
|
+
"OpenAIFile",
|
|
153
|
+
"OpenAIChatCompletionContentPartParam",
|
|
154
|
+
"OpenAIChatCompletionMessageContent",
|
|
155
|
+
"OpenAIChatCompletionTextOnlyMessageContent",
|
|
156
|
+
"OpenAIUserMessageParam",
|
|
157
|
+
"OpenAISystemMessageParam",
|
|
158
|
+
"OpenAIChatCompletionToolCallFunction",
|
|
159
|
+
"OpenAIChatCompletionToolCall",
|
|
160
|
+
"OpenAIAssistantMessageParam",
|
|
161
|
+
"OpenAIToolMessageParam",
|
|
162
|
+
"OpenAIDeveloperMessageParam",
|
|
163
|
+
"OpenAIMessageParam",
|
|
164
|
+
"OpenAIResponseFormatText",
|
|
165
|
+
"OpenAIJSONSchema",
|
|
166
|
+
"OpenAIResponseFormatJSONSchema",
|
|
167
|
+
"OpenAIResponseFormatJSONObject",
|
|
168
|
+
"OpenAIResponseFormatParam",
|
|
169
|
+
"FunctionToolConfig",
|
|
170
|
+
"OpenAIChatCompletionToolChoiceFunctionTool",
|
|
171
|
+
"CustomToolConfig",
|
|
172
|
+
"OpenAIChatCompletionToolChoiceCustomTool",
|
|
173
|
+
"AllowedToolsConfig",
|
|
174
|
+
"OpenAIChatCompletionToolChoiceAllowedTools",
|
|
175
|
+
"OpenAIChatCompletionToolChoice",
|
|
176
|
+
"OpenAITopLogProb",
|
|
177
|
+
"OpenAITokenLogProb",
|
|
178
|
+
"OpenAIChoiceLogprobs",
|
|
179
|
+
"OpenAIChoiceDelta",
|
|
180
|
+
"OpenAIChunkChoice",
|
|
181
|
+
"OpenAIChoice",
|
|
182
|
+
"OpenAIChatCompletionUsageCompletionTokensDetails",
|
|
183
|
+
"OpenAIChatCompletionUsagePromptTokensDetails",
|
|
184
|
+
"OpenAIChatCompletionUsage",
|
|
185
|
+
"OpenAIChatCompletion",
|
|
186
|
+
"OpenAIChatCompletionChunk",
|
|
187
|
+
"OpenAICompletionLogprobs",
|
|
188
|
+
"OpenAICompletionChoice",
|
|
189
|
+
"OpenAICompletion",
|
|
190
|
+
"OpenAIFinishReason",
|
|
191
|
+
"OpenAIEmbeddingData",
|
|
192
|
+
"OpenAIEmbeddingUsage",
|
|
193
|
+
"OpenAIEmbeddingsResponse",
|
|
194
|
+
"TextTruncation",
|
|
195
|
+
"EmbeddingTaskType",
|
|
196
|
+
"OpenAICompletionWithInputMessages",
|
|
197
|
+
"ListOpenAIChatCompletionResponse",
|
|
198
|
+
"OpenAICompletionRequestWithExtraBody",
|
|
199
|
+
"OpenAIChatCompletionRequestWithExtraBody",
|
|
200
|
+
"OpenAIEmbeddingsRequestWithExtraBody",
|
|
201
|
+
# Request Models
|
|
202
|
+
"ListChatCompletionsRequest",
|
|
203
|
+
"GetChatCompletionRequest",
|
|
204
|
+
"RerankRequest",
|
|
205
|
+
# Router factory module
|
|
206
|
+
"fastapi_routes",
|
|
207
|
+
]
|