llama-stack-api 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. llama_stack_api/__init__.py +1100 -0
  2. llama_stack_api/admin/__init__.py +45 -0
  3. llama_stack_api/admin/api.py +72 -0
  4. llama_stack_api/admin/fastapi_routes.py +117 -0
  5. llama_stack_api/admin/models.py +113 -0
  6. llama_stack_api/agents/__init__.py +38 -0
  7. llama_stack_api/agents/api.py +52 -0
  8. llama_stack_api/agents/fastapi_routes.py +268 -0
  9. llama_stack_api/agents/models.py +181 -0
  10. llama_stack_api/batches/__init__.py +40 -0
  11. llama_stack_api/batches/api.py +53 -0
  12. llama_stack_api/batches/fastapi_routes.py +113 -0
  13. llama_stack_api/batches/models.py +78 -0
  14. llama_stack_api/benchmarks/__init__.py +43 -0
  15. llama_stack_api/benchmarks/api.py +39 -0
  16. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  17. llama_stack_api/benchmarks/models.py +109 -0
  18. llama_stack_api/common/__init__.py +5 -0
  19. llama_stack_api/common/content_types.py +101 -0
  20. llama_stack_api/common/errors.py +110 -0
  21. llama_stack_api/common/job_types.py +38 -0
  22. llama_stack_api/common/responses.py +77 -0
  23. llama_stack_api/common/training_types.py +47 -0
  24. llama_stack_api/common/type_system.py +146 -0
  25. llama_stack_api/connectors/__init__.py +38 -0
  26. llama_stack_api/connectors/api.py +50 -0
  27. llama_stack_api/connectors/fastapi_routes.py +103 -0
  28. llama_stack_api/connectors/models.py +103 -0
  29. llama_stack_api/conversations/__init__.py +61 -0
  30. llama_stack_api/conversations/api.py +44 -0
  31. llama_stack_api/conversations/fastapi_routes.py +177 -0
  32. llama_stack_api/conversations/models.py +245 -0
  33. llama_stack_api/datasetio/__init__.py +34 -0
  34. llama_stack_api/datasetio/api.py +42 -0
  35. llama_stack_api/datasetio/fastapi_routes.py +94 -0
  36. llama_stack_api/datasetio/models.py +48 -0
  37. llama_stack_api/datasets/__init__.py +61 -0
  38. llama_stack_api/datasets/api.py +35 -0
  39. llama_stack_api/datasets/fastapi_routes.py +104 -0
  40. llama_stack_api/datasets/models.py +152 -0
  41. llama_stack_api/datatypes.py +373 -0
  42. llama_stack_api/eval/__init__.py +55 -0
  43. llama_stack_api/eval/api.py +51 -0
  44. llama_stack_api/eval/compat.py +300 -0
  45. llama_stack_api/eval/fastapi_routes.py +126 -0
  46. llama_stack_api/eval/models.py +141 -0
  47. llama_stack_api/file_processors/__init__.py +27 -0
  48. llama_stack_api/file_processors/api.py +64 -0
  49. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  50. llama_stack_api/file_processors/models.py +42 -0
  51. llama_stack_api/files/__init__.py +35 -0
  52. llama_stack_api/files/api.py +51 -0
  53. llama_stack_api/files/fastapi_routes.py +124 -0
  54. llama_stack_api/files/models.py +107 -0
  55. llama_stack_api/inference/__init__.py +207 -0
  56. llama_stack_api/inference/api.py +93 -0
  57. llama_stack_api/inference/fastapi_routes.py +243 -0
  58. llama_stack_api/inference/models.py +1035 -0
  59. llama_stack_api/inspect_api/__init__.py +37 -0
  60. llama_stack_api/inspect_api/api.py +25 -0
  61. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  62. llama_stack_api/inspect_api/models.py +28 -0
  63. llama_stack_api/internal/__init__.py +9 -0
  64. llama_stack_api/internal/kvstore.py +28 -0
  65. llama_stack_api/internal/sqlstore.py +81 -0
  66. llama_stack_api/models/__init__.py +47 -0
  67. llama_stack_api/models/api.py +38 -0
  68. llama_stack_api/models/fastapi_routes.py +104 -0
  69. llama_stack_api/models/models.py +157 -0
  70. llama_stack_api/openai_responses.py +1494 -0
  71. llama_stack_api/post_training/__init__.py +73 -0
  72. llama_stack_api/post_training/api.py +36 -0
  73. llama_stack_api/post_training/fastapi_routes.py +116 -0
  74. llama_stack_api/post_training/models.py +339 -0
  75. llama_stack_api/prompts/__init__.py +47 -0
  76. llama_stack_api/prompts/api.py +44 -0
  77. llama_stack_api/prompts/fastapi_routes.py +163 -0
  78. llama_stack_api/prompts/models.py +177 -0
  79. llama_stack_api/providers/__init__.py +33 -0
  80. llama_stack_api/providers/api.py +16 -0
  81. llama_stack_api/providers/fastapi_routes.py +57 -0
  82. llama_stack_api/providers/models.py +24 -0
  83. llama_stack_api/rag_tool.py +168 -0
  84. llama_stack_api/resource.py +36 -0
  85. llama_stack_api/router_utils.py +160 -0
  86. llama_stack_api/safety/__init__.py +37 -0
  87. llama_stack_api/safety/api.py +29 -0
  88. llama_stack_api/safety/datatypes.py +83 -0
  89. llama_stack_api/safety/fastapi_routes.py +55 -0
  90. llama_stack_api/safety/models.py +38 -0
  91. llama_stack_api/schema_utils.py +251 -0
  92. llama_stack_api/scoring/__init__.py +66 -0
  93. llama_stack_api/scoring/api.py +35 -0
  94. llama_stack_api/scoring/fastapi_routes.py +67 -0
  95. llama_stack_api/scoring/models.py +81 -0
  96. llama_stack_api/scoring_functions/__init__.py +50 -0
  97. llama_stack_api/scoring_functions/api.py +39 -0
  98. llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
  99. llama_stack_api/scoring_functions/models.py +214 -0
  100. llama_stack_api/shields/__init__.py +41 -0
  101. llama_stack_api/shields/api.py +39 -0
  102. llama_stack_api/shields/fastapi_routes.py +104 -0
  103. llama_stack_api/shields/models.py +74 -0
  104. llama_stack_api/tools.py +226 -0
  105. llama_stack_api/validators.py +46 -0
  106. llama_stack_api/vector_io/__init__.py +88 -0
  107. llama_stack_api/vector_io/api.py +234 -0
  108. llama_stack_api/vector_io/fastapi_routes.py +447 -0
  109. llama_stack_api/vector_io/models.py +663 -0
  110. llama_stack_api/vector_stores.py +53 -0
  111. llama_stack_api/version.py +9 -0
  112. {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
  113. llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
  114. llama_stack_api-0.5.0rc1.dist-info/top_level.txt +1 -0
  115. llama_stack_api-0.4.3.dist-info/RECORD +0 -4
  116. llama_stack_api-0.4.3.dist-info/top_level.txt +0 -1
  117. {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,78 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the File Processors API.
8
+
9
+ This module defines the FastAPI router for the File Processors API using standard
10
+ FastAPI route decorators. The router is defined in the API package to keep
11
+ all API-related code together.
12
+ """
13
+
14
+ from typing import Annotated, Any
15
+
16
+ from fastapi import APIRouter, File, Form, UploadFile
17
+
18
+ from llama_stack_api.router_utils import standard_responses
19
+ from llama_stack_api.vector_io import VectorStoreChunkingStrategy
20
+ from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
21
+
22
+ from .api import FileProcessors
23
+ from .models import ProcessFileResponse
24
+
25
+
26
+ def create_router(impl: FileProcessors) -> APIRouter:
27
+ """Create a FastAPI router for the File Processors API.
28
+
29
+ Args:
30
+ impl: The FileProcessors implementation instance
31
+
32
+ Returns:
33
+ APIRouter configured for the File Processors API
34
+ """
35
+ router = APIRouter(
36
+ prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
37
+ tags=["File Processors"],
38
+ responses=standard_responses,
39
+ )
40
+
41
+ @router.post(
42
+ "/file-processors/process",
43
+ response_model=ProcessFileResponse,
44
+ summary="Process a file into chunks ready for vector database storage.",
45
+ description="Process a file into chunks ready for vector database storage. Supports direct upload via multipart form or processing files already uploaded to file storage via file_id. Exactly one of file or file_id must be provided.",
46
+ responses={
47
+ 200: {"description": "The processed file chunks."},
48
+ },
49
+ )
50
+ async def process_file(
51
+ file: Annotated[
52
+ UploadFile | None,
53
+ File(description="The File object to be uploaded and processed. Mutually exclusive with file_id."),
54
+ ] = None,
55
+ file_id: Annotated[
56
+ str | None, Form(description="ID of file already uploaded to file storage. Mutually exclusive with file.")
57
+ ] = None,
58
+ options: Annotated[
59
+ dict[str, Any] | None,
60
+ Form(
61
+ description="Optional processing options. Provider-specific parameters (e.g., OCR settings, output format)."
62
+ ),
63
+ ] = None,
64
+ chunking_strategy: Annotated[
65
+ VectorStoreChunkingStrategy | None,
66
+ Form(description="Optional chunking strategy for splitting content into chunks."),
67
+ ] = None,
68
+ ) -> ProcessFileResponse:
69
+ # Pass the parameters directly to the implementation
70
+ # The protocol method signature expects individual parameters for multipart handling
71
+ return await impl.process_file(
72
+ file=file,
73
+ file_id=file_id,
74
+ options=options,
75
+ chunking_strategy=chunking_strategy,
76
+ )
77
+
78
+ return router
@@ -0,0 +1,42 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for File Processors API responses.
8
+
9
+ This module defines the response models for the File Processors API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+
12
+ Request models are not needed for this API since it uses multipart form data
13
+ with individual parameters rather than a JSON request body.
14
+ """
15
+
16
+ from typing import Any
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from llama_stack_api.schema_utils import json_schema_type
21
+ from llama_stack_api.vector_io import Chunk
22
+
23
+
24
+ @json_schema_type
25
+ class ProcessFileResponse(BaseModel):
26
+ """Response model for file processing operation.
27
+
28
+ Returns a list of chunks ready for storage in vector databases.
29
+ Each chunk contains the content and metadata.
30
+ """
31
+
32
+ chunks: list[Chunk] = Field(..., description="Processed chunks from the file. Always returns at least one chunk.")
33
+
34
+ metadata: dict[str, Any] = Field(
35
+ ...,
36
+ description="Processing-run metadata such as processor name/version, processing_time_ms, page_count, extraction_method (e.g. docling/pypdf/ocr), confidence scores, plus provider-specific fields.",
37
+ )
38
+
39
+
40
+ __all__ = [
41
+ "ProcessFileResponse",
42
+ ]
@@ -0,0 +1,35 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from . import fastapi_routes
8
+ from .api import Files
9
+ from .models import (
10
+ DeleteFileRequest,
11
+ ExpiresAfter,
12
+ ListFilesRequest,
13
+ ListOpenAIFileResponse,
14
+ OpenAIFileDeleteResponse,
15
+ OpenAIFileObject,
16
+ OpenAIFilePurpose,
17
+ RetrieveFileContentRequest,
18
+ RetrieveFileRequest,
19
+ UploadFileRequest,
20
+ )
21
+
22
+ __all__ = [
23
+ "DeleteFileRequest",
24
+ "ExpiresAfter",
25
+ "fastapi_routes",
26
+ "Files",
27
+ "ListFilesRequest",
28
+ "ListOpenAIFileResponse",
29
+ "OpenAIFileDeleteResponse",
30
+ "OpenAIFileObject",
31
+ "OpenAIFilePurpose",
32
+ "RetrieveFileContentRequest",
33
+ "RetrieveFileRequest",
34
+ "UploadFileRequest",
35
+ ]
@@ -0,0 +1,51 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from fastapi import Response, UploadFile
10
+
11
+ from .models import (
12
+ DeleteFileRequest,
13
+ ListFilesRequest,
14
+ ListOpenAIFileResponse,
15
+ OpenAIFileDeleteResponse,
16
+ OpenAIFileObject,
17
+ RetrieveFileContentRequest,
18
+ RetrieveFileRequest,
19
+ UploadFileRequest,
20
+ )
21
+
22
+
23
+ @runtime_checkable
24
+ class Files(Protocol):
25
+ """Files API for managing file uploads and retrieval."""
26
+
27
+ async def openai_upload_file(
28
+ self,
29
+ request: UploadFileRequest,
30
+ file: UploadFile,
31
+ ) -> OpenAIFileObject: ...
32
+
33
+ async def openai_list_files(
34
+ self,
35
+ request: ListFilesRequest,
36
+ ) -> ListOpenAIFileResponse: ...
37
+
38
+ async def openai_retrieve_file(
39
+ self,
40
+ request: RetrieveFileRequest,
41
+ ) -> OpenAIFileObject: ...
42
+
43
+ async def openai_delete_file(
44
+ self,
45
+ request: DeleteFileRequest,
46
+ ) -> OpenAIFileDeleteResponse: ...
47
+
48
+ async def openai_retrieve_file_content(
49
+ self,
50
+ request: RetrieveFileContentRequest,
51
+ ) -> Response: ...
@@ -0,0 +1,124 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Annotated
8
+
9
+ from fastapi import APIRouter, Depends, UploadFile
10
+ from fastapi.param_functions import File, Form
11
+ from fastapi.responses import Response
12
+
13
+ from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
14
+ from llama_stack_api.version import LLAMA_STACK_API_V1
15
+
16
+ from .api import Files
17
+ from .models import (
18
+ DeleteFileRequest,
19
+ ExpiresAfter,
20
+ ListFilesRequest,
21
+ ListOpenAIFileResponse,
22
+ OpenAIFileDeleteResponse,
23
+ OpenAIFileObject,
24
+ OpenAIFilePurpose,
25
+ RetrieveFileContentRequest,
26
+ RetrieveFileRequest,
27
+ UploadFileRequest,
28
+ )
29
+
30
+ # Automatically generate dependency functions from Pydantic models
31
+ # This ensures the models are the single source of truth for descriptions
32
+ get_list_files_request = create_query_dependency(ListFilesRequest)
33
+ get_get_files_request = create_path_dependency(RetrieveFileRequest)
34
+ get_delete_files_request = create_path_dependency(DeleteFileRequest)
35
+ get_retrieve_file_content_request = create_path_dependency(RetrieveFileContentRequest)
36
+
37
+
38
+ def create_router(impl: Files) -> APIRouter:
39
+ router = APIRouter(
40
+ prefix=f"/{LLAMA_STACK_API_V1}",
41
+ tags=["Files"],
42
+ responses=standard_responses,
43
+ )
44
+
45
+ @router.get(
46
+ "/files",
47
+ response_model=ListOpenAIFileResponse,
48
+ summary="List files",
49
+ description="List files",
50
+ responses={
51
+ 200: {"description": "The list of files."},
52
+ },
53
+ )
54
+ async def list_files(
55
+ request: Annotated[ListFilesRequest, Depends(get_list_files_request)],
56
+ ) -> ListOpenAIFileResponse:
57
+ return await impl.openai_list_files(request)
58
+
59
+ @router.get(
60
+ "/files/{file_id}",
61
+ response_model=OpenAIFileObject,
62
+ summary="Get file",
63
+ description="Get file",
64
+ responses={
65
+ 200: {"description": "The file."},
66
+ },
67
+ )
68
+ async def get_file(
69
+ request: Annotated[RetrieveFileRequest, Depends(get_get_files_request)],
70
+ ) -> OpenAIFileObject:
71
+ return await impl.openai_retrieve_file(request)
72
+
73
+ @router.delete(
74
+ "/files/{file_id}",
75
+ response_model=OpenAIFileDeleteResponse,
76
+ summary="Delete file",
77
+ description="Delete file",
78
+ responses={
79
+ 200: {"description": "The file was deleted."},
80
+ },
81
+ )
82
+ async def delete_file(
83
+ request: Annotated[DeleteFileRequest, Depends(get_delete_files_request)],
84
+ ) -> OpenAIFileDeleteResponse:
85
+ return await impl.openai_delete_file(request)
86
+
87
+ @router.get(
88
+ "/files/{file_id}/content",
89
+ status_code=200,
90
+ summary="Retrieve file content",
91
+ description="Retrieve file content",
92
+ responses={
93
+ 200: {
94
+ "description": "The raw file content as a binary response.",
95
+ "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Response"}}},
96
+ },
97
+ },
98
+ )
99
+ async def retrieve_file_content(
100
+ request: Annotated[RetrieveFileContentRequest, Depends(get_retrieve_file_content_request)],
101
+ ) -> Response:
102
+ return await impl.openai_retrieve_file_content(request)
103
+
104
+ @router.post(
105
+ "/files",
106
+ response_model=OpenAIFileObject,
107
+ summary="Upload file",
108
+ description="Upload a file.",
109
+ responses={
110
+ 200: {"description": "The uploaded file."},
111
+ },
112
+ )
113
+ async def upload_file(
114
+ file: Annotated[UploadFile, File(description="The file to upload.")],
115
+ purpose: Annotated[OpenAIFilePurpose, Form(description="The intended purpose of the uploaded file.")],
116
+ expires_after: Annotated[ExpiresAfter | None, Form(description="Optional expiration settings.")] = None,
117
+ ) -> OpenAIFileObject:
118
+ request = UploadFileRequest(
119
+ purpose=purpose,
120
+ expires_after=expires_after,
121
+ )
122
+ return await impl.openai_upload_file(request, file)
123
+
124
+ return router
@@ -0,0 +1,107 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from enum import StrEnum
8
+ from typing import ClassVar, Literal
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from llama_stack_api.common.responses import Order
13
+ from llama_stack_api.schema_utils import json_schema_type
14
+
15
+
16
+ class OpenAIFilePurpose(StrEnum):
17
+ """
18
+ Valid purpose values for OpenAI Files API.
19
+ """
20
+
21
+ ASSISTANTS = "assistants"
22
+ BATCH = "batch"
23
+
24
+
25
+ @json_schema_type
26
+ class OpenAIFileObject(BaseModel):
27
+ """OpenAI File object as defined in the OpenAI Files API."""
28
+
29
+ object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
30
+ id: str = Field(..., description="The file identifier, which can be referenced in the API endpoints.")
31
+ bytes: int = Field(..., description="The size of the file, in bytes.")
32
+ created_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file was created.")
33
+ expires_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file expires.")
34
+ filename: str = Field(..., description="The name of the file.")
35
+ purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the file.")
36
+
37
+
38
+ @json_schema_type
39
+ class ExpiresAfter(BaseModel):
40
+ """Control expiration of uploaded files."""
41
+
42
+ MIN: ClassVar[int] = 3600 # 1 hour
43
+ MAX: ClassVar[int] = 2592000 # 30 days
44
+
45
+ anchor: Literal["created_at"] = Field(..., description="The anchor point for expiration, must be 'created_at'.")
46
+ seconds: int = Field(
47
+ ..., ge=MIN, le=MAX, description="Seconds until expiration, between 3600 (1 hour) and 2592000 (30 days)."
48
+ )
49
+
50
+
51
+ @json_schema_type
52
+ class ListOpenAIFileResponse(BaseModel):
53
+ """Response for listing files in OpenAI Files API."""
54
+
55
+ data: list[OpenAIFileObject] = Field(..., description="The list of files.")
56
+ has_more: bool = Field(..., description="Whether there are more files available beyond this page.")
57
+ first_id: str = Field(..., description="The ID of the first file in the list for pagination.")
58
+ last_id: str = Field(..., description="The ID of the last file in the list for pagination.")
59
+ object: Literal["list"] = Field(default="list", description="The object type, which is always 'list'.")
60
+
61
+
62
+ @json_schema_type
63
+ class OpenAIFileDeleteResponse(BaseModel):
64
+ """Response for deleting a file in OpenAI Files API."""
65
+
66
+ id: str = Field(..., description="The file identifier that was deleted.")
67
+ object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
68
+ deleted: bool = Field(..., description="Whether the file was successfully deleted.")
69
+
70
+
71
+ @json_schema_type
72
+ class ListFilesRequest(BaseModel):
73
+ """Request model for listing files."""
74
+
75
+ after: str | None = Field(default=None, description="A cursor for pagination. Returns files after this ID.")
76
+ limit: int | None = Field(default=10000, description="Maximum number of files to return (1-10,000).")
77
+ order: Order | None = Field(default=Order.desc, description="Sort order by created_at timestamp ('asc' or 'desc').")
78
+ purpose: OpenAIFilePurpose | None = Field(default=None, description="Filter files by purpose.")
79
+
80
+
81
+ @json_schema_type
82
+ class RetrieveFileRequest(BaseModel):
83
+ """Request model for retrieving a file."""
84
+
85
+ file_id: str = Field(..., description="The ID of the file to retrieve.")
86
+
87
+
88
+ @json_schema_type
89
+ class DeleteFileRequest(BaseModel):
90
+ """Request model for deleting a file."""
91
+
92
+ file_id: str = Field(..., description="The ID of the file to delete.")
93
+
94
+
95
+ @json_schema_type
96
+ class RetrieveFileContentRequest(BaseModel):
97
+ """Request model for retrieving file content."""
98
+
99
+ file_id: str = Field(..., description="The ID of the file to retrieve content from.")
100
+
101
+
102
+ @json_schema_type
103
+ class UploadFileRequest(BaseModel):
104
+ """Request model for uploading a file."""
105
+
106
+ purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the uploaded file.")
107
+ expires_after: ExpiresAfter | None = Field(default=None, description="Optional expiration settings for the file.")
@@ -0,0 +1,207 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Inference API protocol and models.
8
+
9
+ This module contains the Inference protocol definition.
10
+ Pydantic models are defined in llama_stack_api.inference.models.
11
+ The FastAPI router is defined in llama_stack_api.inference.fastapi_routes.
12
+ """
13
+
14
+ # Import common types for backward compatibility
15
+ # (these were previously available from the old inference.py)
16
+ from llama_stack_api.common.content_types import InterleavedContent
17
+
18
+ # Import fastapi_routes for router factory access
19
+ from . import fastapi_routes
20
+
21
+ # Import protocol for re-export
22
+ from .api import Inference, InferenceProvider, ModelStore
23
+
24
+ # Import models for re-export
25
+ from .models import (
26
+ AllowedToolsConfig,
27
+ Bf16QuantizationConfig,
28
+ ChatCompletionResponseEventType,
29
+ CompletionRequest,
30
+ CustomToolConfig,
31
+ EmbeddingsResponse,
32
+ EmbeddingTaskType,
33
+ Fp8QuantizationConfig,
34
+ FunctionToolConfig,
35
+ GetChatCompletionRequest,
36
+ GrammarResponseFormat,
37
+ GreedySamplingStrategy,
38
+ Int4QuantizationConfig,
39
+ JsonSchemaResponseFormat,
40
+ ListChatCompletionsRequest,
41
+ ListOpenAIChatCompletionResponse,
42
+ LogProbConfig,
43
+ OpenAIAssistantMessageParam,
44
+ OpenAIChatCompletion,
45
+ OpenAIChatCompletionChunk,
46
+ OpenAIChatCompletionContentPartImageParam,
47
+ OpenAIChatCompletionContentPartParam,
48
+ OpenAIChatCompletionContentPartTextParam,
49
+ OpenAIChatCompletionMessageContent,
50
+ OpenAIChatCompletionRequestWithExtraBody,
51
+ OpenAIChatCompletionTextOnlyMessageContent,
52
+ OpenAIChatCompletionToolCall,
53
+ OpenAIChatCompletionToolCallFunction,
54
+ OpenAIChatCompletionToolChoice,
55
+ OpenAIChatCompletionToolChoiceAllowedTools,
56
+ OpenAIChatCompletionToolChoiceCustomTool,
57
+ OpenAIChatCompletionToolChoiceFunctionTool,
58
+ OpenAIChatCompletionUsage,
59
+ OpenAIChatCompletionUsageCompletionTokensDetails,
60
+ OpenAIChatCompletionUsagePromptTokensDetails,
61
+ OpenAIChoice,
62
+ OpenAIChoiceDelta,
63
+ OpenAIChoiceLogprobs,
64
+ OpenAIChunkChoice,
65
+ OpenAICompletion,
66
+ OpenAICompletionChoice,
67
+ OpenAICompletionLogprobs,
68
+ OpenAICompletionRequestWithExtraBody,
69
+ OpenAICompletionWithInputMessages,
70
+ OpenAIDeveloperMessageParam,
71
+ OpenAIEmbeddingData,
72
+ OpenAIEmbeddingsRequestWithExtraBody,
73
+ OpenAIEmbeddingsResponse,
74
+ OpenAIEmbeddingUsage,
75
+ OpenAIFile,
76
+ OpenAIFileFile,
77
+ OpenAIFinishReason,
78
+ OpenAIImageURL,
79
+ OpenAIJSONSchema,
80
+ OpenAIMessageParam,
81
+ OpenAIResponseFormatJSONObject,
82
+ OpenAIResponseFormatJSONSchema,
83
+ OpenAIResponseFormatParam,
84
+ OpenAIResponseFormatText,
85
+ OpenAISystemMessageParam,
86
+ OpenAITokenLogProb,
87
+ OpenAIToolMessageParam,
88
+ OpenAITopLogProb,
89
+ OpenAIUserMessageParam,
90
+ QuantizationConfig,
91
+ QuantizationType,
92
+ RerankData,
93
+ RerankRequest,
94
+ RerankResponse,
95
+ ResponseFormat,
96
+ ResponseFormatType,
97
+ SamplingParams,
98
+ SamplingStrategy,
99
+ SystemMessage,
100
+ SystemMessageBehavior,
101
+ TextTruncation,
102
+ TokenLogProbs,
103
+ ToolChoice,
104
+ ToolResponseMessage,
105
+ TopKSamplingStrategy,
106
+ TopPSamplingStrategy,
107
+ UserMessage,
108
+ )
109
+
110
+ __all__ = [
111
+ # Protocol
112
+ "Inference",
113
+ "InferenceProvider",
114
+ "ModelStore",
115
+ # Common types (for backward compatibility)
116
+ "InterleavedContent",
117
+ # Sampling
118
+ "GreedySamplingStrategy",
119
+ "TopPSamplingStrategy",
120
+ "TopKSamplingStrategy",
121
+ "SamplingStrategy",
122
+ "SamplingParams",
123
+ "LogProbConfig",
124
+ # Quantization
125
+ "QuantizationType",
126
+ "Fp8QuantizationConfig",
127
+ "Bf16QuantizationConfig",
128
+ "Int4QuantizationConfig",
129
+ "QuantizationConfig",
130
+ # Messages
131
+ "UserMessage",
132
+ "SystemMessage",
133
+ "ToolResponseMessage",
134
+ "ToolChoice",
135
+ "TokenLogProbs",
136
+ # Response
137
+ "ChatCompletionResponseEventType",
138
+ "ResponseFormatType",
139
+ "JsonSchemaResponseFormat",
140
+ "GrammarResponseFormat",
141
+ "ResponseFormat",
142
+ "CompletionRequest",
143
+ "SystemMessageBehavior",
144
+ "EmbeddingsResponse",
145
+ "RerankData",
146
+ "RerankResponse",
147
+ # OpenAI Compatibility
148
+ "OpenAIChatCompletionContentPartTextParam",
149
+ "OpenAIImageURL",
150
+ "OpenAIChatCompletionContentPartImageParam",
151
+ "OpenAIFileFile",
152
+ "OpenAIFile",
153
+ "OpenAIChatCompletionContentPartParam",
154
+ "OpenAIChatCompletionMessageContent",
155
+ "OpenAIChatCompletionTextOnlyMessageContent",
156
+ "OpenAIUserMessageParam",
157
+ "OpenAISystemMessageParam",
158
+ "OpenAIChatCompletionToolCallFunction",
159
+ "OpenAIChatCompletionToolCall",
160
+ "OpenAIAssistantMessageParam",
161
+ "OpenAIToolMessageParam",
162
+ "OpenAIDeveloperMessageParam",
163
+ "OpenAIMessageParam",
164
+ "OpenAIResponseFormatText",
165
+ "OpenAIJSONSchema",
166
+ "OpenAIResponseFormatJSONSchema",
167
+ "OpenAIResponseFormatJSONObject",
168
+ "OpenAIResponseFormatParam",
169
+ "FunctionToolConfig",
170
+ "OpenAIChatCompletionToolChoiceFunctionTool",
171
+ "CustomToolConfig",
172
+ "OpenAIChatCompletionToolChoiceCustomTool",
173
+ "AllowedToolsConfig",
174
+ "OpenAIChatCompletionToolChoiceAllowedTools",
175
+ "OpenAIChatCompletionToolChoice",
176
+ "OpenAITopLogProb",
177
+ "OpenAITokenLogProb",
178
+ "OpenAIChoiceLogprobs",
179
+ "OpenAIChoiceDelta",
180
+ "OpenAIChunkChoice",
181
+ "OpenAIChoice",
182
+ "OpenAIChatCompletionUsageCompletionTokensDetails",
183
+ "OpenAIChatCompletionUsagePromptTokensDetails",
184
+ "OpenAIChatCompletionUsage",
185
+ "OpenAIChatCompletion",
186
+ "OpenAIChatCompletionChunk",
187
+ "OpenAICompletionLogprobs",
188
+ "OpenAICompletionChoice",
189
+ "OpenAICompletion",
190
+ "OpenAIFinishReason",
191
+ "OpenAIEmbeddingData",
192
+ "OpenAIEmbeddingUsage",
193
+ "OpenAIEmbeddingsResponse",
194
+ "TextTruncation",
195
+ "EmbeddingTaskType",
196
+ "OpenAICompletionWithInputMessages",
197
+ "ListOpenAIChatCompletionResponse",
198
+ "OpenAICompletionRequestWithExtraBody",
199
+ "OpenAIChatCompletionRequestWithExtraBody",
200
+ "OpenAIEmbeddingsRequestWithExtraBody",
201
+ # Request Models
202
+ "ListChatCompletionsRequest",
203
+ "GetChatCompletionRequest",
204
+ "RerankRequest",
205
+ # Router factory module
206
+ "fastapi_routes",
207
+ ]