llama-stack-api 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack_api/__init__.py +1100 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents/__init__.py +38 -0
- llama_stack_api/agents/api.py +52 -0
- llama_stack_api/agents/fastapi_routes.py +268 -0
- llama_stack_api/agents/models.py +181 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- llama_stack_api/common/__init__.py +5 -0
- llama_stack_api/common/content_types.py +101 -0
- llama_stack_api/common/errors.py +110 -0
- llama_stack_api/common/job_types.py +38 -0
- llama_stack_api/common/responses.py +77 -0
- llama_stack_api/common/training_types.py +47 -0
- llama_stack_api/common/type_system.py +146 -0
- llama_stack_api/connectors/__init__.py +38 -0
- llama_stack_api/connectors/api.py +50 -0
- llama_stack_api/connectors/fastapi_routes.py +103 -0
- llama_stack_api/connectors/models.py +103 -0
- llama_stack_api/conversations/__init__.py +61 -0
- llama_stack_api/conversations/api.py +44 -0
- llama_stack_api/conversations/fastapi_routes.py +177 -0
- llama_stack_api/conversations/models.py +245 -0
- llama_stack_api/datasetio/__init__.py +34 -0
- llama_stack_api/datasetio/api.py +42 -0
- llama_stack_api/datasetio/fastapi_routes.py +94 -0
- llama_stack_api/datasetio/models.py +48 -0
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- llama_stack_api/datatypes.py +373 -0
- llama_stack_api/eval/__init__.py +55 -0
- llama_stack_api/eval/api.py +51 -0
- llama_stack_api/eval/compat.py +300 -0
- llama_stack_api/eval/fastapi_routes.py +126 -0
- llama_stack_api/eval/models.py +141 -0
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- llama_stack_api/inference/__init__.py +207 -0
- llama_stack_api/inference/api.py +93 -0
- llama_stack_api/inference/fastapi_routes.py +243 -0
- llama_stack_api/inference/models.py +1035 -0
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- llama_stack_api/internal/__init__.py +9 -0
- llama_stack_api/internal/kvstore.py +28 -0
- llama_stack_api/internal/sqlstore.py +81 -0
- llama_stack_api/models/__init__.py +47 -0
- llama_stack_api/models/api.py +38 -0
- llama_stack_api/models/fastapi_routes.py +104 -0
- llama_stack_api/models/models.py +157 -0
- llama_stack_api/openai_responses.py +1494 -0
- llama_stack_api/post_training/__init__.py +73 -0
- llama_stack_api/post_training/api.py +36 -0
- llama_stack_api/post_training/fastapi_routes.py +116 -0
- llama_stack_api/post_training/models.py +339 -0
- llama_stack_api/prompts/__init__.py +47 -0
- llama_stack_api/prompts/api.py +44 -0
- llama_stack_api/prompts/fastapi_routes.py +163 -0
- llama_stack_api/prompts/models.py +177 -0
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- llama_stack_api/rag_tool.py +168 -0
- llama_stack_api/resource.py +36 -0
- llama_stack_api/router_utils.py +160 -0
- llama_stack_api/safety/__init__.py +37 -0
- llama_stack_api/safety/api.py +29 -0
- llama_stack_api/safety/datatypes.py +83 -0
- llama_stack_api/safety/fastapi_routes.py +55 -0
- llama_stack_api/safety/models.py +38 -0
- llama_stack_api/schema_utils.py +251 -0
- llama_stack_api/scoring/__init__.py +66 -0
- llama_stack_api/scoring/api.py +35 -0
- llama_stack_api/scoring/fastapi_routes.py +67 -0
- llama_stack_api/scoring/models.py +81 -0
- llama_stack_api/scoring_functions/__init__.py +50 -0
- llama_stack_api/scoring_functions/api.py +39 -0
- llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
- llama_stack_api/scoring_functions/models.py +214 -0
- llama_stack_api/shields/__init__.py +41 -0
- llama_stack_api/shields/api.py +39 -0
- llama_stack_api/shields/fastapi_routes.py +104 -0
- llama_stack_api/shields/models.py +74 -0
- llama_stack_api/tools.py +226 -0
- llama_stack_api/validators.py +46 -0
- llama_stack_api/vector_io/__init__.py +88 -0
- llama_stack_api/vector_io/api.py +234 -0
- llama_stack_api/vector_io/fastapi_routes.py +447 -0
- llama_stack_api/vector_io/models.py +663 -0
- llama_stack_api/vector_stores.py +53 -0
- llama_stack_api/version.py +9 -0
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
- llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
- llama_stack_api-0.5.0rc1.dist-info/top_level.txt +1 -0
- llama_stack_api-0.4.3.dist-info/RECORD +0 -4
- llama_stack_api-0.4.3.dist-info/top_level.txt +0 -1
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Agents API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Agents API using standard
|
|
10
|
+
FastAPI route decorators.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import contextvars
|
|
15
|
+
import json
|
|
16
|
+
import logging # allow-direct-logging
|
|
17
|
+
from collections.abc import AsyncIterator
|
|
18
|
+
from typing import Annotated, Any
|
|
19
|
+
|
|
20
|
+
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
|
|
21
|
+
from fastapi.responses import StreamingResponse
|
|
22
|
+
from pydantic import BaseModel
|
|
23
|
+
|
|
24
|
+
from llama_stack_api.common.responses import Order
|
|
25
|
+
from llama_stack_api.openai_responses import (
|
|
26
|
+
ListOpenAIResponseInputItem,
|
|
27
|
+
ListOpenAIResponseObject,
|
|
28
|
+
OpenAIDeleteResponseObject,
|
|
29
|
+
OpenAIResponseObject,
|
|
30
|
+
)
|
|
31
|
+
from llama_stack_api.router_utils import (
|
|
32
|
+
create_path_dependency,
|
|
33
|
+
create_query_dependency,
|
|
34
|
+
standard_responses,
|
|
35
|
+
)
|
|
36
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
37
|
+
|
|
38
|
+
from .api import Agents
|
|
39
|
+
from .models import (
|
|
40
|
+
CreateResponseRequest,
|
|
41
|
+
DeleteResponseRequest,
|
|
42
|
+
ListResponseInputItemsRequest,
|
|
43
|
+
ListResponsesRequest,
|
|
44
|
+
ResponseItemInclude,
|
|
45
|
+
RetrieveResponseRequest,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
logger = logging.LoggerAdapter(logging.getLogger(__name__), {"category": "agents"})
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def create_sse_event(data: Any) -> str:
|
|
52
|
+
"""Create a Server-Sent Event string from data."""
|
|
53
|
+
if isinstance(data, BaseModel):
|
|
54
|
+
data = data.model_dump_json()
|
|
55
|
+
else:
|
|
56
|
+
data = json.dumps(data)
|
|
57
|
+
return f"data: {data}\n\n"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def sse_generator(event_gen):
|
|
61
|
+
"""Convert an async generator to SSE format.
|
|
62
|
+
|
|
63
|
+
This function iterates over an async generator and formats each yielded
|
|
64
|
+
item as a Server-Sent Event.
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
async for item in event_gen:
|
|
68
|
+
yield create_sse_event(item)
|
|
69
|
+
except asyncio.CancelledError:
|
|
70
|
+
if hasattr(event_gen, "aclose"):
|
|
71
|
+
await event_gen.aclose()
|
|
72
|
+
raise # Re-raise to maintain proper cancellation semantics
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.exception("Error in SSE generator")
|
|
75
|
+
exc = _http_exception_from_sse_error(e)
|
|
76
|
+
yield create_sse_event({"error": {"status_code": exc.status_code, "message": exc.detail}})
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# Automatically generate dependency functions from Pydantic models
|
|
80
|
+
get_retrieve_response_request = create_path_dependency(RetrieveResponseRequest)
|
|
81
|
+
get_delete_response_request = create_path_dependency(DeleteResponseRequest)
|
|
82
|
+
get_list_responses_request = create_query_dependency(ListResponsesRequest)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# Manual dependency for ListResponseInputItemsRequest since it mixes Path and Query parameters
|
|
86
|
+
async def get_list_response_input_items_request(
|
|
87
|
+
response_id: Annotated[str, Path(description="The ID of the response to retrieve input items for.")],
|
|
88
|
+
after: Annotated[
|
|
89
|
+
str | None,
|
|
90
|
+
Query(description="An item ID to list items after, used for pagination."),
|
|
91
|
+
] = None,
|
|
92
|
+
before: Annotated[
|
|
93
|
+
str | None,
|
|
94
|
+
Query(description="An item ID to list items before, used for pagination."),
|
|
95
|
+
] = None,
|
|
96
|
+
include: Annotated[
|
|
97
|
+
list[ResponseItemInclude] | None,
|
|
98
|
+
Query(description="Additional fields to include in the response."),
|
|
99
|
+
] = None,
|
|
100
|
+
limit: Annotated[
|
|
101
|
+
int | None,
|
|
102
|
+
Query(
|
|
103
|
+
description="A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20."
|
|
104
|
+
),
|
|
105
|
+
] = 20,
|
|
106
|
+
order: Annotated[Order | None, Query(description="The order to return the input items in.")] = Order.desc,
|
|
107
|
+
) -> ListResponseInputItemsRequest:
|
|
108
|
+
return ListResponseInputItemsRequest(
|
|
109
|
+
response_id=response_id,
|
|
110
|
+
after=after,
|
|
111
|
+
before=before,
|
|
112
|
+
include=include,
|
|
113
|
+
limit=limit,
|
|
114
|
+
order=order,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _http_exception_from_value_error(exc: ValueError) -> HTTPException:
|
|
119
|
+
"""Convert implementation `ValueError` into an OpenAI-compatible HTTP error.
|
|
120
|
+
|
|
121
|
+
The compatibility OpenAI client maps HTTP 400 -> `BadRequestError`.
|
|
122
|
+
The existing API surface (and integration tests) expect "not found" cases
|
|
123
|
+
to be represented as a 400, not a 404.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
detail = str(exc) or "Invalid value"
|
|
127
|
+
return HTTPException(status_code=400, detail=detail)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _http_exception_from_sse_error(exc: Exception) -> HTTPException:
|
|
131
|
+
if isinstance(exc, HTTPException):
|
|
132
|
+
return exc
|
|
133
|
+
if isinstance(exc, ValueError):
|
|
134
|
+
return _http_exception_from_value_error(exc)
|
|
135
|
+
status_code = getattr(exc, "status_code", None)
|
|
136
|
+
if isinstance(status_code, int):
|
|
137
|
+
return HTTPException(status_code=status_code, detail=str(exc))
|
|
138
|
+
return HTTPException(status_code=500, detail="Internal server error: An unexpected error occurred.")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _preserve_context_for_sse(event_gen):
|
|
142
|
+
# StreamingResponse runs in a different task, losing request contextvars.
|
|
143
|
+
# create_task inside context.run captures the context at task creation.
|
|
144
|
+
context = contextvars.copy_context()
|
|
145
|
+
|
|
146
|
+
async def wrapper():
|
|
147
|
+
try:
|
|
148
|
+
while True:
|
|
149
|
+
try:
|
|
150
|
+
task = context.run(asyncio.create_task, event_gen.__anext__())
|
|
151
|
+
item = await task
|
|
152
|
+
except StopAsyncIteration:
|
|
153
|
+
break
|
|
154
|
+
yield item
|
|
155
|
+
except (asyncio.CancelledError, GeneratorExit):
|
|
156
|
+
if hasattr(event_gen, "aclose"):
|
|
157
|
+
await event_gen.aclose()
|
|
158
|
+
raise
|
|
159
|
+
|
|
160
|
+
return wrapper()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def create_router(impl: Agents) -> APIRouter:
|
|
164
|
+
"""Create a FastAPI router for the Agents API.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
impl: The Agents implementation instance
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
APIRouter configured for the Agents API
|
|
171
|
+
"""
|
|
172
|
+
router = APIRouter(
|
|
173
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
174
|
+
tags=["Agents"],
|
|
175
|
+
responses=standard_responses,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
@router.get(
|
|
179
|
+
"/responses/{response_id}",
|
|
180
|
+
response_model=OpenAIResponseObject,
|
|
181
|
+
summary="Get a model response.",
|
|
182
|
+
description="Get a model response.",
|
|
183
|
+
)
|
|
184
|
+
async def get_openai_response(
|
|
185
|
+
request: Annotated[RetrieveResponseRequest, Depends(get_retrieve_response_request)],
|
|
186
|
+
) -> OpenAIResponseObject:
|
|
187
|
+
try:
|
|
188
|
+
return await impl.get_openai_response(request)
|
|
189
|
+
except ValueError as exc:
|
|
190
|
+
raise _http_exception_from_value_error(exc) from exc
|
|
191
|
+
|
|
192
|
+
@router.post(
|
|
193
|
+
"/responses",
|
|
194
|
+
summary="Create a model response.",
|
|
195
|
+
description="Create a model response.",
|
|
196
|
+
status_code=200,
|
|
197
|
+
response_model=None,
|
|
198
|
+
responses={
|
|
199
|
+
200: {
|
|
200
|
+
"description": "An OpenAIResponseObject or a stream of OpenAIResponseObjectStream.",
|
|
201
|
+
"content": {
|
|
202
|
+
"application/json": {"schema": {"$ref": "#/components/schemas/OpenAIResponseObject"}},
|
|
203
|
+
"text/event-stream": {"schema": {"$ref": "#/components/schemas/OpenAIResponseObjectStream"}},
|
|
204
|
+
},
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
)
|
|
208
|
+
async def create_openai_response(
|
|
209
|
+
request: Annotated[CreateResponseRequest, Body(...)],
|
|
210
|
+
) -> OpenAIResponseObject | StreamingResponse:
|
|
211
|
+
try:
|
|
212
|
+
result = await impl.create_openai_response(request)
|
|
213
|
+
except ValueError as exc:
|
|
214
|
+
raise _http_exception_from_value_error(exc) from exc
|
|
215
|
+
|
|
216
|
+
# For streaming responses, wrap in StreamingResponse for HTTP requests.
|
|
217
|
+
# The implementation is typed to return an `AsyncIterator` for streaming.
|
|
218
|
+
if isinstance(result, AsyncIterator):
|
|
219
|
+
return StreamingResponse(
|
|
220
|
+
_preserve_context_for_sse(sse_generator(result)),
|
|
221
|
+
media_type="text/event-stream",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return result
|
|
225
|
+
|
|
226
|
+
@router.get(
|
|
227
|
+
"/responses",
|
|
228
|
+
response_model=ListOpenAIResponseObject,
|
|
229
|
+
summary="List all responses.",
|
|
230
|
+
description="List all responses.",
|
|
231
|
+
)
|
|
232
|
+
async def list_openai_responses(
|
|
233
|
+
request: Annotated[ListResponsesRequest, Depends(get_list_responses_request)],
|
|
234
|
+
) -> ListOpenAIResponseObject:
|
|
235
|
+
try:
|
|
236
|
+
return await impl.list_openai_responses(request)
|
|
237
|
+
except ValueError as exc:
|
|
238
|
+
raise _http_exception_from_value_error(exc) from exc
|
|
239
|
+
|
|
240
|
+
@router.get(
|
|
241
|
+
"/responses/{response_id}/input_items",
|
|
242
|
+
response_model=ListOpenAIResponseInputItem,
|
|
243
|
+
summary="List input items.",
|
|
244
|
+
description="List input items.",
|
|
245
|
+
)
|
|
246
|
+
async def list_openai_response_input_items(
|
|
247
|
+
request: Annotated[ListResponseInputItemsRequest, Depends(get_list_response_input_items_request)],
|
|
248
|
+
) -> ListOpenAIResponseInputItem:
|
|
249
|
+
try:
|
|
250
|
+
return await impl.list_openai_response_input_items(request)
|
|
251
|
+
except ValueError as exc:
|
|
252
|
+
raise _http_exception_from_value_error(exc) from exc
|
|
253
|
+
|
|
254
|
+
@router.delete(
|
|
255
|
+
"/responses/{response_id}",
|
|
256
|
+
response_model=OpenAIDeleteResponseObject,
|
|
257
|
+
summary="Delete a response.",
|
|
258
|
+
description="Delete a response.",
|
|
259
|
+
)
|
|
260
|
+
async def delete_openai_response(
|
|
261
|
+
request: Annotated[DeleteResponseRequest, Depends(get_delete_response_request)],
|
|
262
|
+
) -> OpenAIDeleteResponseObject:
|
|
263
|
+
try:
|
|
264
|
+
return await impl.delete_openai_response(request)
|
|
265
|
+
except ValueError as exc:
|
|
266
|
+
raise _http_exception_from_value_error(exc) from exc
|
|
267
|
+
|
|
268
|
+
return router
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Agents API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module defines the request and response models for the Agents API
|
|
10
|
+
using Pydantic with Field descriptions for OpenAPI schema generation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from enum import StrEnum
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.common.responses import Order
|
|
18
|
+
from llama_stack_api.openai_responses import (
|
|
19
|
+
OpenAIResponseInput,
|
|
20
|
+
OpenAIResponseInputTool,
|
|
21
|
+
OpenAIResponseInputToolChoice,
|
|
22
|
+
OpenAIResponsePrompt,
|
|
23
|
+
OpenAIResponseReasoning,
|
|
24
|
+
OpenAIResponseText,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ResponseItemInclude(StrEnum):
|
|
29
|
+
"""Specify additional output data to include in the model response."""
|
|
30
|
+
|
|
31
|
+
web_search_call_action_sources = "web_search_call.action.sources"
|
|
32
|
+
code_interpreter_call_outputs = "code_interpreter_call.outputs"
|
|
33
|
+
computer_call_output_output_image_url = "computer_call_output.output.image_url"
|
|
34
|
+
file_search_call_results = "file_search_call.results"
|
|
35
|
+
message_input_image_image_url = "message.input_image.image_url"
|
|
36
|
+
message_output_text_logprobs = "message.output_text.logprobs"
|
|
37
|
+
reasoning_encrypted_content = "reasoning.encrypted_content"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ResponseGuardrailSpec(BaseModel):
|
|
41
|
+
"""Specification for a guardrail to apply during response generation."""
|
|
42
|
+
|
|
43
|
+
model_config = ConfigDict(extra="forbid")
|
|
44
|
+
|
|
45
|
+
type: str
|
|
46
|
+
# TODO: more fields to be added for guardrail configuration
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
ResponseGuardrail = str | ResponseGuardrailSpec
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CreateResponseRequest(BaseModel):
|
|
53
|
+
"""Request model for creating a response."""
|
|
54
|
+
|
|
55
|
+
model_config = ConfigDict(extra="forbid")
|
|
56
|
+
|
|
57
|
+
input: str | list[OpenAIResponseInput] = Field(..., description="Input message(s) to create the response.")
|
|
58
|
+
model: str = Field(..., description="The underlying LLM used for completions.")
|
|
59
|
+
prompt: OpenAIResponsePrompt | None = Field(
|
|
60
|
+
default=None, description="Prompt object with ID, version, and variables."
|
|
61
|
+
)
|
|
62
|
+
instructions: str | None = Field(default=None, description="Instructions to guide the model's behavior.")
|
|
63
|
+
parallel_tool_calls: bool | None = Field(
|
|
64
|
+
default=True,
|
|
65
|
+
description="Whether to enable parallel tool calls.",
|
|
66
|
+
)
|
|
67
|
+
previous_response_id: str | None = Field(
|
|
68
|
+
default=None,
|
|
69
|
+
description="Optional ID of a previous response to continue from.",
|
|
70
|
+
)
|
|
71
|
+
conversation: str | None = Field(
|
|
72
|
+
default=None,
|
|
73
|
+
description="Optional ID of a conversation to add the response to.",
|
|
74
|
+
)
|
|
75
|
+
store: bool | None = Field(
|
|
76
|
+
default=True,
|
|
77
|
+
description="Whether to store the response in the database.",
|
|
78
|
+
)
|
|
79
|
+
stream: bool | None = Field(
|
|
80
|
+
default=False,
|
|
81
|
+
description="Whether to stream the response.",
|
|
82
|
+
)
|
|
83
|
+
temperature: float | None = Field(
|
|
84
|
+
default=None,
|
|
85
|
+
ge=0.0,
|
|
86
|
+
le=2.0,
|
|
87
|
+
description="Sampling temperature.",
|
|
88
|
+
)
|
|
89
|
+
text: OpenAIResponseText | None = Field(
|
|
90
|
+
default=None,
|
|
91
|
+
description="Configuration for text response generation.",
|
|
92
|
+
)
|
|
93
|
+
tool_choice: OpenAIResponseInputToolChoice | None = Field(
|
|
94
|
+
default=None,
|
|
95
|
+
description="How the model should select which tool to call (if any).",
|
|
96
|
+
)
|
|
97
|
+
tools: list[OpenAIResponseInputTool] | None = Field(
|
|
98
|
+
default=None,
|
|
99
|
+
description="List of tools available to the model.",
|
|
100
|
+
)
|
|
101
|
+
include: list[ResponseItemInclude] | None = Field(
|
|
102
|
+
default=None,
|
|
103
|
+
description="Additional fields to include in the response.",
|
|
104
|
+
)
|
|
105
|
+
max_infer_iters: int | None = Field(
|
|
106
|
+
default=10,
|
|
107
|
+
ge=1,
|
|
108
|
+
description="Maximum number of inference iterations.",
|
|
109
|
+
)
|
|
110
|
+
guardrails: list[ResponseGuardrail] | None = Field(
|
|
111
|
+
default=None,
|
|
112
|
+
description="List of guardrails to apply during response generation.",
|
|
113
|
+
)
|
|
114
|
+
max_tool_calls: int | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
ge=1,
|
|
117
|
+
description="Max number of total calls to built-in tools that can be processed in a response.",
|
|
118
|
+
)
|
|
119
|
+
max_output_tokens: int | None = Field(
|
|
120
|
+
default=None,
|
|
121
|
+
ge=16,
|
|
122
|
+
description="Upper bound for the number of tokens that can be generated for a response.",
|
|
123
|
+
)
|
|
124
|
+
reasoning: OpenAIResponseReasoning | None = Field(
|
|
125
|
+
default=None,
|
|
126
|
+
description="Configuration for reasoning effort in responses.",
|
|
127
|
+
)
|
|
128
|
+
metadata: dict[str, str] | None = Field(
|
|
129
|
+
default=None,
|
|
130
|
+
description="Dictionary of metadata key-value pairs to attach to the response.",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class RetrieveResponseRequest(BaseModel):
|
|
135
|
+
"""Request model for retrieving a response."""
|
|
136
|
+
|
|
137
|
+
model_config = ConfigDict(extra="forbid")
|
|
138
|
+
|
|
139
|
+
response_id: str = Field(..., min_length=1, description="The ID of the OpenAI response to retrieve.")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ListResponsesRequest(BaseModel):
|
|
143
|
+
"""Request model for listing responses."""
|
|
144
|
+
|
|
145
|
+
model_config = ConfigDict(extra="forbid")
|
|
146
|
+
|
|
147
|
+
after: str | None = Field(default=None, description="The ID of the last response to return.")
|
|
148
|
+
limit: int | None = Field(default=50, ge=1, le=100, description="The number of responses to return.")
|
|
149
|
+
model: str | None = Field(default=None, description="The model to filter responses by.")
|
|
150
|
+
order: Order | None = Field(
|
|
151
|
+
default=Order.desc,
|
|
152
|
+
description="The order to sort responses by when sorted by created_at ('asc' or 'desc').",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class ListResponseInputItemsRequest(BaseModel):
|
|
157
|
+
"""Request model for listing input items of a response."""
|
|
158
|
+
|
|
159
|
+
model_config = ConfigDict(extra="forbid")
|
|
160
|
+
|
|
161
|
+
response_id: str = Field(..., min_length=1, description="The ID of the response to retrieve input items for.")
|
|
162
|
+
after: str | None = Field(default=None, description="An item ID to list items after, used for pagination.")
|
|
163
|
+
before: str | None = Field(default=None, description="An item ID to list items before, used for pagination.")
|
|
164
|
+
include: list[ResponseItemInclude] | None = Field(
|
|
165
|
+
default=None, description="Additional fields to include in the response."
|
|
166
|
+
)
|
|
167
|
+
limit: int | None = Field(
|
|
168
|
+
default=20,
|
|
169
|
+
ge=1,
|
|
170
|
+
le=100,
|
|
171
|
+
description="A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
|
|
172
|
+
)
|
|
173
|
+
order: Order | None = Field(default=Order.desc, description="The order to return the input items in.")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class DeleteResponseRequest(BaseModel):
|
|
177
|
+
"""Request model for deleting a response."""
|
|
178
|
+
|
|
179
|
+
model_config = ConfigDict(extra="forbid")
|
|
180
|
+
|
|
181
|
+
response_id: str = Field(..., min_length=1, description="The ID of the OpenAI response to delete.")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Batches API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Batches protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.batches.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.batches.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from openai.types import Batch as BatchObject
|
|
15
|
+
|
|
16
|
+
# Import fastapi_routes for router factory access
|
|
17
|
+
from . import fastapi_routes
|
|
18
|
+
|
|
19
|
+
# Import protocol for re-export
|
|
20
|
+
from .api import Batches
|
|
21
|
+
|
|
22
|
+
# Import models for re-export
|
|
23
|
+
from .models import (
|
|
24
|
+
CancelBatchRequest,
|
|
25
|
+
CreateBatchRequest,
|
|
26
|
+
ListBatchesRequest,
|
|
27
|
+
ListBatchesResponse,
|
|
28
|
+
RetrieveBatchRequest,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"Batches",
|
|
33
|
+
"BatchObject",
|
|
34
|
+
"CancelBatchRequest",
|
|
35
|
+
"CreateBatchRequest",
|
|
36
|
+
"ListBatchesRequest",
|
|
37
|
+
"ListBatchesResponse",
|
|
38
|
+
"RetrieveBatchRequest",
|
|
39
|
+
"fastapi_routes",
|
|
40
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from openai.types import Batch as BatchObject
|
|
10
|
+
|
|
11
|
+
from .models import (
|
|
12
|
+
CancelBatchRequest,
|
|
13
|
+
CreateBatchRequest,
|
|
14
|
+
ListBatchesRequest,
|
|
15
|
+
ListBatchesResponse,
|
|
16
|
+
RetrieveBatchRequest,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@runtime_checkable
|
|
21
|
+
class Batches(Protocol):
|
|
22
|
+
"""
|
|
23
|
+
The Batches API enables efficient processing of multiple requests in a single operation,
|
|
24
|
+
particularly useful for processing large datasets, batch evaluation workflows, and
|
|
25
|
+
cost-effective inference at scale.
|
|
26
|
+
|
|
27
|
+
The API is designed to allow use of openai client libraries for seamless integration.
|
|
28
|
+
|
|
29
|
+
This API provides the following extensions:
|
|
30
|
+
- idempotent batch creation
|
|
31
|
+
|
|
32
|
+
Note: This API is currently under active development and may undergo changes.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
async def create_batch(
|
|
36
|
+
self,
|
|
37
|
+
request: CreateBatchRequest,
|
|
38
|
+
) -> BatchObject: ...
|
|
39
|
+
|
|
40
|
+
async def retrieve_batch(
|
|
41
|
+
self,
|
|
42
|
+
request: RetrieveBatchRequest,
|
|
43
|
+
) -> BatchObject: ...
|
|
44
|
+
|
|
45
|
+
async def cancel_batch(
|
|
46
|
+
self,
|
|
47
|
+
request: CancelBatchRequest,
|
|
48
|
+
) -> BatchObject: ...
|
|
49
|
+
|
|
50
|
+
async def list_batches(
|
|
51
|
+
self,
|
|
52
|
+
request: ListBatchesRequest,
|
|
53
|
+
) -> ListBatchesResponse: ...
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Batches API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Batches API using standard
|
|
10
|
+
FastAPI route decorators. The router is defined in the API package to keep
|
|
11
|
+
all API-related code together.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Annotated
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Body, Depends
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.batches.models import (
|
|
19
|
+
CancelBatchRequest,
|
|
20
|
+
CreateBatchRequest,
|
|
21
|
+
ListBatchesRequest,
|
|
22
|
+
RetrieveBatchRequest,
|
|
23
|
+
)
|
|
24
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
25
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
26
|
+
|
|
27
|
+
from .api import Batches
|
|
28
|
+
from .models import BatchObject, ListBatchesResponse
|
|
29
|
+
|
|
30
|
+
# Automatically generate dependency functions from Pydantic models
|
|
31
|
+
# This ensures the models are the single source of truth for descriptions
|
|
32
|
+
get_retrieve_batch_request = create_path_dependency(RetrieveBatchRequest)
|
|
33
|
+
get_cancel_batch_request = create_path_dependency(CancelBatchRequest)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Automatically generate dependency function from Pydantic model
|
|
37
|
+
# This ensures the model is the single source of truth for descriptions and defaults
|
|
38
|
+
get_list_batches_request = create_query_dependency(ListBatchesRequest)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def create_router(impl: Batches) -> APIRouter:
|
|
42
|
+
"""Create a FastAPI router for the Batches API.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
impl: The Batches implementation instance
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
APIRouter configured for the Batches API
|
|
49
|
+
"""
|
|
50
|
+
router = APIRouter(
|
|
51
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
52
|
+
tags=["Batches"],
|
|
53
|
+
responses=standard_responses,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@router.post(
|
|
57
|
+
"/batches",
|
|
58
|
+
response_model=BatchObject,
|
|
59
|
+
summary="Create a new batch for processing multiple API requests.",
|
|
60
|
+
description="Create a new batch for processing multiple API requests.",
|
|
61
|
+
responses={
|
|
62
|
+
200: {"description": "The created batch object."},
|
|
63
|
+
409: {"description": "Conflict: The idempotency key was previously used with different parameters."},
|
|
64
|
+
},
|
|
65
|
+
)
|
|
66
|
+
async def create_batch(
|
|
67
|
+
request: Annotated[CreateBatchRequest, Body(...)],
|
|
68
|
+
) -> BatchObject:
|
|
69
|
+
return await impl.create_batch(request)
|
|
70
|
+
|
|
71
|
+
@router.get(
|
|
72
|
+
"/batches/{batch_id}",
|
|
73
|
+
response_model=BatchObject,
|
|
74
|
+
summary="Retrieve information about a specific batch.",
|
|
75
|
+
description="Retrieve information about a specific batch.",
|
|
76
|
+
responses={
|
|
77
|
+
200: {"description": "The batch object."},
|
|
78
|
+
},
|
|
79
|
+
)
|
|
80
|
+
async def retrieve_batch(
|
|
81
|
+
request: Annotated[RetrieveBatchRequest, Depends(get_retrieve_batch_request)],
|
|
82
|
+
) -> BatchObject:
|
|
83
|
+
return await impl.retrieve_batch(request)
|
|
84
|
+
|
|
85
|
+
@router.post(
|
|
86
|
+
"/batches/{batch_id}/cancel",
|
|
87
|
+
response_model=BatchObject,
|
|
88
|
+
summary="Cancel a batch that is in progress.",
|
|
89
|
+
description="Cancel a batch that is in progress.",
|
|
90
|
+
responses={
|
|
91
|
+
200: {"description": "The updated batch object."},
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
async def cancel_batch(
|
|
95
|
+
request: Annotated[CancelBatchRequest, Depends(get_cancel_batch_request)],
|
|
96
|
+
) -> BatchObject:
|
|
97
|
+
return await impl.cancel_batch(request)
|
|
98
|
+
|
|
99
|
+
@router.get(
|
|
100
|
+
"/batches",
|
|
101
|
+
response_model=ListBatchesResponse,
|
|
102
|
+
summary="List all batches for the current user.",
|
|
103
|
+
description="List all batches for the current user.",
|
|
104
|
+
responses={
|
|
105
|
+
200: {"description": "A list of batch objects."},
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
async def list_batches(
|
|
109
|
+
request: Annotated[ListBatchesRequest, Depends(get_list_batches_request)],
|
|
110
|
+
) -> ListBatchesResponse:
|
|
111
|
+
return await impl.list_batches(request)
|
|
112
|
+
|
|
113
|
+
return router
|