llama-stack-api 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. llama_stack_api/__init__.py +1100 -0
  2. llama_stack_api/admin/__init__.py +45 -0
  3. llama_stack_api/admin/api.py +72 -0
  4. llama_stack_api/admin/fastapi_routes.py +117 -0
  5. llama_stack_api/admin/models.py +113 -0
  6. llama_stack_api/agents/__init__.py +38 -0
  7. llama_stack_api/agents/api.py +52 -0
  8. llama_stack_api/agents/fastapi_routes.py +268 -0
  9. llama_stack_api/agents/models.py +181 -0
  10. llama_stack_api/batches/__init__.py +40 -0
  11. llama_stack_api/batches/api.py +53 -0
  12. llama_stack_api/batches/fastapi_routes.py +113 -0
  13. llama_stack_api/batches/models.py +78 -0
  14. llama_stack_api/benchmarks/__init__.py +43 -0
  15. llama_stack_api/benchmarks/api.py +39 -0
  16. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  17. llama_stack_api/benchmarks/models.py +109 -0
  18. llama_stack_api/common/__init__.py +5 -0
  19. llama_stack_api/common/content_types.py +101 -0
  20. llama_stack_api/common/errors.py +110 -0
  21. llama_stack_api/common/job_types.py +38 -0
  22. llama_stack_api/common/responses.py +77 -0
  23. llama_stack_api/common/training_types.py +47 -0
  24. llama_stack_api/common/type_system.py +146 -0
  25. llama_stack_api/connectors/__init__.py +38 -0
  26. llama_stack_api/connectors/api.py +50 -0
  27. llama_stack_api/connectors/fastapi_routes.py +103 -0
  28. llama_stack_api/connectors/models.py +103 -0
  29. llama_stack_api/conversations/__init__.py +61 -0
  30. llama_stack_api/conversations/api.py +44 -0
  31. llama_stack_api/conversations/fastapi_routes.py +177 -0
  32. llama_stack_api/conversations/models.py +245 -0
  33. llama_stack_api/datasetio/__init__.py +34 -0
  34. llama_stack_api/datasetio/api.py +42 -0
  35. llama_stack_api/datasetio/fastapi_routes.py +94 -0
  36. llama_stack_api/datasetio/models.py +48 -0
  37. llama_stack_api/datasets/__init__.py +61 -0
  38. llama_stack_api/datasets/api.py +35 -0
  39. llama_stack_api/datasets/fastapi_routes.py +104 -0
  40. llama_stack_api/datasets/models.py +152 -0
  41. llama_stack_api/datatypes.py +373 -0
  42. llama_stack_api/eval/__init__.py +55 -0
  43. llama_stack_api/eval/api.py +51 -0
  44. llama_stack_api/eval/compat.py +300 -0
  45. llama_stack_api/eval/fastapi_routes.py +126 -0
  46. llama_stack_api/eval/models.py +141 -0
  47. llama_stack_api/file_processors/__init__.py +27 -0
  48. llama_stack_api/file_processors/api.py +64 -0
  49. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  50. llama_stack_api/file_processors/models.py +42 -0
  51. llama_stack_api/files/__init__.py +35 -0
  52. llama_stack_api/files/api.py +51 -0
  53. llama_stack_api/files/fastapi_routes.py +124 -0
  54. llama_stack_api/files/models.py +107 -0
  55. llama_stack_api/inference/__init__.py +207 -0
  56. llama_stack_api/inference/api.py +93 -0
  57. llama_stack_api/inference/fastapi_routes.py +243 -0
  58. llama_stack_api/inference/models.py +1035 -0
  59. llama_stack_api/inspect_api/__init__.py +37 -0
  60. llama_stack_api/inspect_api/api.py +25 -0
  61. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  62. llama_stack_api/inspect_api/models.py +28 -0
  63. llama_stack_api/internal/__init__.py +9 -0
  64. llama_stack_api/internal/kvstore.py +28 -0
  65. llama_stack_api/internal/sqlstore.py +81 -0
  66. llama_stack_api/models/__init__.py +47 -0
  67. llama_stack_api/models/api.py +38 -0
  68. llama_stack_api/models/fastapi_routes.py +104 -0
  69. llama_stack_api/models/models.py +157 -0
  70. llama_stack_api/openai_responses.py +1494 -0
  71. llama_stack_api/post_training/__init__.py +73 -0
  72. llama_stack_api/post_training/api.py +36 -0
  73. llama_stack_api/post_training/fastapi_routes.py +116 -0
  74. llama_stack_api/post_training/models.py +339 -0
  75. llama_stack_api/prompts/__init__.py +47 -0
  76. llama_stack_api/prompts/api.py +44 -0
  77. llama_stack_api/prompts/fastapi_routes.py +163 -0
  78. llama_stack_api/prompts/models.py +177 -0
  79. llama_stack_api/providers/__init__.py +33 -0
  80. llama_stack_api/providers/api.py +16 -0
  81. llama_stack_api/providers/fastapi_routes.py +57 -0
  82. llama_stack_api/providers/models.py +24 -0
  83. llama_stack_api/rag_tool.py +168 -0
  84. llama_stack_api/resource.py +36 -0
  85. llama_stack_api/router_utils.py +160 -0
  86. llama_stack_api/safety/__init__.py +37 -0
  87. llama_stack_api/safety/api.py +29 -0
  88. llama_stack_api/safety/datatypes.py +83 -0
  89. llama_stack_api/safety/fastapi_routes.py +55 -0
  90. llama_stack_api/safety/models.py +38 -0
  91. llama_stack_api/schema_utils.py +251 -0
  92. llama_stack_api/scoring/__init__.py +66 -0
  93. llama_stack_api/scoring/api.py +35 -0
  94. llama_stack_api/scoring/fastapi_routes.py +67 -0
  95. llama_stack_api/scoring/models.py +81 -0
  96. llama_stack_api/scoring_functions/__init__.py +50 -0
  97. llama_stack_api/scoring_functions/api.py +39 -0
  98. llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
  99. llama_stack_api/scoring_functions/models.py +214 -0
  100. llama_stack_api/shields/__init__.py +41 -0
  101. llama_stack_api/shields/api.py +39 -0
  102. llama_stack_api/shields/fastapi_routes.py +104 -0
  103. llama_stack_api/shields/models.py +74 -0
  104. llama_stack_api/tools.py +226 -0
  105. llama_stack_api/validators.py +46 -0
  106. llama_stack_api/vector_io/__init__.py +88 -0
  107. llama_stack_api/vector_io/api.py +234 -0
  108. llama_stack_api/vector_io/fastapi_routes.py +447 -0
  109. llama_stack_api/vector_io/models.py +663 -0
  110. llama_stack_api/vector_stores.py +53 -0
  111. llama_stack_api/version.py +9 -0
  112. {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
  113. llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
  114. llama_stack_api-0.5.0rc1.dist-info/top_level.txt +1 -0
  115. llama_stack_api-0.4.3.dist-info/RECORD +0 -4
  116. llama_stack_api-0.4.3.dist-info/top_level.txt +0 -1
  117. {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,268 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Agents API.
8
+
9
+ This module defines the FastAPI router for the Agents API using standard
10
+ FastAPI route decorators.
11
+ """
12
+
13
+ import asyncio
14
+ import contextvars
15
+ import json
16
+ import logging # allow-direct-logging
17
+ from collections.abc import AsyncIterator
18
+ from typing import Annotated, Any
19
+
20
+ from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
21
+ from fastapi.responses import StreamingResponse
22
+ from pydantic import BaseModel
23
+
24
+ from llama_stack_api.common.responses import Order
25
+ from llama_stack_api.openai_responses import (
26
+ ListOpenAIResponseInputItem,
27
+ ListOpenAIResponseObject,
28
+ OpenAIDeleteResponseObject,
29
+ OpenAIResponseObject,
30
+ )
31
+ from llama_stack_api.router_utils import (
32
+ create_path_dependency,
33
+ create_query_dependency,
34
+ standard_responses,
35
+ )
36
+ from llama_stack_api.version import LLAMA_STACK_API_V1
37
+
38
+ from .api import Agents
39
+ from .models import (
40
+ CreateResponseRequest,
41
+ DeleteResponseRequest,
42
+ ListResponseInputItemsRequest,
43
+ ListResponsesRequest,
44
+ ResponseItemInclude,
45
+ RetrieveResponseRequest,
46
+ )
47
+
48
+ logger = logging.LoggerAdapter(logging.getLogger(__name__), {"category": "agents"})
49
+
50
+
51
+ def create_sse_event(data: Any) -> str:
52
+ """Create a Server-Sent Event string from data."""
53
+ if isinstance(data, BaseModel):
54
+ data = data.model_dump_json()
55
+ else:
56
+ data = json.dumps(data)
57
+ return f"data: {data}\n\n"
58
+
59
+
60
+ async def sse_generator(event_gen):
61
+ """Convert an async generator to SSE format.
62
+
63
+ This function iterates over an async generator and formats each yielded
64
+ item as a Server-Sent Event.
65
+ """
66
+ try:
67
+ async for item in event_gen:
68
+ yield create_sse_event(item)
69
+ except asyncio.CancelledError:
70
+ if hasattr(event_gen, "aclose"):
71
+ await event_gen.aclose()
72
+ raise # Re-raise to maintain proper cancellation semantics
73
+ except Exception as e:
74
+ logger.exception("Error in SSE generator")
75
+ exc = _http_exception_from_sse_error(e)
76
+ yield create_sse_event({"error": {"status_code": exc.status_code, "message": exc.detail}})
77
+
78
+
79
+ # Automatically generate dependency functions from Pydantic models
80
+ get_retrieve_response_request = create_path_dependency(RetrieveResponseRequest)
81
+ get_delete_response_request = create_path_dependency(DeleteResponseRequest)
82
+ get_list_responses_request = create_query_dependency(ListResponsesRequest)
83
+
84
+
85
+ # Manual dependency for ListResponseInputItemsRequest since it mixes Path and Query parameters
86
+ async def get_list_response_input_items_request(
87
+ response_id: Annotated[str, Path(description="The ID of the response to retrieve input items for.")],
88
+ after: Annotated[
89
+ str | None,
90
+ Query(description="An item ID to list items after, used for pagination."),
91
+ ] = None,
92
+ before: Annotated[
93
+ str | None,
94
+ Query(description="An item ID to list items before, used for pagination."),
95
+ ] = None,
96
+ include: Annotated[
97
+ list[ResponseItemInclude] | None,
98
+ Query(description="Additional fields to include in the response."),
99
+ ] = None,
100
+ limit: Annotated[
101
+ int | None,
102
+ Query(
103
+ description="A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20."
104
+ ),
105
+ ] = 20,
106
+ order: Annotated[Order | None, Query(description="The order to return the input items in.")] = Order.desc,
107
+ ) -> ListResponseInputItemsRequest:
108
+ return ListResponseInputItemsRequest(
109
+ response_id=response_id,
110
+ after=after,
111
+ before=before,
112
+ include=include,
113
+ limit=limit,
114
+ order=order,
115
+ )
116
+
117
+
118
+ def _http_exception_from_value_error(exc: ValueError) -> HTTPException:
119
+ """Convert implementation `ValueError` into an OpenAI-compatible HTTP error.
120
+
121
+ The compatibility OpenAI client maps HTTP 400 -> `BadRequestError`.
122
+ The existing API surface (and integration tests) expect "not found" cases
123
+ to be represented as a 400, not a 404.
124
+ """
125
+
126
+ detail = str(exc) or "Invalid value"
127
+ return HTTPException(status_code=400, detail=detail)
128
+
129
+
130
+ def _http_exception_from_sse_error(exc: Exception) -> HTTPException:
131
+ if isinstance(exc, HTTPException):
132
+ return exc
133
+ if isinstance(exc, ValueError):
134
+ return _http_exception_from_value_error(exc)
135
+ status_code = getattr(exc, "status_code", None)
136
+ if isinstance(status_code, int):
137
+ return HTTPException(status_code=status_code, detail=str(exc))
138
+ return HTTPException(status_code=500, detail="Internal server error: An unexpected error occurred.")
139
+
140
+
141
+ def _preserve_context_for_sse(event_gen):
142
+ # StreamingResponse runs in a different task, losing request contextvars.
143
+ # create_task inside context.run captures the context at task creation.
144
+ context = contextvars.copy_context()
145
+
146
+ async def wrapper():
147
+ try:
148
+ while True:
149
+ try:
150
+ task = context.run(asyncio.create_task, event_gen.__anext__())
151
+ item = await task
152
+ except StopAsyncIteration:
153
+ break
154
+ yield item
155
+ except (asyncio.CancelledError, GeneratorExit):
156
+ if hasattr(event_gen, "aclose"):
157
+ await event_gen.aclose()
158
+ raise
159
+
160
+ return wrapper()
161
+
162
+
163
+ def create_router(impl: Agents) -> APIRouter:
164
+ """Create a FastAPI router for the Agents API.
165
+
166
+ Args:
167
+ impl: The Agents implementation instance
168
+
169
+ Returns:
170
+ APIRouter configured for the Agents API
171
+ """
172
+ router = APIRouter(
173
+ prefix=f"/{LLAMA_STACK_API_V1}",
174
+ tags=["Agents"],
175
+ responses=standard_responses,
176
+ )
177
+
178
+ @router.get(
179
+ "/responses/{response_id}",
180
+ response_model=OpenAIResponseObject,
181
+ summary="Get a model response.",
182
+ description="Get a model response.",
183
+ )
184
+ async def get_openai_response(
185
+ request: Annotated[RetrieveResponseRequest, Depends(get_retrieve_response_request)],
186
+ ) -> OpenAIResponseObject:
187
+ try:
188
+ return await impl.get_openai_response(request)
189
+ except ValueError as exc:
190
+ raise _http_exception_from_value_error(exc) from exc
191
+
192
+ @router.post(
193
+ "/responses",
194
+ summary="Create a model response.",
195
+ description="Create a model response.",
196
+ status_code=200,
197
+ response_model=None,
198
+ responses={
199
+ 200: {
200
+ "description": "An OpenAIResponseObject or a stream of OpenAIResponseObjectStream.",
201
+ "content": {
202
+ "application/json": {"schema": {"$ref": "#/components/schemas/OpenAIResponseObject"}},
203
+ "text/event-stream": {"schema": {"$ref": "#/components/schemas/OpenAIResponseObjectStream"}},
204
+ },
205
+ }
206
+ },
207
+ )
208
+ async def create_openai_response(
209
+ request: Annotated[CreateResponseRequest, Body(...)],
210
+ ) -> OpenAIResponseObject | StreamingResponse:
211
+ try:
212
+ result = await impl.create_openai_response(request)
213
+ except ValueError as exc:
214
+ raise _http_exception_from_value_error(exc) from exc
215
+
216
+ # For streaming responses, wrap in StreamingResponse for HTTP requests.
217
+ # The implementation is typed to return an `AsyncIterator` for streaming.
218
+ if isinstance(result, AsyncIterator):
219
+ return StreamingResponse(
220
+ _preserve_context_for_sse(sse_generator(result)),
221
+ media_type="text/event-stream",
222
+ )
223
+
224
+ return result
225
+
226
+ @router.get(
227
+ "/responses",
228
+ response_model=ListOpenAIResponseObject,
229
+ summary="List all responses.",
230
+ description="List all responses.",
231
+ )
232
+ async def list_openai_responses(
233
+ request: Annotated[ListResponsesRequest, Depends(get_list_responses_request)],
234
+ ) -> ListOpenAIResponseObject:
235
+ try:
236
+ return await impl.list_openai_responses(request)
237
+ except ValueError as exc:
238
+ raise _http_exception_from_value_error(exc) from exc
239
+
240
+ @router.get(
241
+ "/responses/{response_id}/input_items",
242
+ response_model=ListOpenAIResponseInputItem,
243
+ summary="List input items.",
244
+ description="List input items.",
245
+ )
246
+ async def list_openai_response_input_items(
247
+ request: Annotated[ListResponseInputItemsRequest, Depends(get_list_response_input_items_request)],
248
+ ) -> ListOpenAIResponseInputItem:
249
+ try:
250
+ return await impl.list_openai_response_input_items(request)
251
+ except ValueError as exc:
252
+ raise _http_exception_from_value_error(exc) from exc
253
+
254
+ @router.delete(
255
+ "/responses/{response_id}",
256
+ response_model=OpenAIDeleteResponseObject,
257
+ summary="Delete a response.",
258
+ description="Delete a response.",
259
+ )
260
+ async def delete_openai_response(
261
+ request: Annotated[DeleteResponseRequest, Depends(get_delete_response_request)],
262
+ ) -> OpenAIDeleteResponseObject:
263
+ try:
264
+ return await impl.delete_openai_response(request)
265
+ except ValueError as exc:
266
+ raise _http_exception_from_value_error(exc) from exc
267
+
268
+ return router
@@ -0,0 +1,181 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Agents API requests and responses.
8
+
9
+ This module defines the request and response models for the Agents API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+ """
12
+
13
+ from enum import StrEnum
14
+
15
+ from pydantic import BaseModel, ConfigDict, Field
16
+
17
+ from llama_stack_api.common.responses import Order
18
+ from llama_stack_api.openai_responses import (
19
+ OpenAIResponseInput,
20
+ OpenAIResponseInputTool,
21
+ OpenAIResponseInputToolChoice,
22
+ OpenAIResponsePrompt,
23
+ OpenAIResponseReasoning,
24
+ OpenAIResponseText,
25
+ )
26
+
27
+
28
+ class ResponseItemInclude(StrEnum):
29
+ """Specify additional output data to include in the model response."""
30
+
31
+ web_search_call_action_sources = "web_search_call.action.sources"
32
+ code_interpreter_call_outputs = "code_interpreter_call.outputs"
33
+ computer_call_output_output_image_url = "computer_call_output.output.image_url"
34
+ file_search_call_results = "file_search_call.results"
35
+ message_input_image_image_url = "message.input_image.image_url"
36
+ message_output_text_logprobs = "message.output_text.logprobs"
37
+ reasoning_encrypted_content = "reasoning.encrypted_content"
38
+
39
+
40
+ class ResponseGuardrailSpec(BaseModel):
41
+ """Specification for a guardrail to apply during response generation."""
42
+
43
+ model_config = ConfigDict(extra="forbid")
44
+
45
+ type: str
46
+ # TODO: more fields to be added for guardrail configuration
47
+
48
+
49
+ ResponseGuardrail = str | ResponseGuardrailSpec
50
+
51
+
52
+ class CreateResponseRequest(BaseModel):
53
+ """Request model for creating a response."""
54
+
55
+ model_config = ConfigDict(extra="forbid")
56
+
57
+ input: str | list[OpenAIResponseInput] = Field(..., description="Input message(s) to create the response.")
58
+ model: str = Field(..., description="The underlying LLM used for completions.")
59
+ prompt: OpenAIResponsePrompt | None = Field(
60
+ default=None, description="Prompt object with ID, version, and variables."
61
+ )
62
+ instructions: str | None = Field(default=None, description="Instructions to guide the model's behavior.")
63
+ parallel_tool_calls: bool | None = Field(
64
+ default=True,
65
+ description="Whether to enable parallel tool calls.",
66
+ )
67
+ previous_response_id: str | None = Field(
68
+ default=None,
69
+ description="Optional ID of a previous response to continue from.",
70
+ )
71
+ conversation: str | None = Field(
72
+ default=None,
73
+ description="Optional ID of a conversation to add the response to.",
74
+ )
75
+ store: bool | None = Field(
76
+ default=True,
77
+ description="Whether to store the response in the database.",
78
+ )
79
+ stream: bool | None = Field(
80
+ default=False,
81
+ description="Whether to stream the response.",
82
+ )
83
+ temperature: float | None = Field(
84
+ default=None,
85
+ ge=0.0,
86
+ le=2.0,
87
+ description="Sampling temperature.",
88
+ )
89
+ text: OpenAIResponseText | None = Field(
90
+ default=None,
91
+ description="Configuration for text response generation.",
92
+ )
93
+ tool_choice: OpenAIResponseInputToolChoice | None = Field(
94
+ default=None,
95
+ description="How the model should select which tool to call (if any).",
96
+ )
97
+ tools: list[OpenAIResponseInputTool] | None = Field(
98
+ default=None,
99
+ description="List of tools available to the model.",
100
+ )
101
+ include: list[ResponseItemInclude] | None = Field(
102
+ default=None,
103
+ description="Additional fields to include in the response.",
104
+ )
105
+ max_infer_iters: int | None = Field(
106
+ default=10,
107
+ ge=1,
108
+ description="Maximum number of inference iterations.",
109
+ )
110
+ guardrails: list[ResponseGuardrail] | None = Field(
111
+ default=None,
112
+ description="List of guardrails to apply during response generation.",
113
+ )
114
+ max_tool_calls: int | None = Field(
115
+ default=None,
116
+ ge=1,
117
+ description="Max number of total calls to built-in tools that can be processed in a response.",
118
+ )
119
+ max_output_tokens: int | None = Field(
120
+ default=None,
121
+ ge=16,
122
+ description="Upper bound for the number of tokens that can be generated for a response.",
123
+ )
124
+ reasoning: OpenAIResponseReasoning | None = Field(
125
+ default=None,
126
+ description="Configuration for reasoning effort in responses.",
127
+ )
128
+ metadata: dict[str, str] | None = Field(
129
+ default=None,
130
+ description="Dictionary of metadata key-value pairs to attach to the response.",
131
+ )
132
+
133
+
134
+ class RetrieveResponseRequest(BaseModel):
135
+ """Request model for retrieving a response."""
136
+
137
+ model_config = ConfigDict(extra="forbid")
138
+
139
+ response_id: str = Field(..., min_length=1, description="The ID of the OpenAI response to retrieve.")
140
+
141
+
142
+ class ListResponsesRequest(BaseModel):
143
+ """Request model for listing responses."""
144
+
145
+ model_config = ConfigDict(extra="forbid")
146
+
147
+ after: str | None = Field(default=None, description="The ID of the last response to return.")
148
+ limit: int | None = Field(default=50, ge=1, le=100, description="The number of responses to return.")
149
+ model: str | None = Field(default=None, description="The model to filter responses by.")
150
+ order: Order | None = Field(
151
+ default=Order.desc,
152
+ description="The order to sort responses by when sorted by created_at ('asc' or 'desc').",
153
+ )
154
+
155
+
156
+ class ListResponseInputItemsRequest(BaseModel):
157
+ """Request model for listing input items of a response."""
158
+
159
+ model_config = ConfigDict(extra="forbid")
160
+
161
+ response_id: str = Field(..., min_length=1, description="The ID of the response to retrieve input items for.")
162
+ after: str | None = Field(default=None, description="An item ID to list items after, used for pagination.")
163
+ before: str | None = Field(default=None, description="An item ID to list items before, used for pagination.")
164
+ include: list[ResponseItemInclude] | None = Field(
165
+ default=None, description="Additional fields to include in the response."
166
+ )
167
+ limit: int | None = Field(
168
+ default=20,
169
+ ge=1,
170
+ le=100,
171
+ description="A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
172
+ )
173
+ order: Order | None = Field(default=Order.desc, description="The order to return the input items in.")
174
+
175
+
176
+ class DeleteResponseRequest(BaseModel):
177
+ """Request model for deleting a response."""
178
+
179
+ model_config = ConfigDict(extra="forbid")
180
+
181
+ response_id: str = Field(..., min_length=1, description="The ID of the OpenAI response to delete.")
@@ -0,0 +1,40 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Batches API protocol and models.
8
+
9
+ This module contains the Batches protocol definition.
10
+ Pydantic models are defined in llama_stack_api.batches.models.
11
+ The FastAPI router is defined in llama_stack_api.batches.fastapi_routes.
12
+ """
13
+
14
+ from openai.types import Batch as BatchObject
15
+
16
+ # Import fastapi_routes for router factory access
17
+ from . import fastapi_routes
18
+
19
+ # Import protocol for re-export
20
+ from .api import Batches
21
+
22
+ # Import models for re-export
23
+ from .models import (
24
+ CancelBatchRequest,
25
+ CreateBatchRequest,
26
+ ListBatchesRequest,
27
+ ListBatchesResponse,
28
+ RetrieveBatchRequest,
29
+ )
30
+
31
+ __all__ = [
32
+ "Batches",
33
+ "BatchObject",
34
+ "CancelBatchRequest",
35
+ "CreateBatchRequest",
36
+ "ListBatchesRequest",
37
+ "ListBatchesResponse",
38
+ "RetrieveBatchRequest",
39
+ "fastapi_routes",
40
+ ]
@@ -0,0 +1,53 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from openai.types import Batch as BatchObject
10
+
11
+ from .models import (
12
+ CancelBatchRequest,
13
+ CreateBatchRequest,
14
+ ListBatchesRequest,
15
+ ListBatchesResponse,
16
+ RetrieveBatchRequest,
17
+ )
18
+
19
+
20
+ @runtime_checkable
21
+ class Batches(Protocol):
22
+ """
23
+ The Batches API enables efficient processing of multiple requests in a single operation,
24
+ particularly useful for processing large datasets, batch evaluation workflows, and
25
+ cost-effective inference at scale.
26
+
27
+ The API is designed to allow use of openai client libraries for seamless integration.
28
+
29
+ This API provides the following extensions:
30
+ - idempotent batch creation
31
+
32
+ Note: This API is currently under active development and may undergo changes.
33
+ """
34
+
35
+ async def create_batch(
36
+ self,
37
+ request: CreateBatchRequest,
38
+ ) -> BatchObject: ...
39
+
40
+ async def retrieve_batch(
41
+ self,
42
+ request: RetrieveBatchRequest,
43
+ ) -> BatchObject: ...
44
+
45
+ async def cancel_batch(
46
+ self,
47
+ request: CancelBatchRequest,
48
+ ) -> BatchObject: ...
49
+
50
+ async def list_batches(
51
+ self,
52
+ request: ListBatchesRequest,
53
+ ) -> ListBatchesResponse: ...
@@ -0,0 +1,113 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Batches API.
8
+
9
+ This module defines the FastAPI router for the Batches API using standard
10
+ FastAPI route decorators. The router is defined in the API package to keep
11
+ all API-related code together.
12
+ """
13
+
14
+ from typing import Annotated
15
+
16
+ from fastapi import APIRouter, Body, Depends
17
+
18
+ from llama_stack_api.batches.models import (
19
+ CancelBatchRequest,
20
+ CreateBatchRequest,
21
+ ListBatchesRequest,
22
+ RetrieveBatchRequest,
23
+ )
24
+ from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
25
+ from llama_stack_api.version import LLAMA_STACK_API_V1
26
+
27
+ from .api import Batches
28
+ from .models import BatchObject, ListBatchesResponse
29
+
30
+ # Automatically generate dependency functions from Pydantic models
31
+ # This ensures the models are the single source of truth for descriptions
32
+ get_retrieve_batch_request = create_path_dependency(RetrieveBatchRequest)
33
+ get_cancel_batch_request = create_path_dependency(CancelBatchRequest)
34
+
35
+
36
+ # Automatically generate dependency function from Pydantic model
37
+ # This ensures the model is the single source of truth for descriptions and defaults
38
+ get_list_batches_request = create_query_dependency(ListBatchesRequest)
39
+
40
+
41
+ def create_router(impl: Batches) -> APIRouter:
42
+ """Create a FastAPI router for the Batches API.
43
+
44
+ Args:
45
+ impl: The Batches implementation instance
46
+
47
+ Returns:
48
+ APIRouter configured for the Batches API
49
+ """
50
+ router = APIRouter(
51
+ prefix=f"/{LLAMA_STACK_API_V1}",
52
+ tags=["Batches"],
53
+ responses=standard_responses,
54
+ )
55
+
56
+ @router.post(
57
+ "/batches",
58
+ response_model=BatchObject,
59
+ summary="Create a new batch for processing multiple API requests.",
60
+ description="Create a new batch for processing multiple API requests.",
61
+ responses={
62
+ 200: {"description": "The created batch object."},
63
+ 409: {"description": "Conflict: The idempotency key was previously used with different parameters."},
64
+ },
65
+ )
66
+ async def create_batch(
67
+ request: Annotated[CreateBatchRequest, Body(...)],
68
+ ) -> BatchObject:
69
+ return await impl.create_batch(request)
70
+
71
+ @router.get(
72
+ "/batches/{batch_id}",
73
+ response_model=BatchObject,
74
+ summary="Retrieve information about a specific batch.",
75
+ description="Retrieve information about a specific batch.",
76
+ responses={
77
+ 200: {"description": "The batch object."},
78
+ },
79
+ )
80
+ async def retrieve_batch(
81
+ request: Annotated[RetrieveBatchRequest, Depends(get_retrieve_batch_request)],
82
+ ) -> BatchObject:
83
+ return await impl.retrieve_batch(request)
84
+
85
+ @router.post(
86
+ "/batches/{batch_id}/cancel",
87
+ response_model=BatchObject,
88
+ summary="Cancel a batch that is in progress.",
89
+ description="Cancel a batch that is in progress.",
90
+ responses={
91
+ 200: {"description": "The updated batch object."},
92
+ },
93
+ )
94
+ async def cancel_batch(
95
+ request: Annotated[CancelBatchRequest, Depends(get_cancel_batch_request)],
96
+ ) -> BatchObject:
97
+ return await impl.cancel_batch(request)
98
+
99
+ @router.get(
100
+ "/batches",
101
+ response_model=ListBatchesResponse,
102
+ summary="List all batches for the current user.",
103
+ description="List all batches for the current user.",
104
+ responses={
105
+ 200: {"description": "A list of batch objects."},
106
+ },
107
+ )
108
+ async def list_batches(
109
+ request: Annotated[ListBatchesRequest, Depends(get_list_batches_request)],
110
+ ) -> ListBatchesResponse:
111
+ return await impl.list_batches(request)
112
+
113
+ return router