llama-stack-api 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. llama_stack_api/__init__.py +175 -20
  2. llama_stack_api/agents/__init__.py +38 -0
  3. llama_stack_api/agents/api.py +52 -0
  4. llama_stack_api/agents/fastapi_routes.py +268 -0
  5. llama_stack_api/agents/models.py +181 -0
  6. llama_stack_api/common/errors.py +15 -0
  7. llama_stack_api/connectors/__init__.py +38 -0
  8. llama_stack_api/connectors/api.py +50 -0
  9. llama_stack_api/connectors/fastapi_routes.py +103 -0
  10. llama_stack_api/connectors/models.py +103 -0
  11. llama_stack_api/conversations/__init__.py +61 -0
  12. llama_stack_api/conversations/api.py +44 -0
  13. llama_stack_api/conversations/fastapi_routes.py +177 -0
  14. llama_stack_api/conversations/models.py +245 -0
  15. llama_stack_api/datasetio/__init__.py +34 -0
  16. llama_stack_api/datasetio/api.py +42 -0
  17. llama_stack_api/datasetio/fastapi_routes.py +94 -0
  18. llama_stack_api/datasetio/models.py +48 -0
  19. llama_stack_api/eval/__init__.py +55 -0
  20. llama_stack_api/eval/api.py +51 -0
  21. llama_stack_api/eval/compat.py +300 -0
  22. llama_stack_api/eval/fastapi_routes.py +126 -0
  23. llama_stack_api/eval/models.py +141 -0
  24. llama_stack_api/inference/__init__.py +207 -0
  25. llama_stack_api/inference/api.py +93 -0
  26. llama_stack_api/inference/fastapi_routes.py +243 -0
  27. llama_stack_api/inference/models.py +1035 -0
  28. llama_stack_api/models/__init__.py +47 -0
  29. llama_stack_api/models/api.py +38 -0
  30. llama_stack_api/models/fastapi_routes.py +104 -0
  31. llama_stack_api/{models.py → models/models.py} +65 -79
  32. llama_stack_api/openai_responses.py +32 -6
  33. llama_stack_api/post_training/__init__.py +73 -0
  34. llama_stack_api/post_training/api.py +36 -0
  35. llama_stack_api/post_training/fastapi_routes.py +116 -0
  36. llama_stack_api/{post_training.py → post_training/models.py} +55 -86
  37. llama_stack_api/prompts/__init__.py +47 -0
  38. llama_stack_api/prompts/api.py +44 -0
  39. llama_stack_api/prompts/fastapi_routes.py +163 -0
  40. llama_stack_api/prompts/models.py +177 -0
  41. llama_stack_api/resource.py +0 -1
  42. llama_stack_api/safety/__init__.py +37 -0
  43. llama_stack_api/safety/api.py +29 -0
  44. llama_stack_api/safety/datatypes.py +83 -0
  45. llama_stack_api/safety/fastapi_routes.py +55 -0
  46. llama_stack_api/safety/models.py +38 -0
  47. llama_stack_api/schema_utils.py +47 -4
  48. llama_stack_api/scoring/__init__.py +66 -0
  49. llama_stack_api/scoring/api.py +35 -0
  50. llama_stack_api/scoring/fastapi_routes.py +67 -0
  51. llama_stack_api/scoring/models.py +81 -0
  52. llama_stack_api/scoring_functions/__init__.py +50 -0
  53. llama_stack_api/scoring_functions/api.py +39 -0
  54. llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
  55. llama_stack_api/{scoring_functions.py → scoring_functions/models.py} +67 -64
  56. llama_stack_api/shields/__init__.py +41 -0
  57. llama_stack_api/shields/api.py +39 -0
  58. llama_stack_api/shields/fastapi_routes.py +104 -0
  59. llama_stack_api/shields/models.py +74 -0
  60. llama_stack_api/validators.py +46 -0
  61. llama_stack_api/vector_io/__init__.py +88 -0
  62. llama_stack_api/vector_io/api.py +234 -0
  63. llama_stack_api/vector_io/fastapi_routes.py +447 -0
  64. llama_stack_api/{vector_io.py → vector_io/models.py} +99 -377
  65. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
  66. llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
  67. llama_stack_api/agents.py +0 -173
  68. llama_stack_api/connectors.py +0 -146
  69. llama_stack_api/conversations.py +0 -270
  70. llama_stack_api/datasetio.py +0 -55
  71. llama_stack_api/eval.py +0 -137
  72. llama_stack_api/inference.py +0 -1169
  73. llama_stack_api/prompts.py +0 -203
  74. llama_stack_api/safety.py +0 -132
  75. llama_stack_api/scoring.py +0 -93
  76. llama_stack_api/shields.py +0 -93
  77. llama_stack_api-0.4.4.dist-info/RECORD +0 -70
  78. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
  79. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1035 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Inference API requests and responses.
8
+
9
+ This module defines all request and response models for the Inference API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+ """
12
+
13
+ from enum import Enum, StrEnum
14
+ from typing import Annotated, Any, Literal, Self
15
+
16
+ from pydantic import BaseModel, Field, field_validator, model_validator
17
+ from typing_extensions import TypedDict
18
+
19
+ from llama_stack_api.common.content_types import InterleavedContent
20
+ from llama_stack_api.common.responses import Order
21
+ from llama_stack_api.schema_utils import json_schema_type, nullable_openai_style, register_schema
22
+
23
+
24
+ # Sampling Strategies
25
+ @json_schema_type
26
+ class GreedySamplingStrategy(BaseModel):
27
+ """Greedy sampling strategy that selects the highest probability token at each step."""
28
+
29
+ type: Literal["greedy"] = Field(
30
+ default="greedy", description="Must be 'greedy' to identify this sampling strategy."
31
+ )
32
+
33
+
34
+ @json_schema_type
35
+ class TopPSamplingStrategy(BaseModel):
36
+ """Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."""
37
+
38
+ type: Literal["top_p"] = Field(default="top_p", description="Must be 'top_p' to identify this sampling strategy.")
39
+ temperature: float = Field(
40
+ ..., gt=0.0, le=2.0, description="Controls randomness in sampling. Higher values increase randomness."
41
+ )
42
+ top_p: float = Field(
43
+ default=0.95, ge=0.0, le=1.0, description="Cumulative probability threshold for nucleus sampling."
44
+ )
45
+
46
+
47
+ @json_schema_type
48
+ class TopKSamplingStrategy(BaseModel):
49
+ """Top-k sampling strategy that restricts sampling to the k most likely tokens."""
50
+
51
+ type: Literal["top_k"] = Field(default="top_k", description="Must be 'top_k' to identify this sampling strategy.")
52
+ top_k: int = Field(..., ge=1, description="Number of top tokens to consider for sampling. Must be at least 1.")
53
+
54
+
55
+ SamplingStrategy = Annotated[
56
+ GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy,
57
+ Field(discriminator="type"),
58
+ ]
59
+ register_schema(SamplingStrategy, name="SamplingStrategy")
60
+
61
+
62
+ @json_schema_type
63
+ class SamplingParams(BaseModel):
64
+ """Sampling parameters for text generation."""
65
+
66
+ strategy: SamplingStrategy = Field(
67
+ default_factory=GreedySamplingStrategy, description="The sampling strategy to use."
68
+ )
69
+ max_tokens: int | None = Field(
70
+ default=None,
71
+ ge=1,
72
+ description="The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length.",
73
+ )
74
+ repetition_penalty: float | None = Field(
75
+ default=1.0,
76
+ ge=-2.0,
77
+ le=2.0,
78
+ description="Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far.",
79
+ )
80
+ stop: list[str] | None = Field(
81
+ default=None,
82
+ max_length=4,
83
+ description="Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
84
+ )
85
+
86
+
87
+ class LogProbConfig(BaseModel):
88
+ """Configuration for log probability output."""
89
+
90
+ top_k: int | None = Field(
91
+ default=0, ge=0, description="How many tokens (for each position) to return log probabilities for."
92
+ )
93
+
94
+
95
+ class QuantizationType(Enum):
96
+ """Type of model quantization to run inference with."""
97
+
98
+ bf16 = "bf16"
99
+ fp8_mixed = "fp8_mixed"
100
+ int4_mixed = "int4_mixed"
101
+
102
+
103
+ @json_schema_type
104
+ class Fp8QuantizationConfig(BaseModel):
105
+ """Configuration for 8-bit floating point quantization."""
106
+
107
+ type: Literal["fp8_mixed"] = Field(
108
+ default="fp8_mixed", description="Must be 'fp8_mixed' to identify this quantization type."
109
+ )
110
+
111
+
112
+ @json_schema_type
113
+ class Bf16QuantizationConfig(BaseModel):
114
+ """Configuration for BFloat16 precision (typically no quantization)."""
115
+
116
+ type: Literal["bf16"] = Field(default="bf16", description="Must be 'bf16' to identify this quantization type.")
117
+
118
+
119
+ @json_schema_type
120
+ class Int4QuantizationConfig(BaseModel):
121
+ """Configuration for 4-bit integer quantization."""
122
+
123
+ type: Literal["int4_mixed"] = Field(
124
+ default="int4_mixed", description="Must be 'int4' to identify this quantization type."
125
+ )
126
+ scheme: str | None = Field(default="int4_weight_int8_dynamic_activation", description="Quantization scheme to use.")
127
+
128
+
129
+ QuantizationConfig = Annotated[
130
+ Bf16QuantizationConfig | Fp8QuantizationConfig | Int4QuantizationConfig,
131
+ Field(discriminator="type"),
132
+ ]
133
+
134
+
135
+ # Message Models
136
+ @json_schema_type
137
+ class UserMessage(BaseModel):
138
+ """A message from the user in a chat conversation."""
139
+
140
+ role: Literal["user"] = Field(default="user", description="Must be 'user' to identify this as a user message.")
141
+ content: InterleavedContent = Field(
142
+ ..., description="The content of the message, which can include text and other media."
143
+ )
144
+ context: InterleavedContent | None = Field(
145
+ default=None,
146
+ description="This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future.",
147
+ )
148
+
149
+
150
+ @json_schema_type
151
+ class SystemMessage(BaseModel):
152
+ """A system message providing instructions or context to the model."""
153
+
154
+ role: Literal["system"] = Field(
155
+ default="system", description="Must be 'system' to identify this as a system message."
156
+ )
157
+ content: InterleavedContent = Field(
158
+ ...,
159
+ description="The content of the 'system prompt'. If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages.",
160
+ )
161
+
162
+
163
+ @json_schema_type
164
+ class ToolResponseMessage(BaseModel):
165
+ """A message representing the result of a tool invocation."""
166
+
167
+ role: Literal["tool"] = Field(default="tool", description="Must be 'tool' to identify this as a tool response.")
168
+ call_id: str = Field(..., description="Unique identifier for the tool call this response is for.")
169
+ content: InterleavedContent = Field(..., description="The response content from the tool.")
170
+
171
+
172
+ class ToolChoice(Enum):
173
+ """Whether tool use is required or automatic. This is a hint to the model which may not be followed."""
174
+
175
+ auto = "auto"
176
+ required = "required"
177
+ none = "none"
178
+
179
+
180
+ @json_schema_type
181
+ class TokenLogProbs(BaseModel):
182
+ """Log probabilities for generated tokens."""
183
+
184
+ logprobs_by_token: dict[str, float] = Field(
185
+ ..., description="Dictionary mapping tokens to their log probabilities."
186
+ )
187
+
188
+
189
+ class ChatCompletionResponseEventType(Enum):
190
+ """Types of events that can occur during chat completion."""
191
+
192
+ start = "start"
193
+ complete = "complete"
194
+ progress = "progress"
195
+
196
+
197
+ class ResponseFormatType(StrEnum):
198
+ """Types of formats for structured (guided) decoding."""
199
+
200
+ json_schema = "json_schema"
201
+ grammar = "grammar"
202
+
203
+
204
+ @json_schema_type
205
+ class JsonSchemaResponseFormat(BaseModel):
206
+ """Configuration for JSON schema-guided response generation."""
207
+
208
+ type: Literal[ResponseFormatType.json_schema] = Field(
209
+ default=ResponseFormatType.json_schema, description="Must be 'json_schema' to identify this format type."
210
+ )
211
+ json_schema: dict[str, Any] = Field(..., description="The JSON schema the response should conform to.")
212
+
213
+
214
+ @json_schema_type
215
+ class GrammarResponseFormat(BaseModel):
216
+ """Configuration for grammar-guided response generation."""
217
+
218
+ type: Literal[ResponseFormatType.grammar] = Field(
219
+ default=ResponseFormatType.grammar, description="Must be 'grammar' to identify this format type."
220
+ )
221
+ bnf: dict[str, Any] = Field(..., description="The BNF grammar specification the response should conform to.")
222
+
223
+
224
+ ResponseFormat = Annotated[
225
+ JsonSchemaResponseFormat | GrammarResponseFormat,
226
+ Field(discriminator="type"),
227
+ ]
228
+ register_schema(ResponseFormat, name="ResponseFormat")
229
+
230
+
231
+ # This is an internally used class
232
+ class CompletionRequest(BaseModel):
233
+ model: str
234
+ content: InterleavedContent
235
+ sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
236
+ response_format: ResponseFormat | None = None
237
+ stream: bool | None = False
238
+ logprobs: LogProbConfig | None = None
239
+
240
+
241
+ class SystemMessageBehavior(Enum):
242
+ """Config for how to override the default system prompt."""
243
+
244
+ append = "append"
245
+ replace = "replace"
246
+
247
+
248
+ @json_schema_type
249
+ class EmbeddingsResponse(BaseModel):
250
+ """Response containing generated embeddings."""
251
+
252
+ embeddings: list[list[float]] = Field(
253
+ ...,
254
+ description="List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality is model-specific.",
255
+ )
256
+
257
+
258
+ @json_schema_type
259
+ class RerankData(BaseModel):
260
+ """A single rerank result from a reranking response."""
261
+
262
+ index: int = Field(..., ge=0, description="The original index of the document in the input list.")
263
+ relevance_score: float = Field(
264
+ ..., description="The relevance score from the model output. Higher scores indicate greater relevance."
265
+ )
266
+
267
+
268
+ @json_schema_type
269
+ class RerankResponse(BaseModel):
270
+ """Response from a reranking request."""
271
+
272
+ data: list[RerankData] = Field(
273
+ ..., description="List of rerank result objects, sorted by relevance score (descending)."
274
+ )
275
+
276
+
277
+ # OpenAI Compatibility Models
278
+ @json_schema_type
279
+ class OpenAIChatCompletionContentPartTextParam(BaseModel):
280
+ """Text content part for OpenAI-compatible chat completion messages."""
281
+
282
+ type: Literal["text"] = Field(default="text", description="Must be 'text' to identify this as text content.")
283
+ text: str = Field(..., description="The text content of the message.")
284
+
285
+
286
+ @json_schema_type
287
+ class OpenAIImageURL(BaseModel):
288
+ """Image URL specification for OpenAI-compatible chat completion messages."""
289
+
290
+ url: str = Field(..., description="URL of the image to include in the message.")
291
+ detail: Literal["low", "high", "auto"] | None = Field(
292
+ default=None, description="Level of detail for image processing. Can be 'low', 'high', or 'auto'."
293
+ )
294
+
295
+
296
+ @json_schema_type
297
+ class OpenAIChatCompletionContentPartImageParam(BaseModel):
298
+ """Image content part for OpenAI-compatible chat completion messages."""
299
+
300
+ type: Literal["image_url"] = Field(
301
+ default="image_url", description="Must be 'image_url' to identify this as image content."
302
+ )
303
+ image_url: OpenAIImageURL = Field(..., description="Image URL specification and processing details.")
304
+
305
+
306
+ @json_schema_type
307
+ class OpenAIFileFile(BaseModel):
308
+ """File reference for OpenAI-compatible file content."""
309
+
310
+ file_data: str | None = Field(default=None, description="Base64-encoded file data.")
311
+ file_id: str | None = Field(default=None, description="ID of an uploaded file.")
312
+ filename: str | None = Field(default=None, description="Name of the file.")
313
+
314
+ @model_validator(mode="after")
315
+ def validate_file_reference(self) -> Self:
316
+ """Ensure at least one of file_data or file_id is provided."""
317
+ if self.file_data is None and self.file_id is None:
318
+ raise ValueError("Either file_data or file_id must be provided")
319
+ return self
320
+
321
+
322
+ @json_schema_type
323
+ class OpenAIFile(BaseModel):
324
+ type: Literal["file"] = Field(default="file", description="Must be 'file' to identify this as file content.")
325
+ file: OpenAIFileFile = Field(..., description="File specification.")
326
+
327
+
328
+ OpenAIChatCompletionContentPartParam = Annotated[
329
+ OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
330
+ Field(discriminator="type"),
331
+ ]
332
+ register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
333
+
334
+
335
+ OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam]
336
+
337
+ OpenAIChatCompletionTextOnlyMessageContent = str | list[OpenAIChatCompletionContentPartTextParam]
338
+
339
+
340
+ @json_schema_type
341
+ class OpenAIUserMessageParam(BaseModel):
342
+ """A message from the user in an OpenAI-compatible chat completion request."""
343
+
344
+ role: Literal["user"] = Field(default="user", description="Must be 'user' to identify this as a user message.")
345
+ content: OpenAIChatCompletionMessageContent = Field(
346
+ ..., description="The content of the message, which can include text and other media."
347
+ )
348
+ name: str | None = Field(default=None, description="The name of the user message participant.")
349
+
350
+
351
+ @json_schema_type
352
+ class OpenAISystemMessageParam(BaseModel):
353
+ """A system message providing instructions or context to the model."""
354
+
355
+ role: Literal["system"] = Field(
356
+ default="system", description="Must be 'system' to identify this as a system message."
357
+ )
358
+ content: OpenAIChatCompletionTextOnlyMessageContent = Field(
359
+ ...,
360
+ description="The content of the 'system prompt'. If multiple system messages are provided, they are concatenated.",
361
+ )
362
+ name: str | None = Field(default=None, description="The name of the system message participant.")
363
+
364
+
365
+ @json_schema_type
366
+ class OpenAIChatCompletionToolCallFunction(BaseModel):
367
+ """Function call details for OpenAI-compatible tool calls."""
368
+
369
+ name: str | None = Field(default=None, description="Name of the function to call.")
370
+ arguments: str | None = Field(default=None, description="Arguments to pass to the function as a JSON string.")
371
+
372
+
373
+ @json_schema_type
374
+ class OpenAIChatCompletionToolCall(BaseModel):
375
+ """Tool call specification for OpenAI-compatible chat completion responses."""
376
+
377
+ index: int | None = Field(default=None, ge=0, description="Index of the tool call in the list.")
378
+ id: str | None = Field(default=None, description="Unique identifier for the tool call.")
379
+ type: Literal["function"] = Field(
380
+ default="function", description="Must be 'function' to identify this as a function call."
381
+ )
382
+ function: OpenAIChatCompletionToolCallFunction | None = Field(default=None, description="Function call details.")
383
+
384
+
385
+ @json_schema_type
386
+ class OpenAIAssistantMessageParam(BaseModel):
387
+ """A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."""
388
+
389
+ role: Literal["assistant"] = Field(
390
+ default="assistant", description="Must be 'assistant' to identify this as the model's response."
391
+ )
392
+ content: OpenAIChatCompletionTextOnlyMessageContent | None = Field(
393
+ default=None, description="The content of the model's response."
394
+ )
395
+ name: str | None = Field(default=None, description="The name of the assistant message participant.")
396
+ tool_calls: list[OpenAIChatCompletionToolCall] | None = Field(
397
+ default=None, description="List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."
398
+ )
399
+
400
+
401
+ @json_schema_type
402
+ class OpenAIToolMessageParam(BaseModel):
403
+ """A message representing the result of a tool invocation in an OpenAI-compatible chat completion request."""
404
+
405
+ role: Literal["tool"] = Field(default="tool", description="Must be 'tool' to identify this as a tool response.")
406
+ tool_call_id: str = Field(..., description="Unique identifier for the tool call this response is for.")
407
+ content: OpenAIChatCompletionTextOnlyMessageContent = Field(..., description="The response content from the tool.")
408
+
409
+
410
+ @json_schema_type
411
+ class OpenAIDeveloperMessageParam(BaseModel):
412
+ """A message from the developer in an OpenAI-compatible chat completion request."""
413
+
414
+ role: Literal["developer"] = Field(
415
+ default="developer", description="Must be 'developer' to identify this as a developer message."
416
+ )
417
+ content: OpenAIChatCompletionTextOnlyMessageContent = Field(
418
+ ..., description="The content of the developer message."
419
+ )
420
+ name: str | None = Field(default=None, description="The name of the developer message participant.")
421
+
422
+
423
+ OpenAIMessageParam = Annotated[
424
+ OpenAIUserMessageParam
425
+ | OpenAISystemMessageParam
426
+ | OpenAIAssistantMessageParam
427
+ | OpenAIToolMessageParam
428
+ | OpenAIDeveloperMessageParam,
429
+ Field(discriminator="role"),
430
+ ]
431
+ register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
432
+
433
+
434
+ @json_schema_type
435
+ class OpenAIResponseFormatText(BaseModel):
436
+ """Text response format for OpenAI-compatible chat completion requests."""
437
+
438
+ type: Literal["text"] = Field(default="text", description="Must be 'text' to indicate plain text response format.")
439
+
440
+
441
+ @json_schema_type
442
+ class OpenAIJSONSchema(TypedDict, total=False):
443
+ """JSON schema specification for OpenAI-compatible structured response format."""
444
+
445
+ name: str
446
+ description: str | None
447
+ strict: bool | None
448
+
449
+ # Pydantic BaseModel cannot be used with a schema param, since it already
450
+ # has one. And, we don't want to alias here because then have to handle
451
+ # that alias when converting to OpenAI params. So, to support schema,
452
+ # we use a TypedDict.
453
+ schema: dict[str, Any] | None
454
+
455
+
456
+ @json_schema_type
457
+ class OpenAIResponseFormatJSONSchema(BaseModel):
458
+ """JSON schema response format for OpenAI-compatible chat completion requests."""
459
+
460
+ type: Literal["json_schema"] = Field(
461
+ default="json_schema", description="Must be 'json_schema' to indicate structured JSON response format."
462
+ )
463
+ json_schema: OpenAIJSONSchema = Field(..., description="The JSON schema specification for the response.")
464
+
465
+
466
+ @json_schema_type
467
+ class OpenAIResponseFormatJSONObject(BaseModel):
468
+ """JSON object response format for OpenAI-compatible chat completion requests."""
469
+
470
+ type: Literal["json_object"] = Field(
471
+ default="json_object", description="Must be 'json_object' to indicate generic JSON object response format."
472
+ )
473
+
474
+
475
+ OpenAIResponseFormatParam = Annotated[
476
+ OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject,
477
+ Field(discriminator="type"),
478
+ ]
479
+ register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
480
+
481
+
482
+ @json_schema_type
483
+ class FunctionToolConfig(BaseModel):
484
+ name: str = Field(..., description="Name of the function.")
485
+
486
+
487
+ @json_schema_type
488
+ class OpenAIChatCompletionToolChoiceFunctionTool(BaseModel):
489
+ """Function tool choice for OpenAI-compatible chat completion requests."""
490
+
491
+ type: Literal["function"] = Field(
492
+ default="function", description="Must be 'function' to indicate function tool choice."
493
+ )
494
+ function: FunctionToolConfig = Field(..., description="The function tool configuration.")
495
+
496
+ def __init__(self, name: str):
497
+ super().__init__(type="function", function=FunctionToolConfig(name=name))
498
+
499
+
500
+ @json_schema_type
501
+ class CustomToolConfig(BaseModel):
502
+ """Custom tool configuration for OpenAI-compatible chat completion requests."""
503
+
504
+ name: str = Field(..., description="Name of the custom tool.")
505
+
506
+
507
+ @json_schema_type
508
+ class OpenAIChatCompletionToolChoiceCustomTool(BaseModel):
509
+ """Custom tool choice for OpenAI-compatible chat completion requests."""
510
+
511
+ type: Literal["custom"] = Field(default="custom", description="Must be 'custom' to indicate custom tool choice.")
512
+ custom: CustomToolConfig = Field(..., description="Custom tool configuration.")
513
+
514
+ def __init__(self, name: str):
515
+ super().__init__(type="custom", custom=CustomToolConfig(name=name))
516
+
517
+
518
+ @json_schema_type
519
+ class AllowedToolsConfig(BaseModel):
520
+ tools: list[dict[str, Any]] = Field(..., description="List of allowed tools.")
521
+ mode: Literal["auto", "required"] = Field(..., description="Mode for allowed tools.")
522
+
523
+
524
+ @json_schema_type
525
+ class OpenAIChatCompletionToolChoiceAllowedTools(BaseModel):
526
+ """Allowed tools response format for OpenAI-compatible chat completion requests."""
527
+
528
+ type: Literal["allowed_tools"] = Field(
529
+ default="allowed_tools", description="Must be 'allowed_tools' to indicate allowed tools response format."
530
+ )
531
+ allowed_tools: AllowedToolsConfig = Field(..., description="Allowed tools configuration.")
532
+
533
+ def __init__(self, tools: list[dict[str, Any]], mode: Literal["auto", "required"]):
534
+ super().__init__(type="allowed_tools", allowed_tools=AllowedToolsConfig(tools=tools, mode=mode))
535
+
536
+
537
+ # Define the object-level union with discriminator
538
+ OpenAIChatCompletionToolChoice = Annotated[
539
+ OpenAIChatCompletionToolChoiceAllowedTools
540
+ | OpenAIChatCompletionToolChoiceFunctionTool
541
+ | OpenAIChatCompletionToolChoiceCustomTool,
542
+ Field(discriminator="type"),
543
+ ]
544
+
545
+ register_schema(OpenAIChatCompletionToolChoice, name="OpenAIChatCompletionToolChoice")
546
+
547
+
548
+ @json_schema_type
549
+ class OpenAITopLogProb(BaseModel):
550
+ """The top log probability for a token from an OpenAI-compatible chat completion response."""
551
+
552
+ token: str = Field(..., description="The token.")
553
+ bytes: list[int] | None = Field(default=None, description="The bytes for the token.")
554
+ logprob: float = Field(..., description="The log probability of the token.")
555
+
556
+
557
+ @json_schema_type
558
+ class OpenAITokenLogProb(BaseModel):
559
+ """The log probability for a token from an OpenAI-compatible chat completion response."""
560
+
561
+ token: str = Field(..., description="The token.")
562
+ bytes: list[int] | None = Field(default=None, description="The bytes for the token.")
563
+ logprob: float = Field(..., description="The log probability of the token.")
564
+ top_logprobs: list[OpenAITopLogProb] | None = Field(
565
+ default=None, description="The top log probabilities for the token."
566
+ )
567
+
568
+
569
+ @json_schema_type
570
+ class OpenAIChoiceLogprobs(BaseModel):
571
+ """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."""
572
+
573
+ content: list[OpenAITokenLogProb] | None = Field(
574
+ default=None, description="The log probabilities for the tokens in the message."
575
+ )
576
+ refusal: list[OpenAITokenLogProb] | None = Field(
577
+ default=None, description="The log probabilities for the refusal tokens."
578
+ )
579
+
580
+
581
+ @json_schema_type
582
+ class OpenAIChoiceDelta(BaseModel):
583
+ """A delta from an OpenAI-compatible chat completion streaming response."""
584
+
585
+ content: str | None = Field(default=None, description="The content of the delta.")
586
+ refusal: str | None = Field(default=None, description="The refusal of the delta.")
587
+ role: str | None = Field(default=None, description="The role of the delta.")
588
+ tool_calls: list[OpenAIChatCompletionToolCall] | None = Field(
589
+ default=None, description="The tool calls of the delta."
590
+ )
591
+ reasoning_content: str | None = Field(
592
+ default=None, description="The reasoning content from the model (for o1/o3 models)."
593
+ )
594
+
595
+
596
+ # OpenAI finish_reason enum values
597
+ OpenAIFinishReason = Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
598
+ register_schema(OpenAIFinishReason, name="OpenAIFinishReason")
599
+
600
+
601
+ @json_schema_type
602
+ class OpenAIChunkChoice(BaseModel):
603
+ """A chunk choice from an OpenAI-compatible chat completion streaming response."""
604
+
605
+ delta: OpenAIChoiceDelta = Field(..., description="The delta from the chunk.")
606
+ finish_reason: OpenAIFinishReason | None = Field(
607
+ default=None, json_schema_extra=nullable_openai_style, description="The reason the model stopped generating."
608
+ )
609
+ index: int = Field(..., ge=0, description="The index of the choice.")
610
+ logprobs: OpenAIChoiceLogprobs | None = Field(
611
+ default=None, description="The log probabilities for the tokens in the message."
612
+ )
613
+
614
+
615
+ @json_schema_type
616
+ class OpenAIChoice(BaseModel):
617
+ """A choice from an OpenAI-compatible chat completion response."""
618
+
619
+ message: OpenAIMessageParam = Field(..., description="The message from the model.")
620
+ finish_reason: OpenAIFinishReason = Field(..., description="The reason the model stopped generating.")
621
+ index: int = Field(..., ge=0, description="The index of the choice.")
622
+ logprobs: OpenAIChoiceLogprobs | None = Field(
623
+ default=None, description="The log probabilities for the tokens in the message."
624
+ )
625
+
626
+
627
+ class OpenAIChatCompletionUsageCompletionTokensDetails(BaseModel):
628
+ """Token details for output tokens in OpenAI chat completion usage."""
629
+
630
+ reasoning_tokens: int | None = Field(
631
+ default=None, ge=0, description="Number of tokens used for reasoning (o1/o3 models)."
632
+ )
633
+
634
+
635
+ class OpenAIChatCompletionUsagePromptTokensDetails(BaseModel):
636
+ """Token details for prompt tokens in OpenAI chat completion usage."""
637
+
638
+ cached_tokens: int | None = Field(default=None, ge=0, description="Number of tokens retrieved from cache.")
639
+
640
+
641
+ @json_schema_type
642
+ class OpenAIChatCompletionUsage(BaseModel):
643
+ """Usage information for OpenAI chat completion."""
644
+
645
+ prompt_tokens: int = Field(..., ge=0, description="Number of tokens in the prompt.")
646
+ completion_tokens: int = Field(..., ge=0, description="Number of tokens in the completion.")
647
+ total_tokens: int = Field(..., ge=0, description="Total tokens used (prompt + completion).")
648
+ prompt_tokens_details: OpenAIChatCompletionUsagePromptTokensDetails | None = Field(
649
+ default=None, description="Detailed breakdown of input token usage."
650
+ )
651
+ completion_tokens_details: OpenAIChatCompletionUsageCompletionTokensDetails | None = Field(
652
+ default=None, description="Detailed breakdown of output token usage."
653
+ )
654
+
655
+
656
+ @json_schema_type
657
+ class OpenAIChatCompletion(BaseModel):
658
+ """Response from an OpenAI-compatible chat completion request."""
659
+
660
+ id: str = Field(..., description="The ID of the chat completion.")
661
+ choices: list[OpenAIChoice] = Field(..., min_length=1, description="List of choices.")
662
+ object: Literal["chat.completion"] = Field(default="chat.completion", description="The object type.")
663
+ created: int = Field(..., ge=0, description="The Unix timestamp in seconds when the chat completion was created.")
664
+ model: str = Field(..., description="The model that was used to generate the chat completion.")
665
+ usage: OpenAIChatCompletionUsage | None = Field(
666
+ default=None, description="Token usage information for the completion."
667
+ )
668
+
669
+
670
+ @json_schema_type
671
+ class OpenAIChatCompletionChunk(BaseModel):
672
+ """Chunk from a streaming response to an OpenAI-compatible chat completion request."""
673
+
674
+ id: str = Field(..., description="The ID of the chat completion.")
675
+ choices: list[OpenAIChunkChoice] = Field(..., description="List of choices.")
676
+ object: Literal["chat.completion.chunk"] = Field(default="chat.completion.chunk", description="The object type.")
677
+ created: int = Field(..., ge=0, description="The Unix timestamp in seconds when the chat completion was created.")
678
+ model: str = Field(..., description="The model that was used to generate the chat completion.")
679
+ usage: OpenAIChatCompletionUsage | None = Field(
680
+ default=None, description="Token usage information (typically included in final chunk with stream_options)."
681
+ )
682
+
683
+
684
+ @json_schema_type
685
+ class OpenAICompletionLogprobs(BaseModel):
686
+ """The log probabilities for the tokens from an OpenAI-compatible completion response."""
687
+
688
+ text_offset: list[int] | None = Field(default=None, description="The offset of the token in the text.")
689
+ token_logprobs: list[float] | None = Field(default=None, description="The log probabilities for the tokens.")
690
+ tokens: list[str] | None = Field(default=None, description="The tokens.")
691
+ top_logprobs: list[dict[str, float]] | None = Field(
692
+ default=None, description="The top log probabilities for the tokens."
693
+ )
694
+
695
+
696
+ @json_schema_type
697
+ class OpenAICompletionChoice(BaseModel):
698
+ """A choice from an OpenAI-compatible completion response."""
699
+
700
+ finish_reason: OpenAIFinishReason = Field(..., description="The reason the model stopped generating.")
701
+ text: str = Field(..., description="The text of the choice.")
702
+ index: int = Field(..., ge=0, description="The index of the choice.")
703
+ logprobs: OpenAIChoiceLogprobs | None = Field(
704
+ default=None, description="The log probabilities for the tokens in the choice."
705
+ )
706
+
707
+
708
+ @json_schema_type
709
+ class OpenAICompletion(BaseModel):
710
+ """Response from an OpenAI-compatible completion request."""
711
+
712
+ id: str = Field(..., description="The ID of the completion.")
713
+ choices: list[OpenAICompletionChoice] = Field(..., min_length=1, description="List of choices.")
714
+ created: int = Field(..., ge=0, description="The Unix timestamp in seconds when the completion was created.")
715
+ model: str = Field(..., description="The model that was used to generate the completion.")
716
+ object: Literal["text_completion"] = Field(default="text_completion", description="The object type.")
717
+
718
+
719
+ @json_schema_type
720
+ class OpenAIEmbeddingData(BaseModel):
721
+ """A single embedding data object from an OpenAI-compatible embeddings response."""
722
+
723
+ object: Literal["embedding"] = Field(default="embedding", description="The object type.")
724
+ # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
725
+ embedding: list[float] | str = Field(
726
+ ...,
727
+ description="The embedding vector as a list of floats (when encoding_format='float') or as a base64-encoded string.",
728
+ )
729
+ index: int = Field(..., ge=0, description="The index of the embedding in the input list.")
730
+
731
+
732
+ @json_schema_type
733
+ class OpenAIEmbeddingUsage(BaseModel):
734
+ """Usage information for an OpenAI-compatible embeddings response."""
735
+
736
+ prompt_tokens: int = Field(..., description="The number of tokens in the input.")
737
+ total_tokens: int = Field(..., description="The total number of tokens used.")
738
+
739
+
740
+ @json_schema_type
741
+ class OpenAIEmbeddingsResponse(BaseModel):
742
+ """Response from an OpenAI-compatible embeddings request."""
743
+
744
+ object: Literal["list"] = Field(default="list", description="The object type.")
745
+ data: list[OpenAIEmbeddingData] = Field(..., min_length=1, description="List of embedding data objects.")
746
+ model: str = Field(..., description="The model that was used to generate the embeddings.")
747
+ usage: OpenAIEmbeddingUsage = Field(..., description="Usage information.")
748
+
749
+
750
+ class TextTruncation(Enum):
751
+ """Config for how to truncate text for embedding when text is longer than the model's max sequence length."""
752
+
753
+ none = "none"
754
+ start = "start"
755
+ end = "end"
756
+
757
+
758
+ class EmbeddingTaskType(Enum):
759
+ """How is the embedding being used? This is only supported by asymmetric embedding models."""
760
+
761
+ query = "query"
762
+ document = "document"
763
+
764
+
765
+ class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
766
+ input_messages: list[OpenAIMessageParam] = Field(
767
+ ..., description="The input messages used to generate this completion."
768
+ )
769
+
770
+
771
+ @json_schema_type
772
+ class ListOpenAIChatCompletionResponse(BaseModel):
773
+ """Response from listing OpenAI-compatible chat completions."""
774
+
775
+ data: list[OpenAICompletionWithInputMessages] = Field(
776
+ ..., description="List of chat completion objects with their input messages."
777
+ )
778
+ has_more: bool = Field(..., description="Whether there are more completions available beyond this list.")
779
+ first_id: str = Field(..., description="ID of the first completion in this list.")
780
+ last_id: str = Field(..., description="ID of the last completion in this list.")
781
+ object: Literal["list"] = Field(default="list", description="Must be 'list' to identify this as a list response.")
782
+
783
+
784
+ # extra_body can be accessed via .model_extra
785
+ @json_schema_type
786
+ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
787
+ """Request parameters for OpenAI-compatible completion endpoint."""
788
+
789
+ # Standard OpenAI completion parameters
790
+ model: str = Field(..., description="The identifier of the model to use.")
791
+ prompt: str | list[str] | list[int] | list[list[int]] = Field(
792
+ ..., description="The prompt to generate a completion for."
793
+ )
794
+ best_of: int | None = Field(default=None, ge=1, description="The number of completions to generate.")
795
+ echo: bool | None = Field(default=None, description="Whether to echo the prompt.")
796
+ frequency_penalty: float | None = Field(
797
+ default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
798
+ )
799
+ logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.")
800
+ logprobs: bool | None = Field(default=None, description="The log probabilities to use.")
801
+ max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.")
802
+ n: int | None = Field(default=None, ge=1, description="The number of completions to generate.")
803
+ presence_penalty: float | None = Field(
804
+ default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
805
+ )
806
+ seed: int | None = Field(default=None, description="The seed to use.")
807
+ stop: str | list[str] | None = Field(default=None, description="The stop tokens to use.")
808
+ stream: bool | None = Field(default=None, description="Whether to stream the response.")
809
+ stream_options: dict[str, Any] | None = Field(default=None, description="The stream options to use.")
810
+ temperature: float | None = Field(default=None, ge=0.0, le=2.0, description="The temperature to use.")
811
+ top_p: float | None = Field(default=None, ge=0.0, le=1.0, description="The top p to use.")
812
+ user: str | None = Field(default=None, description="The user to use.")
813
+ suffix: str | None = Field(default=None, description="The suffix that should be appended to the completion.")
814
+
815
+
816
+ # extra_body can be accessed via .model_extra
817
+ @json_schema_type
818
+ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
819
+ """Request parameters for OpenAI-compatible chat completion endpoint."""
820
+
821
+ # Standard OpenAI chat completion parameters
822
+ model: str = Field(..., description="The identifier of the model to use.")
823
+ messages: Annotated[
824
+ list[OpenAIMessageParam], Field(..., min_length=1, description="List of messages in the conversation.")
825
+ ]
826
+ frequency_penalty: float | None = Field(
827
+ default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
828
+ )
829
+ function_call: str | dict[str, Any] | None = Field(default=None, description="The function call to use.")
830
+ functions: list[dict[str, Any]] | None = Field(default=None, description="List of functions to use.")
831
+ logit_bias: dict[str, float] | None = Field(default=None, description="The logit bias to use.")
832
+ logprobs: bool | None = Field(default=None, description="The log probabilities to use.")
833
+ max_completion_tokens: int | None = Field(
834
+ default=None, ge=1, description="The maximum number of tokens to generate."
835
+ )
836
+ max_tokens: int | None = Field(default=None, ge=1, description="The maximum number of tokens to generate.")
837
+ n: int | None = Field(default=None, ge=1, description="The number of completions to generate.")
838
+ parallel_tool_calls: bool | None = Field(default=None, description="Whether to parallelize tool calls.")
839
+ presence_penalty: float | None = Field(
840
+ default=None, ge=-2.0, le=2.0, description="The penalty for repeated tokens."
841
+ )
842
+ response_format: OpenAIResponseFormatParam | None = Field(default=None, description="The response format to use.")
843
+ seed: int | None = Field(default=None, description="The seed to use.")
844
+ stop: str | list[str] | None = Field(default=None, description="The stop tokens to use.")
845
+ stream: bool | None = Field(default=None, description="Whether to stream the response.")
846
+ stream_options: dict[str, Any] | None = Field(default=None, description="The stream options to use.")
847
+ temperature: float | None = Field(default=None, ge=0.0, le=2.0, description="The temperature to use.")
848
+ tool_choice: str | dict[str, Any] | None = Field(default=None, description="The tool choice to use.")
849
+ tools: list[dict[str, Any]] | None = Field(default=None, description="The tools to use.")
850
+ top_logprobs: int | None = Field(default=None, ge=0, description="The top log probabilities to use.")
851
+ top_p: float | None = Field(default=None, ge=0.0, le=1.0, description="The top p to use.")
852
+ user: str | None = Field(default=None, description="The user to use.")
853
+ reasoning_effort: Literal["none", "minimal", "low", "medium", "high", "xhigh"] | None = Field(
854
+ default=None, description="The effort level for reasoning models."
855
+ )
856
+
857
+
858
+ def _remove_null_from_anyof(schema: dict) -> None:
859
+ """Remove null type from anyOf if present in JSON schema."""
860
+ if "anyOf" in schema:
861
+ schema["anyOf"] = [s for s in schema["anyOf"] if s.get("type") != "null"]
862
+ if len(schema["anyOf"]) == 1:
863
+ only_schema = schema["anyOf"][0]
864
+ del schema["anyOf"]
865
+ schema.update(only_schema)
866
+
867
+
868
+ @json_schema_type
869
+ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
870
+ """Request parameters for OpenAI-compatible embeddings endpoint."""
871
+
872
+ model: str = Field(..., description="The identifier of the model to use.")
873
+ input: (
874
+ Annotated[str, Field(title="string")]
875
+ | Annotated[list[str], Field(title="Array of strings", min_length=1, max_length=2048)]
876
+ | Annotated[list[int], Field(title="Array of tokens", min_length=1, max_length=2048)]
877
+ | Annotated[
878
+ list[Annotated[list[int], Field(min_length=1)]],
879
+ Field(title="Array of token arrays", min_length=1, max_length=2048),
880
+ ]
881
+ ) = Field(..., description="Input text to embed, encoded as a string or array of tokens.")
882
+ encoding_format: Literal["float", "base64"] = Field(
883
+ default="float", description="The format to return the embeddings in."
884
+ )
885
+ dimensions: int | None = Field(
886
+ default=None,
887
+ ge=1,
888
+ description="The number of dimensions for output embeddings.",
889
+ json_schema_extra=_remove_null_from_anyof,
890
+ )
891
+ user: str | None = Field(
892
+ default=None,
893
+ description="A unique identifier representing your end-user.",
894
+ json_schema_extra=_remove_null_from_anyof,
895
+ )
896
+
897
+ @field_validator("dimensions", "user", mode="before")
898
+ @classmethod
899
+ def _reject_explicit_null(cls, v: Any, info: Any) -> Any:
900
+ """Reject explicit null values to match OpenAI API behavior."""
901
+ if v is None:
902
+ raise ValueError(f"{info.field_name} cannot be null")
903
+ return v
904
+
905
+
906
+ # New Request Models for Inference Endpoints
907
+ @json_schema_type
908
+ class ListChatCompletionsRequest(BaseModel):
909
+ """Request model for listing chat completions."""
910
+
911
+ after: str | None = Field(default=None, description="The ID of the last chat completion to return.")
912
+ limit: int | None = Field(default=20, ge=1, description="The maximum number of chat completions to return.")
913
+ model: str | None = Field(default=None, description="The model to filter by.")
914
+ order: Order | None = Field(
915
+ default=Order.desc,
916
+ description='The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".',
917
+ )
918
+
919
+
920
+ @json_schema_type
921
+ class GetChatCompletionRequest(BaseModel):
922
+ """Request model for getting a chat completion."""
923
+
924
+ completion_id: str = Field(..., description="ID of the chat completion.")
925
+
926
+
927
+ @json_schema_type
928
+ class RerankRequest(BaseModel):
929
+ """Request model for reranking documents."""
930
+
931
+ model: str = Field(..., description="The identifier of the reranking model to use.")
932
+ query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam = Field(
933
+ ...,
934
+ description="The search query to rank items against. Can be a string, text content part, or image content part.",
935
+ )
936
+ items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam] = Field(
937
+ ...,
938
+ min_length=1,
939
+ description="List of items to rerank. Each item can be a string, text content part, or image content part.",
940
+ )
941
+ max_num_results: int | None = Field(
942
+ default=None, ge=1, description="Maximum number of results to return. Default: returns all."
943
+ )
944
+
945
+
946
+ __all__ = [
947
+ # Sampling
948
+ "GreedySamplingStrategy",
949
+ "TopPSamplingStrategy",
950
+ "TopKSamplingStrategy",
951
+ "SamplingStrategy",
952
+ "SamplingParams",
953
+ "LogProbConfig",
954
+ # Quantization
955
+ "QuantizationType",
956
+ "Fp8QuantizationConfig",
957
+ "Bf16QuantizationConfig",
958
+ "Int4QuantizationConfig",
959
+ "QuantizationConfig",
960
+ # Messages
961
+ "UserMessage",
962
+ "SystemMessage",
963
+ "ToolResponseMessage",
964
+ "ToolChoice",
965
+ "TokenLogProbs",
966
+ # Response
967
+ "ChatCompletionResponseEventType",
968
+ "ResponseFormatType",
969
+ "JsonSchemaResponseFormat",
970
+ "GrammarResponseFormat",
971
+ "ResponseFormat",
972
+ "CompletionRequest",
973
+ "SystemMessageBehavior",
974
+ "EmbeddingsResponse",
975
+ "RerankData",
976
+ "RerankResponse",
977
+ # OpenAI Compatibility
978
+ "OpenAIChatCompletionContentPartTextParam",
979
+ "OpenAIImageURL",
980
+ "OpenAIChatCompletionContentPartImageParam",
981
+ "OpenAIFileFile",
982
+ "OpenAIFile",
983
+ "OpenAIChatCompletionContentPartParam",
984
+ "OpenAIChatCompletionMessageContent",
985
+ "OpenAIChatCompletionTextOnlyMessageContent",
986
+ "OpenAIUserMessageParam",
987
+ "OpenAISystemMessageParam",
988
+ "OpenAIChatCompletionToolCallFunction",
989
+ "OpenAIChatCompletionToolCall",
990
+ "OpenAIAssistantMessageParam",
991
+ "OpenAIToolMessageParam",
992
+ "OpenAIDeveloperMessageParam",
993
+ "OpenAIMessageParam",
994
+ "OpenAIResponseFormatText",
995
+ "OpenAIJSONSchema",
996
+ "OpenAIResponseFormatJSONSchema",
997
+ "OpenAIResponseFormatJSONObject",
998
+ "OpenAIResponseFormatParam",
999
+ "FunctionToolConfig",
1000
+ "OpenAIChatCompletionToolChoiceFunctionTool",
1001
+ "CustomToolConfig",
1002
+ "OpenAIChatCompletionToolChoiceCustomTool",
1003
+ "AllowedToolsConfig",
1004
+ "OpenAIChatCompletionToolChoiceAllowedTools",
1005
+ "OpenAIChatCompletionToolChoice",
1006
+ "OpenAITopLogProb",
1007
+ "OpenAITokenLogProb",
1008
+ "OpenAIChoiceLogprobs",
1009
+ "OpenAIChoiceDelta",
1010
+ "OpenAIChunkChoice",
1011
+ "OpenAIChoice",
1012
+ "OpenAIChatCompletionUsageCompletionTokensDetails",
1013
+ "OpenAIChatCompletionUsagePromptTokensDetails",
1014
+ "OpenAIChatCompletionUsage",
1015
+ "OpenAIChatCompletion",
1016
+ "OpenAIChatCompletionChunk",
1017
+ "OpenAICompletionLogprobs",
1018
+ "OpenAICompletionChoice",
1019
+ "OpenAICompletion",
1020
+ "OpenAIFinishReason",
1021
+ "OpenAIEmbeddingData",
1022
+ "OpenAIEmbeddingUsage",
1023
+ "OpenAIEmbeddingsResponse",
1024
+ "TextTruncation",
1025
+ "EmbeddingTaskType",
1026
+ "OpenAICompletionWithInputMessages",
1027
+ "ListOpenAIChatCompletionResponse",
1028
+ "OpenAICompletionRequestWithExtraBody",
1029
+ "OpenAIChatCompletionRequestWithExtraBody",
1030
+ "OpenAIEmbeddingsRequestWithExtraBody",
1031
+ # Request Models
1032
+ "ListChatCompletionsRequest",
1033
+ "GetChatCompletionRequest",
1034
+ "RerankRequest",
1035
+ ]