llama-stack-api 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. llama_stack_api/__init__.py +175 -20
  2. llama_stack_api/agents/__init__.py +38 -0
  3. llama_stack_api/agents/api.py +52 -0
  4. llama_stack_api/agents/fastapi_routes.py +268 -0
  5. llama_stack_api/agents/models.py +181 -0
  6. llama_stack_api/common/errors.py +15 -0
  7. llama_stack_api/connectors/__init__.py +38 -0
  8. llama_stack_api/connectors/api.py +50 -0
  9. llama_stack_api/connectors/fastapi_routes.py +103 -0
  10. llama_stack_api/connectors/models.py +103 -0
  11. llama_stack_api/conversations/__init__.py +61 -0
  12. llama_stack_api/conversations/api.py +44 -0
  13. llama_stack_api/conversations/fastapi_routes.py +177 -0
  14. llama_stack_api/conversations/models.py +245 -0
  15. llama_stack_api/datasetio/__init__.py +34 -0
  16. llama_stack_api/datasetio/api.py +42 -0
  17. llama_stack_api/datasetio/fastapi_routes.py +94 -0
  18. llama_stack_api/datasetio/models.py +48 -0
  19. llama_stack_api/eval/__init__.py +55 -0
  20. llama_stack_api/eval/api.py +51 -0
  21. llama_stack_api/eval/compat.py +300 -0
  22. llama_stack_api/eval/fastapi_routes.py +126 -0
  23. llama_stack_api/eval/models.py +141 -0
  24. llama_stack_api/inference/__init__.py +207 -0
  25. llama_stack_api/inference/api.py +93 -0
  26. llama_stack_api/inference/fastapi_routes.py +243 -0
  27. llama_stack_api/inference/models.py +1035 -0
  28. llama_stack_api/models/__init__.py +47 -0
  29. llama_stack_api/models/api.py +38 -0
  30. llama_stack_api/models/fastapi_routes.py +104 -0
  31. llama_stack_api/{models.py → models/models.py} +65 -79
  32. llama_stack_api/openai_responses.py +32 -6
  33. llama_stack_api/post_training/__init__.py +73 -0
  34. llama_stack_api/post_training/api.py +36 -0
  35. llama_stack_api/post_training/fastapi_routes.py +116 -0
  36. llama_stack_api/{post_training.py → post_training/models.py} +55 -86
  37. llama_stack_api/prompts/__init__.py +47 -0
  38. llama_stack_api/prompts/api.py +44 -0
  39. llama_stack_api/prompts/fastapi_routes.py +163 -0
  40. llama_stack_api/prompts/models.py +177 -0
  41. llama_stack_api/resource.py +0 -1
  42. llama_stack_api/safety/__init__.py +37 -0
  43. llama_stack_api/safety/api.py +29 -0
  44. llama_stack_api/safety/datatypes.py +83 -0
  45. llama_stack_api/safety/fastapi_routes.py +55 -0
  46. llama_stack_api/safety/models.py +38 -0
  47. llama_stack_api/schema_utils.py +47 -4
  48. llama_stack_api/scoring/__init__.py +66 -0
  49. llama_stack_api/scoring/api.py +35 -0
  50. llama_stack_api/scoring/fastapi_routes.py +67 -0
  51. llama_stack_api/scoring/models.py +81 -0
  52. llama_stack_api/scoring_functions/__init__.py +50 -0
  53. llama_stack_api/scoring_functions/api.py +39 -0
  54. llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
  55. llama_stack_api/{scoring_functions.py → scoring_functions/models.py} +67 -64
  56. llama_stack_api/shields/__init__.py +41 -0
  57. llama_stack_api/shields/api.py +39 -0
  58. llama_stack_api/shields/fastapi_routes.py +104 -0
  59. llama_stack_api/shields/models.py +74 -0
  60. llama_stack_api/validators.py +46 -0
  61. llama_stack_api/vector_io/__init__.py +88 -0
  62. llama_stack_api/vector_io/api.py +234 -0
  63. llama_stack_api/vector_io/fastapi_routes.py +447 -0
  64. llama_stack_api/{vector_io.py → vector_io/models.py} +99 -377
  65. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
  66. llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
  67. llama_stack_api/agents.py +0 -173
  68. llama_stack_api/connectors.py +0 -146
  69. llama_stack_api/conversations.py +0 -270
  70. llama_stack_api/datasetio.py +0 -55
  71. llama_stack_api/eval.py +0 -137
  72. llama_stack_api/inference.py +0 -1169
  73. llama_stack_api/prompts.py +0 -203
  74. llama_stack_api/safety.py +0 -132
  75. llama_stack_api/scoring.py +0 -93
  76. llama_stack_api/shields.py +0 -93
  77. llama_stack_api-0.4.4.dist-info/RECORD +0 -70
  78. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
  79. {llama_stack_api-0.4.4.dist-info → llama_stack_api-0.5.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,1169 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- from collections.abc import AsyncIterator
8
- from enum import Enum, StrEnum
9
- from typing import (
10
- Annotated,
11
- Any,
12
- Literal,
13
- Protocol,
14
- runtime_checkable,
15
- )
16
-
17
- from fastapi import Body
18
- from pydantic import BaseModel, Field
19
- from typing_extensions import TypedDict
20
-
21
- from llama_stack_api.common.content_types import InterleavedContent
22
- from llama_stack_api.common.responses import (
23
- Order,
24
- )
25
- from llama_stack_api.models import Model
26
- from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
27
- from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
28
-
29
-
30
- @json_schema_type
31
- class GreedySamplingStrategy(BaseModel):
32
- """Greedy sampling strategy that selects the highest probability token at each step.
33
-
34
- :param type: Must be "greedy" to identify this sampling strategy
35
- """
36
-
37
- type: Literal["greedy"] = "greedy"
38
-
39
-
40
- @json_schema_type
41
- class TopPSamplingStrategy(BaseModel):
42
- """Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
43
-
44
- :param type: Must be "top_p" to identify this sampling strategy
45
- :param temperature: Controls randomness in sampling. Higher values increase randomness
46
- :param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95
47
- """
48
-
49
- type: Literal["top_p"] = "top_p"
50
- temperature: float | None = Field(..., gt=0.0)
51
- top_p: float | None = 0.95
52
-
53
-
54
- @json_schema_type
55
- class TopKSamplingStrategy(BaseModel):
56
- """Top-k sampling strategy that restricts sampling to the k most likely tokens.
57
-
58
- :param type: Must be "top_k" to identify this sampling strategy
59
- :param top_k: Number of top tokens to consider for sampling. Must be at least 1
60
- """
61
-
62
- type: Literal["top_k"] = "top_k"
63
- top_k: int = Field(..., ge=1)
64
-
65
-
66
- SamplingStrategy = Annotated[
67
- GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy,
68
- Field(discriminator="type"),
69
- ]
70
- register_schema(SamplingStrategy, name="SamplingStrategy")
71
-
72
-
73
- @json_schema_type
74
- class SamplingParams(BaseModel):
75
- """Sampling parameters.
76
-
77
- :param strategy: The sampling strategy.
78
- :param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of
79
- your prompt plus max_tokens cannot exceed the model's context length.
80
- :param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens
81
- based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
82
- :param stop: Up to 4 sequences where the API will stop generating further tokens.
83
- The returned text will not contain the stop sequence.
84
- """
85
-
86
- strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
87
-
88
- max_tokens: int | None = None
89
- repetition_penalty: float | None = 1.0
90
- stop: list[str] | None = None
91
-
92
-
93
- class LogProbConfig(BaseModel):
94
- """
95
-
96
- :param top_k: How many tokens (for each position) to return log probabilities for.
97
- """
98
-
99
- top_k: int | None = 0
100
-
101
-
102
- class QuantizationType(Enum):
103
- """Type of model quantization to run inference with.
104
-
105
- :cvar bf16: BFloat16 typically this means _no_ quantization
106
- :cvar fp8_mixed: 8-bit floating point quantization with mixed precision
107
- :cvar int4_mixed: 4-bit integer quantization with mixed precision
108
- """
109
-
110
- bf16 = "bf16"
111
- fp8_mixed = "fp8_mixed"
112
- int4_mixed = "int4_mixed"
113
-
114
-
115
- @json_schema_type
116
- class Fp8QuantizationConfig(BaseModel):
117
- """Configuration for 8-bit floating point quantization.
118
-
119
- :param type: Must be "fp8_mixed" to identify this quantization type
120
- """
121
-
122
- type: Literal["fp8_mixed"] = "fp8_mixed"
123
-
124
-
125
- @json_schema_type
126
- class Bf16QuantizationConfig(BaseModel):
127
- """Configuration for BFloat16 precision (typically no quantization).
128
-
129
- :param type: Must be "bf16" to identify this quantization type
130
- """
131
-
132
- type: Literal["bf16"] = "bf16"
133
-
134
-
135
- @json_schema_type
136
- class Int4QuantizationConfig(BaseModel):
137
- """Configuration for 4-bit integer quantization.
138
-
139
- :param type: Must be "int4" to identify this quantization type
140
- :param scheme: Quantization scheme to use. Defaults to "int4_weight_int8_dynamic_activation"
141
- """
142
-
143
- type: Literal["int4_mixed"] = "int4_mixed"
144
- scheme: str | None = "int4_weight_int8_dynamic_activation"
145
-
146
-
147
- QuantizationConfig = Annotated[
148
- Bf16QuantizationConfig | Fp8QuantizationConfig | Int4QuantizationConfig,
149
- Field(discriminator="type"),
150
- ]
151
-
152
-
153
- @json_schema_type
154
- class UserMessage(BaseModel):
155
- """A message from the user in a chat conversation.
156
-
157
- :param role: Must be "user" to identify this as a user message
158
- :param content: The content of the message, which can include text and other media
159
- :param context: (Optional) This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future.
160
- """
161
-
162
- role: Literal["user"] = "user"
163
- content: InterleavedContent
164
- context: InterleavedContent | None = None
165
-
166
-
167
- @json_schema_type
168
- class SystemMessage(BaseModel):
169
- """A system message providing instructions or context to the model.
170
-
171
- :param role: Must be "system" to identify this as a system message
172
- :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions).
173
- """
174
-
175
- role: Literal["system"] = "system"
176
- content: InterleavedContent
177
-
178
-
179
- @json_schema_type
180
- class ToolResponseMessage(BaseModel):
181
- """A message representing the result of a tool invocation.
182
-
183
- :param role: Must be "tool" to identify this as a tool response
184
- :param call_id: Unique identifier for the tool call this response is for
185
- :param content: The response content from the tool
186
- """
187
-
188
- role: Literal["tool"] = "tool"
189
- call_id: str
190
- content: InterleavedContent
191
-
192
-
193
- class ToolChoice(Enum):
194
- """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
195
-
196
- :cvar auto: The model may use tools if it determines that is appropriate.
197
- :cvar required: The model must use tools.
198
- :cvar none: The model must not use tools.
199
- """
200
-
201
- auto = "auto"
202
- required = "required"
203
- none = "none"
204
-
205
-
206
- @json_schema_type
207
- class TokenLogProbs(BaseModel):
208
- """Log probabilities for generated tokens.
209
-
210
- :param logprobs_by_token: Dictionary mapping tokens to their log probabilities
211
- """
212
-
213
- logprobs_by_token: dict[str, float]
214
-
215
-
216
- class ChatCompletionResponseEventType(Enum):
217
- """Types of events that can occur during chat completion.
218
-
219
- :cvar start: Inference has started
220
- :cvar complete: Inference is complete and a full response is available
221
- :cvar progress: Inference is in progress and a partial response is available
222
- """
223
-
224
- start = "start"
225
- complete = "complete"
226
- progress = "progress"
227
-
228
-
229
- class ResponseFormatType(StrEnum):
230
- """Types of formats for structured (guided) decoding.
231
-
232
- :cvar json_schema: Response should conform to a JSON schema. In a Python SDK, this is often a `pydantic` model.
233
- :cvar grammar: Response should conform to a BNF grammar
234
- """
235
-
236
- json_schema = "json_schema"
237
- grammar = "grammar"
238
-
239
-
240
- @json_schema_type
241
- class JsonSchemaResponseFormat(BaseModel):
242
- """Configuration for JSON schema-guided response generation.
243
-
244
- :param type: Must be "json_schema" to identify this format type
245
- :param json_schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model.
246
- """
247
-
248
- type: Literal[ResponseFormatType.json_schema] = ResponseFormatType.json_schema
249
- json_schema: dict[str, Any]
250
-
251
-
252
- @json_schema_type
253
- class GrammarResponseFormat(BaseModel):
254
- """Configuration for grammar-guided response generation.
255
-
256
- :param type: Must be "grammar" to identify this format type
257
- :param bnf: The BNF grammar specification the response should conform to
258
- """
259
-
260
- type: Literal[ResponseFormatType.grammar] = ResponseFormatType.grammar
261
- bnf: dict[str, Any]
262
-
263
-
264
- ResponseFormat = Annotated[
265
- JsonSchemaResponseFormat | GrammarResponseFormat,
266
- Field(discriminator="type"),
267
- ]
268
- register_schema(ResponseFormat, name="ResponseFormat")
269
-
270
-
271
- # This is an internally used class
272
- class CompletionRequest(BaseModel):
273
- model: str
274
- content: InterleavedContent
275
- sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
276
- response_format: ResponseFormat | None = None
277
- stream: bool | None = False
278
- logprobs: LogProbConfig | None = None
279
-
280
-
281
- class SystemMessageBehavior(Enum):
282
- """Config for how to override the default system prompt.
283
-
284
- :cvar append: Appends the provided system message to the default system prompt:
285
- https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2/#-function-definitions-in-the-system-prompt-
286
- :cvar replace: Replaces the default system prompt with the provided system message. The system message can include the string
287
- '{{function_definitions}}' to indicate where the function definitions should be inserted.
288
- """
289
-
290
- append = "append"
291
- replace = "replace"
292
-
293
-
294
- @json_schema_type
295
- class EmbeddingsResponse(BaseModel):
296
- """Response containing generated embeddings.
297
-
298
- :param embeddings: List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
299
- """
300
-
301
- embeddings: list[list[float]]
302
-
303
-
304
- @json_schema_type
305
- class RerankData(BaseModel):
306
- """A single rerank result from a reranking response.
307
-
308
- :param index: The original index of the document in the input list
309
- :param relevance_score: The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance.
310
- """
311
-
312
- index: int
313
- relevance_score: float
314
-
315
-
316
- @json_schema_type
317
- class RerankResponse(BaseModel):
318
- """Response from a reranking request.
319
-
320
- :param data: List of rerank result objects, sorted by relevance score (descending)
321
- """
322
-
323
- data: list[RerankData]
324
-
325
-
326
- @json_schema_type
327
- class OpenAIChatCompletionContentPartTextParam(BaseModel):
328
- """Text content part for OpenAI-compatible chat completion messages.
329
-
330
- :param type: Must be "text" to identify this as text content
331
- :param text: The text content of the message
332
- """
333
-
334
- type: Literal["text"] = "text"
335
- text: str
336
-
337
-
338
- @json_schema_type
339
- class OpenAIImageURL(BaseModel):
340
- """Image URL specification for OpenAI-compatible chat completion messages.
341
-
342
- :param url: URL of the image to include in the message
343
- :param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
344
- """
345
-
346
- url: str
347
- detail: str | None = None
348
-
349
-
350
- @json_schema_type
351
- class OpenAIChatCompletionContentPartImageParam(BaseModel):
352
- """Image content part for OpenAI-compatible chat completion messages.
353
-
354
- :param type: Must be "image_url" to identify this as image content
355
- :param image_url: Image URL specification and processing details
356
- """
357
-
358
- type: Literal["image_url"] = "image_url"
359
- image_url: OpenAIImageURL
360
-
361
-
362
- @json_schema_type
363
- class OpenAIFileFile(BaseModel):
364
- file_data: str | None = None
365
- file_id: str | None = None
366
- filename: str | None = None
367
-
368
-
369
- @json_schema_type
370
- class OpenAIFile(BaseModel):
371
- type: Literal["file"] = "file"
372
- file: OpenAIFileFile
373
-
374
-
375
- OpenAIChatCompletionContentPartParam = Annotated[
376
- OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam | OpenAIFile,
377
- Field(discriminator="type"),
378
- ]
379
- register_schema(OpenAIChatCompletionContentPartParam, name="OpenAIChatCompletionContentPartParam")
380
-
381
-
382
- OpenAIChatCompletionMessageContent = str | list[OpenAIChatCompletionContentPartParam]
383
-
384
- OpenAIChatCompletionTextOnlyMessageContent = str | list[OpenAIChatCompletionContentPartTextParam]
385
-
386
-
387
- @json_schema_type
388
- class OpenAIUserMessageParam(BaseModel):
389
- """A message from the user in an OpenAI-compatible chat completion request.
390
-
391
- :param role: Must be "user" to identify this as a user message
392
- :param content: The content of the message, which can include text and other media
393
- :param name: (Optional) The name of the user message participant.
394
- """
395
-
396
- role: Literal["user"] = "user"
397
- content: OpenAIChatCompletionMessageContent
398
- name: str | None = None
399
-
400
-
401
- @json_schema_type
402
- class OpenAISystemMessageParam(BaseModel):
403
- """A system message providing instructions or context to the model.
404
-
405
- :param role: Must be "system" to identify this as a system message
406
- :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions).
407
- :param name: (Optional) The name of the system message participant.
408
- """
409
-
410
- role: Literal["system"] = "system"
411
- content: OpenAIChatCompletionTextOnlyMessageContent
412
- name: str | None = None
413
-
414
-
415
- @json_schema_type
416
- class OpenAIChatCompletionToolCallFunction(BaseModel):
417
- """Function call details for OpenAI-compatible tool calls.
418
-
419
- :param name: (Optional) Name of the function to call
420
- :param arguments: (Optional) Arguments to pass to the function as a JSON string
421
- """
422
-
423
- name: str | None = None
424
- arguments: str | None = None
425
-
426
-
427
- @json_schema_type
428
- class OpenAIChatCompletionToolCall(BaseModel):
429
- """Tool call specification for OpenAI-compatible chat completion responses.
430
-
431
- :param index: (Optional) Index of the tool call in the list
432
- :param id: (Optional) Unique identifier for the tool call
433
- :param type: Must be "function" to identify this as a function call
434
- :param function: (Optional) Function call details
435
- """
436
-
437
- index: int | None = None
438
- id: str | None = None
439
- type: Literal["function"] = "function"
440
- function: OpenAIChatCompletionToolCallFunction | None = None
441
-
442
-
443
- @json_schema_type
444
- class OpenAIAssistantMessageParam(BaseModel):
445
- """A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
446
-
447
- :param role: Must be "assistant" to identify this as the model's response
448
- :param content: The content of the model's response
449
- :param name: (Optional) The name of the assistant message participant.
450
- :param tool_calls: List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.
451
- """
452
-
453
- role: Literal["assistant"] = "assistant"
454
- content: OpenAIChatCompletionTextOnlyMessageContent | None = None
455
- name: str | None = None
456
- tool_calls: list[OpenAIChatCompletionToolCall] | None = None
457
-
458
-
459
- @json_schema_type
460
- class OpenAIToolMessageParam(BaseModel):
461
- """A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
462
-
463
- :param role: Must be "tool" to identify this as a tool response
464
- :param tool_call_id: Unique identifier for the tool call this response is for
465
- :param content: The response content from the tool
466
- """
467
-
468
- role: Literal["tool"] = "tool"
469
- tool_call_id: str
470
- content: OpenAIChatCompletionTextOnlyMessageContent
471
-
472
-
473
- @json_schema_type
474
- class OpenAIDeveloperMessageParam(BaseModel):
475
- """A message from the developer in an OpenAI-compatible chat completion request.
476
-
477
- :param role: Must be "developer" to identify this as a developer message
478
- :param content: The content of the developer message
479
- :param name: (Optional) The name of the developer message participant.
480
- """
481
-
482
- role: Literal["developer"] = "developer"
483
- content: OpenAIChatCompletionTextOnlyMessageContent
484
- name: str | None = None
485
-
486
-
487
- OpenAIMessageParam = Annotated[
488
- OpenAIUserMessageParam
489
- | OpenAISystemMessageParam
490
- | OpenAIAssistantMessageParam
491
- | OpenAIToolMessageParam
492
- | OpenAIDeveloperMessageParam,
493
- Field(discriminator="role"),
494
- ]
495
- register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
496
-
497
-
498
- @json_schema_type
499
- class OpenAIResponseFormatText(BaseModel):
500
- """Text response format for OpenAI-compatible chat completion requests.
501
-
502
- :param type: Must be "text" to indicate plain text response format
503
- """
504
-
505
- type: Literal["text"] = "text"
506
-
507
-
508
- @json_schema_type
509
- class OpenAIJSONSchema(TypedDict, total=False):
510
- """JSON schema specification for OpenAI-compatible structured response format.
511
-
512
- :param name: Name of the schema
513
- :param description: (Optional) Description of the schema
514
- :param strict: (Optional) Whether to enforce strict adherence to the schema
515
- :param schema: (Optional) The JSON schema definition
516
- """
517
-
518
- name: str
519
- description: str | None
520
- strict: bool | None
521
-
522
- # Pydantic BaseModel cannot be used with a schema param, since it already
523
- # has one. And, we don't want to alias here because then have to handle
524
- # that alias when converting to OpenAI params. So, to support schema,
525
- # we use a TypedDict.
526
- schema: dict[str, Any] | None
527
-
528
-
529
- @json_schema_type
530
- class OpenAIResponseFormatJSONSchema(BaseModel):
531
- """JSON schema response format for OpenAI-compatible chat completion requests.
532
-
533
- :param type: Must be "json_schema" to indicate structured JSON response format
534
- :param json_schema: The JSON schema specification for the response
535
- """
536
-
537
- type: Literal["json_schema"] = "json_schema"
538
- json_schema: OpenAIJSONSchema
539
-
540
-
541
- @json_schema_type
542
- class OpenAIResponseFormatJSONObject(BaseModel):
543
- """JSON object response format for OpenAI-compatible chat completion requests.
544
-
545
- :param type: Must be "json_object" to indicate generic JSON object response format
546
- """
547
-
548
- type: Literal["json_object"] = "json_object"
549
-
550
-
551
- OpenAIResponseFormatParam = Annotated[
552
- OpenAIResponseFormatText | OpenAIResponseFormatJSONSchema | OpenAIResponseFormatJSONObject,
553
- Field(discriminator="type"),
554
- ]
555
- register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
556
-
557
-
558
- @json_schema_type
559
- class FunctionToolConfig(BaseModel):
560
- name: str
561
-
562
-
563
- @json_schema_type
564
- class OpenAIChatCompletionToolChoiceFunctionTool(BaseModel):
565
- """Function tool choice for OpenAI-compatible chat completion requests.
566
-
567
- :param type: Must be "function" to indicate function tool choice
568
- :param function: The function tool configuration
569
- """
570
-
571
- type: Literal["function"] = "function"
572
- function: FunctionToolConfig
573
-
574
- def __init__(self, name: str):
575
- super().__init__(type="function", function=FunctionToolConfig(name=name))
576
-
577
-
578
- @json_schema_type
579
- class CustomToolConfig(BaseModel):
580
- """Custom tool configuration for OpenAI-compatible chat completion requests.
581
-
582
- :param name: Name of the custom tool
583
- """
584
-
585
- name: str
586
-
587
-
588
- @json_schema_type
589
- class OpenAIChatCompletionToolChoiceCustomTool(BaseModel):
590
- """Custom tool choice for OpenAI-compatible chat completion requests.
591
-
592
- :param type: Must be "custom" to indicate custom tool choice
593
- """
594
-
595
- type: Literal["custom"] = "custom"
596
- custom: CustomToolConfig
597
-
598
- def __init__(self, name: str):
599
- super().__init__(type="custom", custom=CustomToolConfig(name=name))
600
-
601
-
602
- @json_schema_type
603
- class AllowedToolsConfig(BaseModel):
604
- tools: list[dict[str, Any]]
605
- mode: Literal["auto", "required"]
606
-
607
-
608
- @json_schema_type
609
- class OpenAIChatCompletionToolChoiceAllowedTools(BaseModel):
610
- """Allowed tools response format for OpenAI-compatible chat completion requests.
611
-
612
- :param type: Must be "allowed_tools" to indicate allowed tools response format
613
- """
614
-
615
- type: Literal["allowed_tools"] = "allowed_tools"
616
- allowed_tools: AllowedToolsConfig
617
-
618
- def __init__(self, tools: list[dict[str, Any]], mode: Literal["auto", "required"]):
619
- super().__init__(type="allowed_tools", allowed_tools=AllowedToolsConfig(tools=tools, mode=mode))
620
-
621
-
622
- # Define the object-level union with discriminator
623
- OpenAIChatCompletionToolChoice = Annotated[
624
- OpenAIChatCompletionToolChoiceAllowedTools
625
- | OpenAIChatCompletionToolChoiceFunctionTool
626
- | OpenAIChatCompletionToolChoiceCustomTool,
627
- Field(discriminator="type"),
628
- ]
629
-
630
- register_schema(OpenAIChatCompletionToolChoice, name="OpenAIChatCompletionToolChoice")
631
-
632
-
633
- @json_schema_type
634
- class OpenAITopLogProb(BaseModel):
635
- """The top log probability for a token from an OpenAI-compatible chat completion response.
636
-
637
- :token: The token
638
- :bytes: (Optional) The bytes for the token
639
- :logprob: The log probability of the token
640
- """
641
-
642
- token: str
643
- bytes: list[int] | None = None
644
- logprob: float
645
-
646
-
647
- @json_schema_type
648
- class OpenAITokenLogProb(BaseModel):
649
- """The log probability for a token from an OpenAI-compatible chat completion response.
650
-
651
- :token: The token
652
- :bytes: (Optional) The bytes for the token
653
- :logprob: The log probability of the token
654
- :top_logprobs: The top log probabilities for the token
655
- """
656
-
657
- token: str
658
- bytes: list[int] | None = None
659
- logprob: float
660
- top_logprobs: list[OpenAITopLogProb] | None = None
661
-
662
-
663
- @json_schema_type
664
- class OpenAIChoiceLogprobs(BaseModel):
665
- """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
666
-
667
- :param content: (Optional) The log probabilities for the tokens in the message
668
- :param refusal: (Optional) The log probabilities for the tokens in the message
669
- """
670
-
671
- content: list[OpenAITokenLogProb] | None = None
672
- refusal: list[OpenAITokenLogProb] | None = None
673
-
674
-
675
- @json_schema_type
676
- class OpenAIChoiceDelta(BaseModel):
677
- """A delta from an OpenAI-compatible chat completion streaming response.
678
-
679
- :param content: (Optional) The content of the delta
680
- :param refusal: (Optional) The refusal of the delta
681
- :param role: (Optional) The role of the delta
682
- :param tool_calls: (Optional) The tool calls of the delta
683
- :param reasoning_content: (Optional) The reasoning content from the model (non-standard, for o1/o3 models)
684
- """
685
-
686
- content: str | None = None
687
- refusal: str | None = None
688
- role: str | None = None
689
- tool_calls: list[OpenAIChatCompletionToolCall] | None = None
690
- reasoning_content: str | None = None
691
-
692
-
693
- @json_schema_type
694
- class OpenAIChunkChoice(BaseModel):
695
- """A chunk choice from an OpenAI-compatible chat completion streaming response.
696
-
697
- :param delta: The delta from the chunk
698
- :param finish_reason: The reason the model stopped generating
699
- :param index: The index of the choice
700
- :param logprobs: (Optional) The log probabilities for the tokens in the message
701
- """
702
-
703
- delta: OpenAIChoiceDelta
704
- finish_reason: str
705
- index: int
706
- logprobs: OpenAIChoiceLogprobs | None = None
707
-
708
-
709
- @json_schema_type
710
- class OpenAIChoice(BaseModel):
711
- """A choice from an OpenAI-compatible chat completion response.
712
-
713
- :param message: The message from the model
714
- :param finish_reason: The reason the model stopped generating
715
- :param index: The index of the choice
716
- :param logprobs: (Optional) The log probabilities for the tokens in the message
717
- """
718
-
719
- message: OpenAIMessageParam
720
- finish_reason: str
721
- index: int
722
- logprobs: OpenAIChoiceLogprobs | None = None
723
-
724
-
725
- class OpenAIChatCompletionUsageCompletionTokensDetails(BaseModel):
726
- """Token details for output tokens in OpenAI chat completion usage.
727
-
728
- :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)
729
- """
730
-
731
- reasoning_tokens: int | None = None
732
-
733
-
734
- class OpenAIChatCompletionUsagePromptTokensDetails(BaseModel):
735
- """Token details for prompt tokens in OpenAI chat completion usage.
736
-
737
- :param cached_tokens: Number of tokens retrieved from cache
738
- """
739
-
740
- cached_tokens: int | None = None
741
-
742
-
743
- @json_schema_type
744
- class OpenAIChatCompletionUsage(BaseModel):
745
- """Usage information for OpenAI chat completion.
746
-
747
- :param prompt_tokens: Number of tokens in the prompt
748
- :param completion_tokens: Number of tokens in the completion
749
- :param total_tokens: Total tokens used (prompt + completion)
750
- :param input_tokens_details: Detailed breakdown of input token usage
751
- :param output_tokens_details: Detailed breakdown of output token usage
752
- """
753
-
754
- prompt_tokens: int
755
- completion_tokens: int
756
- total_tokens: int
757
- prompt_tokens_details: OpenAIChatCompletionUsagePromptTokensDetails | None = None
758
- completion_tokens_details: OpenAIChatCompletionUsageCompletionTokensDetails | None = None
759
-
760
-
761
- @json_schema_type
762
- class OpenAIChatCompletion(BaseModel):
763
- """Response from an OpenAI-compatible chat completion request.
764
-
765
- :param id: The ID of the chat completion
766
- :param choices: List of choices
767
- :param object: The object type, which will be "chat.completion"
768
- :param created: The Unix timestamp in seconds when the chat completion was created
769
- :param model: The model that was used to generate the chat completion
770
- :param usage: Token usage information for the completion
771
- """
772
-
773
- id: str
774
- choices: list[OpenAIChoice]
775
- object: Literal["chat.completion"] = "chat.completion"
776
- created: int
777
- model: str
778
- usage: OpenAIChatCompletionUsage | None = None
779
-
780
-
781
- @json_schema_type
782
- class OpenAIChatCompletionChunk(BaseModel):
783
- """Chunk from a streaming response to an OpenAI-compatible chat completion request.
784
-
785
- :param id: The ID of the chat completion
786
- :param choices: List of choices
787
- :param object: The object type, which will be "chat.completion.chunk"
788
- :param created: The Unix timestamp in seconds when the chat completion was created
789
- :param model: The model that was used to generate the chat completion
790
- :param usage: Token usage information (typically included in final chunk with stream_options)
791
- """
792
-
793
- id: str
794
- choices: list[OpenAIChunkChoice]
795
- object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
796
- created: int
797
- model: str
798
- usage: OpenAIChatCompletionUsage | None = None
799
-
800
-
801
- @json_schema_type
802
- class OpenAICompletionLogprobs(BaseModel):
803
- """The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
804
-
805
- :text_offset: (Optional) The offset of the token in the text
806
- :token_logprobs: (Optional) The log probabilities for the tokens
807
- :tokens: (Optional) The tokens
808
- :top_logprobs: (Optional) The top log probabilities for the tokens
809
- """
810
-
811
- text_offset: list[int] | None = None
812
- token_logprobs: list[float] | None = None
813
- tokens: list[str] | None = None
814
- top_logprobs: list[dict[str, float]] | None = None
815
-
816
-
817
- @json_schema_type
818
- class OpenAICompletionChoice(BaseModel):
819
- """A choice from an OpenAI-compatible completion response.
820
-
821
- :finish_reason: The reason the model stopped generating
822
- :text: The text of the choice
823
- :index: The index of the choice
824
- :logprobs: (Optional) The log probabilities for the tokens in the choice
825
- """
826
-
827
- finish_reason: str
828
- text: str
829
- index: int
830
- logprobs: OpenAIChoiceLogprobs | None = None
831
-
832
-
833
- @json_schema_type
834
- class OpenAICompletion(BaseModel):
835
- """Response from an OpenAI-compatible completion request.
836
-
837
- :id: The ID of the completion
838
- :choices: List of choices
839
- :created: The Unix timestamp in seconds when the completion was created
840
- :model: The model that was used to generate the completion
841
- :object: The object type, which will be "text_completion"
842
- """
843
-
844
- id: str
845
- choices: list[OpenAICompletionChoice]
846
- created: int
847
- model: str
848
- object: Literal["text_completion"] = "text_completion"
849
-
850
-
851
- @json_schema_type
852
- class OpenAIEmbeddingData(BaseModel):
853
- """A single embedding data object from an OpenAI-compatible embeddings response.
854
-
855
- :param object: The object type, which will be "embedding"
856
- :param embedding: The embedding vector as a list of floats (when encoding_format="float") or as a base64-encoded string (when encoding_format="base64")
857
- :param index: The index of the embedding in the input list
858
- """
859
-
860
- object: Literal["embedding"] = "embedding"
861
- # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
862
- embedding: list[float] | str
863
- index: int
864
-
865
-
866
- @json_schema_type
867
- class OpenAIEmbeddingUsage(BaseModel):
868
- """Usage information for an OpenAI-compatible embeddings response.
869
-
870
- :param prompt_tokens: The number of tokens in the input
871
- :param total_tokens: The total number of tokens used
872
- """
873
-
874
- prompt_tokens: int
875
- total_tokens: int
876
-
877
-
878
- @json_schema_type
879
- class OpenAIEmbeddingsResponse(BaseModel):
880
- """Response from an OpenAI-compatible embeddings request.
881
-
882
- :param object: The object type, which will be "list"
883
- :param data: List of embedding data objects
884
- :param model: The model that was used to generate the embeddings
885
- :param usage: Usage information
886
- """
887
-
888
- object: Literal["list"] = "list"
889
- data: list[OpenAIEmbeddingData]
890
- model: str
891
- usage: OpenAIEmbeddingUsage
892
-
893
-
894
- class ModelStore(Protocol):
895
- async def get_model(self, identifier: str) -> Model: ...
896
-
897
-
898
- class TextTruncation(Enum):
899
- """Config for how to truncate text for embedding when text is longer than the model's max sequence length. Start and End semantics depend on whether the language is left-to-right or right-to-left.
900
-
901
- :cvar none: No truncation (default). If the text is longer than the model's max sequence length, you will get an error.
902
- :cvar start: Truncate from the start
903
- :cvar end: Truncate from the end
904
- """
905
-
906
- none = "none"
907
- start = "start"
908
- end = "end"
909
-
910
-
911
- class EmbeddingTaskType(Enum):
912
- """How is the embedding being used? This is only supported by asymmetric embedding models.
913
-
914
- :cvar query: Used for a query for semantic search.
915
- :cvar document: Used at indexing time when ingesting documents.
916
- """
917
-
918
- query = "query"
919
- document = "document"
920
-
921
-
922
- class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
923
- input_messages: list[OpenAIMessageParam]
924
-
925
-
926
- @json_schema_type
927
- class ListOpenAIChatCompletionResponse(BaseModel):
928
- """Response from listing OpenAI-compatible chat completions.
929
-
930
- :param data: List of chat completion objects with their input messages
931
- :param has_more: Whether there are more completions available beyond this list
932
- :param first_id: ID of the first completion in this list
933
- :param last_id: ID of the last completion in this list
934
- :param object: Must be "list" to identify this as a list response
935
- """
936
-
937
- data: list[OpenAICompletionWithInputMessages]
938
- has_more: bool
939
- first_id: str
940
- last_id: str
941
- object: Literal["list"] = "list"
942
-
943
-
944
- # extra_body can be accessed via .model_extra
945
- @json_schema_type
946
- class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
947
- """Request parameters for OpenAI-compatible completion endpoint.
948
-
949
- :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
950
- :param prompt: The prompt to generate a completion for.
951
- :param best_of: (Optional) The number of completions to generate.
952
- :param echo: (Optional) Whether to echo the prompt.
953
- :param frequency_penalty: (Optional) The penalty for repeated tokens.
954
- :param logit_bias: (Optional) The logit bias to use.
955
- :param logprobs: (Optional) The log probabilities to use.
956
- :param max_tokens: (Optional) The maximum number of tokens to generate.
957
- :param n: (Optional) The number of completions to generate.
958
- :param presence_penalty: (Optional) The penalty for repeated tokens.
959
- :param seed: (Optional) The seed to use.
960
- :param stop: (Optional) The stop tokens to use.
961
- :param stream: (Optional) Whether to stream the response.
962
- :param stream_options: (Optional) The stream options to use.
963
- :param temperature: (Optional) The temperature to use.
964
- :param top_p: (Optional) The top p to use.
965
- :param user: (Optional) The user to use.
966
- :param suffix: (Optional) The suffix that should be appended to the completion.
967
- """
968
-
969
- # Standard OpenAI completion parameters
970
- model: str
971
- prompt: str | list[str] | list[int] | list[list[int]]
972
- best_of: int | None = None
973
- echo: bool | None = None
974
- frequency_penalty: float | None = None
975
- logit_bias: dict[str, float] | None = None
976
- logprobs: bool | None = None
977
- max_tokens: int | None = None
978
- n: int | None = None
979
- presence_penalty: float | None = None
980
- seed: int | None = None
981
- stop: str | list[str] | None = None
982
- stream: bool | None = None
983
- stream_options: dict[str, Any] | None = None
984
- temperature: float | None = None
985
- top_p: float | None = None
986
- user: str | None = None
987
- suffix: str | None = None
988
-
989
-
990
- # extra_body can be accessed via .model_extra
991
- @json_schema_type
992
- class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
993
- """Request parameters for OpenAI-compatible chat completion endpoint.
994
-
995
- :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
996
- :param messages: List of messages in the conversation.
997
- :param frequency_penalty: (Optional) The penalty for repeated tokens.
998
- :param function_call: (Optional) The function call to use.
999
- :param functions: (Optional) List of functions to use.
1000
- :param logit_bias: (Optional) The logit bias to use.
1001
- :param logprobs: (Optional) The log probabilities to use.
1002
- :param max_completion_tokens: (Optional) The maximum number of tokens to generate.
1003
- :param max_tokens: (Optional) The maximum number of tokens to generate.
1004
- :param n: (Optional) The number of completions to generate.
1005
- :param parallel_tool_calls: (Optional) Whether to parallelize tool calls.
1006
- :param presence_penalty: (Optional) The penalty for repeated tokens.
1007
- :param response_format: (Optional) The response format to use.
1008
- :param seed: (Optional) The seed to use.
1009
- :param stop: (Optional) The stop tokens to use.
1010
- :param stream: (Optional) Whether to stream the response.
1011
- :param stream_options: (Optional) The stream options to use.
1012
- :param temperature: (Optional) The temperature to use.
1013
- :param tool_choice: (Optional) The tool choice to use.
1014
- :param tools: (Optional) The tools to use.
1015
- :param top_logprobs: (Optional) The top log probabilities to use.
1016
- :param top_p: (Optional) The top p to use.
1017
- :param user: (Optional) The user to use.
1018
- """
1019
-
1020
- # Standard OpenAI chat completion parameters
1021
- model: str
1022
- messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)]
1023
- frequency_penalty: float | None = None
1024
- function_call: str | dict[str, Any] | None = None
1025
- functions: list[dict[str, Any]] | None = None
1026
- logit_bias: dict[str, float] | None = None
1027
- logprobs: bool | None = None
1028
- max_completion_tokens: int | None = None
1029
- max_tokens: int | None = None
1030
- n: int | None = None
1031
- parallel_tool_calls: bool | None = None
1032
- presence_penalty: float | None = None
1033
- response_format: OpenAIResponseFormatParam | None = None
1034
- seed: int | None = None
1035
- stop: str | list[str] | None = None
1036
- stream: bool | None = None
1037
- stream_options: dict[str, Any] | None = None
1038
- temperature: float | None = None
1039
- tool_choice: str | dict[str, Any] | None = None
1040
- tools: list[dict[str, Any]] | None = None
1041
- top_logprobs: int | None = None
1042
- top_p: float | None = None
1043
- user: str | None = None
1044
-
1045
-
1046
- # extra_body can be accessed via .model_extra
1047
- @json_schema_type
1048
- class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
1049
- """Request parameters for OpenAI-compatible embeddings endpoint.
1050
-
1051
- :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
1052
- :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
1053
- :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
1054
- :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
1055
- :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
1056
- """
1057
-
1058
- model: str
1059
- input: str | list[str]
1060
- encoding_format: str | None = "float"
1061
- dimensions: int | None = None
1062
- user: str | None = None
1063
-
1064
-
1065
- @runtime_checkable
1066
- class InferenceProvider(Protocol):
1067
- """
1068
- This protocol defines the interface that should be implemented by all inference providers.
1069
- """
1070
-
1071
- API_NAMESPACE: str = "Inference"
1072
-
1073
- model_store: ModelStore | None = None
1074
-
1075
- @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
1076
- async def rerank(
1077
- self,
1078
- model: str,
1079
- query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
1080
- items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
1081
- max_num_results: int | None = None,
1082
- ) -> RerankResponse:
1083
- """Rerank a list of documents based on their relevance to a query.
1084
-
1085
- :param model: The identifier of the reranking model to use.
1086
- :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length.
1087
- :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length.
1088
- :param max_num_results: (Optional) Maximum number of results to return. Default: returns all.
1089
- :returns: RerankResponse with indices sorted by relevance score (descending).
1090
- """
1091
- raise NotImplementedError("Reranking is not implemented")
1092
- return # this is so mypy's safe-super rule will consider the method concrete
1093
-
1094
- @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
1095
- async def openai_completion(
1096
- self,
1097
- params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
1098
- ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
1099
- """Create completion.
1100
-
1101
- Generate an OpenAI-compatible completion for the given prompt using the specified model.
1102
- :returns: An OpenAICompletion. When streaming, returns Server-Sent Events (SSE) with OpenAICompletion chunks.
1103
- """
1104
- ...
1105
-
1106
- @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
1107
- async def openai_chat_completion(
1108
- self,
1109
- params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)],
1110
- ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
1111
- """Create chat completions.
1112
-
1113
- Generate an OpenAI-compatible chat completion for the given messages using the specified model.
1114
- :returns: An OpenAIChatCompletion. When streaming, returns Server-Sent Events (SSE) with OpenAIChatCompletionChunk objects.
1115
- """
1116
- ...
1117
-
1118
- @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
1119
- async def openai_embeddings(
1120
- self,
1121
- params: Annotated[OpenAIEmbeddingsRequestWithExtraBody, Body(...)],
1122
- ) -> OpenAIEmbeddingsResponse:
1123
- """Create embeddings.
1124
-
1125
- Generate OpenAI-compatible embeddings for the given input using the specified model.
1126
- :returns: An OpenAIEmbeddingsResponse containing the embeddings.
1127
- """
1128
- ...
1129
-
1130
-
1131
- class Inference(InferenceProvider):
1132
- """Inference
1133
-
1134
- Llama Stack Inference API for generating completions, chat completions, and embeddings.
1135
-
1136
- This API provides the raw interface to the underlying models. Three kinds of models are supported:
1137
- - LLM models: these models generate "raw" and "chat" (conversational) completions.
1138
- - Embedding models: these models generate embeddings to be used for semantic search.
1139
- - Rerank models: these models reorder the documents based on their relevance to a query.
1140
- """
1141
-
1142
- @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
1143
- async def list_chat_completions(
1144
- self,
1145
- after: str | None = None,
1146
- limit: int | None = 20,
1147
- model: str | None = None,
1148
- order: Order | None = Order.desc,
1149
- ) -> ListOpenAIChatCompletionResponse:
1150
- """List chat completions.
1151
-
1152
- :param after: The ID of the last chat completion to return.
1153
- :param limit: The maximum number of chat completions to return.
1154
- :param model: The model to filter by.
1155
- :param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
1156
- :returns: A ListOpenAIChatCompletionResponse.
1157
- """
1158
- raise NotImplementedError("List chat completions is not implemented")
1159
-
1160
- @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
1161
- async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
1162
- """Get chat completion.
1163
-
1164
- Describe a chat completion by its ID.
1165
-
1166
- :param completion_id: ID of the chat completion.
1167
- :returns: A OpenAICompletionWithInputMessages.
1168
- """
1169
- raise NotImplementedError("Get chat completion is not implemented")