c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python 0.1.0a7__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python
3
- Version: 0.1.0a7
3
+ Version: 0.1.0a8
4
4
  Summary: The official Python library for GradientAI
5
5
  Project-URL: Homepage, https://github.com/digitalocean/gradientai-python
6
6
  Project-URL: Repository, https://github.com/digitalocean/gradientai-python
@@ -1,6 +1,6 @@
1
1
  gradientai/__init__.py,sha256=yqy3yZnX0JVUS-H01MAvroChzqS67Qf072OrPfNckjI,2655
2
2
  gradientai/_base_client.py,sha256=TADFnPHK7WpsNuJUY76SxMFf2IVoOdz_tlloQEXkutk,66719
3
- gradientai/_client.py,sha256=eKRTIA1SXYoAA28P6LyDc49IOxCB4hUI2YWrszXqXdY,27334
3
+ gradientai/_client.py,sha256=SwygmkQnjfqCEKgMA7rovhfWiEOk3BjyenunyiDS0F8,27425
4
4
  gradientai/_compat.py,sha256=VWemUKbj6DDkQ-O4baSpHVLJafotzeXmCQGJugfVTIw,6580
5
5
  gradientai/_constants.py,sha256=S14PFzyN9-I31wiV7SmIlL5Ga0MLHxdvegInGdXH7tM,462
6
6
  gradientai/_exceptions.py,sha256=o1GvaW36c7_LMj5WasVKUBOpae8tzETBJsfbVphb3Vk,3228
@@ -9,9 +9,9 @@ gradientai/_models.py,sha256=G1vczEodX0vUySeVKbF-mbzlaObNL1oVAYH4c65agRk,29131
9
9
  gradientai/_qs.py,sha256=AOkSz4rHtK4YI3ZU_kzea-zpwBUgEY8WniGmTPyEimc,4846
10
10
  gradientai/_resource.py,sha256=4NZbH2h8dQ-t-DQPida4VANJ_oZJNA7qxV84mwJT8oM,1124
11
11
  gradientai/_response.py,sha256=RhlDdupxTcKNyDDj045MZD3-a_lsEc3yjiOzxWg0cDc,28842
12
- gradientai/_streaming.py,sha256=AWqY4cmmmTplZperXnkkMkeQ11gmpqYbt6TIXByqCv8,10116
12
+ gradientai/_streaming.py,sha256=3KH-GBmqhoS1KAOhecADOsbW9WuzhIi8wSdmrEj5PPA,11404
13
13
  gradientai/_types.py,sha256=22gBoIuoGJ1R6l5nPwquWCRzJodKhO-3e7k22-h37JQ,6201
14
- gradientai/_version.py,sha256=3OiJuEV2eKbtW9ToIU93cLhPFrPrdnFJrulaUojrGfE,170
14
+ gradientai/_version.py,sha256=vl2rc2vAkezojVTxFct2GJxzipzCGzHOxgS9Ld5ASmM,170
15
15
  gradientai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  gradientai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
17
17
  gradientai/_utils/_logs.py,sha256=1QuZcxzSKHcqNFbPoz_pvfLD5eDfOMBzGMyanAm_2gw,787
@@ -39,7 +39,7 @@ gradientai/resources/agents/routes.py,sha256=pDoK5hUgluhz5awJ9FR_krF8DIbkRzLX__M
39
39
  gradientai/resources/agents/versions.py,sha256=hmHDaawFkpqoUEYI4Vi5jckH3mamkPKupXO3cZccyKE,11546
40
40
  gradientai/resources/agents/chat/__init__.py,sha256=BVAfz9TM3DT5W9f_mt0P9YRxL_MsUxKCWAH6u1iogmA,1041
41
41
  gradientai/resources/agents/chat/chat.py,sha256=nt97777qa-xM71JQBKDFG_x7fUDJRvy6rF5LoBiMOKE,3698
42
- gradientai/resources/agents/chat/completions.py,sha256=bkaLCAXB9RkTg_D56XbnCIsJl_gKwwaoFhCDDDYT040,18230
42
+ gradientai/resources/agents/chat/completions.py,sha256=jq62v8gN3hJ7POxBCHMcUJUi6Zj4IgvNGlr7D36W1M8,46188
43
43
  gradientai/resources/agents/evaluation_metrics/__init__.py,sha256=qUCsT_vI2TrZrUBPe8h-VMNBE4ytcoe0RXshDctV0g0,1198
44
44
  gradientai/resources/agents/evaluation_metrics/evaluation_metrics.py,sha256=BueqcWT0lqbElo-lgntkr1s7J0-qi2KqEZV2qzc7dcI,7089
45
45
  gradientai/resources/agents/evaluation_metrics/workspaces/__init__.py,sha256=Zf3wa7WSVOKyG1x9Fppny38_ewQCbdghxbptqGFVfOM,1054
@@ -150,8 +150,9 @@ gradientai/types/agents/version_list_params.py,sha256=0_3DhUbFDRyjUnn2G4saFOvuot
150
150
  gradientai/types/agents/version_list_response.py,sha256=Y0Y8CSPUPVHKRA3zTRRfQ8gC2aFyBacSCaGz-D5k8nk,2725
151
151
  gradientai/types/agents/version_update_params.py,sha256=j9tOda5wXmSOHsmcxQONo2mM-hEtrYi5-19HfGU_XnI,379
152
152
  gradientai/types/agents/version_update_response.py,sha256=nspPIkxQskT82tcW0JyG7bBVlXq_KU6CZzodTd9jfkQ,709
153
- gradientai/types/agents/chat/__init__.py,sha256=A5VCUPqJZydjjOqEXC01GXmcDkKM3bq6zuCu9lmi5Es,303
154
- gradientai/types/agents/chat/completion_create_params.py,sha256=F4Dcrt5aYC_GEWUSN2OA3Zm5ImevZ7tFuTxBH7RyooM,6635
153
+ gradientai/types/agents/chat/__init__.py,sha256=c-PmEwuvWZQ4CRBTs9gzbKAq2sxL7V7JlVxddeoaGl0,381
154
+ gradientai/types/agents/chat/chat_completion_chunk.py,sha256=1K-F0JdUmQ_4idDk3oUGUB_mhxAxCzjq0C8hMhGtDuY,3048
155
+ gradientai/types/agents/chat/completion_create_params.py,sha256=ADEJ0N3MMsouT9AqBLE1-rho4FVVhlp9U9E-buqIAYs,7165
155
156
  gradientai/types/agents/chat/completion_create_response.py,sha256=9uKS3memEoV0_Xd1CZwI0jQGsQyfVkhXRPGlRO3rUIc,2415
156
157
  gradientai/types/agents/evaluation_metrics/__init__.py,sha256=XWH_utxMx-JwArRpr-rHQfmoxQRGK6GciKOllbkqg40,894
157
158
  gradientai/types/agents/evaluation_metrics/workspace_create_params.py,sha256=LrccBST52BUMlIidiba8K_7vU9HLZW8TTQ2E227UF8Y,343
@@ -231,7 +232,7 @@ gradientai/types/shared/__init__.py,sha256=YA2_qLkZLySOac1HrqOfCTEz6GeipnjIJh1mK
231
232
  gradientai/types/shared/api_links.py,sha256=Iq5iQwOkRYuwLcuDLk54dUfrq0f2ZVEOXSpF744gYgA,403
232
233
  gradientai/types/shared/api_meta.py,sha256=-KyinzQqM5GSjD7E5xm7A4UALXAvLOyVNR1SYVOUFJM,297
233
234
  gradientai/types/shared/chat_completion_token_logprob.py,sha256=6-ipUFfsXMf5L7FDFi127NaVkDtmEooVgGBF6Ts965A,1769
234
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info/METADATA,sha256=g-NQEA3bos3YcRy4U0sil0jrvchjE0S8ufrQpio2jv4,15049
235
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
236
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info/licenses/LICENSE,sha256=AzxEF8mEks6hu5V_87CXF8gLdL875WeO8FmQtEZTFok,11341
237
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info/RECORD,,
235
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/METADATA,sha256=xwI-Z-rvG_Z_-L5Xu3x_NW2F7CauiUQmYG8wtunHk-Q,15049
236
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
237
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/licenses/LICENSE,sha256=AzxEF8mEks6hu5V_87CXF8gLdL875WeO8FmQtEZTFok,11341
238
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/RECORD,,
gradientai/_client.py CHANGED
@@ -117,6 +117,8 @@ class GradientAI(SyncAPIClient):
117
117
  _strict_response_validation=_strict_response_validation,
118
118
  )
119
119
 
120
+ self._default_stream_cls = Stream
121
+
120
122
  @cached_property
121
123
  def agents(self) -> AgentsResource:
122
124
  from .resources.agents import AgentsResource
@@ -355,6 +357,8 @@ class AsyncGradientAI(AsyncAPIClient):
355
357
  _strict_response_validation=_strict_response_validation,
356
358
  )
357
359
 
360
+ self._default_stream_cls = AsyncStream
361
+
358
362
  @cached_property
359
363
  def agents(self) -> AsyncAgentsResource:
360
364
  from .resources.agents import AsyncAgentsResource
gradientai/_streaming.py CHANGED
@@ -9,7 +9,8 @@ from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, r
9
9
 
10
10
  import httpx
11
11
 
12
- from ._utils import extract_type_var_from_base
12
+ from ._utils import is_mapping, extract_type_var_from_base
13
+ from ._exceptions import APIError
13
14
 
14
15
  if TYPE_CHECKING:
15
16
  from ._client import GradientAI, AsyncGradientAI
@@ -55,7 +56,25 @@ class Stream(Generic[_T]):
55
56
  iterator = self._iter_events()
56
57
 
57
58
  for sse in iterator:
58
- yield process_data(data=sse.json(), cast_to=cast_to, response=response)
59
+ if sse.data.startswith("[DONE]"):
60
+ break
61
+
62
+ data = sse.json()
63
+ if is_mapping(data) and data.get("error"):
64
+ message = None
65
+ error = data.get("error")
66
+ if is_mapping(error):
67
+ message = error.get("message")
68
+ if not message or not isinstance(message, str):
69
+ message = "An error occurred during streaming"
70
+
71
+ raise APIError(
72
+ message=message,
73
+ request=self.response.request,
74
+ body=data["error"],
75
+ )
76
+
77
+ yield process_data(data=data, cast_to=cast_to, response=response)
59
78
 
60
79
  # Ensure the entire stream is consumed
61
80
  for _sse in iterator:
@@ -119,7 +138,25 @@ class AsyncStream(Generic[_T]):
119
138
  iterator = self._iter_events()
120
139
 
121
140
  async for sse in iterator:
122
- yield process_data(data=sse.json(), cast_to=cast_to, response=response)
141
+ if sse.data.startswith("[DONE]"):
142
+ break
143
+
144
+ data = sse.json()
145
+ if is_mapping(data) and data.get("error"):
146
+ message = None
147
+ error = data.get("error")
148
+ if is_mapping(error):
149
+ message = error.get("message")
150
+ if not message or not isinstance(message, str):
151
+ message = "An error occurred during streaming"
152
+
153
+ raise APIError(
154
+ message=message,
155
+ request=self.response.request,
156
+ body=data["error"],
157
+ )
158
+
159
+ yield process_data(data=data, cast_to=cast_to, response=response)
123
160
 
124
161
  # Ensure the entire stream is consumed
125
162
  async for _sse in iterator:
gradientai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "gradientai"
4
- __version__ = "0.1.0-alpha.7" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.8" # x-release-please-version
@@ -3,11 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List, Union, Iterable, Optional
6
+ from typing_extensions import Literal, overload
6
7
 
7
8
  import httpx
8
9
 
9
10
  from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
10
- from ...._utils import maybe_transform, async_maybe_transform
11
+ from ...._utils import required_args, maybe_transform, async_maybe_transform
11
12
  from ...._compat import cached_property
12
13
  from ...._resource import SyncAPIResource, AsyncAPIResource
13
14
  from ...._response import (
@@ -16,8 +17,10 @@ from ...._response import (
16
17
  async_to_raw_response_wrapper,
17
18
  async_to_streamed_response_wrapper,
18
19
  )
20
+ from ...._streaming import Stream, AsyncStream
19
21
  from ...._base_client import make_request_options
20
22
  from ....types.agents.chat import completion_create_params
23
+ from ....types.agents.chat.chat_completion_chunk import ChatCompletionChunk
21
24
  from ....types.agents.chat.completion_create_response import CompletionCreateResponse
22
25
 
23
26
  __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
@@ -43,6 +46,7 @@ class CompletionsResource(SyncAPIResource):
43
46
  """
44
47
  return CompletionsResourceWithStreamingResponse(self)
45
48
 
49
+ @overload
46
50
  def create(
47
51
  self,
48
52
  *,
@@ -57,7 +61,7 @@ class CompletionsResource(SyncAPIResource):
57
61
  n: Optional[int] | NotGiven = NOT_GIVEN,
58
62
  presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
59
63
  stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
60
- stream: Optional[bool] | NotGiven = NOT_GIVEN,
64
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
61
65
  stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
62
66
  temperature: Optional[float] | NotGiven = NOT_GIVEN,
63
67
  top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
@@ -153,6 +157,262 @@ class CompletionsResource(SyncAPIResource):
153
157
 
154
158
  timeout: Override the client-level default timeout for this request, in seconds
155
159
  """
160
+ ...
161
+
162
+ @overload
163
+ def create(
164
+ self,
165
+ *,
166
+ messages: Iterable[completion_create_params.Message],
167
+ model: str,
168
+ stream: Literal[True],
169
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
170
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
171
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
172
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
173
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
174
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
175
+ n: Optional[int] | NotGiven = NOT_GIVEN,
176
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
177
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
178
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
179
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
180
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
181
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
182
+ user: str | NotGiven = NOT_GIVEN,
183
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
184
+ # The extra values given here take precedence over values defined on the client or passed to this method.
185
+ extra_headers: Headers | None = None,
186
+ extra_query: Query | None = None,
187
+ extra_body: Body | None = None,
188
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
189
+ ) -> Stream[ChatCompletionChunk]:
190
+ """
191
+ Creates a model response for the given chat conversation.
192
+
193
+ Args:
194
+ messages: A list of messages comprising the conversation so far.
195
+
196
+ model: Model ID used to generate the response.
197
+
198
+ stream: If set to true, the model response data will be streamed to the client as it is
199
+ generated using server-sent events.
200
+
201
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
202
+ existing frequency in the text so far, decreasing the model's likelihood to
203
+ repeat the same line verbatim.
204
+
205
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
206
+
207
+ Accepts a JSON object that maps tokens (specified by their token ID in the
208
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
209
+ bias is added to the logits generated by the model prior to sampling. The exact
210
+ effect will vary per model, but values between -1 and 1 should decrease or
211
+ increase likelihood of selection; values like -100 or 100 should result in a ban
212
+ or exclusive selection of the relevant token.
213
+
214
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
215
+ returns the log probabilities of each output token returned in the `content` of
216
+ `message`.
217
+
218
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
219
+ run. The run will make a best effort to use only the number of completion tokens
220
+ specified, across multiple turns of the run.
221
+
222
+ max_tokens: The maximum number of tokens that can be generated in the completion.
223
+
224
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
225
+ context length.
226
+
227
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
228
+ for storing additional information about the object in a structured format, and
229
+ querying for objects via API or the dashboard.
230
+
231
+ Keys are strings with a maximum length of 64 characters. Values are strings with
232
+ a maximum length of 512 characters.
233
+
234
+ n: How many chat completion choices to generate for each input message. Note that
235
+ you will be charged based on the number of generated tokens across all of the
236
+ choices. Keep `n` as `1` to minimize costs.
237
+
238
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
239
+ whether they appear in the text so far, increasing the model's likelihood to
240
+ talk about new topics.
241
+
242
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
243
+ returned text will not contain the stop sequence.
244
+
245
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
246
+
247
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
248
+ make the output more random, while lower values like 0.2 will make it more
249
+ focused and deterministic. We generally recommend altering this or `top_p` but
250
+ not both.
251
+
252
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
253
+ return at each token position, each with an associated log probability.
254
+ `logprobs` must be set to `true` if this parameter is used.
255
+
256
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
257
+ model considers the results of the tokens with top_p probability mass. So 0.1
258
+ means only the tokens comprising the top 10% probability mass are considered.
259
+
260
+ We generally recommend altering this or `temperature` but not both.
261
+
262
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
263
+ monitor and detect abuse.
264
+
265
+ extra_headers: Send extra headers
266
+
267
+ extra_query: Add additional query parameters to the request
268
+
269
+ extra_body: Add additional JSON properties to the request
270
+
271
+ timeout: Override the client-level default timeout for this request, in seconds
272
+ """
273
+ ...
274
+
275
+ @overload
276
+ def create(
277
+ self,
278
+ *,
279
+ messages: Iterable[completion_create_params.Message],
280
+ model: str,
281
+ stream: bool,
282
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
283
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
284
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
285
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
286
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
287
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
288
+ n: Optional[int] | NotGiven = NOT_GIVEN,
289
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
290
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
291
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
292
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
293
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
294
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
295
+ user: str | NotGiven = NOT_GIVEN,
296
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
297
+ # The extra values given here take precedence over values defined on the client or passed to this method.
298
+ extra_headers: Headers | None = None,
299
+ extra_query: Query | None = None,
300
+ extra_body: Body | None = None,
301
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
302
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
303
+ """
304
+ Creates a model response for the given chat conversation.
305
+
306
+ Args:
307
+ messages: A list of messages comprising the conversation so far.
308
+
309
+ model: Model ID used to generate the response.
310
+
311
+ stream: If set to true, the model response data will be streamed to the client as it is
312
+ generated using server-sent events.
313
+
314
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
315
+ existing frequency in the text so far, decreasing the model's likelihood to
316
+ repeat the same line verbatim.
317
+
318
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
319
+
320
+ Accepts a JSON object that maps tokens (specified by their token ID in the
321
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
322
+ bias is added to the logits generated by the model prior to sampling. The exact
323
+ effect will vary per model, but values between -1 and 1 should decrease or
324
+ increase likelihood of selection; values like -100 or 100 should result in a ban
325
+ or exclusive selection of the relevant token.
326
+
327
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
328
+ returns the log probabilities of each output token returned in the `content` of
329
+ `message`.
330
+
331
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
332
+ run. The run will make a best effort to use only the number of completion tokens
333
+ specified, across multiple turns of the run.
334
+
335
+ max_tokens: The maximum number of tokens that can be generated in the completion.
336
+
337
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
338
+ context length.
339
+
340
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
341
+ for storing additional information about the object in a structured format, and
342
+ querying for objects via API or the dashboard.
343
+
344
+ Keys are strings with a maximum length of 64 characters. Values are strings with
345
+ a maximum length of 512 characters.
346
+
347
+ n: How many chat completion choices to generate for each input message. Note that
348
+ you will be charged based on the number of generated tokens across all of the
349
+ choices. Keep `n` as `1` to minimize costs.
350
+
351
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
352
+ whether they appear in the text so far, increasing the model's likelihood to
353
+ talk about new topics.
354
+
355
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
356
+ returned text will not contain the stop sequence.
357
+
358
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
359
+
360
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
361
+ make the output more random, while lower values like 0.2 will make it more
362
+ focused and deterministic. We generally recommend altering this or `top_p` but
363
+ not both.
364
+
365
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
366
+ return at each token position, each with an associated log probability.
367
+ `logprobs` must be set to `true` if this parameter is used.
368
+
369
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
370
+ model considers the results of the tokens with top_p probability mass. So 0.1
371
+ means only the tokens comprising the top 10% probability mass are considered.
372
+
373
+ We generally recommend altering this or `temperature` but not both.
374
+
375
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
376
+ monitor and detect abuse.
377
+
378
+ extra_headers: Send extra headers
379
+
380
+ extra_query: Add additional query parameters to the request
381
+
382
+ extra_body: Add additional JSON properties to the request
383
+
384
+ timeout: Override the client-level default timeout for this request, in seconds
385
+ """
386
+ ...
387
+
388
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
389
+ def create(
390
+ self,
391
+ *,
392
+ messages: Iterable[completion_create_params.Message],
393
+ model: str,
394
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
395
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
396
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
397
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
398
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
399
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
400
+ n: Optional[int] | NotGiven = NOT_GIVEN,
401
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
402
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
403
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
404
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
405
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
406
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
407
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
408
+ user: str | NotGiven = NOT_GIVEN,
409
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
410
+ # The extra values given here take precedence over values defined on the client or passed to this method.
411
+ extra_headers: Headers | None = None,
412
+ extra_query: Query | None = None,
413
+ extra_body: Body | None = None,
414
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
415
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
156
416
  return self._post(
157
417
  "/chat/completions"
158
418
  if self._client._base_url_overridden
@@ -177,12 +437,16 @@ class CompletionsResource(SyncAPIResource):
177
437
  "top_p": top_p,
178
438
  "user": user,
179
439
  },
180
- completion_create_params.CompletionCreateParams,
440
+ completion_create_params.CompletionCreateParamsStreaming
441
+ if stream
442
+ else completion_create_params.CompletionCreateParamsNonStreaming,
181
443
  ),
182
444
  options=make_request_options(
183
445
  extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
184
446
  ),
185
447
  cast_to=CompletionCreateResponse,
448
+ stream=stream or False,
449
+ stream_cls=Stream[ChatCompletionChunk],
186
450
  )
187
451
 
188
452
 
@@ -206,6 +470,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
206
470
  """
207
471
  return AsyncCompletionsResourceWithStreamingResponse(self)
208
472
 
473
+ @overload
209
474
  async def create(
210
475
  self,
211
476
  *,
@@ -220,7 +485,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
220
485
  n: Optional[int] | NotGiven = NOT_GIVEN,
221
486
  presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
222
487
  stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
223
- stream: Optional[bool] | NotGiven = NOT_GIVEN,
488
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
224
489
  stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
225
490
  temperature: Optional[float] | NotGiven = NOT_GIVEN,
226
491
  top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
@@ -316,6 +581,262 @@ class AsyncCompletionsResource(AsyncAPIResource):
316
581
 
317
582
  timeout: Override the client-level default timeout for this request, in seconds
318
583
  """
584
+ ...
585
+
586
+ @overload
587
+ async def create(
588
+ self,
589
+ *,
590
+ messages: Iterable[completion_create_params.Message],
591
+ model: str,
592
+ stream: Literal[True],
593
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
594
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
595
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
596
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
597
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
598
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
599
+ n: Optional[int] | NotGiven = NOT_GIVEN,
600
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
601
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
602
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
603
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
604
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
605
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
606
+ user: str | NotGiven = NOT_GIVEN,
607
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
608
+ # The extra values given here take precedence over values defined on the client or passed to this method.
609
+ extra_headers: Headers | None = None,
610
+ extra_query: Query | None = None,
611
+ extra_body: Body | None = None,
612
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
613
+ ) -> AsyncStream[ChatCompletionChunk]:
614
+ """
615
+ Creates a model response for the given chat conversation.
616
+
617
+ Args:
618
+ messages: A list of messages comprising the conversation so far.
619
+
620
+ model: Model ID used to generate the response.
621
+
622
+ stream: If set to true, the model response data will be streamed to the client as it is
623
+ generated using server-sent events.
624
+
625
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
626
+ existing frequency in the text so far, decreasing the model's likelihood to
627
+ repeat the same line verbatim.
628
+
629
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
630
+
631
+ Accepts a JSON object that maps tokens (specified by their token ID in the
632
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
633
+ bias is added to the logits generated by the model prior to sampling. The exact
634
+ effect will vary per model, but values between -1 and 1 should decrease or
635
+ increase likelihood of selection; values like -100 or 100 should result in a ban
636
+ or exclusive selection of the relevant token.
637
+
638
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
639
+ returns the log probabilities of each output token returned in the `content` of
640
+ `message`.
641
+
642
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
643
+ run. The run will make a best effort to use only the number of completion tokens
644
+ specified, across multiple turns of the run.
645
+
646
+ max_tokens: The maximum number of tokens that can be generated in the completion.
647
+
648
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
649
+ context length.
650
+
651
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
652
+ for storing additional information about the object in a structured format, and
653
+ querying for objects via API or the dashboard.
654
+
655
+ Keys are strings with a maximum length of 64 characters. Values are strings with
656
+ a maximum length of 512 characters.
657
+
658
+ n: How many chat completion choices to generate for each input message. Note that
659
+ you will be charged based on the number of generated tokens across all of the
660
+ choices. Keep `n` as `1` to minimize costs.
661
+
662
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
663
+ whether they appear in the text so far, increasing the model's likelihood to
664
+ talk about new topics.
665
+
666
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
667
+ returned text will not contain the stop sequence.
668
+
669
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
670
+
671
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
672
+ make the output more random, while lower values like 0.2 will make it more
673
+ focused and deterministic. We generally recommend altering this or `top_p` but
674
+ not both.
675
+
676
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
677
+ return at each token position, each with an associated log probability.
678
+ `logprobs` must be set to `true` if this parameter is used.
679
+
680
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
681
+ model considers the results of the tokens with top_p probability mass. So 0.1
682
+ means only the tokens comprising the top 10% probability mass are considered.
683
+
684
+ We generally recommend altering this or `temperature` but not both.
685
+
686
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
687
+ monitor and detect abuse.
688
+
689
+ extra_headers: Send extra headers
690
+
691
+ extra_query: Add additional query parameters to the request
692
+
693
+ extra_body: Add additional JSON properties to the request
694
+
695
+ timeout: Override the client-level default timeout for this request, in seconds
696
+ """
697
+ ...
698
+
699
+ @overload
700
+ async def create(
701
+ self,
702
+ *,
703
+ messages: Iterable[completion_create_params.Message],
704
+ model: str,
705
+ stream: bool,
706
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
707
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
708
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
709
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
710
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
711
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
712
+ n: Optional[int] | NotGiven = NOT_GIVEN,
713
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
714
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
715
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
716
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
717
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
718
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
719
+ user: str | NotGiven = NOT_GIVEN,
720
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
721
+ # The extra values given here take precedence over values defined on the client or passed to this method.
722
+ extra_headers: Headers | None = None,
723
+ extra_query: Query | None = None,
724
+ extra_body: Body | None = None,
725
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
726
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
727
+ """
728
+ Creates a model response for the given chat conversation.
729
+
730
+ Args:
731
+ messages: A list of messages comprising the conversation so far.
732
+
733
+ model: Model ID used to generate the response.
734
+
735
+ stream: If set to true, the model response data will be streamed to the client as it is
736
+ generated using server-sent events.
737
+
738
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
739
+ existing frequency in the text so far, decreasing the model's likelihood to
740
+ repeat the same line verbatim.
741
+
742
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
743
+
744
+ Accepts a JSON object that maps tokens (specified by their token ID in the
745
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
746
+ bias is added to the logits generated by the model prior to sampling. The exact
747
+ effect will vary per model, but values between -1 and 1 should decrease or
748
+ increase likelihood of selection; values like -100 or 100 should result in a ban
749
+ or exclusive selection of the relevant token.
750
+
751
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
752
+ returns the log probabilities of each output token returned in the `content` of
753
+ `message`.
754
+
755
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
756
+ run. The run will make a best effort to use only the number of completion tokens
757
+ specified, across multiple turns of the run.
758
+
759
+ max_tokens: The maximum number of tokens that can be generated in the completion.
760
+
761
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
762
+ context length.
763
+
764
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
765
+ for storing additional information about the object in a structured format, and
766
+ querying for objects via API or the dashboard.
767
+
768
+ Keys are strings with a maximum length of 64 characters. Values are strings with
769
+ a maximum length of 512 characters.
770
+
771
+ n: How many chat completion choices to generate for each input message. Note that
772
+ you will be charged based on the number of generated tokens across all of the
773
+ choices. Keep `n` as `1` to minimize costs.
774
+
775
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
776
+ whether they appear in the text so far, increasing the model's likelihood to
777
+ talk about new topics.
778
+
779
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
780
+ returned text will not contain the stop sequence.
781
+
782
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
783
+
784
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
785
+ make the output more random, while lower values like 0.2 will make it more
786
+ focused and deterministic. We generally recommend altering this or `top_p` but
787
+ not both.
788
+
789
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
790
+ return at each token position, each with an associated log probability.
791
+ `logprobs` must be set to `true` if this parameter is used.
792
+
793
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
794
+ model considers the results of the tokens with top_p probability mass. So 0.1
795
+ means only the tokens comprising the top 10% probability mass are considered.
796
+
797
+ We generally recommend altering this or `temperature` but not both.
798
+
799
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
800
+ monitor and detect abuse.
801
+
802
+ extra_headers: Send extra headers
803
+
804
+ extra_query: Add additional query parameters to the request
805
+
806
+ extra_body: Add additional JSON properties to the request
807
+
808
+ timeout: Override the client-level default timeout for this request, in seconds
809
+ """
810
+ ...
811
+
812
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
813
+ async def create(
814
+ self,
815
+ *,
816
+ messages: Iterable[completion_create_params.Message],
817
+ model: str,
818
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
819
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
820
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
821
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
822
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
823
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
824
+ n: Optional[int] | NotGiven = NOT_GIVEN,
825
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
826
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
827
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
828
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
829
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
830
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
831
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
832
+ user: str | NotGiven = NOT_GIVEN,
833
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
834
+ # The extra values given here take precedence over values defined on the client or passed to this method.
835
+ extra_headers: Headers | None = None,
836
+ extra_query: Query | None = None,
837
+ extra_body: Body | None = None,
838
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
839
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
319
840
  return await self._post(
320
841
  "/chat/completions"
321
842
  if self._client._base_url_overridden
@@ -340,12 +861,16 @@ class AsyncCompletionsResource(AsyncAPIResource):
340
861
  "top_p": top_p,
341
862
  "user": user,
342
863
  },
343
- completion_create_params.CompletionCreateParams,
864
+ completion_create_params.CompletionCreateParamsStreaming
865
+ if stream
866
+ else completion_create_params.CompletionCreateParamsNonStreaming,
344
867
  ),
345
868
  options=make_request_options(
346
869
  extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
347
870
  ),
348
871
  cast_to=CompletionCreateResponse,
872
+ stream=stream or False,
873
+ stream_cls=AsyncStream[ChatCompletionChunk],
349
874
  )
350
875
 
351
876
 
@@ -2,5 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
5
6
  from .completion_create_params import CompletionCreateParams as CompletionCreateParams
6
7
  from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
@@ -0,0 +1,93 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from ...._models import BaseModel
7
+ from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
8
+
9
+ __all__ = ["ChatCompletionChunk", "Choice", "ChoiceDelta", "ChoiceLogprobs", "Usage"]
10
+
11
+
12
+ class ChoiceDelta(BaseModel):
13
+ content: Optional[str] = None
14
+ """The contents of the chunk message."""
15
+
16
+ refusal: Optional[str] = None
17
+ """The refusal message generated by the model."""
18
+
19
+ role: Optional[Literal["developer", "user", "assistant"]] = None
20
+ """The role of the author of this message."""
21
+
22
+
23
+ class ChoiceLogprobs(BaseModel):
24
+ content: Optional[List[ChatCompletionTokenLogprob]] = None
25
+ """A list of message content tokens with log probability information."""
26
+
27
+ refusal: Optional[List[ChatCompletionTokenLogprob]] = None
28
+ """A list of message refusal tokens with log probability information."""
29
+
30
+
31
+ class Choice(BaseModel):
32
+ delta: ChoiceDelta
33
+ """A chat completion delta generated by streamed model responses."""
34
+
35
+ finish_reason: Optional[Literal["stop", "length"]] = None
36
+ """The reason the model stopped generating tokens.
37
+
38
+ This will be `stop` if the model hit a natural stop point or a provided stop
39
+ sequence, or `length` if the maximum number of tokens specified in the request
40
+ was reached
41
+ """
42
+
43
+ index: int
44
+ """The index of the choice in the list of choices."""
45
+
46
+ logprobs: Optional[ChoiceLogprobs] = None
47
+ """Log probability information for the choice."""
48
+
49
+
50
+ class Usage(BaseModel):
51
+ completion_tokens: int
52
+ """Number of tokens in the generated completion."""
53
+
54
+ prompt_tokens: int
55
+ """Number of tokens in the prompt."""
56
+
57
+ total_tokens: int
58
+ """Total number of tokens used in the request (prompt + completion)."""
59
+
60
+
61
+ class ChatCompletionChunk(BaseModel):
62
+ id: str
63
+ """A unique identifier for the chat completion. Each chunk has the same ID."""
64
+
65
+ choices: List[Choice]
66
+ """A list of chat completion choices.
67
+
68
+ Can contain more than one elements if `n` is greater than 1. Can also be empty
69
+ for the last chunk if you set `stream_options: {"include_usage": true}`.
70
+ """
71
+
72
+ created: int
73
+ """The Unix timestamp (in seconds) of when the chat completion was created.
74
+
75
+ Each chunk has the same timestamp.
76
+ """
77
+
78
+ model: str
79
+ """The model to generate the completion."""
80
+
81
+ object: Literal["chat.completion.chunk"]
82
+ """The object type, which is always `chat.completion.chunk`."""
83
+
84
+ usage: Optional[Usage] = None
85
+ """
86
+ An optional field that will only be present when you set
87
+ `stream_options: {"include_usage": true}` in your request. When present, it
88
+ contains a null value **except for the last chunk** which contains the token
89
+ usage statistics for the entire request.
90
+
91
+ **NOTE:** If the stream is interrupted or cancelled, you may not receive the
92
+ final usage chunk which contains the total token usage for the request.
93
+ """
@@ -6,17 +6,19 @@ from typing import Dict, List, Union, Iterable, Optional
6
6
  from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
7
 
8
8
  __all__ = [
9
- "CompletionCreateParams",
9
+ "CompletionCreateParamsBase",
10
10
  "Message",
11
11
  "MessageChatCompletionRequestSystemMessage",
12
12
  "MessageChatCompletionRequestDeveloperMessage",
13
13
  "MessageChatCompletionRequestUserMessage",
14
14
  "MessageChatCompletionRequestAssistantMessage",
15
15
  "StreamOptions",
16
+ "CompletionCreateParamsNonStreaming",
17
+ "CompletionCreateParamsStreaming",
16
18
  ]
17
19
 
18
20
 
19
- class CompletionCreateParams(TypedDict, total=False):
21
+ class CompletionCreateParamsBase(TypedDict, total=False):
20
22
  messages: Required[Iterable[Message]]
21
23
  """A list of messages comprising the conversation so far."""
22
24
 
@@ -92,12 +94,6 @@ class CompletionCreateParams(TypedDict, total=False):
92
94
  The returned text will not contain the stop sequence.
93
95
  """
94
96
 
95
- stream: Optional[bool]
96
- """
97
- If set to true, the model response data will be streamed to the client as it is
98
- generated using server-sent events.
99
- """
100
-
101
97
  stream_options: Optional[StreamOptions]
102
98
  """Options for streaming response. Only set this when you set `stream: true`."""
103
99
 
@@ -183,3 +179,22 @@ class StreamOptions(TypedDict, total=False):
183
179
  **NOTE:** If the stream is interrupted, you may not receive the final usage
184
180
  chunk which contains the total token usage for the request.
185
181
  """
182
+
183
+
184
+ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
185
+ stream: Optional[Literal[False]]
186
+ """
187
+ If set to true, the model response data will be streamed to the client as it is
188
+ generated using server-sent events.
189
+ """
190
+
191
+
192
+ class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
193
+ stream: Required[Literal[True]]
194
+ """
195
+ If set to true, the model response data will be streamed to the client as it is
196
+ generated using server-sent events.
197
+ """
198
+
199
+
200
+ CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]