c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python 0.1.0a8__py3-none-any.whl → 0.1.0a9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python
3
- Version: 0.1.0a8
3
+ Version: 0.1.0a9
4
4
  Summary: The official Python library for GradientAI
5
5
  Project-URL: Homepage, https://github.com/digitalocean/gradientai-python
6
6
  Project-URL: Repository, https://github.com/digitalocean/gradientai-python
@@ -11,7 +11,7 @@ gradientai/_resource.py,sha256=4NZbH2h8dQ-t-DQPida4VANJ_oZJNA7qxV84mwJT8oM,1124
11
11
  gradientai/_response.py,sha256=RhlDdupxTcKNyDDj045MZD3-a_lsEc3yjiOzxWg0cDc,28842
12
12
  gradientai/_streaming.py,sha256=3KH-GBmqhoS1KAOhecADOsbW9WuzhIi8wSdmrEj5PPA,11404
13
13
  gradientai/_types.py,sha256=22gBoIuoGJ1R6l5nPwquWCRzJodKhO-3e7k22-h37JQ,6201
14
- gradientai/_version.py,sha256=vl2rc2vAkezojVTxFct2GJxzipzCGzHOxgS9Ld5ASmM,170
14
+ gradientai/_version.py,sha256=aATDhXxfQvFSrVaXTkz4f895b3kBEp1bAJcYPj23INc,170
15
15
  gradientai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  gradientai/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
17
17
  gradientai/_utils/_logs.py,sha256=1QuZcxzSKHcqNFbPoz_pvfLD5eDfOMBzGMyanAm_2gw,787
@@ -39,7 +39,7 @@ gradientai/resources/agents/routes.py,sha256=pDoK5hUgluhz5awJ9FR_krF8DIbkRzLX__M
39
39
  gradientai/resources/agents/versions.py,sha256=hmHDaawFkpqoUEYI4Vi5jckH3mamkPKupXO3cZccyKE,11546
40
40
  gradientai/resources/agents/chat/__init__.py,sha256=BVAfz9TM3DT5W9f_mt0P9YRxL_MsUxKCWAH6u1iogmA,1041
41
41
  gradientai/resources/agents/chat/chat.py,sha256=nt97777qa-xM71JQBKDFG_x7fUDJRvy6rF5LoBiMOKE,3698
42
- gradientai/resources/agents/chat/completions.py,sha256=jq62v8gN3hJ7POxBCHMcUJUi6Zj4IgvNGlr7D36W1M8,46188
42
+ gradientai/resources/agents/chat/completions.py,sha256=uYlI68LprqXL-W1hVRjd0tb3jutXYosKKMOEecuzpV8,46239
43
43
  gradientai/resources/agents/evaluation_metrics/__init__.py,sha256=qUCsT_vI2TrZrUBPe8h-VMNBE4ytcoe0RXshDctV0g0,1198
44
44
  gradientai/resources/agents/evaluation_metrics/evaluation_metrics.py,sha256=BueqcWT0lqbElo-lgntkr1s7J0-qi2KqEZV2qzc7dcI,7089
45
45
  gradientai/resources/agents/evaluation_metrics/workspaces/__init__.py,sha256=Zf3wa7WSVOKyG1x9Fppny38_ewQCbdghxbptqGFVfOM,1054
@@ -47,7 +47,7 @@ gradientai/resources/agents/evaluation_metrics/workspaces/agents.py,sha256=AVgDN
47
47
  gradientai/resources/agents/evaluation_metrics/workspaces/workspaces.py,sha256=iSQS3IW5xjOX5EsyJraoS5enwzrBARUhm6Xl1EgiICE,27079
48
48
  gradientai/resources/chat/__init__.py,sha256=BVAfz9TM3DT5W9f_mt0P9YRxL_MsUxKCWAH6u1iogmA,1041
49
49
  gradientai/resources/chat/chat.py,sha256=ANOEzzDATXj-onkkdtvVad9O-Dfwe7Uza7yfA_3bU4U,3696
50
- gradientai/resources/chat/completions.py,sha256=KWtB8ro2ClciI4NhbXzgXL-bIE7fcInDdGR7EElA5nU,19108
50
+ gradientai/resources/chat/completions.py,sha256=u_TvPvwNU79e3I5-v1P384LE5ysVz9gjJpYOKxnkl7E,47059
51
51
  gradientai/resources/inference/__init__.py,sha256=5Yk9bdOpUJPTero0_CvA-GJvcU5_gVlN1jf5r2GGGPY,1055
52
52
  gradientai/resources/inference/api_keys.py,sha256=xgWTFTp8IVRkw2nvXnfjBZPOTJyLUDIKlE7fSmb1y2I,22021
53
53
  gradientai/resources/inference/inference.py,sha256=jBimuBx3kKsuwC3pgy-PPrWg1TryO_a108CC_xLS2-Y,3771
@@ -150,8 +150,8 @@ gradientai/types/agents/version_list_params.py,sha256=0_3DhUbFDRyjUnn2G4saFOvuot
150
150
  gradientai/types/agents/version_list_response.py,sha256=Y0Y8CSPUPVHKRA3zTRRfQ8gC2aFyBacSCaGz-D5k8nk,2725
151
151
  gradientai/types/agents/version_update_params.py,sha256=j9tOda5wXmSOHsmcxQONo2mM-hEtrYi5-19HfGU_XnI,379
152
152
  gradientai/types/agents/version_update_response.py,sha256=nspPIkxQskT82tcW0JyG7bBVlXq_KU6CZzodTd9jfkQ,709
153
- gradientai/types/agents/chat/__init__.py,sha256=c-PmEwuvWZQ4CRBTs9gzbKAq2sxL7V7JlVxddeoaGl0,381
154
- gradientai/types/agents/chat/chat_completion_chunk.py,sha256=1K-F0JdUmQ_4idDk3oUGUB_mhxAxCzjq0C8hMhGtDuY,3048
153
+ gradientai/types/agents/chat/__init__.py,sha256=VGhGOtQE4pcOIHKsOeLDYDpixc_5ExOfIJcR0q6AqvY,397
154
+ gradientai/types/agents/chat/agent_chat_completion_chunk.py,sha256=bktrs-Ao6uta3k4PVzBrreMKbArztBcsHIEa8uzIK9s,3058
155
155
  gradientai/types/agents/chat/completion_create_params.py,sha256=ADEJ0N3MMsouT9AqBLE1-rho4FVVhlp9U9E-buqIAYs,7165
156
156
  gradientai/types/agents/chat/completion_create_response.py,sha256=9uKS3memEoV0_Xd1CZwI0jQGsQyfVkhXRPGlRO3rUIc,2415
157
157
  gradientai/types/agents/evaluation_metrics/__init__.py,sha256=XWH_utxMx-JwArRpr-rHQfmoxQRGK6GciKOllbkqg40,894
@@ -168,8 +168,9 @@ gradientai/types/agents/evaluation_metrics/workspaces/agent_list_params.py,sha25
168
168
  gradientai/types/agents/evaluation_metrics/workspaces/agent_list_response.py,sha256=W4O5v1LoWh2hQJTpUm5SfQCYcv6Q9Yz1Id5Pm1sPQNA,503
169
169
  gradientai/types/agents/evaluation_metrics/workspaces/agent_move_params.py,sha256=4INiLEvgT9UDqFbrGwp3nuWOzFhwv7sX_YCr1Um1RaQ,422
170
170
  gradientai/types/agents/evaluation_metrics/workspaces/agent_move_response.py,sha256=j2uoTmFwnLNU7kGI5LZdPpMUxWmUk-HdQIVVDOwSy4Y,350
171
- gradientai/types/chat/__init__.py,sha256=A5VCUPqJZydjjOqEXC01GXmcDkKM3bq6zuCu9lmi5Es,303
172
- gradientai/types/chat/completion_create_params.py,sha256=F4Dcrt5aYC_GEWUSN2OA3Zm5ImevZ7tFuTxBH7RyooM,6635
171
+ gradientai/types/chat/__init__.py,sha256=c-PmEwuvWZQ4CRBTs9gzbKAq2sxL7V7JlVxddeoaGl0,381
172
+ gradientai/types/chat/chat_completion_chunk.py,sha256=o1gDgYtzM477RmKWg-q5CE0tP3p0J7YKlZWaoqjCJOU,3046
173
+ gradientai/types/chat/completion_create_params.py,sha256=ADEJ0N3MMsouT9AqBLE1-rho4FVVhlp9U9E-buqIAYs,7165
173
174
  gradientai/types/chat/completion_create_response.py,sha256=nNPWSXZYbyYLjT_ikVvDcjRw3f9eRGHFsUrLKtQHYGI,2413
174
175
  gradientai/types/inference/__init__.py,sha256=4Dt7-03NeP9ehdHLkLsZMiL_YLQwZsl92D0mMoDQ5g0,857
175
176
  gradientai/types/inference/api_key_create_params.py,sha256=MOy5Bdr1wNBqCvqzyZ0FLfFY2a97q6eXCzgCR1wcLAE,263
@@ -232,7 +233,7 @@ gradientai/types/shared/__init__.py,sha256=YA2_qLkZLySOac1HrqOfCTEz6GeipnjIJh1mK
232
233
  gradientai/types/shared/api_links.py,sha256=Iq5iQwOkRYuwLcuDLk54dUfrq0f2ZVEOXSpF744gYgA,403
233
234
  gradientai/types/shared/api_meta.py,sha256=-KyinzQqM5GSjD7E5xm7A4UALXAvLOyVNR1SYVOUFJM,297
234
235
  gradientai/types/shared/chat_completion_token_logprob.py,sha256=6-ipUFfsXMf5L7FDFi127NaVkDtmEooVgGBF6Ts965A,1769
235
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/METADATA,sha256=xwI-Z-rvG_Z_-L5Xu3x_NW2F7CauiUQmYG8wtunHk-Q,15049
236
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
237
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/licenses/LICENSE,sha256=AzxEF8mEks6hu5V_87CXF8gLdL875WeO8FmQtEZTFok,11341
238
- c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info/RECORD,,
236
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a9.dist-info/METADATA,sha256=knvT5XUviLOr5lmtTU4M4N1a1_opUZDVrfNKXZM1nqM,15049
237
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a9.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
238
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a9.dist-info/licenses/LICENSE,sha256=AzxEF8mEks6hu5V_87CXF8gLdL875WeO8FmQtEZTFok,11341
239
+ c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a9.dist-info/RECORD,,
gradientai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "gradientai"
4
- __version__ = "0.1.0-alpha.8" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.9" # x-release-please-version
@@ -20,8 +20,8 @@ from ...._response import (
20
20
  from ...._streaming import Stream, AsyncStream
21
21
  from ...._base_client import make_request_options
22
22
  from ....types.agents.chat import completion_create_params
23
- from ....types.agents.chat.chat_completion_chunk import ChatCompletionChunk
24
23
  from ....types.agents.chat.completion_create_response import CompletionCreateResponse
24
+ from ....types.agents.chat.agent_chat_completion_chunk import AgentChatCompletionChunk
25
25
 
26
26
  __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
27
27
 
@@ -186,7 +186,7 @@ class CompletionsResource(SyncAPIResource):
186
186
  extra_query: Query | None = None,
187
187
  extra_body: Body | None = None,
188
188
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
189
- ) -> Stream[ChatCompletionChunk]:
189
+ ) -> Stream[AgentChatCompletionChunk]:
190
190
  """
191
191
  Creates a model response for the given chat conversation.
192
192
 
@@ -299,7 +299,7 @@ class CompletionsResource(SyncAPIResource):
299
299
  extra_query: Query | None = None,
300
300
  extra_body: Body | None = None,
301
301
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
302
- ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
302
+ ) -> CompletionCreateResponse | Stream[AgentChatCompletionChunk]:
303
303
  """
304
304
  Creates a model response for the given chat conversation.
305
305
 
@@ -412,7 +412,7 @@ class CompletionsResource(SyncAPIResource):
412
412
  extra_query: Query | None = None,
413
413
  extra_body: Body | None = None,
414
414
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
415
- ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
415
+ ) -> CompletionCreateResponse | Stream[AgentChatCompletionChunk]:
416
416
  return self._post(
417
417
  "/chat/completions"
418
418
  if self._client._base_url_overridden
@@ -446,7 +446,7 @@ class CompletionsResource(SyncAPIResource):
446
446
  ),
447
447
  cast_to=CompletionCreateResponse,
448
448
  stream=stream or False,
449
- stream_cls=Stream[ChatCompletionChunk],
449
+ stream_cls=Stream[AgentChatCompletionChunk],
450
450
  )
451
451
 
452
452
 
@@ -610,7 +610,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
610
610
  extra_query: Query | None = None,
611
611
  extra_body: Body | None = None,
612
612
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
613
- ) -> AsyncStream[ChatCompletionChunk]:
613
+ ) -> AsyncStream[AgentChatCompletionChunk]:
614
614
  """
615
615
  Creates a model response for the given chat conversation.
616
616
 
@@ -723,7 +723,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
723
723
  extra_query: Query | None = None,
724
724
  extra_body: Body | None = None,
725
725
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
726
- ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
726
+ ) -> CompletionCreateResponse | AsyncStream[AgentChatCompletionChunk]:
727
727
  """
728
728
  Creates a model response for the given chat conversation.
729
729
 
@@ -836,7 +836,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
836
836
  extra_query: Query | None = None,
837
837
  extra_body: Body | None = None,
838
838
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
839
- ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
839
+ ) -> CompletionCreateResponse | AsyncStream[AgentChatCompletionChunk]:
840
840
  return await self._post(
841
841
  "/chat/completions"
842
842
  if self._client._base_url_overridden
@@ -870,7 +870,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
870
870
  ),
871
871
  cast_to=CompletionCreateResponse,
872
872
  stream=stream or False,
873
- stream_cls=AsyncStream[ChatCompletionChunk],
873
+ stream_cls=AsyncStream[AgentChatCompletionChunk],
874
874
  )
875
875
 
876
876
 
@@ -3,11 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from typing import Dict, List, Union, Iterable, Optional
6
+ from typing_extensions import Literal, overload
6
7
 
7
8
  import httpx
8
9
 
9
10
  from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
10
- from ..._utils import maybe_transform, async_maybe_transform
11
+ from ..._utils import required_args, maybe_transform, async_maybe_transform
11
12
  from ..._compat import cached_property
12
13
  from ..._resource import SyncAPIResource, AsyncAPIResource
13
14
  from ..._response import (
@@ -16,8 +17,10 @@ from ..._response import (
16
17
  async_to_raw_response_wrapper,
17
18
  async_to_streamed_response_wrapper,
18
19
  )
20
+ from ..._streaming import Stream, AsyncStream
19
21
  from ...types.chat import completion_create_params
20
22
  from ..._base_client import make_request_options
23
+ from ...types.chat.chat_completion_chunk import ChatCompletionChunk
21
24
  from ...types.chat.completion_create_response import CompletionCreateResponse
22
25
 
23
26
  __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
@@ -43,6 +46,7 @@ class CompletionsResource(SyncAPIResource):
43
46
  """
44
47
  return CompletionsResourceWithStreamingResponse(self)
45
48
 
49
+ @overload
46
50
  def create(
47
51
  self,
48
52
  *,
@@ -57,7 +61,7 @@ class CompletionsResource(SyncAPIResource):
57
61
  n: Optional[int] | NotGiven = NOT_GIVEN,
58
62
  presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
59
63
  stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
60
- stream: Optional[bool] | NotGiven = NOT_GIVEN,
64
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
61
65
  stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
62
66
  temperature: Optional[float] | NotGiven = NOT_GIVEN,
63
67
  top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
@@ -154,6 +158,263 @@ class CompletionsResource(SyncAPIResource):
154
158
  timeout: Override the client-level default timeout for this request, in seconds
155
159
  """
156
160
 
161
+ ...
162
+
163
+ @overload
164
+ def create(
165
+ self,
166
+ *,
167
+ messages: Iterable[completion_create_params.Message],
168
+ model: str,
169
+ stream: Literal[True],
170
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
171
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
172
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
173
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
174
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
175
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
176
+ n: Optional[int] | NotGiven = NOT_GIVEN,
177
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
178
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
179
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
180
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
181
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
182
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
183
+ user: str | NotGiven = NOT_GIVEN,
184
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
185
+ # The extra values given here take precedence over values defined on the client or passed to this method.
186
+ extra_headers: Headers | None = None,
187
+ extra_query: Query | None = None,
188
+ extra_body: Body | None = None,
189
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
190
+ ) -> Stream[ChatCompletionChunk]:
191
+ """
192
+ Creates a model response for the given chat conversation.
193
+
194
+ Args:
195
+ messages: A list of messages comprising the conversation so far.
196
+
197
+ model: Model ID used to generate the response.
198
+
199
+ stream: If set to true, the model response data will be streamed to the client as it is
200
+ generated using server-sent events.
201
+
202
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
203
+ existing frequency in the text so far, decreasing the model's likelihood to
204
+ repeat the same line verbatim.
205
+
206
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
207
+
208
+ Accepts a JSON object that maps tokens (specified by their token ID in the
209
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
210
+ bias is added to the logits generated by the model prior to sampling. The exact
211
+ effect will vary per model, but values between -1 and 1 should decrease or
212
+ increase likelihood of selection; values like -100 or 100 should result in a ban
213
+ or exclusive selection of the relevant token.
214
+
215
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
216
+ returns the log probabilities of each output token returned in the `content` of
217
+ `message`.
218
+
219
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
220
+ run. The run will make a best effort to use only the number of completion tokens
221
+ specified, across multiple turns of the run.
222
+
223
+ max_tokens: The maximum number of tokens that can be generated in the completion.
224
+
225
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
226
+ context length.
227
+
228
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
229
+ for storing additional information about the object in a structured format, and
230
+ querying for objects via API or the dashboard.
231
+
232
+ Keys are strings with a maximum length of 64 characters. Values are strings with
233
+ a maximum length of 512 characters.
234
+
235
+ n: How many chat completion choices to generate for each input message. Note that
236
+ you will be charged based on the number of generated tokens across all of the
237
+ choices. Keep `n` as `1` to minimize costs.
238
+
239
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
240
+ whether they appear in the text so far, increasing the model's likelihood to
241
+ talk about new topics.
242
+
243
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
244
+ returned text will not contain the stop sequence.
245
+
246
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
247
+
248
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
249
+ make the output more random, while lower values like 0.2 will make it more
250
+ focused and deterministic. We generally recommend altering this or `top_p` but
251
+ not both.
252
+
253
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
254
+ return at each token position, each with an associated log probability.
255
+ `logprobs` must be set to `true` if this parameter is used.
256
+
257
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
258
+ model considers the results of the tokens with top_p probability mass. So 0.1
259
+ means only the tokens comprising the top 10% probability mass are considered.
260
+
261
+ We generally recommend altering this or `temperature` but not both.
262
+
263
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
264
+ monitor and detect abuse.
265
+
266
+ extra_headers: Send extra headers
267
+
268
+ extra_query: Add additional query parameters to the request
269
+
270
+ extra_body: Add additional JSON properties to the request
271
+
272
+ timeout: Override the client-level default timeout for this request, in seconds
273
+ """
274
+ ...
275
+
276
+ @overload
277
+ def create(
278
+ self,
279
+ *,
280
+ messages: Iterable[completion_create_params.Message],
281
+ model: str,
282
+ stream: bool,
283
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
284
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
285
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
286
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
287
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
288
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
289
+ n: Optional[int] | NotGiven = NOT_GIVEN,
290
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
291
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
292
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
293
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
294
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
295
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
296
+ user: str | NotGiven = NOT_GIVEN,
297
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
298
+ # The extra values given here take precedence over values defined on the client or passed to this method.
299
+ extra_headers: Headers | None = None,
300
+ extra_query: Query | None = None,
301
+ extra_body: Body | None = None,
302
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
303
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
304
+ """
305
+ Creates a model response for the given chat conversation.
306
+
307
+ Args:
308
+ messages: A list of messages comprising the conversation so far.
309
+
310
+ model: Model ID used to generate the response.
311
+
312
+ stream: If set to true, the model response data will be streamed to the client as it is
313
+ generated using server-sent events.
314
+
315
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
316
+ existing frequency in the text so far, decreasing the model's likelihood to
317
+ repeat the same line verbatim.
318
+
319
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
320
+
321
+ Accepts a JSON object that maps tokens (specified by their token ID in the
322
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
323
+ bias is added to the logits generated by the model prior to sampling. The exact
324
+ effect will vary per model, but values between -1 and 1 should decrease or
325
+ increase likelihood of selection; values like -100 or 100 should result in a ban
326
+ or exclusive selection of the relevant token.
327
+
328
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
329
+ returns the log probabilities of each output token returned in the `content` of
330
+ `message`.
331
+
332
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
333
+ run. The run will make a best effort to use only the number of completion tokens
334
+ specified, across multiple turns of the run.
335
+
336
+ max_tokens: The maximum number of tokens that can be generated in the completion.
337
+
338
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
339
+ context length.
340
+
341
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
342
+ for storing additional information about the object in a structured format, and
343
+ querying for objects via API or the dashboard.
344
+
345
+ Keys are strings with a maximum length of 64 characters. Values are strings with
346
+ a maximum length of 512 characters.
347
+
348
+ n: How many chat completion choices to generate for each input message. Note that
349
+ you will be charged based on the number of generated tokens across all of the
350
+ choices. Keep `n` as `1` to minimize costs.
351
+
352
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
353
+ whether they appear in the text so far, increasing the model's likelihood to
354
+ talk about new topics.
355
+
356
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
357
+ returned text will not contain the stop sequence.
358
+
359
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
360
+
361
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
362
+ make the output more random, while lower values like 0.2 will make it more
363
+ focused and deterministic. We generally recommend altering this or `top_p` but
364
+ not both.
365
+
366
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
367
+ return at each token position, each with an associated log probability.
368
+ `logprobs` must be set to `true` if this parameter is used.
369
+
370
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
371
+ model considers the results of the tokens with top_p probability mass. So 0.1
372
+ means only the tokens comprising the top 10% probability mass are considered.
373
+
374
+ We generally recommend altering this or `temperature` but not both.
375
+
376
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
377
+ monitor and detect abuse.
378
+
379
+ extra_headers: Send extra headers
380
+
381
+ extra_query: Add additional query parameters to the request
382
+
383
+ extra_body: Add additional JSON properties to the request
384
+
385
+ timeout: Override the client-level default timeout for this request, in seconds
386
+ """
387
+ ...
388
+
389
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
390
+ def create(
391
+ self,
392
+ *,
393
+ messages: Iterable[completion_create_params.Message],
394
+ model: str,
395
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
396
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
397
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
398
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
399
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
400
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
401
+ n: Optional[int] | NotGiven = NOT_GIVEN,
402
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
403
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
404
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
405
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
406
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
407
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
408
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
409
+ user: str | NotGiven = NOT_GIVEN,
410
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
411
+ # The extra values given here take precedence over values defined on the client or passed to this method.
412
+ extra_headers: Headers | None = None,
413
+ extra_query: Query | None = None,
414
+ extra_body: Body | None = None,
415
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
416
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
417
+
157
418
  # This method requires an inference_key to be set via client argument or environment variable
158
419
  if not self._client.inference_key:
159
420
  raise TypeError(
@@ -186,12 +447,16 @@ class CompletionsResource(SyncAPIResource):
186
447
  "top_p": top_p,
187
448
  "user": user,
188
449
  },
189
- completion_create_params.CompletionCreateParams,
450
+ completion_create_params.CompletionCreateParamsStreaming
451
+ if stream
452
+ else completion_create_params.CompletionCreateParamsNonStreaming,
190
453
  ),
191
454
  options=make_request_options(
192
455
  extra_headers=headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
193
456
  ),
194
457
  cast_to=CompletionCreateResponse,
458
+ stream=stream or False,
459
+ stream_cls=Stream[ChatCompletionChunk],
195
460
  )
196
461
 
197
462
 
@@ -215,6 +480,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
215
480
  """
216
481
  return AsyncCompletionsResourceWithStreamingResponse(self)
217
482
 
483
+ @overload
218
484
  async def create(
219
485
  self,
220
486
  *,
@@ -229,7 +495,7 @@ class AsyncCompletionsResource(AsyncAPIResource):
229
495
  n: Optional[int] | NotGiven = NOT_GIVEN,
230
496
  presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
231
497
  stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
232
- stream: Optional[bool] | NotGiven = NOT_GIVEN,
498
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
233
499
  stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
234
500
  temperature: Optional[float] | NotGiven = NOT_GIVEN,
235
501
  top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
@@ -326,6 +592,263 @@ class AsyncCompletionsResource(AsyncAPIResource):
326
592
  timeout: Override the client-level default timeout for this request, in seconds
327
593
  """
328
594
 
595
+ ...
596
+
597
+ @overload
598
+ async def create(
599
+ self,
600
+ *,
601
+ messages: Iterable[completion_create_params.Message],
602
+ model: str,
603
+ stream: Literal[True],
604
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
605
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
606
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
607
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
608
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
609
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
610
+ n: Optional[int] | NotGiven = NOT_GIVEN,
611
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
612
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
613
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
614
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
615
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
616
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
617
+ user: str | NotGiven = NOT_GIVEN,
618
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
619
+ # The extra values given here take precedence over values defined on the client or passed to this method.
620
+ extra_headers: Headers | None = None,
621
+ extra_query: Query | None = None,
622
+ extra_body: Body | None = None,
623
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
624
+ ) -> AsyncStream[ChatCompletionChunk]:
625
+ """
626
+ Creates a model response for the given chat conversation.
627
+
628
+ Args:
629
+ messages: A list of messages comprising the conversation so far.
630
+
631
+ model: Model ID used to generate the response.
632
+
633
+ stream: If set to true, the model response data will be streamed to the client as it is
634
+ generated using server-sent events.
635
+
636
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
637
+ existing frequency in the text so far, decreasing the model's likelihood to
638
+ repeat the same line verbatim.
639
+
640
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
641
+
642
+ Accepts a JSON object that maps tokens (specified by their token ID in the
643
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
644
+ bias is added to the logits generated by the model prior to sampling. The exact
645
+ effect will vary per model, but values between -1 and 1 should decrease or
646
+ increase likelihood of selection; values like -100 or 100 should result in a ban
647
+ or exclusive selection of the relevant token.
648
+
649
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
650
+ returns the log probabilities of each output token returned in the `content` of
651
+ `message`.
652
+
653
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
654
+ run. The run will make a best effort to use only the number of completion tokens
655
+ specified, across multiple turns of the run.
656
+
657
+ max_tokens: The maximum number of tokens that can be generated in the completion.
658
+
659
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
660
+ context length.
661
+
662
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
663
+ for storing additional information about the object in a structured format, and
664
+ querying for objects via API or the dashboard.
665
+
666
+ Keys are strings with a maximum length of 64 characters. Values are strings with
667
+ a maximum length of 512 characters.
668
+
669
+ n: How many chat completion choices to generate for each input message. Note that
670
+ you will be charged based on the number of generated tokens across all of the
671
+ choices. Keep `n` as `1` to minimize costs.
672
+
673
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
674
+ whether they appear in the text so far, increasing the model's likelihood to
675
+ talk about new topics.
676
+
677
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
678
+ returned text will not contain the stop sequence.
679
+
680
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
681
+
682
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
683
+ make the output more random, while lower values like 0.2 will make it more
684
+ focused and deterministic. We generally recommend altering this or `top_p` but
685
+ not both.
686
+
687
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
688
+ return at each token position, each with an associated log probability.
689
+ `logprobs` must be set to `true` if this parameter is used.
690
+
691
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
692
+ model considers the results of the tokens with top_p probability mass. So 0.1
693
+ means only the tokens comprising the top 10% probability mass are considered.
694
+
695
+ We generally recommend altering this or `temperature` but not both.
696
+
697
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
698
+ monitor and detect abuse.
699
+
700
+ extra_headers: Send extra headers
701
+
702
+ extra_query: Add additional query parameters to the request
703
+
704
+ extra_body: Add additional JSON properties to the request
705
+
706
+ timeout: Override the client-level default timeout for this request, in seconds
707
+ """
708
+ ...
709
+
710
+ @overload
711
+ async def create(
712
+ self,
713
+ *,
714
+ messages: Iterable[completion_create_params.Message],
715
+ model: str,
716
+ stream: bool,
717
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
718
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
719
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
720
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
721
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
722
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
723
+ n: Optional[int] | NotGiven = NOT_GIVEN,
724
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
725
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
726
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
727
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
728
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
729
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
730
+ user: str | NotGiven = NOT_GIVEN,
731
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
732
+ # The extra values given here take precedence over values defined on the client or passed to this method.
733
+ extra_headers: Headers | None = None,
734
+ extra_query: Query | None = None,
735
+ extra_body: Body | None = None,
736
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
737
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
738
+ """
739
+ Creates a model response for the given chat conversation.
740
+
741
+ Args:
742
+ messages: A list of messages comprising the conversation so far.
743
+
744
+ model: Model ID used to generate the response.
745
+
746
+ stream: If set to true, the model response data will be streamed to the client as it is
747
+ generated using server-sent events.
748
+
749
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
750
+ existing frequency in the text so far, decreasing the model's likelihood to
751
+ repeat the same line verbatim.
752
+
753
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
754
+
755
+ Accepts a JSON object that maps tokens (specified by their token ID in the
756
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
757
+ bias is added to the logits generated by the model prior to sampling. The exact
758
+ effect will vary per model, but values between -1 and 1 should decrease or
759
+ increase likelihood of selection; values like -100 or 100 should result in a ban
760
+ or exclusive selection of the relevant token.
761
+
762
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
763
+ returns the log probabilities of each output token returned in the `content` of
764
+ `message`.
765
+
766
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
767
+ run. The run will make a best effort to use only the number of completion tokens
768
+ specified, across multiple turns of the run.
769
+
770
+ max_tokens: The maximum number of tokens that can be generated in the completion.
771
+
772
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
773
+ context length.
774
+
775
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
776
+ for storing additional information about the object in a structured format, and
777
+ querying for objects via API or the dashboard.
778
+
779
+ Keys are strings with a maximum length of 64 characters. Values are strings with
780
+ a maximum length of 512 characters.
781
+
782
+ n: How many chat completion choices to generate for each input message. Note that
783
+ you will be charged based on the number of generated tokens across all of the
784
+ choices. Keep `n` as `1` to minimize costs.
785
+
786
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
787
+ whether they appear in the text so far, increasing the model's likelihood to
788
+ talk about new topics.
789
+
790
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
791
+ returned text will not contain the stop sequence.
792
+
793
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
794
+
795
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
796
+ make the output more random, while lower values like 0.2 will make it more
797
+ focused and deterministic. We generally recommend altering this or `top_p` but
798
+ not both.
799
+
800
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
801
+ return at each token position, each with an associated log probability.
802
+ `logprobs` must be set to `true` if this parameter is used.
803
+
804
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
805
+ model considers the results of the tokens with top_p probability mass. So 0.1
806
+ means only the tokens comprising the top 10% probability mass are considered.
807
+
808
+ We generally recommend altering this or `temperature` but not both.
809
+
810
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
811
+ monitor and detect abuse.
812
+
813
+ extra_headers: Send extra headers
814
+
815
+ extra_query: Add additional query parameters to the request
816
+
817
+ extra_body: Add additional JSON properties to the request
818
+
819
+ timeout: Override the client-level default timeout for this request, in seconds
820
+ """
821
+ ...
822
+
823
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
824
+ async def create(
825
+ self,
826
+ *,
827
+ messages: Iterable[completion_create_params.Message],
828
+ model: str,
829
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
830
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
831
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
832
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
833
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
834
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
835
+ n: Optional[int] | NotGiven = NOT_GIVEN,
836
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
837
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
838
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
839
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
840
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
841
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
842
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
843
+ user: str | NotGiven = NOT_GIVEN,
844
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
845
+ # The extra values given here take precedence over values defined on the client or passed to this method.
846
+ extra_headers: Headers | None = None,
847
+ extra_query: Query | None = None,
848
+ extra_body: Body | None = None,
849
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
850
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
851
+
329
852
  # This method requires an inference_key to be set via client argument or environment variable
330
853
  if not hasattr(self._client, "inference_key") or not self._client.inference_key:
331
854
  raise TypeError(
@@ -358,12 +881,16 @@ class AsyncCompletionsResource(AsyncAPIResource):
358
881
  "top_p": top_p,
359
882
  "user": user,
360
883
  },
361
- completion_create_params.CompletionCreateParams,
884
+ completion_create_params.CompletionCreateParamsStreaming
885
+ if stream
886
+ else completion_create_params.CompletionCreateParamsNonStreaming,
362
887
  ),
363
888
  options=make_request_options(
364
889
  extra_headers=headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
365
890
  ),
366
891
  cast_to=CompletionCreateResponse,
892
+ stream=stream or False,
893
+ stream_cls=AsyncStream[ChatCompletionChunk],
367
894
  )
368
895
 
369
896
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
6
5
  from .completion_create_params import CompletionCreateParams as CompletionCreateParams
7
6
  from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
7
+ from .agent_chat_completion_chunk import AgentChatCompletionChunk as AgentChatCompletionChunk
@@ -0,0 +1,93 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from ...._models import BaseModel
7
+ from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
8
+
9
+ __all__ = ["AgentChatCompletionChunk", "Choice", "ChoiceDelta", "ChoiceLogprobs", "Usage"]
10
+
11
+
12
+ class ChoiceDelta(BaseModel):
13
+ content: Optional[str] = None
14
+ """The contents of the chunk message."""
15
+
16
+ refusal: Optional[str] = None
17
+ """The refusal message generated by the model."""
18
+
19
+ role: Optional[Literal["developer", "user", "assistant"]] = None
20
+ """The role of the author of this message."""
21
+
22
+
23
+ class ChoiceLogprobs(BaseModel):
24
+ content: Optional[List[ChatCompletionTokenLogprob]] = None
25
+ """A list of message content tokens with log probability information."""
26
+
27
+ refusal: Optional[List[ChatCompletionTokenLogprob]] = None
28
+ """A list of message refusal tokens with log probability information."""
29
+
30
+
31
+ class Choice(BaseModel):
32
+ delta: ChoiceDelta
33
+ """A chat completion delta generated by streamed model responses."""
34
+
35
+ finish_reason: Optional[Literal["stop", "length"]] = None
36
+ """The reason the model stopped generating tokens.
37
+
38
+ This will be `stop` if the model hit a natural stop point or a provided stop
39
+ sequence, or `length` if the maximum number of tokens specified in the request
40
+ was reached
41
+ """
42
+
43
+ index: int
44
+ """The index of the choice in the list of choices."""
45
+
46
+ logprobs: Optional[ChoiceLogprobs] = None
47
+ """Log probability information for the choice."""
48
+
49
+
50
+ class Usage(BaseModel):
51
+ completion_tokens: int
52
+ """Number of tokens in the generated completion."""
53
+
54
+ prompt_tokens: int
55
+ """Number of tokens in the prompt."""
56
+
57
+ total_tokens: int
58
+ """Total number of tokens used in the request (prompt + completion)."""
59
+
60
+
61
+ class AgentChatCompletionChunk(BaseModel):
62
+ id: str
63
+ """A unique identifier for the chat completion. Each chunk has the same ID."""
64
+
65
+ choices: List[Choice]
66
+ """A list of chat completion choices.
67
+
68
+ Can contain more than one elements if `n` is greater than 1. Can also be empty
69
+ for the last chunk if you set `stream_options: {"include_usage": true}`.
70
+ """
71
+
72
+ created: int
73
+ """The Unix timestamp (in seconds) of when the chat completion was created.
74
+
75
+ Each chunk has the same timestamp.
76
+ """
77
+
78
+ model: str
79
+ """The model to generate the completion."""
80
+
81
+ object: Literal["chat.completion.chunk"]
82
+ """The object type, which is always `chat.completion.chunk`."""
83
+
84
+ usage: Optional[Usage] = None
85
+ """
86
+ An optional field that will only be present when you set
87
+ `stream_options: {"include_usage": true}` in your request. When present, it
88
+ contains a null value **except for the last chunk** which contains the token
89
+ usage statistics for the entire request.
90
+
91
+ **NOTE:** If the stream is interrupted or cancelled, you may not receive the
92
+ final usage chunk which contains the total token usage for the request.
93
+ """
@@ -2,5 +2,6 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
5
6
  from .completion_create_params import CompletionCreateParams as CompletionCreateParams
6
7
  from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
@@ -3,8 +3,8 @@
3
3
  from typing import List, Optional
4
4
  from typing_extensions import Literal
5
5
 
6
- from ...._models import BaseModel
7
- from ...shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
6
+ from ..._models import BaseModel
7
+ from ..shared.chat_completion_token_logprob import ChatCompletionTokenLogprob
8
8
 
9
9
  __all__ = ["ChatCompletionChunk", "Choice", "ChoiceDelta", "ChoiceLogprobs", "Usage"]
10
10
 
@@ -6,17 +6,19 @@ from typing import Dict, List, Union, Iterable, Optional
6
6
  from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
7
 
8
8
  __all__ = [
9
- "CompletionCreateParams",
9
+ "CompletionCreateParamsBase",
10
10
  "Message",
11
11
  "MessageChatCompletionRequestSystemMessage",
12
12
  "MessageChatCompletionRequestDeveloperMessage",
13
13
  "MessageChatCompletionRequestUserMessage",
14
14
  "MessageChatCompletionRequestAssistantMessage",
15
15
  "StreamOptions",
16
+ "CompletionCreateParamsNonStreaming",
17
+ "CompletionCreateParamsStreaming",
16
18
  ]
17
19
 
18
20
 
19
- class CompletionCreateParams(TypedDict, total=False):
21
+ class CompletionCreateParamsBase(TypedDict, total=False):
20
22
  messages: Required[Iterable[Message]]
21
23
  """A list of messages comprising the conversation so far."""
22
24
 
@@ -92,12 +94,6 @@ class CompletionCreateParams(TypedDict, total=False):
92
94
  The returned text will not contain the stop sequence.
93
95
  """
94
96
 
95
- stream: Optional[bool]
96
- """
97
- If set to true, the model response data will be streamed to the client as it is
98
- generated using server-sent events.
99
- """
100
-
101
97
  stream_options: Optional[StreamOptions]
102
98
  """Options for streaming response. Only set this when you set `stream: true`."""
103
99
 
@@ -183,3 +179,22 @@ class StreamOptions(TypedDict, total=False):
183
179
  **NOTE:** If the stream is interrupted, you may not receive the final usage
184
180
  chunk which contains the total token usage for the request.
185
181
  """
182
+
183
+
184
+ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
185
+ stream: Optional[Literal[False]]
186
+ """
187
+ If set to true, the model response data will be streamed to the client as it is
188
+ generated using server-sent events.
189
+ """
190
+
191
+
192
+ class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
193
+ stream: Required[Literal[True]]
194
+ """
195
+ If set to true, the model response data will be streamed to the client as it is
196
+ generated using server-sent events.
197
+ """
198
+
199
+
200
+ CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]