c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info}/METADATA +5 -5
  2. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info}/RECORD +25 -17
  3. gradientai/_client.py +16 -0
  4. gradientai/_streaming.py +40 -3
  5. gradientai/_version.py +1 -1
  6. gradientai/resources/agents/__init__.py +14 -0
  7. gradientai/resources/agents/agents.py +32 -0
  8. gradientai/resources/agents/chat/__init__.py +33 -0
  9. gradientai/resources/agents/chat/chat.py +102 -0
  10. gradientai/resources/agents/chat/completions.py +910 -0
  11. gradientai/resources/models.py +105 -77
  12. gradientai/types/__init__.py +3 -2
  13. gradientai/types/agents/chat/__init__.py +7 -0
  14. gradientai/types/agents/chat/chat_completion_chunk.py +93 -0
  15. gradientai/types/agents/chat/completion_create_params.py +200 -0
  16. gradientai/types/agents/chat/completion_create_response.py +81 -0
  17. gradientai/types/api_model.py +32 -0
  18. gradientai/types/chat/__init__.py +0 -1
  19. gradientai/types/chat/completion_create_response.py +1 -1
  20. gradientai/types/model_list_params.py +42 -0
  21. gradientai/types/model_list_response.py +8 -5
  22. gradientai/types/shared/__init__.py +1 -0
  23. gradientai/types/model.py +0 -21
  24. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info}/WHEEL +0 -0
  25. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a8.dist-info}/licenses/LICENSE +0 -0
  26. /gradientai/types/{chat → shared}/chat_completion_token_logprob.py +0 -0
@@ -0,0 +1,910 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Union, Iterable, Optional
6
+ from typing_extensions import Literal, overload
7
+
8
+ import httpx
9
+
10
+ from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
11
+ from ...._utils import required_args, maybe_transform, async_maybe_transform
12
+ from ...._compat import cached_property
13
+ from ...._resource import SyncAPIResource, AsyncAPIResource
14
+ from ...._response import (
15
+ to_raw_response_wrapper,
16
+ to_streamed_response_wrapper,
17
+ async_to_raw_response_wrapper,
18
+ async_to_streamed_response_wrapper,
19
+ )
20
+ from ...._streaming import Stream, AsyncStream
21
+ from ...._base_client import make_request_options
22
+ from ....types.agents.chat import completion_create_params
23
+ from ....types.agents.chat.chat_completion_chunk import ChatCompletionChunk
24
+ from ....types.agents.chat.completion_create_response import CompletionCreateResponse
25
+
26
+ __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
27
+
28
+
29
+ class CompletionsResource(SyncAPIResource):
30
+ @cached_property
31
+ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
32
+ """
33
+ This property can be used as a prefix for any HTTP method call to return
34
+ the raw response object instead of the parsed content.
35
+
36
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
37
+ """
38
+ return CompletionsResourceWithRawResponse(self)
39
+
40
+ @cached_property
41
+ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
42
+ """
43
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
44
+
45
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
46
+ """
47
+ return CompletionsResourceWithStreamingResponse(self)
48
+
49
+ @overload
50
+ def create(
51
+ self,
52
+ *,
53
+ messages: Iterable[completion_create_params.Message],
54
+ model: str,
55
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
56
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
57
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
58
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
59
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
60
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
61
+ n: Optional[int] | NotGiven = NOT_GIVEN,
62
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
63
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
64
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
65
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
66
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
67
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
68
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
69
+ user: str | NotGiven = NOT_GIVEN,
70
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
71
+ # The extra values given here take precedence over values defined on the client or passed to this method.
72
+ extra_headers: Headers | None = None,
73
+ extra_query: Query | None = None,
74
+ extra_body: Body | None = None,
75
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
76
+ ) -> CompletionCreateResponse:
77
+ """
78
+ Creates a model response for the given chat conversation.
79
+
80
+ Args:
81
+ messages: A list of messages comprising the conversation so far.
82
+
83
+ model: Model ID used to generate the response.
84
+
85
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
86
+ existing frequency in the text so far, decreasing the model's likelihood to
87
+ repeat the same line verbatim.
88
+
89
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
90
+
91
+ Accepts a JSON object that maps tokens (specified by their token ID in the
92
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
93
+ bias is added to the logits generated by the model prior to sampling. The exact
94
+ effect will vary per model, but values between -1 and 1 should decrease or
95
+ increase likelihood of selection; values like -100 or 100 should result in a ban
96
+ or exclusive selection of the relevant token.
97
+
98
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
99
+ returns the log probabilities of each output token returned in the `content` of
100
+ `message`.
101
+
102
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
103
+ run. The run will make a best effort to use only the number of completion tokens
104
+ specified, across multiple turns of the run.
105
+
106
+ max_tokens: The maximum number of tokens that can be generated in the completion.
107
+
108
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
109
+ context length.
110
+
111
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
112
+ for storing additional information about the object in a structured format, and
113
+ querying for objects via API or the dashboard.
114
+
115
+ Keys are strings with a maximum length of 64 characters. Values are strings with
116
+ a maximum length of 512 characters.
117
+
118
+ n: How many chat completion choices to generate for each input message. Note that
119
+ you will be charged based on the number of generated tokens across all of the
120
+ choices. Keep `n` as `1` to minimize costs.
121
+
122
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
123
+ whether they appear in the text so far, increasing the model's likelihood to
124
+ talk about new topics.
125
+
126
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
127
+ returned text will not contain the stop sequence.
128
+
129
+ stream: If set to true, the model response data will be streamed to the client as it is
130
+ generated using server-sent events.
131
+
132
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
133
+
134
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
135
+ make the output more random, while lower values like 0.2 will make it more
136
+ focused and deterministic. We generally recommend altering this or `top_p` but
137
+ not both.
138
+
139
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
140
+ return at each token position, each with an associated log probability.
141
+ `logprobs` must be set to `true` if this parameter is used.
142
+
143
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
144
+ model considers the results of the tokens with top_p probability mass. So 0.1
145
+ means only the tokens comprising the top 10% probability mass are considered.
146
+
147
+ We generally recommend altering this or `temperature` but not both.
148
+
149
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
150
+ monitor and detect abuse.
151
+
152
+ extra_headers: Send extra headers
153
+
154
+ extra_query: Add additional query parameters to the request
155
+
156
+ extra_body: Add additional JSON properties to the request
157
+
158
+ timeout: Override the client-level default timeout for this request, in seconds
159
+ """
160
+ ...
161
+
162
+ @overload
163
+ def create(
164
+ self,
165
+ *,
166
+ messages: Iterable[completion_create_params.Message],
167
+ model: str,
168
+ stream: Literal[True],
169
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
170
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
171
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
172
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
173
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
174
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
175
+ n: Optional[int] | NotGiven = NOT_GIVEN,
176
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
177
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
178
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
179
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
180
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
181
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
182
+ user: str | NotGiven = NOT_GIVEN,
183
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
184
+ # The extra values given here take precedence over values defined on the client or passed to this method.
185
+ extra_headers: Headers | None = None,
186
+ extra_query: Query | None = None,
187
+ extra_body: Body | None = None,
188
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
189
+ ) -> Stream[ChatCompletionChunk]:
190
+ """
191
+ Creates a model response for the given chat conversation.
192
+
193
+ Args:
194
+ messages: A list of messages comprising the conversation so far.
195
+
196
+ model: Model ID used to generate the response.
197
+
198
+ stream: If set to true, the model response data will be streamed to the client as it is
199
+ generated using server-sent events.
200
+
201
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
202
+ existing frequency in the text so far, decreasing the model's likelihood to
203
+ repeat the same line verbatim.
204
+
205
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
206
+
207
+ Accepts a JSON object that maps tokens (specified by their token ID in the
208
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
209
+ bias is added to the logits generated by the model prior to sampling. The exact
210
+ effect will vary per model, but values between -1 and 1 should decrease or
211
+ increase likelihood of selection; values like -100 or 100 should result in a ban
212
+ or exclusive selection of the relevant token.
213
+
214
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
215
+ returns the log probabilities of each output token returned in the `content` of
216
+ `message`.
217
+
218
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
219
+ run. The run will make a best effort to use only the number of completion tokens
220
+ specified, across multiple turns of the run.
221
+
222
+ max_tokens: The maximum number of tokens that can be generated in the completion.
223
+
224
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
225
+ context length.
226
+
227
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
228
+ for storing additional information about the object in a structured format, and
229
+ querying for objects via API or the dashboard.
230
+
231
+ Keys are strings with a maximum length of 64 characters. Values are strings with
232
+ a maximum length of 512 characters.
233
+
234
+ n: How many chat completion choices to generate for each input message. Note that
235
+ you will be charged based on the number of generated tokens across all of the
236
+ choices. Keep `n` as `1` to minimize costs.
237
+
238
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
239
+ whether they appear in the text so far, increasing the model's likelihood to
240
+ talk about new topics.
241
+
242
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
243
+ returned text will not contain the stop sequence.
244
+
245
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
246
+
247
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
248
+ make the output more random, while lower values like 0.2 will make it more
249
+ focused and deterministic. We generally recommend altering this or `top_p` but
250
+ not both.
251
+
252
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
253
+ return at each token position, each with an associated log probability.
254
+ `logprobs` must be set to `true` if this parameter is used.
255
+
256
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
257
+ model considers the results of the tokens with top_p probability mass. So 0.1
258
+ means only the tokens comprising the top 10% probability mass are considered.
259
+
260
+ We generally recommend altering this or `temperature` but not both.
261
+
262
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
263
+ monitor and detect abuse.
264
+
265
+ extra_headers: Send extra headers
266
+
267
+ extra_query: Add additional query parameters to the request
268
+
269
+ extra_body: Add additional JSON properties to the request
270
+
271
+ timeout: Override the client-level default timeout for this request, in seconds
272
+ """
273
+ ...
274
+
275
+ @overload
276
+ def create(
277
+ self,
278
+ *,
279
+ messages: Iterable[completion_create_params.Message],
280
+ model: str,
281
+ stream: bool,
282
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
283
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
284
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
285
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
286
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
287
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
288
+ n: Optional[int] | NotGiven = NOT_GIVEN,
289
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
290
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
291
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
292
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
293
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
294
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
295
+ user: str | NotGiven = NOT_GIVEN,
296
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
297
+ # The extra values given here take precedence over values defined on the client or passed to this method.
298
+ extra_headers: Headers | None = None,
299
+ extra_query: Query | None = None,
300
+ extra_body: Body | None = None,
301
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
302
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
303
+ """
304
+ Creates a model response for the given chat conversation.
305
+
306
+ Args:
307
+ messages: A list of messages comprising the conversation so far.
308
+
309
+ model: Model ID used to generate the response.
310
+
311
+ stream: If set to true, the model response data will be streamed to the client as it is
312
+ generated using server-sent events.
313
+
314
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
315
+ existing frequency in the text so far, decreasing the model's likelihood to
316
+ repeat the same line verbatim.
317
+
318
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
319
+
320
+ Accepts a JSON object that maps tokens (specified by their token ID in the
321
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
322
+ bias is added to the logits generated by the model prior to sampling. The exact
323
+ effect will vary per model, but values between -1 and 1 should decrease or
324
+ increase likelihood of selection; values like -100 or 100 should result in a ban
325
+ or exclusive selection of the relevant token.
326
+
327
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
328
+ returns the log probabilities of each output token returned in the `content` of
329
+ `message`.
330
+
331
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
332
+ run. The run will make a best effort to use only the number of completion tokens
333
+ specified, across multiple turns of the run.
334
+
335
+ max_tokens: The maximum number of tokens that can be generated in the completion.
336
+
337
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
338
+ context length.
339
+
340
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
341
+ for storing additional information about the object in a structured format, and
342
+ querying for objects via API or the dashboard.
343
+
344
+ Keys are strings with a maximum length of 64 characters. Values are strings with
345
+ a maximum length of 512 characters.
346
+
347
+ n: How many chat completion choices to generate for each input message. Note that
348
+ you will be charged based on the number of generated tokens across all of the
349
+ choices. Keep `n` as `1` to minimize costs.
350
+
351
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
352
+ whether they appear in the text so far, increasing the model's likelihood to
353
+ talk about new topics.
354
+
355
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
356
+ returned text will not contain the stop sequence.
357
+
358
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
359
+
360
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
361
+ make the output more random, while lower values like 0.2 will make it more
362
+ focused and deterministic. We generally recommend altering this or `top_p` but
363
+ not both.
364
+
365
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
366
+ return at each token position, each with an associated log probability.
367
+ `logprobs` must be set to `true` if this parameter is used.
368
+
369
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
370
+ model considers the results of the tokens with top_p probability mass. So 0.1
371
+ means only the tokens comprising the top 10% probability mass are considered.
372
+
373
+ We generally recommend altering this or `temperature` but not both.
374
+
375
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
376
+ monitor and detect abuse.
377
+
378
+ extra_headers: Send extra headers
379
+
380
+ extra_query: Add additional query parameters to the request
381
+
382
+ extra_body: Add additional JSON properties to the request
383
+
384
+ timeout: Override the client-level default timeout for this request, in seconds
385
+ """
386
+ ...
387
+
388
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
389
+ def create(
390
+ self,
391
+ *,
392
+ messages: Iterable[completion_create_params.Message],
393
+ model: str,
394
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
395
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
396
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
397
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
398
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
399
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
400
+ n: Optional[int] | NotGiven = NOT_GIVEN,
401
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
402
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
403
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
404
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
405
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
406
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
407
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
408
+ user: str | NotGiven = NOT_GIVEN,
409
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
410
+ # The extra values given here take precedence over values defined on the client or passed to this method.
411
+ extra_headers: Headers | None = None,
412
+ extra_query: Query | None = None,
413
+ extra_body: Body | None = None,
414
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
415
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
416
+ return self._post(
417
+ "/chat/completions"
418
+ if self._client._base_url_overridden
419
+ else "https://inference.do-ai.run/v1/chat/completions",
420
+ body=maybe_transform(
421
+ {
422
+ "messages": messages,
423
+ "model": model,
424
+ "frequency_penalty": frequency_penalty,
425
+ "logit_bias": logit_bias,
426
+ "logprobs": logprobs,
427
+ "max_completion_tokens": max_completion_tokens,
428
+ "max_tokens": max_tokens,
429
+ "metadata": metadata,
430
+ "n": n,
431
+ "presence_penalty": presence_penalty,
432
+ "stop": stop,
433
+ "stream": stream,
434
+ "stream_options": stream_options,
435
+ "temperature": temperature,
436
+ "top_logprobs": top_logprobs,
437
+ "top_p": top_p,
438
+ "user": user,
439
+ },
440
+ completion_create_params.CompletionCreateParamsStreaming
441
+ if stream
442
+ else completion_create_params.CompletionCreateParamsNonStreaming,
443
+ ),
444
+ options=make_request_options(
445
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
446
+ ),
447
+ cast_to=CompletionCreateResponse,
448
+ stream=stream or False,
449
+ stream_cls=Stream[ChatCompletionChunk],
450
+ )
451
+
452
+
453
+ class AsyncCompletionsResource(AsyncAPIResource):
454
+ @cached_property
455
+ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
456
+ """
457
+ This property can be used as a prefix for any HTTP method call to return
458
+ the raw response object instead of the parsed content.
459
+
460
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
461
+ """
462
+ return AsyncCompletionsResourceWithRawResponse(self)
463
+
464
+ @cached_property
465
+ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
466
+ """
467
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
468
+
469
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
470
+ """
471
+ return AsyncCompletionsResourceWithStreamingResponse(self)
472
+
473
+ @overload
474
+ async def create(
475
+ self,
476
+ *,
477
+ messages: Iterable[completion_create_params.Message],
478
+ model: str,
479
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
480
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
481
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
482
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
483
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
484
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
485
+ n: Optional[int] | NotGiven = NOT_GIVEN,
486
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
487
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
488
+ stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
489
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
490
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
491
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
492
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
493
+ user: str | NotGiven = NOT_GIVEN,
494
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
495
+ # The extra values given here take precedence over values defined on the client or passed to this method.
496
+ extra_headers: Headers | None = None,
497
+ extra_query: Query | None = None,
498
+ extra_body: Body | None = None,
499
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
500
+ ) -> CompletionCreateResponse:
501
+ """
502
+ Creates a model response for the given chat conversation.
503
+
504
+ Args:
505
+ messages: A list of messages comprising the conversation so far.
506
+
507
+ model: Model ID used to generate the response.
508
+
509
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
510
+ existing frequency in the text so far, decreasing the model's likelihood to
511
+ repeat the same line verbatim.
512
+
513
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
514
+
515
+ Accepts a JSON object that maps tokens (specified by their token ID in the
516
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
517
+ bias is added to the logits generated by the model prior to sampling. The exact
518
+ effect will vary per model, but values between -1 and 1 should decrease or
519
+ increase likelihood of selection; values like -100 or 100 should result in a ban
520
+ or exclusive selection of the relevant token.
521
+
522
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
523
+ returns the log probabilities of each output token returned in the `content` of
524
+ `message`.
525
+
526
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
527
+ run. The run will make a best effort to use only the number of completion tokens
528
+ specified, across multiple turns of the run.
529
+
530
+ max_tokens: The maximum number of tokens that can be generated in the completion.
531
+
532
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
533
+ context length.
534
+
535
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
536
+ for storing additional information about the object in a structured format, and
537
+ querying for objects via API or the dashboard.
538
+
539
+ Keys are strings with a maximum length of 64 characters. Values are strings with
540
+ a maximum length of 512 characters.
541
+
542
+ n: How many chat completion choices to generate for each input message. Note that
543
+ you will be charged based on the number of generated tokens across all of the
544
+ choices. Keep `n` as `1` to minimize costs.
545
+
546
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
547
+ whether they appear in the text so far, increasing the model's likelihood to
548
+ talk about new topics.
549
+
550
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
551
+ returned text will not contain the stop sequence.
552
+
553
+ stream: If set to true, the model response data will be streamed to the client as it is
554
+ generated using server-sent events.
555
+
556
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
557
+
558
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
559
+ make the output more random, while lower values like 0.2 will make it more
560
+ focused and deterministic. We generally recommend altering this or `top_p` but
561
+ not both.
562
+
563
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
564
+ return at each token position, each with an associated log probability.
565
+ `logprobs` must be set to `true` if this parameter is used.
566
+
567
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
568
+ model considers the results of the tokens with top_p probability mass. So 0.1
569
+ means only the tokens comprising the top 10% probability mass are considered.
570
+
571
+ We generally recommend altering this or `temperature` but not both.
572
+
573
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
574
+ monitor and detect abuse.
575
+
576
+ extra_headers: Send extra headers
577
+
578
+ extra_query: Add additional query parameters to the request
579
+
580
+ extra_body: Add additional JSON properties to the request
581
+
582
+ timeout: Override the client-level default timeout for this request, in seconds
583
+ """
584
+ ...
585
+
586
+ @overload
587
+ async def create(
588
+ self,
589
+ *,
590
+ messages: Iterable[completion_create_params.Message],
591
+ model: str,
592
+ stream: Literal[True],
593
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
594
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
595
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
596
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
597
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
598
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
599
+ n: Optional[int] | NotGiven = NOT_GIVEN,
600
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
601
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
602
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
603
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
604
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
605
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
606
+ user: str | NotGiven = NOT_GIVEN,
607
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
608
+ # The extra values given here take precedence over values defined on the client or passed to this method.
609
+ extra_headers: Headers | None = None,
610
+ extra_query: Query | None = None,
611
+ extra_body: Body | None = None,
612
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
613
+ ) -> AsyncStream[ChatCompletionChunk]:
614
+ """
615
+ Creates a model response for the given chat conversation.
616
+
617
+ Args:
618
+ messages: A list of messages comprising the conversation so far.
619
+
620
+ model: Model ID used to generate the response.
621
+
622
+ stream: If set to true, the model response data will be streamed to the client as it is
623
+ generated using server-sent events.
624
+
625
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
626
+ existing frequency in the text so far, decreasing the model's likelihood to
627
+ repeat the same line verbatim.
628
+
629
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
630
+
631
+ Accepts a JSON object that maps tokens (specified by their token ID in the
632
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
633
+ bias is added to the logits generated by the model prior to sampling. The exact
634
+ effect will vary per model, but values between -1 and 1 should decrease or
635
+ increase likelihood of selection; values like -100 or 100 should result in a ban
636
+ or exclusive selection of the relevant token.
637
+
638
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
639
+ returns the log probabilities of each output token returned in the `content` of
640
+ `message`.
641
+
642
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
643
+ run. The run will make a best effort to use only the number of completion tokens
644
+ specified, across multiple turns of the run.
645
+
646
+ max_tokens: The maximum number of tokens that can be generated in the completion.
647
+
648
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
649
+ context length.
650
+
651
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
652
+ for storing additional information about the object in a structured format, and
653
+ querying for objects via API or the dashboard.
654
+
655
+ Keys are strings with a maximum length of 64 characters. Values are strings with
656
+ a maximum length of 512 characters.
657
+
658
+ n: How many chat completion choices to generate for each input message. Note that
659
+ you will be charged based on the number of generated tokens across all of the
660
+ choices. Keep `n` as `1` to minimize costs.
661
+
662
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
663
+ whether they appear in the text so far, increasing the model's likelihood to
664
+ talk about new topics.
665
+
666
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
667
+ returned text will not contain the stop sequence.
668
+
669
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
670
+
671
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
672
+ make the output more random, while lower values like 0.2 will make it more
673
+ focused and deterministic. We generally recommend altering this or `top_p` but
674
+ not both.
675
+
676
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
677
+ return at each token position, each with an associated log probability.
678
+ `logprobs` must be set to `true` if this parameter is used.
679
+
680
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
681
+ model considers the results of the tokens with top_p probability mass. So 0.1
682
+ means only the tokens comprising the top 10% probability mass are considered.
683
+
684
+ We generally recommend altering this or `temperature` but not both.
685
+
686
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
687
+ monitor and detect abuse.
688
+
689
+ extra_headers: Send extra headers
690
+
691
+ extra_query: Add additional query parameters to the request
692
+
693
+ extra_body: Add additional JSON properties to the request
694
+
695
+ timeout: Override the client-level default timeout for this request, in seconds
696
+ """
697
+ ...
698
+
699
+ @overload
700
+ async def create(
701
+ self,
702
+ *,
703
+ messages: Iterable[completion_create_params.Message],
704
+ model: str,
705
+ stream: bool,
706
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
707
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
708
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
709
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
710
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
711
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
712
+ n: Optional[int] | NotGiven = NOT_GIVEN,
713
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
714
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
715
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
716
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
717
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
718
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
719
+ user: str | NotGiven = NOT_GIVEN,
720
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
721
+ # The extra values given here take precedence over values defined on the client or passed to this method.
722
+ extra_headers: Headers | None = None,
723
+ extra_query: Query | None = None,
724
+ extra_body: Body | None = None,
725
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
726
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
727
+ """
728
+ Creates a model response for the given chat conversation.
729
+
730
+ Args:
731
+ messages: A list of messages comprising the conversation so far.
732
+
733
+ model: Model ID used to generate the response.
734
+
735
+ stream: If set to true, the model response data will be streamed to the client as it is
736
+ generated using server-sent events.
737
+
738
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
739
+ existing frequency in the text so far, decreasing the model's likelihood to
740
+ repeat the same line verbatim.
741
+
742
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
743
+
744
+ Accepts a JSON object that maps tokens (specified by their token ID in the
745
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
746
+ bias is added to the logits generated by the model prior to sampling. The exact
747
+ effect will vary per model, but values between -1 and 1 should decrease or
748
+ increase likelihood of selection; values like -100 or 100 should result in a ban
749
+ or exclusive selection of the relevant token.
750
+
751
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
752
+ returns the log probabilities of each output token returned in the `content` of
753
+ `message`.
754
+
755
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
756
+ run. The run will make a best effort to use only the number of completion tokens
757
+ specified, across multiple turns of the run.
758
+
759
+ max_tokens: The maximum number of tokens that can be generated in the completion.
760
+
761
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
762
+ context length.
763
+
764
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
765
+ for storing additional information about the object in a structured format, and
766
+ querying for objects via API or the dashboard.
767
+
768
+ Keys are strings with a maximum length of 64 characters. Values are strings with
769
+ a maximum length of 512 characters.
770
+
771
+ n: How many chat completion choices to generate for each input message. Note that
772
+ you will be charged based on the number of generated tokens across all of the
773
+ choices. Keep `n` as `1` to minimize costs.
774
+
775
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
776
+ whether they appear in the text so far, increasing the model's likelihood to
777
+ talk about new topics.
778
+
779
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
780
+ returned text will not contain the stop sequence.
781
+
782
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
783
+
784
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
785
+ make the output more random, while lower values like 0.2 will make it more
786
+ focused and deterministic. We generally recommend altering this or `top_p` but
787
+ not both.
788
+
789
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
790
+ return at each token position, each with an associated log probability.
791
+ `logprobs` must be set to `true` if this parameter is used.
792
+
793
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
794
+ model considers the results of the tokens with top_p probability mass. So 0.1
795
+ means only the tokens comprising the top 10% probability mass are considered.
796
+
797
+ We generally recommend altering this or `temperature` but not both.
798
+
799
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
800
+ monitor and detect abuse.
801
+
802
+ extra_headers: Send extra headers
803
+
804
+ extra_query: Add additional query parameters to the request
805
+
806
+ extra_body: Add additional JSON properties to the request
807
+
808
+ timeout: Override the client-level default timeout for this request, in seconds
809
+ """
810
+ ...
811
+
812
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
813
+ async def create(
814
+ self,
815
+ *,
816
+ messages: Iterable[completion_create_params.Message],
817
+ model: str,
818
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
819
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
820
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
821
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
822
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
823
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
824
+ n: Optional[int] | NotGiven = NOT_GIVEN,
825
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
826
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
827
+ stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
828
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
829
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
830
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
831
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
832
+ user: str | NotGiven = NOT_GIVEN,
833
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
834
+ # The extra values given here take precedence over values defined on the client or passed to this method.
835
+ extra_headers: Headers | None = None,
836
+ extra_query: Query | None = None,
837
+ extra_body: Body | None = None,
838
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
839
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
840
+ return await self._post(
841
+ "/chat/completions"
842
+ if self._client._base_url_overridden
843
+ else "https://inference.do-ai.run/v1/chat/completions",
844
+ body=await async_maybe_transform(
845
+ {
846
+ "messages": messages,
847
+ "model": model,
848
+ "frequency_penalty": frequency_penalty,
849
+ "logit_bias": logit_bias,
850
+ "logprobs": logprobs,
851
+ "max_completion_tokens": max_completion_tokens,
852
+ "max_tokens": max_tokens,
853
+ "metadata": metadata,
854
+ "n": n,
855
+ "presence_penalty": presence_penalty,
856
+ "stop": stop,
857
+ "stream": stream,
858
+ "stream_options": stream_options,
859
+ "temperature": temperature,
860
+ "top_logprobs": top_logprobs,
861
+ "top_p": top_p,
862
+ "user": user,
863
+ },
864
+ completion_create_params.CompletionCreateParamsStreaming
865
+ if stream
866
+ else completion_create_params.CompletionCreateParamsNonStreaming,
867
+ ),
868
+ options=make_request_options(
869
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
870
+ ),
871
+ cast_to=CompletionCreateResponse,
872
+ stream=stream or False,
873
+ stream_cls=AsyncStream[ChatCompletionChunk],
874
+ )
875
+
876
+
877
+ class CompletionsResourceWithRawResponse:
878
+ def __init__(self, completions: CompletionsResource) -> None:
879
+ self._completions = completions
880
+
881
+ self.create = to_raw_response_wrapper(
882
+ completions.create,
883
+ )
884
+
885
+
886
+ class AsyncCompletionsResourceWithRawResponse:
887
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
888
+ self._completions = completions
889
+
890
+ self.create = async_to_raw_response_wrapper(
891
+ completions.create,
892
+ )
893
+
894
+
895
+ class CompletionsResourceWithStreamingResponse:
896
+ def __init__(self, completions: CompletionsResource) -> None:
897
+ self._completions = completions
898
+
899
+ self.create = to_streamed_response_wrapper(
900
+ completions.create,
901
+ )
902
+
903
+
904
+ class AsyncCompletionsResourceWithStreamingResponse:
905
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
906
+ self._completions = completions
907
+
908
+ self.create = async_to_streamed_response_wrapper(
909
+ completions.create,
910
+ )