c63a5cfe-b235-4fbe-8bbb-82a9e02a482a-python 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info}/METADATA +5 -5
  2. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info}/RECORD +23 -16
  3. gradientai/_client.py +12 -0
  4. gradientai/_version.py +1 -1
  5. gradientai/resources/agents/__init__.py +14 -0
  6. gradientai/resources/agents/agents.py +32 -0
  7. gradientai/resources/agents/chat/__init__.py +33 -0
  8. gradientai/resources/agents/chat/chat.py +102 -0
  9. gradientai/resources/agents/chat/completions.py +385 -0
  10. gradientai/resources/models.py +105 -77
  11. gradientai/types/__init__.py +3 -2
  12. gradientai/types/agents/chat/__init__.py +6 -0
  13. gradientai/types/agents/chat/completion_create_params.py +185 -0
  14. gradientai/types/agents/chat/completion_create_response.py +81 -0
  15. gradientai/types/api_model.py +32 -0
  16. gradientai/types/chat/__init__.py +0 -1
  17. gradientai/types/chat/completion_create_response.py +1 -1
  18. gradientai/types/model_list_params.py +42 -0
  19. gradientai/types/model_list_response.py +8 -5
  20. gradientai/types/shared/__init__.py +1 -0
  21. gradientai/types/model.py +0 -21
  22. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info}/WHEEL +0 -0
  23. {c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a6.dist-info → c63a5cfe_b235_4fbe_8bbb_82a9e02a482a_python-0.1.0a7.dist-info}/licenses/LICENSE +0 -0
  24. /gradientai/types/{chat → shared}/chat_completion_token_logprob.py +0 -0
@@ -0,0 +1,385 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List, Union, Iterable, Optional
6
+
7
+ import httpx
8
+
9
+ from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
10
+ from ...._utils import maybe_transform, async_maybe_transform
11
+ from ...._compat import cached_property
12
+ from ...._resource import SyncAPIResource, AsyncAPIResource
13
+ from ...._response import (
14
+ to_raw_response_wrapper,
15
+ to_streamed_response_wrapper,
16
+ async_to_raw_response_wrapper,
17
+ async_to_streamed_response_wrapper,
18
+ )
19
+ from ...._base_client import make_request_options
20
+ from ....types.agents.chat import completion_create_params
21
+ from ....types.agents.chat.completion_create_response import CompletionCreateResponse
22
+
23
+ __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
24
+
25
+
26
+ class CompletionsResource(SyncAPIResource):
27
+ @cached_property
28
+ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
29
+ """
30
+ This property can be used as a prefix for any HTTP method call to return
31
+ the raw response object instead of the parsed content.
32
+
33
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
34
+ """
35
+ return CompletionsResourceWithRawResponse(self)
36
+
37
+ @cached_property
38
+ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
39
+ """
40
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
41
+
42
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
43
+ """
44
+ return CompletionsResourceWithStreamingResponse(self)
45
+
46
+ def create(
47
+ self,
48
+ *,
49
+ messages: Iterable[completion_create_params.Message],
50
+ model: str,
51
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
52
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
53
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
54
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
55
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
56
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
57
+ n: Optional[int] | NotGiven = NOT_GIVEN,
58
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
59
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
60
+ stream: Optional[bool] | NotGiven = NOT_GIVEN,
61
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
62
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
63
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
64
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
65
+ user: str | NotGiven = NOT_GIVEN,
66
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
67
+ # The extra values given here take precedence over values defined on the client or passed to this method.
68
+ extra_headers: Headers | None = None,
69
+ extra_query: Query | None = None,
70
+ extra_body: Body | None = None,
71
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
72
+ ) -> CompletionCreateResponse:
73
+ """
74
+ Creates a model response for the given chat conversation.
75
+
76
+ Args:
77
+ messages: A list of messages comprising the conversation so far.
78
+
79
+ model: Model ID used to generate the response.
80
+
81
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
82
+ existing frequency in the text so far, decreasing the model's likelihood to
83
+ repeat the same line verbatim.
84
+
85
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
86
+
87
+ Accepts a JSON object that maps tokens (specified by their token ID in the
88
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
89
+ bias is added to the logits generated by the model prior to sampling. The exact
90
+ effect will vary per model, but values between -1 and 1 should decrease or
91
+ increase likelihood of selection; values like -100 or 100 should result in a ban
92
+ or exclusive selection of the relevant token.
93
+
94
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
95
+ returns the log probabilities of each output token returned in the `content` of
96
+ `message`.
97
+
98
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
99
+ run. The run will make a best effort to use only the number of completion tokens
100
+ specified, across multiple turns of the run.
101
+
102
+ max_tokens: The maximum number of tokens that can be generated in the completion.
103
+
104
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
105
+ context length.
106
+
107
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
108
+ for storing additional information about the object in a structured format, and
109
+ querying for objects via API or the dashboard.
110
+
111
+ Keys are strings with a maximum length of 64 characters. Values are strings with
112
+ a maximum length of 512 characters.
113
+
114
+ n: How many chat completion choices to generate for each input message. Note that
115
+ you will be charged based on the number of generated tokens across all of the
116
+ choices. Keep `n` as `1` to minimize costs.
117
+
118
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
119
+ whether they appear in the text so far, increasing the model's likelihood to
120
+ talk about new topics.
121
+
122
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
123
+ returned text will not contain the stop sequence.
124
+
125
+ stream: If set to true, the model response data will be streamed to the client as it is
126
+ generated using server-sent events.
127
+
128
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
129
+
130
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
131
+ make the output more random, while lower values like 0.2 will make it more
132
+ focused and deterministic. We generally recommend altering this or `top_p` but
133
+ not both.
134
+
135
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
136
+ return at each token position, each with an associated log probability.
137
+ `logprobs` must be set to `true` if this parameter is used.
138
+
139
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
140
+ model considers the results of the tokens with top_p probability mass. So 0.1
141
+ means only the tokens comprising the top 10% probability mass are considered.
142
+
143
+ We generally recommend altering this or `temperature` but not both.
144
+
145
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
146
+ monitor and detect abuse.
147
+
148
+ extra_headers: Send extra headers
149
+
150
+ extra_query: Add additional query parameters to the request
151
+
152
+ extra_body: Add additional JSON properties to the request
153
+
154
+ timeout: Override the client-level default timeout for this request, in seconds
155
+ """
156
+ return self._post(
157
+ "/chat/completions"
158
+ if self._client._base_url_overridden
159
+ else "https://inference.do-ai.run/v1/chat/completions",
160
+ body=maybe_transform(
161
+ {
162
+ "messages": messages,
163
+ "model": model,
164
+ "frequency_penalty": frequency_penalty,
165
+ "logit_bias": logit_bias,
166
+ "logprobs": logprobs,
167
+ "max_completion_tokens": max_completion_tokens,
168
+ "max_tokens": max_tokens,
169
+ "metadata": metadata,
170
+ "n": n,
171
+ "presence_penalty": presence_penalty,
172
+ "stop": stop,
173
+ "stream": stream,
174
+ "stream_options": stream_options,
175
+ "temperature": temperature,
176
+ "top_logprobs": top_logprobs,
177
+ "top_p": top_p,
178
+ "user": user,
179
+ },
180
+ completion_create_params.CompletionCreateParams,
181
+ ),
182
+ options=make_request_options(
183
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
184
+ ),
185
+ cast_to=CompletionCreateResponse,
186
+ )
187
+
188
+
189
+ class AsyncCompletionsResource(AsyncAPIResource):
190
+ @cached_property
191
+ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
192
+ """
193
+ This property can be used as a prefix for any HTTP method call to return
194
+ the raw response object instead of the parsed content.
195
+
196
+ For more information, see https://www.github.com/digitalocean/gradientai-python#accessing-raw-response-data-eg-headers
197
+ """
198
+ return AsyncCompletionsResourceWithRawResponse(self)
199
+
200
+ @cached_property
201
+ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
202
+ """
203
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
204
+
205
+ For more information, see https://www.github.com/digitalocean/gradientai-python#with_streaming_response
206
+ """
207
+ return AsyncCompletionsResourceWithStreamingResponse(self)
208
+
209
+ async def create(
210
+ self,
211
+ *,
212
+ messages: Iterable[completion_create_params.Message],
213
+ model: str,
214
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
215
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
216
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
217
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
218
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
219
+ metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
220
+ n: Optional[int] | NotGiven = NOT_GIVEN,
221
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
222
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
223
+ stream: Optional[bool] | NotGiven = NOT_GIVEN,
224
+ stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
225
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
226
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
227
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
228
+ user: str | NotGiven = NOT_GIVEN,
229
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
230
+ # The extra values given here take precedence over values defined on the client or passed to this method.
231
+ extra_headers: Headers | None = None,
232
+ extra_query: Query | None = None,
233
+ extra_body: Body | None = None,
234
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
235
+ ) -> CompletionCreateResponse:
236
+ """
237
+ Creates a model response for the given chat conversation.
238
+
239
+ Args:
240
+ messages: A list of messages comprising the conversation so far.
241
+
242
+ model: Model ID used to generate the response.
243
+
244
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
245
+ existing frequency in the text so far, decreasing the model's likelihood to
246
+ repeat the same line verbatim.
247
+
248
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
249
+
250
+ Accepts a JSON object that maps tokens (specified by their token ID in the
251
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
252
+ bias is added to the logits generated by the model prior to sampling. The exact
253
+ effect will vary per model, but values between -1 and 1 should decrease or
254
+ increase likelihood of selection; values like -100 or 100 should result in a ban
255
+ or exclusive selection of the relevant token.
256
+
257
+ logprobs: Whether to return log probabilities of the output tokens or not. If true,
258
+ returns the log probabilities of each output token returned in the `content` of
259
+ `message`.
260
+
261
+ max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
262
+ run. The run will make a best effort to use only the number of completion tokens
263
+ specified, across multiple turns of the run.
264
+
265
+ max_tokens: The maximum number of tokens that can be generated in the completion.
266
+
267
+ The token count of your prompt plus `max_tokens` cannot exceed the model's
268
+ context length.
269
+
270
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
271
+ for storing additional information about the object in a structured format, and
272
+ querying for objects via API or the dashboard.
273
+
274
+ Keys are strings with a maximum length of 64 characters. Values are strings with
275
+ a maximum length of 512 characters.
276
+
277
+ n: How many chat completion choices to generate for each input message. Note that
278
+ you will be charged based on the number of generated tokens across all of the
279
+ choices. Keep `n` as `1` to minimize costs.
280
+
281
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
282
+ whether they appear in the text so far, increasing the model's likelihood to
283
+ talk about new topics.
284
+
285
+ stop: Up to 4 sequences where the API will stop generating further tokens. The
286
+ returned text will not contain the stop sequence.
287
+
288
+ stream: If set to true, the model response data will be streamed to the client as it is
289
+ generated using server-sent events.
290
+
291
+ stream_options: Options for streaming response. Only set this when you set `stream: true`.
292
+
293
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
294
+ make the output more random, while lower values like 0.2 will make it more
295
+ focused and deterministic. We generally recommend altering this or `top_p` but
296
+ not both.
297
+
298
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
299
+ return at each token position, each with an associated log probability.
300
+ `logprobs` must be set to `true` if this parameter is used.
301
+
302
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
303
+ model considers the results of the tokens with top_p probability mass. So 0.1
304
+ means only the tokens comprising the top 10% probability mass are considered.
305
+
306
+ We generally recommend altering this or `temperature` but not both.
307
+
308
+ user: A unique identifier representing your end-user, which can help DigitalOcean to
309
+ monitor and detect abuse.
310
+
311
+ extra_headers: Send extra headers
312
+
313
+ extra_query: Add additional query parameters to the request
314
+
315
+ extra_body: Add additional JSON properties to the request
316
+
317
+ timeout: Override the client-level default timeout for this request, in seconds
318
+ """
319
+ return await self._post(
320
+ "/chat/completions"
321
+ if self._client._base_url_overridden
322
+ else "https://inference.do-ai.run/v1/chat/completions",
323
+ body=await async_maybe_transform(
324
+ {
325
+ "messages": messages,
326
+ "model": model,
327
+ "frequency_penalty": frequency_penalty,
328
+ "logit_bias": logit_bias,
329
+ "logprobs": logprobs,
330
+ "max_completion_tokens": max_completion_tokens,
331
+ "max_tokens": max_tokens,
332
+ "metadata": metadata,
333
+ "n": n,
334
+ "presence_penalty": presence_penalty,
335
+ "stop": stop,
336
+ "stream": stream,
337
+ "stream_options": stream_options,
338
+ "temperature": temperature,
339
+ "top_logprobs": top_logprobs,
340
+ "top_p": top_p,
341
+ "user": user,
342
+ },
343
+ completion_create_params.CompletionCreateParams,
344
+ ),
345
+ options=make_request_options(
346
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
347
+ ),
348
+ cast_to=CompletionCreateResponse,
349
+ )
350
+
351
+
352
+ class CompletionsResourceWithRawResponse:
353
+ def __init__(self, completions: CompletionsResource) -> None:
354
+ self._completions = completions
355
+
356
+ self.create = to_raw_response_wrapper(
357
+ completions.create,
358
+ )
359
+
360
+
361
+ class AsyncCompletionsResourceWithRawResponse:
362
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
363
+ self._completions = completions
364
+
365
+ self.create = async_to_raw_response_wrapper(
366
+ completions.create,
367
+ )
368
+
369
+
370
+ class CompletionsResourceWithStreamingResponse:
371
+ def __init__(self, completions: CompletionsResource) -> None:
372
+ self._completions = completions
373
+
374
+ self.create = to_streamed_response_wrapper(
375
+ completions.create,
376
+ )
377
+
378
+
379
+ class AsyncCompletionsResourceWithStreamingResponse:
380
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
381
+ self._completions = completions
382
+
383
+ self.create = async_to_streamed_response_wrapper(
384
+ completions.create,
385
+ )
@@ -2,9 +2,14 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from typing import List
6
+ from typing_extensions import Literal
7
+
5
8
  import httpx
6
9
 
10
+ from ..types import model_list_params
7
11
  from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
12
+ from .._utils import maybe_transform, async_maybe_transform
8
13
  from .._compat import cached_property
9
14
  from .._resource import SyncAPIResource, AsyncAPIResource
10
15
  from .._response import (
@@ -13,7 +18,6 @@ from .._response import (
13
18
  async_to_raw_response_wrapper,
14
19
  async_to_streamed_response_wrapper,
15
20
  )
16
- from ..types.model import Model
17
21
  from .._base_client import make_request_options
18
22
  from ..types.model_list_response import ModelListResponse
19
23
 
@@ -40,22 +44,52 @@ class ModelsResource(SyncAPIResource):
40
44
  """
41
45
  return ModelsResourceWithStreamingResponse(self)
42
46
 
43
- def retrieve(
47
+ def list(
44
48
  self,
45
- model: str,
46
49
  *,
50
+ page: int | NotGiven = NOT_GIVEN,
51
+ per_page: int | NotGiven = NOT_GIVEN,
52
+ public_only: bool | NotGiven = NOT_GIVEN,
53
+ usecases: List[
54
+ Literal[
55
+ "MODEL_USECASE_UNKNOWN",
56
+ "MODEL_USECASE_AGENT",
57
+ "MODEL_USECASE_FINETUNED",
58
+ "MODEL_USECASE_KNOWLEDGEBASE",
59
+ "MODEL_USECASE_GUARDRAIL",
60
+ "MODEL_USECASE_REASONING",
61
+ "MODEL_USECASE_SERVERLESS",
62
+ ]
63
+ ]
64
+ | NotGiven = NOT_GIVEN,
47
65
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
48
66
  # The extra values given here take precedence over values defined on the client or passed to this method.
49
67
  extra_headers: Headers | None = None,
50
68
  extra_query: Query | None = None,
51
69
  extra_body: Body | None = None,
52
70
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
53
- ) -> Model:
71
+ ) -> ModelListResponse:
54
72
  """
55
- Retrieves a model instance, providing basic information about the model such as
56
- the owner and permissioning.
73
+ To list all models, send a GET request to `/v2/gen-ai/models`.
57
74
 
58
75
  Args:
76
+ page: page number.
77
+
78
+ per_page: items per page.
79
+
80
+ public_only: only include models that are publicly available.
81
+
82
+ usecases: include only models defined for the listed usecases.
83
+
84
+ - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
85
+ - MODEL_USECASE_AGENT: The model maybe used in an agent
86
+ - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
87
+ - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
88
+ (embedding models)
89
+ - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
90
+ - MODEL_USECASE_REASONING: The model usecase for reasoning
91
+ - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
92
+
59
93
  extra_headers: Send extra headers
60
94
 
61
95
  extra_query: Add additional query parameters to the request
@@ -64,36 +98,24 @@ class ModelsResource(SyncAPIResource):
64
98
 
65
99
  timeout: Override the client-level default timeout for this request, in seconds
66
100
  """
67
- if not model:
68
- raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
69
101
  return self._get(
70
- f"/models/{model}"
102
+ "/v2/gen-ai/models"
71
103
  if self._client._base_url_overridden
72
- else f"https://inference.do-ai.run/v1/models/{model}",
104
+ else "https://api.digitalocean.com/v2/gen-ai/models",
73
105
  options=make_request_options(
74
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
75
- ),
76
- cast_to=Model,
77
- )
78
-
79
- def list(
80
- self,
81
- *,
82
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
83
- # The extra values given here take precedence over values defined on the client or passed to this method.
84
- extra_headers: Headers | None = None,
85
- extra_query: Query | None = None,
86
- extra_body: Body | None = None,
87
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
88
- ) -> ModelListResponse:
89
- """
90
- Lists the currently available models, and provides basic information about each
91
- one such as the owner and availability.
92
- """
93
- return self._get(
94
- "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
95
- options=make_request_options(
96
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
106
+ extra_headers=extra_headers,
107
+ extra_query=extra_query,
108
+ extra_body=extra_body,
109
+ timeout=timeout,
110
+ query=maybe_transform(
111
+ {
112
+ "page": page,
113
+ "per_page": per_page,
114
+ "public_only": public_only,
115
+ "usecases": usecases,
116
+ },
117
+ model_list_params.ModelListParams,
118
+ ),
97
119
  ),
98
120
  cast_to=ModelListResponse,
99
121
  )
@@ -119,22 +141,52 @@ class AsyncModelsResource(AsyncAPIResource):
119
141
  """
120
142
  return AsyncModelsResourceWithStreamingResponse(self)
121
143
 
122
- async def retrieve(
144
+ async def list(
123
145
  self,
124
- model: str,
125
146
  *,
147
+ page: int | NotGiven = NOT_GIVEN,
148
+ per_page: int | NotGiven = NOT_GIVEN,
149
+ public_only: bool | NotGiven = NOT_GIVEN,
150
+ usecases: List[
151
+ Literal[
152
+ "MODEL_USECASE_UNKNOWN",
153
+ "MODEL_USECASE_AGENT",
154
+ "MODEL_USECASE_FINETUNED",
155
+ "MODEL_USECASE_KNOWLEDGEBASE",
156
+ "MODEL_USECASE_GUARDRAIL",
157
+ "MODEL_USECASE_REASONING",
158
+ "MODEL_USECASE_SERVERLESS",
159
+ ]
160
+ ]
161
+ | NotGiven = NOT_GIVEN,
126
162
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
127
163
  # The extra values given here take precedence over values defined on the client or passed to this method.
128
164
  extra_headers: Headers | None = None,
129
165
  extra_query: Query | None = None,
130
166
  extra_body: Body | None = None,
131
167
  timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
132
- ) -> Model:
168
+ ) -> ModelListResponse:
133
169
  """
134
- Retrieves a model instance, providing basic information about the model such as
135
- the owner and permissioning.
170
+ To list all models, send a GET request to `/v2/gen-ai/models`.
136
171
 
137
172
  Args:
173
+ page: page number.
174
+
175
+ per_page: items per page.
176
+
177
+ public_only: only include models that are publicly available.
178
+
179
+ usecases: include only models defined for the listed usecases.
180
+
181
+ - MODEL_USECASE_UNKNOWN: The use case of the model is unknown
182
+ - MODEL_USECASE_AGENT: The model maybe used in an agent
183
+ - MODEL_USECASE_FINETUNED: The model maybe used for fine tuning
184
+ - MODEL_USECASE_KNOWLEDGEBASE: The model maybe used for knowledge bases
185
+ (embedding models)
186
+ - MODEL_USECASE_GUARDRAIL: The model maybe used for guardrails
187
+ - MODEL_USECASE_REASONING: The model usecase for reasoning
188
+ - MODEL_USECASE_SERVERLESS: The model usecase for serverless inference
189
+
138
190
  extra_headers: Send extra headers
139
191
 
140
192
  extra_query: Add additional query parameters to the request
@@ -143,36 +195,24 @@ class AsyncModelsResource(AsyncAPIResource):
143
195
 
144
196
  timeout: Override the client-level default timeout for this request, in seconds
145
197
  """
146
- if not model:
147
- raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
148
198
  return await self._get(
149
- f"/models/{model}"
199
+ "/v2/gen-ai/models"
150
200
  if self._client._base_url_overridden
151
- else f"https://inference.do-ai.run/v1/models/{model}",
152
- options=make_request_options(
153
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
154
- ),
155
- cast_to=Model,
156
- )
157
-
158
- async def list(
159
- self,
160
- *,
161
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
162
- # The extra values given here take precedence over values defined on the client or passed to this method.
163
- extra_headers: Headers | None = None,
164
- extra_query: Query | None = None,
165
- extra_body: Body | None = None,
166
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
167
- ) -> ModelListResponse:
168
- """
169
- Lists the currently available models, and provides basic information about each
170
- one such as the owner and availability.
171
- """
172
- return await self._get(
173
- "/models" if self._client._base_url_overridden else "https://inference.do-ai.run/v1/models",
201
+ else "https://api.digitalocean.com/v2/gen-ai/models",
174
202
  options=make_request_options(
175
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
203
+ extra_headers=extra_headers,
204
+ extra_query=extra_query,
205
+ extra_body=extra_body,
206
+ timeout=timeout,
207
+ query=await async_maybe_transform(
208
+ {
209
+ "page": page,
210
+ "per_page": per_page,
211
+ "public_only": public_only,
212
+ "usecases": usecases,
213
+ },
214
+ model_list_params.ModelListParams,
215
+ ),
176
216
  ),
177
217
  cast_to=ModelListResponse,
178
218
  )
@@ -182,9 +222,6 @@ class ModelsResourceWithRawResponse:
182
222
  def __init__(self, models: ModelsResource) -> None:
183
223
  self._models = models
184
224
 
185
- self.retrieve = to_raw_response_wrapper(
186
- models.retrieve,
187
- )
188
225
  self.list = to_raw_response_wrapper(
189
226
  models.list,
190
227
  )
@@ -194,9 +231,6 @@ class AsyncModelsResourceWithRawResponse:
194
231
  def __init__(self, models: AsyncModelsResource) -> None:
195
232
  self._models = models
196
233
 
197
- self.retrieve = async_to_raw_response_wrapper(
198
- models.retrieve,
199
- )
200
234
  self.list = async_to_raw_response_wrapper(
201
235
  models.list,
202
236
  )
@@ -206,9 +240,6 @@ class ModelsResourceWithStreamingResponse:
206
240
  def __init__(self, models: ModelsResource) -> None:
207
241
  self._models = models
208
242
 
209
- self.retrieve = to_streamed_response_wrapper(
210
- models.retrieve,
211
- )
212
243
  self.list = to_streamed_response_wrapper(
213
244
  models.list,
214
245
  )
@@ -218,9 +249,6 @@ class AsyncModelsResourceWithStreamingResponse:
218
249
  def __init__(self, models: AsyncModelsResource) -> None:
219
250
  self._models = models
220
251
 
221
- self.retrieve = async_to_streamed_response_wrapper(
222
- models.retrieve,
223
- )
224
252
  self.list = async_to_streamed_response_wrapper(
225
253
  models.list,
226
254
  )
@@ -2,14 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .model import Model as Model
6
- from .shared import APIMeta as APIMeta, APILinks as APILinks
5
+ from .shared import APIMeta as APIMeta, APILinks as APILinks, ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
7
6
  from .api_agent import APIAgent as APIAgent
7
+ from .api_model import APIModel as APIModel
8
8
  from .api_agreement import APIAgreement as APIAgreement
9
9
  from .api_workspace import APIWorkspace as APIWorkspace
10
10
  from .api_agent_model import APIAgentModel as APIAgentModel
11
11
  from .agent_list_params import AgentListParams as AgentListParams
12
12
  from .api_model_version import APIModelVersion as APIModelVersion
13
+ from .model_list_params import ModelListParams as ModelListParams
13
14
  from .api_knowledge_base import APIKnowledgeBase as APIKnowledgeBase
14
15
  from .region_list_params import RegionListParams as RegionListParams
15
16
  from .agent_create_params import AgentCreateParams as AgentCreateParams