scale-gp-beta 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. scale_gp/__init__.py +96 -0
  2. scale_gp/_base_client.py +2058 -0
  3. scale_gp/_client.py +544 -0
  4. scale_gp/_compat.py +219 -0
  5. scale_gp/_constants.py +14 -0
  6. scale_gp/_exceptions.py +108 -0
  7. scale_gp/_files.py +123 -0
  8. scale_gp/_models.py +801 -0
  9. scale_gp/_qs.py +150 -0
  10. scale_gp/_resource.py +43 -0
  11. scale_gp/_response.py +830 -0
  12. scale_gp/_streaming.py +333 -0
  13. scale_gp/_types.py +217 -0
  14. scale_gp/_utils/__init__.py +57 -0
  15. scale_gp/_utils/_logs.py +25 -0
  16. scale_gp/_utils/_proxy.py +62 -0
  17. scale_gp/_utils/_reflection.py +42 -0
  18. scale_gp/_utils/_streams.py +12 -0
  19. scale_gp/_utils/_sync.py +86 -0
  20. scale_gp/_utils/_transform.py +402 -0
  21. scale_gp/_utils/_typing.py +149 -0
  22. scale_gp/_utils/_utils.py +414 -0
  23. scale_gp/_version.py +4 -0
  24. scale_gp/lib/.keep +4 -0
  25. scale_gp/pagination.py +83 -0
  26. scale_gp/py.typed +0 -0
  27. scale_gp/resources/__init__.py +103 -0
  28. scale_gp/resources/chat/__init__.py +33 -0
  29. scale_gp/resources/chat/chat.py +102 -0
  30. scale_gp/resources/chat/completions.py +1054 -0
  31. scale_gp/resources/completions.py +765 -0
  32. scale_gp/resources/files/__init__.py +33 -0
  33. scale_gp/resources/files/content.py +162 -0
  34. scale_gp/resources/files/files.py +558 -0
  35. scale_gp/resources/inference.py +210 -0
  36. scale_gp/resources/models.py +834 -0
  37. scale_gp/resources/question_sets.py +680 -0
  38. scale_gp/resources/questions.py +396 -0
  39. scale_gp/types/__init__.py +33 -0
  40. scale_gp/types/chat/__init__.py +8 -0
  41. scale_gp/types/chat/chat_completion.py +257 -0
  42. scale_gp/types/chat/chat_completion_chunk.py +240 -0
  43. scale_gp/types/chat/completion_create_params.py +156 -0
  44. scale_gp/types/chat/completion_create_response.py +11 -0
  45. scale_gp/types/completion.py +116 -0
  46. scale_gp/types/completion_create_params.py +108 -0
  47. scale_gp/types/file.py +30 -0
  48. scale_gp/types/file_create_params.py +13 -0
  49. scale_gp/types/file_delete_response.py +16 -0
  50. scale_gp/types/file_list.py +27 -0
  51. scale_gp/types/file_list_params.py +16 -0
  52. scale_gp/types/file_update_params.py +12 -0
  53. scale_gp/types/files/__init__.py +3 -0
  54. scale_gp/types/inference_create_params.py +25 -0
  55. scale_gp/types/inference_create_response.py +11 -0
  56. scale_gp/types/inference_model.py +167 -0
  57. scale_gp/types/inference_model_list.py +27 -0
  58. scale_gp/types/inference_response.py +14 -0
  59. scale_gp/types/inference_response_chunk.py +14 -0
  60. scale_gp/types/model_create_params.py +165 -0
  61. scale_gp/types/model_delete_response.py +16 -0
  62. scale_gp/types/model_list_params.py +20 -0
  63. scale_gp/types/model_update_params.py +161 -0
  64. scale_gp/types/question.py +68 -0
  65. scale_gp/types/question_create_params.py +59 -0
  66. scale_gp/types/question_list.py +27 -0
  67. scale_gp/types/question_list_params.py +16 -0
  68. scale_gp/types/question_set.py +106 -0
  69. scale_gp/types/question_set_create_params.py +115 -0
  70. scale_gp/types/question_set_delete_response.py +16 -0
  71. scale_gp/types/question_set_list.py +27 -0
  72. scale_gp/types/question_set_list_params.py +20 -0
  73. scale_gp/types/question_set_retrieve_params.py +12 -0
  74. scale_gp/types/question_set_update_params.py +23 -0
  75. scale_gp_beta-0.1.0a2.dist-info/METADATA +440 -0
  76. scale_gp_beta-0.1.0a2.dist-info/RECORD +78 -0
  77. scale_gp_beta-0.1.0a2.dist-info/WHEEL +4 -0
  78. scale_gp_beta-0.1.0a2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1054 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Union, Iterable, cast
6
+ from typing_extensions import Literal, overload
7
+
8
+ import httpx
9
+
10
+ from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
11
+ from ..._utils import (
12
+ required_args,
13
+ maybe_transform,
14
+ async_maybe_transform,
15
+ )
16
+ from ..._compat import cached_property
17
+ from ..._resource import SyncAPIResource, AsyncAPIResource
18
+ from ..._response import (
19
+ to_raw_response_wrapper,
20
+ to_streamed_response_wrapper,
21
+ async_to_raw_response_wrapper,
22
+ async_to_streamed_response_wrapper,
23
+ )
24
+ from ..._streaming import Stream, AsyncStream
25
+ from ...types.chat import completion_create_params
26
+ from ..._base_client import make_request_options
27
+ from ...types.chat.chat_completion_chunk import ChatCompletionChunk
28
+ from ...types.chat.completion_create_response import CompletionCreateResponse
29
+
30
+ __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
31
+
32
+
33
+ class CompletionsResource(SyncAPIResource):
34
+ @cached_property
35
+ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
36
+ """
37
+ This property can be used as a prefix for any HTTP method call to return
38
+ the raw response object instead of the parsed content.
39
+
40
+ For more information, see https://www.github.com/scaleapi/sgp-python-beta#accessing-raw-response-data-eg-headers
41
+ """
42
+ return CompletionsResourceWithRawResponse(self)
43
+
44
+ @cached_property
45
+ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
46
+ """
47
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
48
+
49
+ For more information, see https://www.github.com/scaleapi/sgp-python-beta#with_streaming_response
50
+ """
51
+ return CompletionsResourceWithStreamingResponse(self)
52
+
53
+ @overload
54
+ def create(
55
+ self,
56
+ *,
57
+ messages: Iterable[Dict[str, object]],
58
+ model: str,
59
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
60
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
61
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
62
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
63
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
64
+ logprobs: bool | NotGiven = NOT_GIVEN,
65
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
66
+ max_tokens: int | NotGiven = NOT_GIVEN,
67
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
68
+ modalities: List[str] | NotGiven = NOT_GIVEN,
69
+ n: int | NotGiven = NOT_GIVEN,
70
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
71
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
72
+ presence_penalty: float | NotGiven = NOT_GIVEN,
73
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
74
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
75
+ seed: int | NotGiven = NOT_GIVEN,
76
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
77
+ store: bool | NotGiven = NOT_GIVEN,
78
+ stream: Literal[False] | NotGiven = NOT_GIVEN,
79
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
80
+ temperature: float | NotGiven = NOT_GIVEN,
81
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
82
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
83
+ top_k: int | NotGiven = NOT_GIVEN,
84
+ top_logprobs: int | NotGiven = NOT_GIVEN,
85
+ top_p: float | NotGiven = NOT_GIVEN,
86
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
87
+ # The extra values given here take precedence over values defined on the client or passed to this method.
88
+ extra_headers: Headers | None = None,
89
+ extra_query: Query | None = None,
90
+ extra_body: Body | None = None,
91
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
92
+ ) -> CompletionCreateResponse:
93
+ """
94
+ Chat Completions
95
+
96
+ Args:
97
+ messages: openai standard message format
98
+
99
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
100
+
101
+ audio: Parameters for audio output. Required when audio output is requested with
102
+ modalities: ['audio'].
103
+
104
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
105
+ existing frequency in the text so far.
106
+
107
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
108
+ model.
109
+
110
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
111
+ inputs for.
112
+
113
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
114
+ tokens to bias values from -100 to 100.
115
+
116
+ logprobs: Whether to return log probabilities of the output tokens or not.
117
+
118
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
119
+ output tokens and reasoning tokens.
120
+
121
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
122
+ generate.
123
+
124
+ metadata: Developer-defined tags and values used for filtering completions in the
125
+ dashboard.
126
+
127
+ modalities: Output types that you would like the model to generate for this request.
128
+
129
+ n: How many chat completion choices to generate for each input message.
130
+
131
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
132
+
133
+ prediction: Static predicted output content, such as the content of a text file being
134
+ regenerated.
135
+
136
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
137
+ they appear in the text so far.
138
+
139
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
140
+
141
+ response_format: An object specifying the format that the model must output.
142
+
143
+ seed: If specified, system will attempt to sample deterministically for repeated
144
+ requests with same seed.
145
+
146
+ stop: Up to 4 sequences where the API will stop generating further tokens.
147
+
148
+ store: Whether to store the output for use in model distillation or evals products.
149
+
150
+ stream: If true, partial message deltas will be sent as server-sent events.
151
+
152
+ stream_options: Options for streaming response. Only set this when stream is true.
153
+
154
+ temperature: What sampling temperature to use. Higher values make output more random, lower
155
+ more focused.
156
+
157
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
158
+ specific tool.
159
+
160
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
161
+ 128 functions.
162
+
163
+ top_k: Only sample from the top K options for each subsequent token
164
+
165
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
166
+ probability.
167
+
168
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
169
+ considered.
170
+
171
+ extra_headers: Send extra headers
172
+
173
+ extra_query: Add additional query parameters to the request
174
+
175
+ extra_body: Add additional JSON properties to the request
176
+
177
+ timeout: Override the client-level default timeout for this request, in seconds
178
+ """
179
+ ...
180
+
181
+ @overload
182
+ def create(
183
+ self,
184
+ *,
185
+ messages: Iterable[Dict[str, object]],
186
+ model: str,
187
+ stream: Literal[True],
188
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
189
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
190
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
191
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
192
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
193
+ logprobs: bool | NotGiven = NOT_GIVEN,
194
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
195
+ max_tokens: int | NotGiven = NOT_GIVEN,
196
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
197
+ modalities: List[str] | NotGiven = NOT_GIVEN,
198
+ n: int | NotGiven = NOT_GIVEN,
199
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
200
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
201
+ presence_penalty: float | NotGiven = NOT_GIVEN,
202
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
203
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
204
+ seed: int | NotGiven = NOT_GIVEN,
205
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
206
+ store: bool | NotGiven = NOT_GIVEN,
207
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
208
+ temperature: float | NotGiven = NOT_GIVEN,
209
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
210
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
211
+ top_k: int | NotGiven = NOT_GIVEN,
212
+ top_logprobs: int | NotGiven = NOT_GIVEN,
213
+ top_p: float | NotGiven = NOT_GIVEN,
214
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
215
+ # The extra values given here take precedence over values defined on the client or passed to this method.
216
+ extra_headers: Headers | None = None,
217
+ extra_query: Query | None = None,
218
+ extra_body: Body | None = None,
219
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
220
+ ) -> Stream[ChatCompletionChunk]:
221
+ """
222
+ Chat Completions
223
+
224
+ Args:
225
+ messages: openai standard message format
226
+
227
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
228
+
229
+ stream: If true, partial message deltas will be sent as server-sent events.
230
+
231
+ audio: Parameters for audio output. Required when audio output is requested with
232
+ modalities: ['audio'].
233
+
234
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
235
+ existing frequency in the text so far.
236
+
237
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
238
+ model.
239
+
240
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
241
+ inputs for.
242
+
243
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
244
+ tokens to bias values from -100 to 100.
245
+
246
+ logprobs: Whether to return log probabilities of the output tokens or not.
247
+
248
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
249
+ output tokens and reasoning tokens.
250
+
251
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
252
+ generate.
253
+
254
+ metadata: Developer-defined tags and values used for filtering completions in the
255
+ dashboard.
256
+
257
+ modalities: Output types that you would like the model to generate for this request.
258
+
259
+ n: How many chat completion choices to generate for each input message.
260
+
261
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
262
+
263
+ prediction: Static predicted output content, such as the content of a text file being
264
+ regenerated.
265
+
266
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
267
+ they appear in the text so far.
268
+
269
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
270
+
271
+ response_format: An object specifying the format that the model must output.
272
+
273
+ seed: If specified, system will attempt to sample deterministically for repeated
274
+ requests with same seed.
275
+
276
+ stop: Up to 4 sequences where the API will stop generating further tokens.
277
+
278
+ store: Whether to store the output for use in model distillation or evals products.
279
+
280
+ stream_options: Options for streaming response. Only set this when stream is true.
281
+
282
+ temperature: What sampling temperature to use. Higher values make output more random, lower
283
+ more focused.
284
+
285
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
286
+ specific tool.
287
+
288
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
289
+ 128 functions.
290
+
291
+ top_k: Only sample from the top K options for each subsequent token
292
+
293
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
294
+ probability.
295
+
296
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
297
+ considered.
298
+
299
+ extra_headers: Send extra headers
300
+
301
+ extra_query: Add additional query parameters to the request
302
+
303
+ extra_body: Add additional JSON properties to the request
304
+
305
+ timeout: Override the client-level default timeout for this request, in seconds
306
+ """
307
+ ...
308
+
309
+ @overload
310
+ def create(
311
+ self,
312
+ *,
313
+ messages: Iterable[Dict[str, object]],
314
+ model: str,
315
+ stream: bool,
316
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
317
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
318
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
319
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
320
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
321
+ logprobs: bool | NotGiven = NOT_GIVEN,
322
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
323
+ max_tokens: int | NotGiven = NOT_GIVEN,
324
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
325
+ modalities: List[str] | NotGiven = NOT_GIVEN,
326
+ n: int | NotGiven = NOT_GIVEN,
327
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
328
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
329
+ presence_penalty: float | NotGiven = NOT_GIVEN,
330
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
331
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
332
+ seed: int | NotGiven = NOT_GIVEN,
333
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
334
+ store: bool | NotGiven = NOT_GIVEN,
335
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
336
+ temperature: float | NotGiven = NOT_GIVEN,
337
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
338
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
339
+ top_k: int | NotGiven = NOT_GIVEN,
340
+ top_logprobs: int | NotGiven = NOT_GIVEN,
341
+ top_p: float | NotGiven = NOT_GIVEN,
342
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
343
+ # The extra values given here take precedence over values defined on the client or passed to this method.
344
+ extra_headers: Headers | None = None,
345
+ extra_query: Query | None = None,
346
+ extra_body: Body | None = None,
347
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
348
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
349
+ """
350
+ Chat Completions
351
+
352
+ Args:
353
+ messages: openai standard message format
354
+
355
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
356
+
357
+ stream: If true, partial message deltas will be sent as server-sent events.
358
+
359
+ audio: Parameters for audio output. Required when audio output is requested with
360
+ modalities: ['audio'].
361
+
362
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
363
+ existing frequency in the text so far.
364
+
365
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
366
+ model.
367
+
368
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
369
+ inputs for.
370
+
371
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
372
+ tokens to bias values from -100 to 100.
373
+
374
+ logprobs: Whether to return log probabilities of the output tokens or not.
375
+
376
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
377
+ output tokens and reasoning tokens.
378
+
379
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
380
+ generate.
381
+
382
+ metadata: Developer-defined tags and values used for filtering completions in the
383
+ dashboard.
384
+
385
+ modalities: Output types that you would like the model to generate for this request.
386
+
387
+ n: How many chat completion choices to generate for each input message.
388
+
389
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
390
+
391
+ prediction: Static predicted output content, such as the content of a text file being
392
+ regenerated.
393
+
394
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
395
+ they appear in the text so far.
396
+
397
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
398
+
399
+ response_format: An object specifying the format that the model must output.
400
+
401
+ seed: If specified, system will attempt to sample deterministically for repeated
402
+ requests with same seed.
403
+
404
+ stop: Up to 4 sequences where the API will stop generating further tokens.
405
+
406
+ store: Whether to store the output for use in model distillation or evals products.
407
+
408
+ stream_options: Options for streaming response. Only set this when stream is true.
409
+
410
+ temperature: What sampling temperature to use. Higher values make output more random, lower
411
+ more focused.
412
+
413
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
414
+ specific tool.
415
+
416
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
417
+ 128 functions.
418
+
419
+ top_k: Only sample from the top K options for each subsequent token
420
+
421
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
422
+ probability.
423
+
424
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
425
+ considered.
426
+
427
+ extra_headers: Send extra headers
428
+
429
+ extra_query: Add additional query parameters to the request
430
+
431
+ extra_body: Add additional JSON properties to the request
432
+
433
+ timeout: Override the client-level default timeout for this request, in seconds
434
+ """
435
+ ...
436
+
437
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
438
+ def create(
439
+ self,
440
+ *,
441
+ messages: Iterable[Dict[str, object]],
442
+ model: str,
443
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
444
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
445
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
446
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
447
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
448
+ logprobs: bool | NotGiven = NOT_GIVEN,
449
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
450
+ max_tokens: int | NotGiven = NOT_GIVEN,
451
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
452
+ modalities: List[str] | NotGiven = NOT_GIVEN,
453
+ n: int | NotGiven = NOT_GIVEN,
454
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
455
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
456
+ presence_penalty: float | NotGiven = NOT_GIVEN,
457
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
458
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
459
+ seed: int | NotGiven = NOT_GIVEN,
460
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
461
+ store: bool | NotGiven = NOT_GIVEN,
462
+ stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
463
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
464
+ temperature: float | NotGiven = NOT_GIVEN,
465
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
466
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
467
+ top_k: int | NotGiven = NOT_GIVEN,
468
+ top_logprobs: int | NotGiven = NOT_GIVEN,
469
+ top_p: float | NotGiven = NOT_GIVEN,
470
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
471
+ # The extra values given here take precedence over values defined on the client or passed to this method.
472
+ extra_headers: Headers | None = None,
473
+ extra_query: Query | None = None,
474
+ extra_body: Body | None = None,
475
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
476
+ ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
477
+ return cast(
478
+ CompletionCreateResponse,
479
+ self._post(
480
+ "/v5/chat/completions",
481
+ body=maybe_transform(
482
+ {
483
+ "messages": messages,
484
+ "model": model,
485
+ "audio": audio,
486
+ "frequency_penalty": frequency_penalty,
487
+ "function_call": function_call,
488
+ "functions": functions,
489
+ "logit_bias": logit_bias,
490
+ "logprobs": logprobs,
491
+ "max_completion_tokens": max_completion_tokens,
492
+ "max_tokens": max_tokens,
493
+ "metadata": metadata,
494
+ "modalities": modalities,
495
+ "n": n,
496
+ "parallel_tool_calls": parallel_tool_calls,
497
+ "prediction": prediction,
498
+ "presence_penalty": presence_penalty,
499
+ "reasoning_effort": reasoning_effort,
500
+ "response_format": response_format,
501
+ "seed": seed,
502
+ "stop": stop,
503
+ "store": store,
504
+ "stream": stream,
505
+ "stream_options": stream_options,
506
+ "temperature": temperature,
507
+ "tool_choice": tool_choice,
508
+ "tools": tools,
509
+ "top_k": top_k,
510
+ "top_logprobs": top_logprobs,
511
+ "top_p": top_p,
512
+ },
513
+ completion_create_params.CompletionCreateParams,
514
+ ),
515
+ options=make_request_options(
516
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
517
+ ),
518
+ cast_to=cast(
519
+ Any, CompletionCreateResponse
520
+ ), # Union types cannot be passed in as arguments in the type system
521
+ stream=stream or False,
522
+ stream_cls=Stream[ChatCompletionChunk],
523
+ ),
524
+ )
525
+
526
+
527
+ class AsyncCompletionsResource(AsyncAPIResource):
528
+ @cached_property
529
+ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
530
+ """
531
+ This property can be used as a prefix for any HTTP method call to return
532
+ the raw response object instead of the parsed content.
533
+
534
+ For more information, see https://www.github.com/scaleapi/sgp-python-beta#accessing-raw-response-data-eg-headers
535
+ """
536
+ return AsyncCompletionsResourceWithRawResponse(self)
537
+
538
+ @cached_property
539
+ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
540
+ """
541
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
542
+
543
+ For more information, see https://www.github.com/scaleapi/sgp-python-beta#with_streaming_response
544
+ """
545
+ return AsyncCompletionsResourceWithStreamingResponse(self)
546
+
547
+ @overload
548
+ async def create(
549
+ self,
550
+ *,
551
+ messages: Iterable[Dict[str, object]],
552
+ model: str,
553
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
554
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
555
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
556
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
557
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
558
+ logprobs: bool | NotGiven = NOT_GIVEN,
559
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
560
+ max_tokens: int | NotGiven = NOT_GIVEN,
561
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
562
+ modalities: List[str] | NotGiven = NOT_GIVEN,
563
+ n: int | NotGiven = NOT_GIVEN,
564
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
565
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
566
+ presence_penalty: float | NotGiven = NOT_GIVEN,
567
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
568
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
569
+ seed: int | NotGiven = NOT_GIVEN,
570
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
571
+ store: bool | NotGiven = NOT_GIVEN,
572
+ stream: Literal[False] | NotGiven = NOT_GIVEN,
573
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
574
+ temperature: float | NotGiven = NOT_GIVEN,
575
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
576
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
577
+ top_k: int | NotGiven = NOT_GIVEN,
578
+ top_logprobs: int | NotGiven = NOT_GIVEN,
579
+ top_p: float | NotGiven = NOT_GIVEN,
580
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
581
+ # The extra values given here take precedence over values defined on the client or passed to this method.
582
+ extra_headers: Headers | None = None,
583
+ extra_query: Query | None = None,
584
+ extra_body: Body | None = None,
585
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
586
+ ) -> CompletionCreateResponse:
587
+ """
588
+ Chat Completions
589
+
590
+ Args:
591
+ messages: openai standard message format
592
+
593
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
594
+
595
+ audio: Parameters for audio output. Required when audio output is requested with
596
+ modalities: ['audio'].
597
+
598
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
599
+ existing frequency in the text so far.
600
+
601
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
602
+ model.
603
+
604
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
605
+ inputs for.
606
+
607
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
608
+ tokens to bias values from -100 to 100.
609
+
610
+ logprobs: Whether to return log probabilities of the output tokens or not.
611
+
612
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
613
+ output tokens and reasoning tokens.
614
+
615
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
616
+ generate.
617
+
618
+ metadata: Developer-defined tags and values used for filtering completions in the
619
+ dashboard.
620
+
621
+ modalities: Output types that you would like the model to generate for this request.
622
+
623
+ n: How many chat completion choices to generate for each input message.
624
+
625
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
626
+
627
+ prediction: Static predicted output content, such as the content of a text file being
628
+ regenerated.
629
+
630
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
631
+ they appear in the text so far.
632
+
633
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
634
+
635
+ response_format: An object specifying the format that the model must output.
636
+
637
+ seed: If specified, system will attempt to sample deterministically for repeated
638
+ requests with same seed.
639
+
640
+ stop: Up to 4 sequences where the API will stop generating further tokens.
641
+
642
+ store: Whether to store the output for use in model distillation or evals products.
643
+
644
+ stream: If true, partial message deltas will be sent as server-sent events.
645
+
646
+ stream_options: Options for streaming response. Only set this when stream is true.
647
+
648
+ temperature: What sampling temperature to use. Higher values make output more random, lower
649
+ more focused.
650
+
651
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
652
+ specific tool.
653
+
654
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
655
+ 128 functions.
656
+
657
+ top_k: Only sample from the top K options for each subsequent token
658
+
659
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
660
+ probability.
661
+
662
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
663
+ considered.
664
+
665
+ extra_headers: Send extra headers
666
+
667
+ extra_query: Add additional query parameters to the request
668
+
669
+ extra_body: Add additional JSON properties to the request
670
+
671
+ timeout: Override the client-level default timeout for this request, in seconds
672
+ """
673
+ ...
674
+
675
+ @overload
676
+ async def create(
677
+ self,
678
+ *,
679
+ messages: Iterable[Dict[str, object]],
680
+ model: str,
681
+ stream: Literal[True],
682
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
683
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
684
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
685
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
686
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
687
+ logprobs: bool | NotGiven = NOT_GIVEN,
688
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
689
+ max_tokens: int | NotGiven = NOT_GIVEN,
690
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
691
+ modalities: List[str] | NotGiven = NOT_GIVEN,
692
+ n: int | NotGiven = NOT_GIVEN,
693
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
694
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
695
+ presence_penalty: float | NotGiven = NOT_GIVEN,
696
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
697
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
698
+ seed: int | NotGiven = NOT_GIVEN,
699
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
700
+ store: bool | NotGiven = NOT_GIVEN,
701
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
702
+ temperature: float | NotGiven = NOT_GIVEN,
703
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
704
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
705
+ top_k: int | NotGiven = NOT_GIVEN,
706
+ top_logprobs: int | NotGiven = NOT_GIVEN,
707
+ top_p: float | NotGiven = NOT_GIVEN,
708
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
709
+ # The extra values given here take precedence over values defined on the client or passed to this method.
710
+ extra_headers: Headers | None = None,
711
+ extra_query: Query | None = None,
712
+ extra_body: Body | None = None,
713
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
714
+ ) -> AsyncStream[ChatCompletionChunk]:
715
+ """
716
+ Chat Completions
717
+
718
+ Args:
719
+ messages: openai standard message format
720
+
721
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
722
+
723
+ stream: If true, partial message deltas will be sent as server-sent events.
724
+
725
+ audio: Parameters for audio output. Required when audio output is requested with
726
+ modalities: ['audio'].
727
+
728
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
729
+ existing frequency in the text so far.
730
+
731
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
732
+ model.
733
+
734
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
735
+ inputs for.
736
+
737
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
738
+ tokens to bias values from -100 to 100.
739
+
740
+ logprobs: Whether to return log probabilities of the output tokens or not.
741
+
742
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
743
+ output tokens and reasoning tokens.
744
+
745
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
746
+ generate.
747
+
748
+ metadata: Developer-defined tags and values used for filtering completions in the
749
+ dashboard.
750
+
751
+ modalities: Output types that you would like the model to generate for this request.
752
+
753
+ n: How many chat completion choices to generate for each input message.
754
+
755
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
756
+
757
+ prediction: Static predicted output content, such as the content of a text file being
758
+ regenerated.
759
+
760
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
761
+ they appear in the text so far.
762
+
763
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
764
+
765
+ response_format: An object specifying the format that the model must output.
766
+
767
+ seed: If specified, system will attempt to sample deterministically for repeated
768
+ requests with same seed.
769
+
770
+ stop: Up to 4 sequences where the API will stop generating further tokens.
771
+
772
+ store: Whether to store the output for use in model distillation or evals products.
773
+
774
+ stream_options: Options for streaming response. Only set this when stream is true.
775
+
776
+ temperature: What sampling temperature to use. Higher values make output more random, lower
777
+ more focused.
778
+
779
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
780
+ specific tool.
781
+
782
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
783
+ 128 functions.
784
+
785
+ top_k: Only sample from the top K options for each subsequent token
786
+
787
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
788
+ probability.
789
+
790
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
791
+ considered.
792
+
793
+ extra_headers: Send extra headers
794
+
795
+ extra_query: Add additional query parameters to the request
796
+
797
+ extra_body: Add additional JSON properties to the request
798
+
799
+ timeout: Override the client-level default timeout for this request, in seconds
800
+ """
801
+ ...
802
+
803
+ @overload
804
+ async def create(
805
+ self,
806
+ *,
807
+ messages: Iterable[Dict[str, object]],
808
+ model: str,
809
+ stream: bool,
810
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
811
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
812
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
813
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
814
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
815
+ logprobs: bool | NotGiven = NOT_GIVEN,
816
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
817
+ max_tokens: int | NotGiven = NOT_GIVEN,
818
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
819
+ modalities: List[str] | NotGiven = NOT_GIVEN,
820
+ n: int | NotGiven = NOT_GIVEN,
821
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
822
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
823
+ presence_penalty: float | NotGiven = NOT_GIVEN,
824
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
825
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
826
+ seed: int | NotGiven = NOT_GIVEN,
827
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
828
+ store: bool | NotGiven = NOT_GIVEN,
829
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
830
+ temperature: float | NotGiven = NOT_GIVEN,
831
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
832
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
833
+ top_k: int | NotGiven = NOT_GIVEN,
834
+ top_logprobs: int | NotGiven = NOT_GIVEN,
835
+ top_p: float | NotGiven = NOT_GIVEN,
836
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
837
+ # The extra values given here take precedence over values defined on the client or passed to this method.
838
+ extra_headers: Headers | None = None,
839
+ extra_query: Query | None = None,
840
+ extra_body: Body | None = None,
841
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
842
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
843
+ """
844
+ Chat Completions
845
+
846
+ Args:
847
+ messages: openai standard message format
848
+
849
+ model: model specified as `model_vendor/model`, for example `openai/gpt-4o`
850
+
851
+ stream: If true, partial message deltas will be sent as server-sent events.
852
+
853
+ audio: Parameters for audio output. Required when audio output is requested with
854
+ modalities: ['audio'].
855
+
856
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
857
+ existing frequency in the text so far.
858
+
859
+ function_call: Deprecated in favor of tool_choice. Controls which function is called by the
860
+ model.
861
+
862
+ functions: Deprecated in favor of tools. A list of functions the model may generate JSON
863
+ inputs for.
864
+
865
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion. Maps
866
+ tokens to bias values from -100 to 100.
867
+
868
+ logprobs: Whether to return log probabilities of the output tokens or not.
869
+
870
+ max_completion_tokens: An upper bound for the number of tokens that can be generated, including visible
871
+ output tokens and reasoning tokens.
872
+
873
+ max_tokens: Deprecated in favor of max_completion_tokens. The maximum number of tokens to
874
+ generate.
875
+
876
+ metadata: Developer-defined tags and values used for filtering completions in the
877
+ dashboard.
878
+
879
+ modalities: Output types that you would like the model to generate for this request.
880
+
881
+ n: How many chat completion choices to generate for each input message.
882
+
883
+ parallel_tool_calls: Whether to enable parallel function calling during tool use.
884
+
885
+ prediction: Static predicted output content, such as the content of a text file being
886
+ regenerated.
887
+
888
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize tokens based on whether
889
+ they appear in the text so far.
890
+
891
+ reasoning_effort: For o1 models only. Constrains effort on reasoning. Values: low, medium, high.
892
+
893
+ response_format: An object specifying the format that the model must output.
894
+
895
+ seed: If specified, system will attempt to sample deterministically for repeated
896
+ requests with same seed.
897
+
898
+ stop: Up to 4 sequences where the API will stop generating further tokens.
899
+
900
+ store: Whether to store the output for use in model distillation or evals products.
901
+
902
+ stream_options: Options for streaming response. Only set this when stream is true.
903
+
904
+ temperature: What sampling temperature to use. Higher values make output more random, lower
905
+ more focused.
906
+
907
+ tool_choice: Controls which tool is called by the model. Values: none, auto, required, or
908
+ specific tool.
909
+
910
+ tools: A list of tools the model may call. Currently, only functions are supported. Max
911
+ 128 functions.
912
+
913
+ top_k: Only sample from the top K options for each subsequent token
914
+
915
+ top_logprobs: Number of most likely tokens to return at each position, with associated log
916
+ probability.
917
+
918
+ top_p: Alternative to temperature. Only tokens comprising top_p probability mass are
919
+ considered.
920
+
921
+ extra_headers: Send extra headers
922
+
923
+ extra_query: Add additional query parameters to the request
924
+
925
+ extra_body: Add additional JSON properties to the request
926
+
927
+ timeout: Override the client-level default timeout for this request, in seconds
928
+ """
929
+ ...
930
+
931
+ @required_args(["messages", "model"], ["messages", "model", "stream"])
932
+ async def create(
933
+ self,
934
+ *,
935
+ messages: Iterable[Dict[str, object]],
936
+ model: str,
937
+ audio: Dict[str, object] | NotGiven = NOT_GIVEN,
938
+ frequency_penalty: float | NotGiven = NOT_GIVEN,
939
+ function_call: Dict[str, object] | NotGiven = NOT_GIVEN,
940
+ functions: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
941
+ logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
942
+ logprobs: bool | NotGiven = NOT_GIVEN,
943
+ max_completion_tokens: int | NotGiven = NOT_GIVEN,
944
+ max_tokens: int | NotGiven = NOT_GIVEN,
945
+ metadata: Dict[str, str] | NotGiven = NOT_GIVEN,
946
+ modalities: List[str] | NotGiven = NOT_GIVEN,
947
+ n: int | NotGiven = NOT_GIVEN,
948
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
949
+ prediction: Dict[str, object] | NotGiven = NOT_GIVEN,
950
+ presence_penalty: float | NotGiven = NOT_GIVEN,
951
+ reasoning_effort: str | NotGiven = NOT_GIVEN,
952
+ response_format: Dict[str, object] | NotGiven = NOT_GIVEN,
953
+ seed: int | NotGiven = NOT_GIVEN,
954
+ stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
955
+ store: bool | NotGiven = NOT_GIVEN,
956
+ stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
957
+ stream_options: Dict[str, object] | NotGiven = NOT_GIVEN,
958
+ temperature: float | NotGiven = NOT_GIVEN,
959
+ tool_choice: Union[str, Dict[str, object]] | NotGiven = NOT_GIVEN,
960
+ tools: Iterable[Dict[str, object]] | NotGiven = NOT_GIVEN,
961
+ top_k: int | NotGiven = NOT_GIVEN,
962
+ top_logprobs: int | NotGiven = NOT_GIVEN,
963
+ top_p: float | NotGiven = NOT_GIVEN,
964
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
965
+ # The extra values given here take precedence over values defined on the client or passed to this method.
966
+ extra_headers: Headers | None = None,
967
+ extra_query: Query | None = None,
968
+ extra_body: Body | None = None,
969
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
970
+ ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
971
+ return cast(
972
+ CompletionCreateResponse,
973
+ await self._post(
974
+ "/v5/chat/completions",
975
+ body=await async_maybe_transform(
976
+ {
977
+ "messages": messages,
978
+ "model": model,
979
+ "audio": audio,
980
+ "frequency_penalty": frequency_penalty,
981
+ "function_call": function_call,
982
+ "functions": functions,
983
+ "logit_bias": logit_bias,
984
+ "logprobs": logprobs,
985
+ "max_completion_tokens": max_completion_tokens,
986
+ "max_tokens": max_tokens,
987
+ "metadata": metadata,
988
+ "modalities": modalities,
989
+ "n": n,
990
+ "parallel_tool_calls": parallel_tool_calls,
991
+ "prediction": prediction,
992
+ "presence_penalty": presence_penalty,
993
+ "reasoning_effort": reasoning_effort,
994
+ "response_format": response_format,
995
+ "seed": seed,
996
+ "stop": stop,
997
+ "store": store,
998
+ "stream": stream,
999
+ "stream_options": stream_options,
1000
+ "temperature": temperature,
1001
+ "tool_choice": tool_choice,
1002
+ "tools": tools,
1003
+ "top_k": top_k,
1004
+ "top_logprobs": top_logprobs,
1005
+ "top_p": top_p,
1006
+ },
1007
+ completion_create_params.CompletionCreateParams,
1008
+ ),
1009
+ options=make_request_options(
1010
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
1011
+ ),
1012
+ cast_to=cast(
1013
+ Any, CompletionCreateResponse
1014
+ ), # Union types cannot be passed in as arguments in the type system
1015
+ stream=stream or False,
1016
+ stream_cls=AsyncStream[ChatCompletionChunk],
1017
+ ),
1018
+ )
1019
+
1020
+
1021
+ class CompletionsResourceWithRawResponse:
1022
+ def __init__(self, completions: CompletionsResource) -> None:
1023
+ self._completions = completions
1024
+
1025
+ self.create = to_raw_response_wrapper(
1026
+ completions.create,
1027
+ )
1028
+
1029
+
1030
+ class AsyncCompletionsResourceWithRawResponse:
1031
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
1032
+ self._completions = completions
1033
+
1034
+ self.create = async_to_raw_response_wrapper(
1035
+ completions.create,
1036
+ )
1037
+
1038
+
1039
+ class CompletionsResourceWithStreamingResponse:
1040
+ def __init__(self, completions: CompletionsResource) -> None:
1041
+ self._completions = completions
1042
+
1043
+ self.create = to_streamed_response_wrapper(
1044
+ completions.create,
1045
+ )
1046
+
1047
+
1048
+ class AsyncCompletionsResourceWithStreamingResponse:
1049
+ def __init__(self, completions: AsyncCompletionsResource) -> None:
1050
+ self._completions = completions
1051
+
1052
+ self.create = async_to_streamed_response_wrapper(
1053
+ completions.create,
1054
+ )