mistralai 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_version.py +12 -0
- mistralai/agents.py +56 -22
- mistralai/batch.py +17 -0
- mistralai/chat.py +64 -30
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +10 -6
- mistralai/files.py +252 -19
- mistralai/fim.py +40 -30
- mistralai/jobs.py +40 -20
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +108 -18
- mistralai/models/agentscompletionrequest.py +27 -10
- mistralai/models/agentscompletionstreamrequest.py +27 -10
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +11 -5
- mistralai/models/assistantmessage.py +11 -6
- mistralai/models/basemodelcard.py +22 -6
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +9 -4
- mistralai/models/chatcompletionrequest.py +32 -13
- mistralai/models/chatcompletionresponse.py +2 -2
- mistralai/models/chatcompletionstreamrequest.py +32 -13
- mistralai/models/checkpointout.py +1 -1
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +2 -2
- mistralai/models/completionevent.py +1 -1
- mistralai/models/completionresponsestreamchoice.py +11 -5
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +1 -2
- mistralai/models/deletefileout.py +1 -1
- mistralai/models/deletemodelout.py +2 -2
- mistralai/models/deltamessage.py +14 -7
- mistralai/models/detailedjobout.py +11 -5
- mistralai/models/embeddingrequest.py +5 -5
- mistralai/models/embeddingresponse.py +2 -1
- mistralai/models/embeddingresponsedata.py +2 -2
- mistralai/models/eventout.py +2 -2
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +1 -2
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +1 -2
- mistralai/models/files_api_routes_upload_fileop.py +9 -9
- mistralai/models/fileschema.py +7 -21
- mistralai/models/fimcompletionrequest.py +20 -13
- mistralai/models/fimcompletionresponse.py +2 -2
- mistralai/models/fimcompletionstreamrequest.py +20 -13
- mistralai/models/ftmodelcapabilitiesout.py +2 -2
- mistralai/models/ftmodelcard.py +24 -6
- mistralai/models/ftmodelout.py +9 -5
- mistralai/models/function.py +2 -2
- mistralai/models/functioncall.py +2 -1
- mistralai/models/functionname.py +1 -1
- mistralai/models/githubrepositoryin.py +11 -5
- mistralai/models/githubrepositoryout.py +11 -5
- mistralai/models/httpvalidationerror.py +0 -2
- mistralai/models/imageurl.py +1 -2
- mistralai/models/imageurlchunk.py +11 -5
- mistralai/models/jobin.py +2 -2
- mistralai/models/jobmetadataout.py +1 -2
- mistralai/models/jobout.py +10 -5
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +2 -2
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +1 -2
- mistralai/models/jobsout.py +9 -5
- mistralai/models/legacyjobmetadataout.py +12 -5
- mistralai/models/listfilesout.py +5 -1
- mistralai/models/metricout.py +1 -2
- mistralai/models/modelcapabilities.py +2 -2
- mistralai/models/modellist.py +2 -2
- mistralai/models/responseformat.py +2 -2
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +2 -2
- mistralai/models/retrievefileout.py +10 -21
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +2 -2
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +6 -6
- mistralai/models/textchunk.py +9 -5
- mistralai/models/tool.py +2 -2
- mistralai/models/toolcall.py +2 -2
- mistralai/models/toolchoice.py +2 -2
- mistralai/models/toolmessage.py +2 -2
- mistralai/models/trainingfile.py +2 -2
- mistralai/models/trainingparameters.py +7 -2
- mistralai/models/trainingparametersin.py +7 -2
- mistralai/models/unarchiveftmodelout.py +11 -5
- mistralai/models/updateftmodelin.py +1 -2
- mistralai/models/uploadfileout.py +7 -21
- mistralai/models/usageinfo.py +1 -1
- mistralai/models/usermessage.py +36 -5
- mistralai/models/validationerror.py +2 -1
- mistralai/models/wandbintegration.py +11 -5
- mistralai/models/wandbintegrationout.py +12 -6
- mistralai/models_.py +48 -24
- mistralai/sdk.py +7 -0
- mistralai/sdkconfiguration.py +7 -7
- mistralai/utils/__init__.py +8 -0
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/METADATA +90 -14
- mistralai-1.2.1.dist-info/RECORD +276 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_version.py +12 -0
- mistralai_azure/chat.py +64 -30
- mistralai_azure/models/__init__.py +9 -3
- mistralai_azure/models/assistantmessage.py +11 -6
- mistralai_azure/models/chatcompletionchoice.py +10 -5
- mistralai_azure/models/chatcompletionrequest.py +32 -13
- mistralai_azure/models/chatcompletionresponse.py +2 -2
- mistralai_azure/models/chatcompletionstreamrequest.py +32 -13
- mistralai_azure/models/completionchunk.py +2 -2
- mistralai_azure/models/completionevent.py +1 -1
- mistralai_azure/models/completionresponsestreamchoice.py +9 -4
- mistralai_azure/models/deltamessage.py +14 -7
- mistralai_azure/models/function.py +2 -2
- mistralai_azure/models/functioncall.py +2 -1
- mistralai_azure/models/functionname.py +1 -1
- mistralai_azure/models/httpvalidationerror.py +0 -2
- mistralai_azure/models/responseformat.py +2 -2
- mistralai_azure/models/security.py +1 -2
- mistralai_azure/models/systemmessage.py +6 -6
- mistralai_azure/models/textchunk.py +9 -5
- mistralai_azure/models/tool.py +2 -2
- mistralai_azure/models/toolcall.py +2 -2
- mistralai_azure/models/toolchoice.py +2 -2
- mistralai_azure/models/toolmessage.py +2 -2
- mistralai_azure/models/usageinfo.py +1 -1
- mistralai_azure/models/usermessage.py +36 -5
- mistralai_azure/models/validationerror.py +2 -1
- mistralai_azure/sdkconfiguration.py +7 -7
- mistralai_azure/utils/__init__.py +8 -0
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/serializers.py +25 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/chat.py +64 -30
- mistralai_gcp/fim.py +40 -30
- mistralai_gcp/models/__init__.py +9 -3
- mistralai_gcp/models/assistantmessage.py +11 -6
- mistralai_gcp/models/chatcompletionchoice.py +10 -5
- mistralai_gcp/models/chatcompletionrequest.py +32 -13
- mistralai_gcp/models/chatcompletionresponse.py +2 -2
- mistralai_gcp/models/chatcompletionstreamrequest.py +32 -13
- mistralai_gcp/models/completionchunk.py +2 -2
- mistralai_gcp/models/completionevent.py +1 -1
- mistralai_gcp/models/completionresponsestreamchoice.py +9 -4
- mistralai_gcp/models/deltamessage.py +14 -7
- mistralai_gcp/models/fimcompletionrequest.py +20 -13
- mistralai_gcp/models/fimcompletionresponse.py +2 -2
- mistralai_gcp/models/fimcompletionstreamrequest.py +20 -13
- mistralai_gcp/models/function.py +2 -2
- mistralai_gcp/models/functioncall.py +2 -1
- mistralai_gcp/models/functionname.py +1 -1
- mistralai_gcp/models/httpvalidationerror.py +0 -2
- mistralai_gcp/models/responseformat.py +2 -2
- mistralai_gcp/models/security.py +1 -2
- mistralai_gcp/models/systemmessage.py +6 -6
- mistralai_gcp/models/textchunk.py +9 -5
- mistralai_gcp/models/tool.py +2 -2
- mistralai_gcp/models/toolcall.py +2 -2
- mistralai_gcp/models/toolchoice.py +2 -2
- mistralai_gcp/models/toolmessage.py +2 -2
- mistralai_gcp/models/usageinfo.py +1 -1
- mistralai_gcp/models/usermessage.py +36 -5
- mistralai_gcp/models/validationerror.py +2 -1
- mistralai_gcp/sdk.py +20 -11
- mistralai_gcp/sdkconfiguration.py +7 -7
- mistralai_gcp/utils/__init__.py +8 -0
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/serializers.py +25 -0
- mistralai-1.1.0.dist-info/RECORD +0 -254
- {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/LICENSE +0 -0
mistralai/chat.py
CHANGED
|
@@ -16,10 +16,9 @@ class Chat(BaseSDK):
|
|
|
16
16
|
*,
|
|
17
17
|
model: Nullable[str],
|
|
18
18
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
19
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
20
20
|
top_p: Optional[float] = 1,
|
|
21
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
22
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
23
22
|
stream: Optional[bool] = False,
|
|
24
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
25
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -35,6 +34,9 @@ class Chat(BaseSDK):
|
|
|
35
34
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
36
35
|
]
|
|
37
36
|
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
38
40
|
safe_prompt: Optional[bool] = False,
|
|
39
41
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
40
42
|
server_url: Optional[str] = None,
|
|
@@ -44,16 +46,18 @@ class Chat(BaseSDK):
|
|
|
44
46
|
|
|
45
47
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
46
48
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
47
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
49
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
48
50
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
49
51
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
50
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
51
52
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
52
53
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
53
54
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
54
55
|
:param response_format:
|
|
55
56
|
:param tools:
|
|
56
57
|
:param tool_choice:
|
|
58
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
59
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
60
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
57
61
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
58
62
|
:param retries: Override the default retry configuration for this method
|
|
59
63
|
:param server_url: Override the default server URL for this method
|
|
@@ -72,7 +76,6 @@ class Chat(BaseSDK):
|
|
|
72
76
|
temperature=temperature,
|
|
73
77
|
top_p=top_p,
|
|
74
78
|
max_tokens=max_tokens,
|
|
75
|
-
min_tokens=min_tokens,
|
|
76
79
|
stream=stream,
|
|
77
80
|
stop=stop,
|
|
78
81
|
random_seed=random_seed,
|
|
@@ -84,6 +87,9 @@ class Chat(BaseSDK):
|
|
|
84
87
|
tool_choice=utils.get_pydantic_model(
|
|
85
88
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
86
89
|
),
|
|
90
|
+
presence_penalty=presence_penalty,
|
|
91
|
+
frequency_penalty=frequency_penalty,
|
|
92
|
+
n=n,
|
|
87
93
|
safe_prompt=safe_prompt,
|
|
88
94
|
)
|
|
89
95
|
|
|
@@ -135,15 +141,17 @@ class Chat(BaseSDK):
|
|
|
135
141
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
136
142
|
raise models.HTTPValidationError(data=data)
|
|
137
143
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
144
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
138
145
|
raise models.SDKError(
|
|
139
|
-
"API error occurred", http_res.status_code,
|
|
146
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
140
147
|
)
|
|
141
148
|
|
|
142
149
|
content_type = http_res.headers.get("Content-Type")
|
|
150
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
143
151
|
raise models.SDKError(
|
|
144
152
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
145
153
|
http_res.status_code,
|
|
146
|
-
|
|
154
|
+
http_res_text,
|
|
147
155
|
http_res,
|
|
148
156
|
)
|
|
149
157
|
|
|
@@ -152,10 +160,9 @@ class Chat(BaseSDK):
|
|
|
152
160
|
*,
|
|
153
161
|
model: Nullable[str],
|
|
154
162
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
155
|
-
temperature:
|
|
163
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
156
164
|
top_p: Optional[float] = 1,
|
|
157
165
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
158
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
159
166
|
stream: Optional[bool] = False,
|
|
160
167
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
161
168
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -171,6 +178,9 @@ class Chat(BaseSDK):
|
|
|
171
178
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
172
179
|
]
|
|
173
180
|
] = None,
|
|
181
|
+
presence_penalty: Optional[float] = 0,
|
|
182
|
+
frequency_penalty: Optional[float] = 0,
|
|
183
|
+
n: OptionalNullable[int] = UNSET,
|
|
174
184
|
safe_prompt: Optional[bool] = False,
|
|
175
185
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
176
186
|
server_url: Optional[str] = None,
|
|
@@ -180,16 +190,18 @@ class Chat(BaseSDK):
|
|
|
180
190
|
|
|
181
191
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
182
192
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
183
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
193
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
184
194
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
185
195
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
186
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
187
196
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
188
197
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
189
198
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
190
199
|
:param response_format:
|
|
191
200
|
:param tools:
|
|
192
201
|
:param tool_choice:
|
|
202
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
203
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
204
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
193
205
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
194
206
|
:param retries: Override the default retry configuration for this method
|
|
195
207
|
:param server_url: Override the default server URL for this method
|
|
@@ -208,7 +220,6 @@ class Chat(BaseSDK):
|
|
|
208
220
|
temperature=temperature,
|
|
209
221
|
top_p=top_p,
|
|
210
222
|
max_tokens=max_tokens,
|
|
211
|
-
min_tokens=min_tokens,
|
|
212
223
|
stream=stream,
|
|
213
224
|
stop=stop,
|
|
214
225
|
random_seed=random_seed,
|
|
@@ -220,6 +231,9 @@ class Chat(BaseSDK):
|
|
|
220
231
|
tool_choice=utils.get_pydantic_model(
|
|
221
232
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
222
233
|
),
|
|
234
|
+
presence_penalty=presence_penalty,
|
|
235
|
+
frequency_penalty=frequency_penalty,
|
|
236
|
+
n=n,
|
|
223
237
|
safe_prompt=safe_prompt,
|
|
224
238
|
)
|
|
225
239
|
|
|
@@ -271,15 +285,17 @@ class Chat(BaseSDK):
|
|
|
271
285
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
272
286
|
raise models.HTTPValidationError(data=data)
|
|
273
287
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
288
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
274
289
|
raise models.SDKError(
|
|
275
|
-
"API error occurred", http_res.status_code,
|
|
290
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
276
291
|
)
|
|
277
292
|
|
|
278
293
|
content_type = http_res.headers.get("Content-Type")
|
|
294
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
279
295
|
raise models.SDKError(
|
|
280
296
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
281
297
|
http_res.status_code,
|
|
282
|
-
|
|
298
|
+
http_res_text,
|
|
283
299
|
http_res,
|
|
284
300
|
)
|
|
285
301
|
|
|
@@ -291,10 +307,9 @@ class Chat(BaseSDK):
|
|
|
291
307
|
List[models.ChatCompletionStreamRequestMessages],
|
|
292
308
|
List[models.ChatCompletionStreamRequestMessagesTypedDict],
|
|
293
309
|
],
|
|
294
|
-
temperature:
|
|
310
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
295
311
|
top_p: Optional[float] = 1,
|
|
296
312
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
297
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
298
313
|
stream: Optional[bool] = True,
|
|
299
314
|
stop: Optional[
|
|
300
315
|
Union[
|
|
@@ -315,6 +330,9 @@ class Chat(BaseSDK):
|
|
|
315
330
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
316
331
|
]
|
|
317
332
|
] = None,
|
|
333
|
+
presence_penalty: Optional[float] = 0,
|
|
334
|
+
frequency_penalty: Optional[float] = 0,
|
|
335
|
+
n: OptionalNullable[int] = UNSET,
|
|
318
336
|
safe_prompt: Optional[bool] = False,
|
|
319
337
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
320
338
|
server_url: Optional[str] = None,
|
|
@@ -326,16 +344,18 @@ class Chat(BaseSDK):
|
|
|
326
344
|
|
|
327
345
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
328
346
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
329
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
347
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
330
348
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
331
349
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
332
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
333
350
|
:param stream:
|
|
334
351
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
335
352
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
336
353
|
:param response_format:
|
|
337
354
|
:param tools:
|
|
338
355
|
:param tool_choice:
|
|
356
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
357
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
358
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
339
359
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
340
360
|
:param retries: Override the default retry configuration for this method
|
|
341
361
|
:param server_url: Override the default server URL for this method
|
|
@@ -354,7 +374,6 @@ class Chat(BaseSDK):
|
|
|
354
374
|
temperature=temperature,
|
|
355
375
|
top_p=top_p,
|
|
356
376
|
max_tokens=max_tokens,
|
|
357
|
-
min_tokens=min_tokens,
|
|
358
377
|
stream=stream,
|
|
359
378
|
stop=stop,
|
|
360
379
|
random_seed=random_seed,
|
|
@@ -368,6 +387,9 @@ class Chat(BaseSDK):
|
|
|
368
387
|
tool_choice=utils.get_pydantic_model(
|
|
369
388
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
370
389
|
),
|
|
390
|
+
presence_penalty=presence_penalty,
|
|
391
|
+
frequency_penalty=frequency_penalty,
|
|
392
|
+
n=n,
|
|
371
393
|
safe_prompt=safe_prompt,
|
|
372
394
|
)
|
|
373
395
|
|
|
@@ -419,18 +441,21 @@ class Chat(BaseSDK):
|
|
|
419
441
|
sentinel="[DONE]",
|
|
420
442
|
)
|
|
421
443
|
if utils.match_response(http_res, "422", "application/json"):
|
|
422
|
-
|
|
444
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
445
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
423
446
|
raise models.HTTPValidationError(data=data)
|
|
424
447
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
448
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
425
449
|
raise models.SDKError(
|
|
426
|
-
"API error occurred", http_res.status_code,
|
|
450
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
427
451
|
)
|
|
428
452
|
|
|
429
453
|
content_type = http_res.headers.get("Content-Type")
|
|
454
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
430
455
|
raise models.SDKError(
|
|
431
456
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
432
457
|
http_res.status_code,
|
|
433
|
-
|
|
458
|
+
http_res_text,
|
|
434
459
|
http_res,
|
|
435
460
|
)
|
|
436
461
|
|
|
@@ -442,10 +467,9 @@ class Chat(BaseSDK):
|
|
|
442
467
|
List[models.ChatCompletionStreamRequestMessages],
|
|
443
468
|
List[models.ChatCompletionStreamRequestMessagesTypedDict],
|
|
444
469
|
],
|
|
445
|
-
temperature:
|
|
470
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
446
471
|
top_p: Optional[float] = 1,
|
|
447
472
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
448
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
449
473
|
stream: Optional[bool] = True,
|
|
450
474
|
stop: Optional[
|
|
451
475
|
Union[
|
|
@@ -466,6 +490,9 @@ class Chat(BaseSDK):
|
|
|
466
490
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
467
491
|
]
|
|
468
492
|
] = None,
|
|
493
|
+
presence_penalty: Optional[float] = 0,
|
|
494
|
+
frequency_penalty: Optional[float] = 0,
|
|
495
|
+
n: OptionalNullable[int] = UNSET,
|
|
469
496
|
safe_prompt: Optional[bool] = False,
|
|
470
497
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
471
498
|
server_url: Optional[str] = None,
|
|
@@ -477,16 +504,18 @@ class Chat(BaseSDK):
|
|
|
477
504
|
|
|
478
505
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
479
506
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
480
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
507
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
481
508
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
482
509
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
483
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
484
510
|
:param stream:
|
|
485
511
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
486
512
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
487
513
|
:param response_format:
|
|
488
514
|
:param tools:
|
|
489
515
|
:param tool_choice:
|
|
516
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
517
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
518
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
490
519
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
491
520
|
:param retries: Override the default retry configuration for this method
|
|
492
521
|
:param server_url: Override the default server URL for this method
|
|
@@ -505,7 +534,6 @@ class Chat(BaseSDK):
|
|
|
505
534
|
temperature=temperature,
|
|
506
535
|
top_p=top_p,
|
|
507
536
|
max_tokens=max_tokens,
|
|
508
|
-
min_tokens=min_tokens,
|
|
509
537
|
stream=stream,
|
|
510
538
|
stop=stop,
|
|
511
539
|
random_seed=random_seed,
|
|
@@ -519,6 +547,9 @@ class Chat(BaseSDK):
|
|
|
519
547
|
tool_choice=utils.get_pydantic_model(
|
|
520
548
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
521
549
|
),
|
|
550
|
+
presence_penalty=presence_penalty,
|
|
551
|
+
frequency_penalty=frequency_penalty,
|
|
552
|
+
n=n,
|
|
522
553
|
safe_prompt=safe_prompt,
|
|
523
554
|
)
|
|
524
555
|
|
|
@@ -570,17 +601,20 @@ class Chat(BaseSDK):
|
|
|
570
601
|
sentinel="[DONE]",
|
|
571
602
|
)
|
|
572
603
|
if utils.match_response(http_res, "422", "application/json"):
|
|
573
|
-
|
|
604
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
605
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
574
606
|
raise models.HTTPValidationError(data=data)
|
|
575
607
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
608
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
576
609
|
raise models.SDKError(
|
|
577
|
-
"API error occurred", http_res.status_code,
|
|
610
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
578
611
|
)
|
|
579
612
|
|
|
580
613
|
content_type = http_res.headers.get("Content-Type")
|
|
614
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
581
615
|
raise models.SDKError(
|
|
582
616
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
583
617
|
http_res.status_code,
|
|
584
|
-
|
|
618
|
+
http_res_text,
|
|
585
619
|
http_res,
|
|
586
620
|
)
|