mistralai 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_version.py +12 -0
- mistralai/agents.py +56 -22
- mistralai/batch.py +17 -0
- mistralai/chat.py +64 -30
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +10 -6
- mistralai/files.py +252 -19
- mistralai/fim.py +40 -30
- mistralai/jobs.py +40 -20
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +108 -18
- mistralai/models/agentscompletionrequest.py +27 -10
- mistralai/models/agentscompletionstreamrequest.py +27 -10
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +11 -5
- mistralai/models/assistantmessage.py +11 -6
- mistralai/models/basemodelcard.py +22 -6
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +9 -4
- mistralai/models/chatcompletionrequest.py +32 -13
- mistralai/models/chatcompletionresponse.py +2 -2
- mistralai/models/chatcompletionstreamrequest.py +32 -13
- mistralai/models/checkpointout.py +1 -1
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +2 -2
- mistralai/models/completionevent.py +1 -1
- mistralai/models/completionresponsestreamchoice.py +11 -5
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +1 -2
- mistralai/models/deletefileout.py +1 -1
- mistralai/models/deletemodelout.py +2 -2
- mistralai/models/deltamessage.py +14 -7
- mistralai/models/detailedjobout.py +11 -5
- mistralai/models/embeddingrequest.py +5 -5
- mistralai/models/embeddingresponse.py +2 -1
- mistralai/models/embeddingresponsedata.py +2 -2
- mistralai/models/eventout.py +2 -2
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +1 -2
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +1 -2
- mistralai/models/files_api_routes_upload_fileop.py +9 -9
- mistralai/models/fileschema.py +7 -21
- mistralai/models/fimcompletionrequest.py +20 -13
- mistralai/models/fimcompletionresponse.py +2 -2
- mistralai/models/fimcompletionstreamrequest.py +20 -13
- mistralai/models/ftmodelcapabilitiesout.py +2 -2
- mistralai/models/ftmodelcard.py +24 -6
- mistralai/models/ftmodelout.py +9 -5
- mistralai/models/function.py +2 -2
- mistralai/models/functioncall.py +2 -1
- mistralai/models/functionname.py +1 -1
- mistralai/models/githubrepositoryin.py +11 -5
- mistralai/models/githubrepositoryout.py +11 -5
- mistralai/models/httpvalidationerror.py +0 -2
- mistralai/models/imageurl.py +1 -2
- mistralai/models/imageurlchunk.py +11 -5
- mistralai/models/jobin.py +2 -2
- mistralai/models/jobmetadataout.py +1 -2
- mistralai/models/jobout.py +10 -5
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +2 -2
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +1 -2
- mistralai/models/jobsout.py +9 -5
- mistralai/models/legacyjobmetadataout.py +12 -5
- mistralai/models/listfilesout.py +5 -1
- mistralai/models/metricout.py +1 -2
- mistralai/models/modelcapabilities.py +2 -2
- mistralai/models/modellist.py +2 -2
- mistralai/models/responseformat.py +2 -2
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +2 -2
- mistralai/models/retrievefileout.py +10 -21
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +2 -2
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +6 -6
- mistralai/models/textchunk.py +9 -5
- mistralai/models/tool.py +2 -2
- mistralai/models/toolcall.py +2 -2
- mistralai/models/toolchoice.py +2 -2
- mistralai/models/toolmessage.py +2 -2
- mistralai/models/trainingfile.py +2 -2
- mistralai/models/trainingparameters.py +7 -2
- mistralai/models/trainingparametersin.py +7 -2
- mistralai/models/unarchiveftmodelout.py +11 -5
- mistralai/models/updateftmodelin.py +1 -2
- mistralai/models/uploadfileout.py +7 -21
- mistralai/models/usageinfo.py +1 -1
- mistralai/models/usermessage.py +36 -5
- mistralai/models/validationerror.py +2 -1
- mistralai/models/wandbintegration.py +11 -5
- mistralai/models/wandbintegrationout.py +12 -6
- mistralai/models_.py +48 -24
- mistralai/sdk.py +7 -0
- mistralai/sdkconfiguration.py +7 -7
- mistralai/utils/__init__.py +8 -0
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/METADATA +90 -14
- mistralai-1.2.0.dist-info/RECORD +276 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_version.py +12 -0
- mistralai_azure/chat.py +64 -30
- mistralai_azure/models/__init__.py +9 -3
- mistralai_azure/models/assistantmessage.py +11 -6
- mistralai_azure/models/chatcompletionchoice.py +10 -5
- mistralai_azure/models/chatcompletionrequest.py +32 -13
- mistralai_azure/models/chatcompletionresponse.py +2 -2
- mistralai_azure/models/chatcompletionstreamrequest.py +32 -13
- mistralai_azure/models/completionchunk.py +2 -2
- mistralai_azure/models/completionevent.py +1 -1
- mistralai_azure/models/completionresponsestreamchoice.py +9 -4
- mistralai_azure/models/deltamessage.py +14 -7
- mistralai_azure/models/function.py +2 -2
- mistralai_azure/models/functioncall.py +2 -1
- mistralai_azure/models/functionname.py +1 -1
- mistralai_azure/models/httpvalidationerror.py +0 -2
- mistralai_azure/models/responseformat.py +2 -2
- mistralai_azure/models/security.py +1 -2
- mistralai_azure/models/systemmessage.py +6 -6
- mistralai_azure/models/textchunk.py +9 -5
- mistralai_azure/models/tool.py +2 -2
- mistralai_azure/models/toolcall.py +2 -2
- mistralai_azure/models/toolchoice.py +2 -2
- mistralai_azure/models/toolmessage.py +2 -2
- mistralai_azure/models/usageinfo.py +1 -1
- mistralai_azure/models/usermessage.py +36 -5
- mistralai_azure/models/validationerror.py +2 -1
- mistralai_azure/sdkconfiguration.py +7 -7
- mistralai_azure/utils/__init__.py +8 -0
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/serializers.py +25 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/chat.py +64 -30
- mistralai_gcp/fim.py +40 -30
- mistralai_gcp/models/__init__.py +9 -3
- mistralai_gcp/models/assistantmessage.py +11 -6
- mistralai_gcp/models/chatcompletionchoice.py +10 -5
- mistralai_gcp/models/chatcompletionrequest.py +32 -13
- mistralai_gcp/models/chatcompletionresponse.py +2 -2
- mistralai_gcp/models/chatcompletionstreamrequest.py +32 -13
- mistralai_gcp/models/completionchunk.py +2 -2
- mistralai_gcp/models/completionevent.py +1 -1
- mistralai_gcp/models/completionresponsestreamchoice.py +9 -4
- mistralai_gcp/models/deltamessage.py +14 -7
- mistralai_gcp/models/fimcompletionrequest.py +20 -13
- mistralai_gcp/models/fimcompletionresponse.py +2 -2
- mistralai_gcp/models/fimcompletionstreamrequest.py +20 -13
- mistralai_gcp/models/function.py +2 -2
- mistralai_gcp/models/functioncall.py +2 -1
- mistralai_gcp/models/functionname.py +1 -1
- mistralai_gcp/models/httpvalidationerror.py +0 -2
- mistralai_gcp/models/responseformat.py +2 -2
- mistralai_gcp/models/security.py +1 -2
- mistralai_gcp/models/systemmessage.py +6 -6
- mistralai_gcp/models/textchunk.py +9 -5
- mistralai_gcp/models/tool.py +2 -2
- mistralai_gcp/models/toolcall.py +2 -2
- mistralai_gcp/models/toolchoice.py +2 -2
- mistralai_gcp/models/toolmessage.py +2 -2
- mistralai_gcp/models/usageinfo.py +1 -1
- mistralai_gcp/models/usermessage.py +36 -5
- mistralai_gcp/models/validationerror.py +2 -1
- mistralai_gcp/sdkconfiguration.py +7 -7
- mistralai_gcp/utils/__init__.py +8 -0
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/serializers.py +25 -0
- mistralai-1.1.0.dist-info/RECORD +0 -254
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_azure/chat.py
CHANGED
|
@@ -16,10 +16,9 @@ class Chat(BaseSDK):
|
|
|
16
16
|
*,
|
|
17
17
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
18
18
|
model: OptionalNullable[str] = "azureai",
|
|
19
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
20
20
|
top_p: Optional[float] = 1,
|
|
21
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
22
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
23
22
|
stream: Optional[bool] = True,
|
|
24
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
25
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -35,6 +34,9 @@ class Chat(BaseSDK):
|
|
|
35
34
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
36
35
|
]
|
|
37
36
|
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
38
40
|
safe_prompt: Optional[bool] = False,
|
|
39
41
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
40
42
|
server_url: Optional[str] = None,
|
|
@@ -46,16 +48,18 @@ class Chat(BaseSDK):
|
|
|
46
48
|
|
|
47
49
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
48
50
|
:param model: The ID of the model to use for this request.
|
|
49
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
51
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
50
52
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
51
53
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
52
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
53
54
|
:param stream:
|
|
54
55
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
55
56
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
56
57
|
:param response_format:
|
|
57
58
|
:param tools:
|
|
58
59
|
:param tool_choice:
|
|
60
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
61
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
62
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
59
63
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
60
64
|
:param retries: Override the default retry configuration for this method
|
|
61
65
|
:param server_url: Override the default server URL for this method
|
|
@@ -74,7 +78,6 @@ class Chat(BaseSDK):
|
|
|
74
78
|
temperature=temperature,
|
|
75
79
|
top_p=top_p,
|
|
76
80
|
max_tokens=max_tokens,
|
|
77
|
-
min_tokens=min_tokens,
|
|
78
81
|
stream=stream,
|
|
79
82
|
stop=stop,
|
|
80
83
|
random_seed=random_seed,
|
|
@@ -86,6 +89,9 @@ class Chat(BaseSDK):
|
|
|
86
89
|
tool_choice=utils.get_pydantic_model(
|
|
87
90
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
88
91
|
),
|
|
92
|
+
presence_penalty=presence_penalty,
|
|
93
|
+
frequency_penalty=frequency_penalty,
|
|
94
|
+
n=n,
|
|
89
95
|
safe_prompt=safe_prompt,
|
|
90
96
|
)
|
|
91
97
|
|
|
@@ -135,18 +141,21 @@ class Chat(BaseSDK):
|
|
|
135
141
|
sentinel="[DONE]",
|
|
136
142
|
)
|
|
137
143
|
if utils.match_response(http_res, "422", "application/json"):
|
|
138
|
-
|
|
144
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
145
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
139
146
|
raise models.HTTPValidationError(data=data)
|
|
140
147
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
148
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
141
149
|
raise models.SDKError(
|
|
142
|
-
"API error occurred", http_res.status_code,
|
|
150
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
143
151
|
)
|
|
144
152
|
|
|
145
153
|
content_type = http_res.headers.get("Content-Type")
|
|
154
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
146
155
|
raise models.SDKError(
|
|
147
156
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
148
157
|
http_res.status_code,
|
|
149
|
-
|
|
158
|
+
http_res_text,
|
|
150
159
|
http_res,
|
|
151
160
|
)
|
|
152
161
|
|
|
@@ -155,10 +164,9 @@ class Chat(BaseSDK):
|
|
|
155
164
|
*,
|
|
156
165
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
157
166
|
model: OptionalNullable[str] = "azureai",
|
|
158
|
-
temperature:
|
|
167
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
159
168
|
top_p: Optional[float] = 1,
|
|
160
169
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
161
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
162
170
|
stream: Optional[bool] = True,
|
|
163
171
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
164
172
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -174,6 +182,9 @@ class Chat(BaseSDK):
|
|
|
174
182
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
175
183
|
]
|
|
176
184
|
] = None,
|
|
185
|
+
presence_penalty: Optional[float] = 0,
|
|
186
|
+
frequency_penalty: Optional[float] = 0,
|
|
187
|
+
n: OptionalNullable[int] = UNSET,
|
|
177
188
|
safe_prompt: Optional[bool] = False,
|
|
178
189
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
179
190
|
server_url: Optional[str] = None,
|
|
@@ -185,16 +196,18 @@ class Chat(BaseSDK):
|
|
|
185
196
|
|
|
186
197
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
187
198
|
:param model: The ID of the model to use for this request.
|
|
188
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
199
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
189
200
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
190
201
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
191
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
192
202
|
:param stream:
|
|
193
203
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
194
204
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
195
205
|
:param response_format:
|
|
196
206
|
:param tools:
|
|
197
207
|
:param tool_choice:
|
|
208
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
209
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
210
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
198
211
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
199
212
|
:param retries: Override the default retry configuration for this method
|
|
200
213
|
:param server_url: Override the default server URL for this method
|
|
@@ -213,7 +226,6 @@ class Chat(BaseSDK):
|
|
|
213
226
|
temperature=temperature,
|
|
214
227
|
top_p=top_p,
|
|
215
228
|
max_tokens=max_tokens,
|
|
216
|
-
min_tokens=min_tokens,
|
|
217
229
|
stream=stream,
|
|
218
230
|
stop=stop,
|
|
219
231
|
random_seed=random_seed,
|
|
@@ -225,6 +237,9 @@ class Chat(BaseSDK):
|
|
|
225
237
|
tool_choice=utils.get_pydantic_model(
|
|
226
238
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
227
239
|
),
|
|
240
|
+
presence_penalty=presence_penalty,
|
|
241
|
+
frequency_penalty=frequency_penalty,
|
|
242
|
+
n=n,
|
|
228
243
|
safe_prompt=safe_prompt,
|
|
229
244
|
)
|
|
230
245
|
|
|
@@ -274,18 +289,21 @@ class Chat(BaseSDK):
|
|
|
274
289
|
sentinel="[DONE]",
|
|
275
290
|
)
|
|
276
291
|
if utils.match_response(http_res, "422", "application/json"):
|
|
277
|
-
|
|
292
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
293
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
278
294
|
raise models.HTTPValidationError(data=data)
|
|
279
295
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
296
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
280
297
|
raise models.SDKError(
|
|
281
|
-
"API error occurred", http_res.status_code,
|
|
298
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
282
299
|
)
|
|
283
300
|
|
|
284
301
|
content_type = http_res.headers.get("Content-Type")
|
|
302
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
285
303
|
raise models.SDKError(
|
|
286
304
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
287
305
|
http_res.status_code,
|
|
288
|
-
|
|
306
|
+
http_res_text,
|
|
289
307
|
http_res,
|
|
290
308
|
)
|
|
291
309
|
|
|
@@ -297,10 +315,9 @@ class Chat(BaseSDK):
|
|
|
297
315
|
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
298
316
|
],
|
|
299
317
|
model: OptionalNullable[str] = "azureai",
|
|
300
|
-
temperature:
|
|
318
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
301
319
|
top_p: Optional[float] = 1,
|
|
302
320
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
303
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
304
321
|
stream: Optional[bool] = False,
|
|
305
322
|
stop: Optional[
|
|
306
323
|
Union[
|
|
@@ -321,6 +338,9 @@ class Chat(BaseSDK):
|
|
|
321
338
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
322
339
|
]
|
|
323
340
|
] = None,
|
|
341
|
+
presence_penalty: Optional[float] = 0,
|
|
342
|
+
frequency_penalty: Optional[float] = 0,
|
|
343
|
+
n: OptionalNullable[int] = UNSET,
|
|
324
344
|
safe_prompt: Optional[bool] = False,
|
|
325
345
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
326
346
|
server_url: Optional[str] = None,
|
|
@@ -330,16 +350,18 @@ class Chat(BaseSDK):
|
|
|
330
350
|
|
|
331
351
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
332
352
|
:param model: The ID of the model to use for this request.
|
|
333
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
353
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
334
354
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
335
355
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
336
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
337
356
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
338
357
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
339
358
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
340
359
|
:param response_format:
|
|
341
360
|
:param tools:
|
|
342
361
|
:param tool_choice:
|
|
362
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
363
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
364
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
343
365
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
344
366
|
:param retries: Override the default retry configuration for this method
|
|
345
367
|
:param server_url: Override the default server URL for this method
|
|
@@ -358,7 +380,6 @@ class Chat(BaseSDK):
|
|
|
358
380
|
temperature=temperature,
|
|
359
381
|
top_p=top_p,
|
|
360
382
|
max_tokens=max_tokens,
|
|
361
|
-
min_tokens=min_tokens,
|
|
362
383
|
stream=stream,
|
|
363
384
|
stop=stop,
|
|
364
385
|
random_seed=random_seed,
|
|
@@ -372,6 +393,9 @@ class Chat(BaseSDK):
|
|
|
372
393
|
tool_choice=utils.get_pydantic_model(
|
|
373
394
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
374
395
|
),
|
|
396
|
+
presence_penalty=presence_penalty,
|
|
397
|
+
frequency_penalty=frequency_penalty,
|
|
398
|
+
n=n,
|
|
375
399
|
safe_prompt=safe_prompt,
|
|
376
400
|
)
|
|
377
401
|
|
|
@@ -421,15 +445,17 @@ class Chat(BaseSDK):
|
|
|
421
445
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
422
446
|
raise models.HTTPValidationError(data=data)
|
|
423
447
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
448
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
424
449
|
raise models.SDKError(
|
|
425
|
-
"API error occurred", http_res.status_code,
|
|
450
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
426
451
|
)
|
|
427
452
|
|
|
428
453
|
content_type = http_res.headers.get("Content-Type")
|
|
454
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
429
455
|
raise models.SDKError(
|
|
430
456
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
431
457
|
http_res.status_code,
|
|
432
|
-
|
|
458
|
+
http_res_text,
|
|
433
459
|
http_res,
|
|
434
460
|
)
|
|
435
461
|
|
|
@@ -441,10 +467,9 @@ class Chat(BaseSDK):
|
|
|
441
467
|
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
442
468
|
],
|
|
443
469
|
model: OptionalNullable[str] = "azureai",
|
|
444
|
-
temperature:
|
|
470
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
445
471
|
top_p: Optional[float] = 1,
|
|
446
472
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
447
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
448
473
|
stream: Optional[bool] = False,
|
|
449
474
|
stop: Optional[
|
|
450
475
|
Union[
|
|
@@ -465,6 +490,9 @@ class Chat(BaseSDK):
|
|
|
465
490
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
466
491
|
]
|
|
467
492
|
] = None,
|
|
493
|
+
presence_penalty: Optional[float] = 0,
|
|
494
|
+
frequency_penalty: Optional[float] = 0,
|
|
495
|
+
n: OptionalNullable[int] = UNSET,
|
|
468
496
|
safe_prompt: Optional[bool] = False,
|
|
469
497
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
470
498
|
server_url: Optional[str] = None,
|
|
@@ -474,16 +502,18 @@ class Chat(BaseSDK):
|
|
|
474
502
|
|
|
475
503
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
476
504
|
:param model: The ID of the model to use for this request.
|
|
477
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
505
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
478
506
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
479
507
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
480
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
481
508
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
482
509
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
483
510
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
484
511
|
:param response_format:
|
|
485
512
|
:param tools:
|
|
486
513
|
:param tool_choice:
|
|
514
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
515
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
516
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
487
517
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
488
518
|
:param retries: Override the default retry configuration for this method
|
|
489
519
|
:param server_url: Override the default server URL for this method
|
|
@@ -502,7 +532,6 @@ class Chat(BaseSDK):
|
|
|
502
532
|
temperature=temperature,
|
|
503
533
|
top_p=top_p,
|
|
504
534
|
max_tokens=max_tokens,
|
|
505
|
-
min_tokens=min_tokens,
|
|
506
535
|
stream=stream,
|
|
507
536
|
stop=stop,
|
|
508
537
|
random_seed=random_seed,
|
|
@@ -516,6 +545,9 @@ class Chat(BaseSDK):
|
|
|
516
545
|
tool_choice=utils.get_pydantic_model(
|
|
517
546
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
518
547
|
),
|
|
548
|
+
presence_penalty=presence_penalty,
|
|
549
|
+
frequency_penalty=frequency_penalty,
|
|
550
|
+
n=n,
|
|
519
551
|
safe_prompt=safe_prompt,
|
|
520
552
|
)
|
|
521
553
|
|
|
@@ -565,14 +597,16 @@ class Chat(BaseSDK):
|
|
|
565
597
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
566
598
|
raise models.HTTPValidationError(data=data)
|
|
567
599
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
600
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
568
601
|
raise models.SDKError(
|
|
569
|
-
"API error occurred", http_res.status_code,
|
|
602
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
570
603
|
)
|
|
571
604
|
|
|
572
605
|
content_type = http_res.headers.get("Content-Type")
|
|
606
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
573
607
|
raise models.SDKError(
|
|
574
608
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
575
609
|
http_res.status_code,
|
|
576
|
-
|
|
610
|
+
http_res_text,
|
|
577
611
|
http_res,
|
|
578
612
|
)
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from .assistantmessage import (
|
|
4
4
|
AssistantMessage,
|
|
5
|
+
AssistantMessageContent,
|
|
6
|
+
AssistantMessageContentTypedDict,
|
|
5
7
|
AssistantMessageRole,
|
|
6
8
|
AssistantMessageTypedDict,
|
|
7
9
|
)
|
|
@@ -42,7 +44,7 @@ from .completionresponsestreamchoice import (
|
|
|
42
44
|
FinishReason,
|
|
43
45
|
)
|
|
44
46
|
from .contentchunk import ContentChunk, ContentChunkTypedDict
|
|
45
|
-
from .deltamessage import DeltaMessage, DeltaMessageTypedDict
|
|
47
|
+
from .deltamessage import Content, ContentTypedDict, DeltaMessage, DeltaMessageTypedDict
|
|
46
48
|
from .function import Function, FunctionTypedDict
|
|
47
49
|
from .functioncall import (
|
|
48
50
|
Arguments,
|
|
@@ -57,10 +59,10 @@ from .responseformats import ResponseFormats
|
|
|
57
59
|
from .sdkerror import SDKError
|
|
58
60
|
from .security import Security, SecurityTypedDict
|
|
59
61
|
from .systemmessage import (
|
|
60
|
-
Content,
|
|
61
|
-
ContentTypedDict,
|
|
62
62
|
Role,
|
|
63
63
|
SystemMessage,
|
|
64
|
+
SystemMessageContent,
|
|
65
|
+
SystemMessageContentTypedDict,
|
|
64
66
|
SystemMessageTypedDict,
|
|
65
67
|
)
|
|
66
68
|
from .textchunk import TextChunk, TextChunkTypedDict, Type
|
|
@@ -89,6 +91,8 @@ __all__ = [
|
|
|
89
91
|
"Arguments",
|
|
90
92
|
"ArgumentsTypedDict",
|
|
91
93
|
"AssistantMessage",
|
|
94
|
+
"AssistantMessageContent",
|
|
95
|
+
"AssistantMessageContentTypedDict",
|
|
92
96
|
"AssistantMessageRole",
|
|
93
97
|
"AssistantMessageTypedDict",
|
|
94
98
|
"ChatCompletionChoice",
|
|
@@ -143,6 +147,8 @@ __all__ = [
|
|
|
143
147
|
"Stop",
|
|
144
148
|
"StopTypedDict",
|
|
145
149
|
"SystemMessage",
|
|
150
|
+
"SystemMessageContent",
|
|
151
|
+
"SystemMessageContentTypedDict",
|
|
146
152
|
"SystemMessageTypedDict",
|
|
147
153
|
"TextChunk",
|
|
148
154
|
"TextChunkTypedDict",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
|
+
from .contentchunk import ContentChunk, ContentChunkTypedDict
|
|
4
5
|
from .toolcall import ToolCall, ToolCallTypedDict
|
|
5
6
|
from mistralai_azure.types import (
|
|
6
7
|
BaseModel,
|
|
@@ -10,28 +11,32 @@ from mistralai_azure.types import (
|
|
|
10
11
|
UNSET_SENTINEL,
|
|
11
12
|
)
|
|
12
13
|
from pydantic import model_serializer
|
|
13
|
-
from typing import List, Literal, Optional,
|
|
14
|
-
from typing_extensions import NotRequired
|
|
14
|
+
from typing import List, Literal, Optional, Union
|
|
15
|
+
from typing_extensions import NotRequired, TypedDict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
AssistantMessageContentTypedDict = Union[str, List[ContentChunkTypedDict]]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
AssistantMessageContent = Union[str, List[ContentChunk]]
|
|
15
22
|
|
|
16
23
|
|
|
17
24
|
AssistantMessageRole = Literal["assistant"]
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
class AssistantMessageTypedDict(TypedDict):
|
|
21
|
-
content: NotRequired[Nullable[
|
|
28
|
+
content: NotRequired[Nullable[AssistantMessageContentTypedDict]]
|
|
22
29
|
tool_calls: NotRequired[Nullable[List[ToolCallTypedDict]]]
|
|
23
30
|
prefix: NotRequired[bool]
|
|
24
|
-
r"""Set this to `true` when adding an assistant message as prefix to condition the model response. The role of the prefix message is to force the model to start its answer by the content of the message."""
|
|
25
31
|
role: NotRequired[AssistantMessageRole]
|
|
26
32
|
|
|
27
33
|
|
|
28
34
|
class AssistantMessage(BaseModel):
|
|
29
|
-
content: OptionalNullable[
|
|
35
|
+
content: OptionalNullable[AssistantMessageContent] = UNSET
|
|
30
36
|
|
|
31
37
|
tool_calls: OptionalNullable[List[ToolCall]] = UNSET
|
|
32
38
|
|
|
33
39
|
prefix: Optional[bool] = False
|
|
34
|
-
r"""Set this to `true` when adding an assistant message as prefix to condition the model response. The role of the prefix message is to force the model to start its answer by the content of the message."""
|
|
35
40
|
|
|
36
41
|
role: Optional[AssistantMessageRole] = "assistant"
|
|
37
42
|
|
|
@@ -2,12 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
from .assistantmessage import AssistantMessage, AssistantMessageTypedDict
|
|
5
|
-
from mistralai_azure.types import BaseModel
|
|
6
|
-
from
|
|
5
|
+
from mistralai_azure.types import BaseModel, UnrecognizedStr
|
|
6
|
+
from mistralai_azure.utils import validate_open_enum
|
|
7
|
+
from pydantic.functional_validators import PlainValidator
|
|
8
|
+
from typing import Literal, Union
|
|
9
|
+
from typing_extensions import Annotated, TypedDict
|
|
7
10
|
|
|
8
11
|
|
|
9
|
-
ChatCompletionChoiceFinishReason =
|
|
10
|
-
"stop", "length", "model_length", "error", "tool_calls"
|
|
12
|
+
ChatCompletionChoiceFinishReason = Union[
|
|
13
|
+
Literal["stop", "length", "model_length", "error", "tool_calls"], UnrecognizedStr
|
|
11
14
|
]
|
|
12
15
|
|
|
13
16
|
|
|
@@ -22,4 +25,6 @@ class ChatCompletionChoice(BaseModel):
|
|
|
22
25
|
|
|
23
26
|
message: AssistantMessage
|
|
24
27
|
|
|
25
|
-
finish_reason:
|
|
28
|
+
finish_reason: Annotated[
|
|
29
|
+
ChatCompletionChoiceFinishReason, PlainValidator(validate_open_enum(False))
|
|
30
|
+
]
|
|
@@ -18,8 +18,8 @@ from mistralai_azure.types import (
|
|
|
18
18
|
)
|
|
19
19
|
from mistralai_azure.utils import get_discriminator
|
|
20
20
|
from pydantic import Discriminator, Tag, model_serializer
|
|
21
|
-
from typing import List, Optional,
|
|
22
|
-
from typing_extensions import Annotated, NotRequired
|
|
21
|
+
from typing import List, Optional, Union
|
|
22
|
+
from typing_extensions import Annotated, NotRequired, TypedDict
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
ChatCompletionRequestStopTypedDict = Union[str, List[str]]
|
|
@@ -60,14 +60,12 @@ class ChatCompletionRequestTypedDict(TypedDict):
|
|
|
60
60
|
r"""The prompt(s) to generate completions for, encoded as a list of dict with role and content."""
|
|
61
61
|
model: NotRequired[Nullable[str]]
|
|
62
62
|
r"""The ID of the model to use for this request."""
|
|
63
|
-
temperature: NotRequired[float]
|
|
64
|
-
r"""What sampling temperature to use, between 0.0 and
|
|
63
|
+
temperature: NotRequired[Nullable[float]]
|
|
64
|
+
r"""What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value."""
|
|
65
65
|
top_p: NotRequired[float]
|
|
66
66
|
r"""Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both."""
|
|
67
67
|
max_tokens: NotRequired[Nullable[int]]
|
|
68
68
|
r"""The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."""
|
|
69
|
-
min_tokens: NotRequired[Nullable[int]]
|
|
70
|
-
r"""The minimum number of tokens to generate in the completion."""
|
|
71
69
|
stream: NotRequired[bool]
|
|
72
70
|
r"""Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON."""
|
|
73
71
|
stop: NotRequired[ChatCompletionRequestStopTypedDict]
|
|
@@ -77,6 +75,12 @@ class ChatCompletionRequestTypedDict(TypedDict):
|
|
|
77
75
|
response_format: NotRequired[ResponseFormatTypedDict]
|
|
78
76
|
tools: NotRequired[Nullable[List[ToolTypedDict]]]
|
|
79
77
|
tool_choice: NotRequired[ChatCompletionRequestToolChoiceTypedDict]
|
|
78
|
+
presence_penalty: NotRequired[float]
|
|
79
|
+
r"""presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative."""
|
|
80
|
+
frequency_penalty: NotRequired[float]
|
|
81
|
+
r"""frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition."""
|
|
82
|
+
n: NotRequired[Nullable[int]]
|
|
83
|
+
r"""Number of completions to return for each request, input tokens are only billed once."""
|
|
80
84
|
safe_prompt: NotRequired[bool]
|
|
81
85
|
r"""Whether to inject a safety prompt before all conversations."""
|
|
82
86
|
|
|
@@ -88,8 +92,8 @@ class ChatCompletionRequest(BaseModel):
|
|
|
88
92
|
model: OptionalNullable[str] = "azureai"
|
|
89
93
|
r"""The ID of the model to use for this request."""
|
|
90
94
|
|
|
91
|
-
temperature:
|
|
92
|
-
r"""What sampling temperature to use, between 0.0 and
|
|
95
|
+
temperature: OptionalNullable[float] = UNSET
|
|
96
|
+
r"""What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value."""
|
|
93
97
|
|
|
94
98
|
top_p: Optional[float] = 1
|
|
95
99
|
r"""Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both."""
|
|
@@ -97,9 +101,6 @@ class ChatCompletionRequest(BaseModel):
|
|
|
97
101
|
max_tokens: OptionalNullable[int] = UNSET
|
|
98
102
|
r"""The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."""
|
|
99
103
|
|
|
100
|
-
min_tokens: OptionalNullable[int] = UNSET
|
|
101
|
-
r"""The minimum number of tokens to generate in the completion."""
|
|
102
|
-
|
|
103
104
|
stream: Optional[bool] = False
|
|
104
105
|
r"""Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON."""
|
|
105
106
|
|
|
@@ -115,6 +116,15 @@ class ChatCompletionRequest(BaseModel):
|
|
|
115
116
|
|
|
116
117
|
tool_choice: Optional[ChatCompletionRequestToolChoice] = None
|
|
117
118
|
|
|
119
|
+
presence_penalty: Optional[float] = 0
|
|
120
|
+
r"""presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative."""
|
|
121
|
+
|
|
122
|
+
frequency_penalty: Optional[float] = 0
|
|
123
|
+
r"""frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition."""
|
|
124
|
+
|
|
125
|
+
n: OptionalNullable[int] = UNSET
|
|
126
|
+
r"""Number of completions to return for each request, input tokens are only billed once."""
|
|
127
|
+
|
|
118
128
|
safe_prompt: Optional[bool] = False
|
|
119
129
|
r"""Whether to inject a safety prompt before all conversations."""
|
|
120
130
|
|
|
@@ -125,16 +135,25 @@ class ChatCompletionRequest(BaseModel):
|
|
|
125
135
|
"temperature",
|
|
126
136
|
"top_p",
|
|
127
137
|
"max_tokens",
|
|
128
|
-
"min_tokens",
|
|
129
138
|
"stream",
|
|
130
139
|
"stop",
|
|
131
140
|
"random_seed",
|
|
132
141
|
"response_format",
|
|
133
142
|
"tools",
|
|
134
143
|
"tool_choice",
|
|
144
|
+
"presence_penalty",
|
|
145
|
+
"frequency_penalty",
|
|
146
|
+
"n",
|
|
135
147
|
"safe_prompt",
|
|
136
148
|
]
|
|
137
|
-
nullable_fields = [
|
|
149
|
+
nullable_fields = [
|
|
150
|
+
"model",
|
|
151
|
+
"temperature",
|
|
152
|
+
"max_tokens",
|
|
153
|
+
"random_seed",
|
|
154
|
+
"tools",
|
|
155
|
+
"n",
|
|
156
|
+
]
|
|
138
157
|
null_default_fields = []
|
|
139
158
|
|
|
140
159
|
serialized = handler(self)
|
|
@@ -4,8 +4,8 @@ from __future__ import annotations
|
|
|
4
4
|
from .chatcompletionchoice import ChatCompletionChoice, ChatCompletionChoiceTypedDict
|
|
5
5
|
from .usageinfo import UsageInfo, UsageInfoTypedDict
|
|
6
6
|
from mistralai_azure.types import BaseModel
|
|
7
|
-
from typing import List, Optional
|
|
8
|
-
from typing_extensions import NotRequired
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from typing_extensions import NotRequired, TypedDict
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ChatCompletionResponseTypedDict(TypedDict):
|