mistralai 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. mistralai/__init__.py +4 -0
  2. mistralai/_version.py +12 -0
  3. mistralai/agents.py +56 -22
  4. mistralai/batch.py +17 -0
  5. mistralai/chat.py +64 -30
  6. mistralai/classifiers.py +396 -0
  7. mistralai/embeddings.py +10 -6
  8. mistralai/files.py +252 -19
  9. mistralai/fim.py +40 -30
  10. mistralai/jobs.py +40 -20
  11. mistralai/mistral_jobs.py +733 -0
  12. mistralai/models/__init__.py +108 -18
  13. mistralai/models/agentscompletionrequest.py +27 -10
  14. mistralai/models/agentscompletionstreamrequest.py +27 -10
  15. mistralai/models/apiendpoint.py +9 -0
  16. mistralai/models/archiveftmodelout.py +11 -5
  17. mistralai/models/assistantmessage.py +11 -6
  18. mistralai/models/basemodelcard.py +22 -6
  19. mistralai/models/batcherror.py +17 -0
  20. mistralai/models/batchjobin.py +58 -0
  21. mistralai/models/batchjobout.py +117 -0
  22. mistralai/models/batchjobsout.py +30 -0
  23. mistralai/models/batchjobstatus.py +15 -0
  24. mistralai/models/chatclassificationrequest.py +104 -0
  25. mistralai/models/chatcompletionchoice.py +9 -4
  26. mistralai/models/chatcompletionrequest.py +32 -13
  27. mistralai/models/chatcompletionresponse.py +2 -2
  28. mistralai/models/chatcompletionstreamrequest.py +32 -13
  29. mistralai/models/checkpointout.py +1 -1
  30. mistralai/models/classificationobject.py +21 -0
  31. mistralai/models/classificationrequest.py +59 -0
  32. mistralai/models/classificationresponse.py +21 -0
  33. mistralai/models/completionchunk.py +2 -2
  34. mistralai/models/completionevent.py +1 -1
  35. mistralai/models/completionresponsestreamchoice.py +11 -5
  36. mistralai/models/delete_model_v1_models_model_id_deleteop.py +1 -2
  37. mistralai/models/deletefileout.py +1 -1
  38. mistralai/models/deletemodelout.py +2 -2
  39. mistralai/models/deltamessage.py +14 -7
  40. mistralai/models/detailedjobout.py +11 -5
  41. mistralai/models/embeddingrequest.py +5 -5
  42. mistralai/models/embeddingresponse.py +2 -1
  43. mistralai/models/embeddingresponsedata.py +2 -2
  44. mistralai/models/eventout.py +2 -2
  45. mistralai/models/filepurpose.py +8 -0
  46. mistralai/models/files_api_routes_delete_fileop.py +1 -2
  47. mistralai/models/files_api_routes_download_fileop.py +16 -0
  48. mistralai/models/files_api_routes_list_filesop.py +96 -0
  49. mistralai/models/files_api_routes_retrieve_fileop.py +1 -2
  50. mistralai/models/files_api_routes_upload_fileop.py +9 -9
  51. mistralai/models/fileschema.py +7 -21
  52. mistralai/models/fimcompletionrequest.py +20 -13
  53. mistralai/models/fimcompletionresponse.py +2 -2
  54. mistralai/models/fimcompletionstreamrequest.py +20 -13
  55. mistralai/models/ftmodelcapabilitiesout.py +2 -2
  56. mistralai/models/ftmodelcard.py +24 -6
  57. mistralai/models/ftmodelout.py +9 -5
  58. mistralai/models/function.py +2 -2
  59. mistralai/models/functioncall.py +2 -1
  60. mistralai/models/functionname.py +1 -1
  61. mistralai/models/githubrepositoryin.py +11 -5
  62. mistralai/models/githubrepositoryout.py +11 -5
  63. mistralai/models/httpvalidationerror.py +0 -2
  64. mistralai/models/imageurl.py +1 -2
  65. mistralai/models/imageurlchunk.py +11 -5
  66. mistralai/models/jobin.py +2 -2
  67. mistralai/models/jobmetadataout.py +1 -2
  68. mistralai/models/jobout.py +10 -5
  69. mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
  70. mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
  71. mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
  72. mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +1 -2
  73. mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +1 -2
  74. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +1 -2
  75. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +2 -2
  76. mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +1 -2
  77. mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +1 -2
  78. mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +1 -2
  79. mistralai/models/jobsout.py +9 -5
  80. mistralai/models/legacyjobmetadataout.py +12 -5
  81. mistralai/models/listfilesout.py +5 -1
  82. mistralai/models/metricout.py +1 -2
  83. mistralai/models/modelcapabilities.py +2 -2
  84. mistralai/models/modellist.py +2 -2
  85. mistralai/models/responseformat.py +2 -2
  86. mistralai/models/retrieve_model_v1_models_model_id_getop.py +2 -2
  87. mistralai/models/retrievefileout.py +10 -21
  88. mistralai/models/sampletype.py +6 -2
  89. mistralai/models/security.py +2 -2
  90. mistralai/models/source.py +3 -2
  91. mistralai/models/systemmessage.py +6 -6
  92. mistralai/models/textchunk.py +9 -5
  93. mistralai/models/tool.py +2 -2
  94. mistralai/models/toolcall.py +2 -2
  95. mistralai/models/toolchoice.py +2 -2
  96. mistralai/models/toolmessage.py +2 -2
  97. mistralai/models/trainingfile.py +2 -2
  98. mistralai/models/trainingparameters.py +7 -2
  99. mistralai/models/trainingparametersin.py +7 -2
  100. mistralai/models/unarchiveftmodelout.py +11 -5
  101. mistralai/models/updateftmodelin.py +1 -2
  102. mistralai/models/uploadfileout.py +7 -21
  103. mistralai/models/usageinfo.py +1 -1
  104. mistralai/models/usermessage.py +36 -5
  105. mistralai/models/validationerror.py +2 -1
  106. mistralai/models/wandbintegration.py +11 -5
  107. mistralai/models/wandbintegrationout.py +12 -6
  108. mistralai/models_.py +48 -24
  109. mistralai/sdk.py +7 -0
  110. mistralai/sdkconfiguration.py +7 -7
  111. mistralai/utils/__init__.py +8 -0
  112. mistralai/utils/annotations.py +13 -2
  113. mistralai/utils/serializers.py +25 -0
  114. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/METADATA +90 -14
  115. mistralai-1.2.1.dist-info/RECORD +276 -0
  116. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/WHEEL +1 -1
  117. mistralai_azure/__init__.py +4 -0
  118. mistralai_azure/_version.py +12 -0
  119. mistralai_azure/chat.py +64 -30
  120. mistralai_azure/models/__init__.py +9 -3
  121. mistralai_azure/models/assistantmessage.py +11 -6
  122. mistralai_azure/models/chatcompletionchoice.py +10 -5
  123. mistralai_azure/models/chatcompletionrequest.py +32 -13
  124. mistralai_azure/models/chatcompletionresponse.py +2 -2
  125. mistralai_azure/models/chatcompletionstreamrequest.py +32 -13
  126. mistralai_azure/models/completionchunk.py +2 -2
  127. mistralai_azure/models/completionevent.py +1 -1
  128. mistralai_azure/models/completionresponsestreamchoice.py +9 -4
  129. mistralai_azure/models/deltamessage.py +14 -7
  130. mistralai_azure/models/function.py +2 -2
  131. mistralai_azure/models/functioncall.py +2 -1
  132. mistralai_azure/models/functionname.py +1 -1
  133. mistralai_azure/models/httpvalidationerror.py +0 -2
  134. mistralai_azure/models/responseformat.py +2 -2
  135. mistralai_azure/models/security.py +1 -2
  136. mistralai_azure/models/systemmessage.py +6 -6
  137. mistralai_azure/models/textchunk.py +9 -5
  138. mistralai_azure/models/tool.py +2 -2
  139. mistralai_azure/models/toolcall.py +2 -2
  140. mistralai_azure/models/toolchoice.py +2 -2
  141. mistralai_azure/models/toolmessage.py +2 -2
  142. mistralai_azure/models/usageinfo.py +1 -1
  143. mistralai_azure/models/usermessage.py +36 -5
  144. mistralai_azure/models/validationerror.py +2 -1
  145. mistralai_azure/sdkconfiguration.py +7 -7
  146. mistralai_azure/utils/__init__.py +8 -0
  147. mistralai_azure/utils/annotations.py +13 -2
  148. mistralai_azure/utils/serializers.py +25 -0
  149. mistralai_gcp/__init__.py +4 -0
  150. mistralai_gcp/_version.py +12 -0
  151. mistralai_gcp/chat.py +64 -30
  152. mistralai_gcp/fim.py +40 -30
  153. mistralai_gcp/models/__init__.py +9 -3
  154. mistralai_gcp/models/assistantmessage.py +11 -6
  155. mistralai_gcp/models/chatcompletionchoice.py +10 -5
  156. mistralai_gcp/models/chatcompletionrequest.py +32 -13
  157. mistralai_gcp/models/chatcompletionresponse.py +2 -2
  158. mistralai_gcp/models/chatcompletionstreamrequest.py +32 -13
  159. mistralai_gcp/models/completionchunk.py +2 -2
  160. mistralai_gcp/models/completionevent.py +1 -1
  161. mistralai_gcp/models/completionresponsestreamchoice.py +9 -4
  162. mistralai_gcp/models/deltamessage.py +14 -7
  163. mistralai_gcp/models/fimcompletionrequest.py +20 -13
  164. mistralai_gcp/models/fimcompletionresponse.py +2 -2
  165. mistralai_gcp/models/fimcompletionstreamrequest.py +20 -13
  166. mistralai_gcp/models/function.py +2 -2
  167. mistralai_gcp/models/functioncall.py +2 -1
  168. mistralai_gcp/models/functionname.py +1 -1
  169. mistralai_gcp/models/httpvalidationerror.py +0 -2
  170. mistralai_gcp/models/responseformat.py +2 -2
  171. mistralai_gcp/models/security.py +1 -2
  172. mistralai_gcp/models/systemmessage.py +6 -6
  173. mistralai_gcp/models/textchunk.py +9 -5
  174. mistralai_gcp/models/tool.py +2 -2
  175. mistralai_gcp/models/toolcall.py +2 -2
  176. mistralai_gcp/models/toolchoice.py +2 -2
  177. mistralai_gcp/models/toolmessage.py +2 -2
  178. mistralai_gcp/models/usageinfo.py +1 -1
  179. mistralai_gcp/models/usermessage.py +36 -5
  180. mistralai_gcp/models/validationerror.py +2 -1
  181. mistralai_gcp/sdk.py +20 -11
  182. mistralai_gcp/sdkconfiguration.py +7 -7
  183. mistralai_gcp/utils/__init__.py +8 -0
  184. mistralai_gcp/utils/annotations.py +13 -2
  185. mistralai_gcp/utils/serializers.py +25 -0
  186. mistralai-1.1.0.dist-info/RECORD +0 -254
  187. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/LICENSE +0 -0
mistralai_azure/chat.py CHANGED
@@ -16,10 +16,9 @@ class Chat(BaseSDK):
16
16
  *,
17
17
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
18
18
  model: OptionalNullable[str] = "azureai",
19
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
20
20
  top_p: Optional[float] = 1,
21
21
  max_tokens: OptionalNullable[int] = UNSET,
22
- min_tokens: OptionalNullable[int] = UNSET,
23
22
  stream: Optional[bool] = True,
24
23
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
25
24
  random_seed: OptionalNullable[int] = UNSET,
@@ -35,6 +34,9 @@ class Chat(BaseSDK):
35
34
  models.ChatCompletionStreamRequestToolChoiceTypedDict,
36
35
  ]
37
36
  ] = None,
37
+ presence_penalty: Optional[float] = 0,
38
+ frequency_penalty: Optional[float] = 0,
39
+ n: OptionalNullable[int] = UNSET,
38
40
  safe_prompt: Optional[bool] = False,
39
41
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
40
42
  server_url: Optional[str] = None,
@@ -46,16 +48,18 @@ class Chat(BaseSDK):
46
48
 
47
49
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
48
50
  :param model: The ID of the model to use for this request.
49
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
51
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
50
52
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
51
53
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
52
- :param min_tokens: The minimum number of tokens to generate in the completion.
53
54
  :param stream:
54
55
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
55
56
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
56
57
  :param response_format:
57
58
  :param tools:
58
59
  :param tool_choice:
60
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
61
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
62
+ :param n: Number of completions to return for each request, input tokens are only billed once.
59
63
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
60
64
  :param retries: Override the default retry configuration for this method
61
65
  :param server_url: Override the default server URL for this method
@@ -74,7 +78,6 @@ class Chat(BaseSDK):
74
78
  temperature=temperature,
75
79
  top_p=top_p,
76
80
  max_tokens=max_tokens,
77
- min_tokens=min_tokens,
78
81
  stream=stream,
79
82
  stop=stop,
80
83
  random_seed=random_seed,
@@ -86,6 +89,9 @@ class Chat(BaseSDK):
86
89
  tool_choice=utils.get_pydantic_model(
87
90
  tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
88
91
  ),
92
+ presence_penalty=presence_penalty,
93
+ frequency_penalty=frequency_penalty,
94
+ n=n,
89
95
  safe_prompt=safe_prompt,
90
96
  )
91
97
 
@@ -135,18 +141,21 @@ class Chat(BaseSDK):
135
141
  sentinel="[DONE]",
136
142
  )
137
143
  if utils.match_response(http_res, "422", "application/json"):
138
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
144
+ http_res_text = utils.stream_to_text(http_res)
145
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
139
146
  raise models.HTTPValidationError(data=data)
140
147
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
148
+ http_res_text = utils.stream_to_text(http_res)
141
149
  raise models.SDKError(
142
- "API error occurred", http_res.status_code, http_res.text, http_res
150
+ "API error occurred", http_res.status_code, http_res_text, http_res
143
151
  )
144
152
 
145
153
  content_type = http_res.headers.get("Content-Type")
154
+ http_res_text = utils.stream_to_text(http_res)
146
155
  raise models.SDKError(
147
156
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
148
157
  http_res.status_code,
149
- http_res.text,
158
+ http_res_text,
150
159
  http_res,
151
160
  )
152
161
 
@@ -155,10 +164,9 @@ class Chat(BaseSDK):
155
164
  *,
156
165
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
157
166
  model: OptionalNullable[str] = "azureai",
158
- temperature: Optional[float] = 0.7,
167
+ temperature: OptionalNullable[float] = UNSET,
159
168
  top_p: Optional[float] = 1,
160
169
  max_tokens: OptionalNullable[int] = UNSET,
161
- min_tokens: OptionalNullable[int] = UNSET,
162
170
  stream: Optional[bool] = True,
163
171
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
164
172
  random_seed: OptionalNullable[int] = UNSET,
@@ -174,6 +182,9 @@ class Chat(BaseSDK):
174
182
  models.ChatCompletionStreamRequestToolChoiceTypedDict,
175
183
  ]
176
184
  ] = None,
185
+ presence_penalty: Optional[float] = 0,
186
+ frequency_penalty: Optional[float] = 0,
187
+ n: OptionalNullable[int] = UNSET,
177
188
  safe_prompt: Optional[bool] = False,
178
189
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
179
190
  server_url: Optional[str] = None,
@@ -185,16 +196,18 @@ class Chat(BaseSDK):
185
196
 
186
197
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
187
198
  :param model: The ID of the model to use for this request.
188
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
199
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
189
200
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
190
201
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
191
- :param min_tokens: The minimum number of tokens to generate in the completion.
192
202
  :param stream:
193
203
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
194
204
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
195
205
  :param response_format:
196
206
  :param tools:
197
207
  :param tool_choice:
208
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
209
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
210
+ :param n: Number of completions to return for each request, input tokens are only billed once.
198
211
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
199
212
  :param retries: Override the default retry configuration for this method
200
213
  :param server_url: Override the default server URL for this method
@@ -213,7 +226,6 @@ class Chat(BaseSDK):
213
226
  temperature=temperature,
214
227
  top_p=top_p,
215
228
  max_tokens=max_tokens,
216
- min_tokens=min_tokens,
217
229
  stream=stream,
218
230
  stop=stop,
219
231
  random_seed=random_seed,
@@ -225,6 +237,9 @@ class Chat(BaseSDK):
225
237
  tool_choice=utils.get_pydantic_model(
226
238
  tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
227
239
  ),
240
+ presence_penalty=presence_penalty,
241
+ frequency_penalty=frequency_penalty,
242
+ n=n,
228
243
  safe_prompt=safe_prompt,
229
244
  )
230
245
 
@@ -274,18 +289,21 @@ class Chat(BaseSDK):
274
289
  sentinel="[DONE]",
275
290
  )
276
291
  if utils.match_response(http_res, "422", "application/json"):
277
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
292
+ http_res_text = await utils.stream_to_text_async(http_res)
293
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
278
294
  raise models.HTTPValidationError(data=data)
279
295
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
296
+ http_res_text = await utils.stream_to_text_async(http_res)
280
297
  raise models.SDKError(
281
- "API error occurred", http_res.status_code, http_res.text, http_res
298
+ "API error occurred", http_res.status_code, http_res_text, http_res
282
299
  )
283
300
 
284
301
  content_type = http_res.headers.get("Content-Type")
302
+ http_res_text = await utils.stream_to_text_async(http_res)
285
303
  raise models.SDKError(
286
304
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
287
305
  http_res.status_code,
288
- http_res.text,
306
+ http_res_text,
289
307
  http_res,
290
308
  )
291
309
 
@@ -297,10 +315,9 @@ class Chat(BaseSDK):
297
315
  List[models.ChatCompletionRequestMessagesTypedDict],
298
316
  ],
299
317
  model: OptionalNullable[str] = "azureai",
300
- temperature: Optional[float] = 0.7,
318
+ temperature: OptionalNullable[float] = UNSET,
301
319
  top_p: Optional[float] = 1,
302
320
  max_tokens: OptionalNullable[int] = UNSET,
303
- min_tokens: OptionalNullable[int] = UNSET,
304
321
  stream: Optional[bool] = False,
305
322
  stop: Optional[
306
323
  Union[
@@ -321,6 +338,9 @@ class Chat(BaseSDK):
321
338
  models.ChatCompletionRequestToolChoiceTypedDict,
322
339
  ]
323
340
  ] = None,
341
+ presence_penalty: Optional[float] = 0,
342
+ frequency_penalty: Optional[float] = 0,
343
+ n: OptionalNullable[int] = UNSET,
324
344
  safe_prompt: Optional[bool] = False,
325
345
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
326
346
  server_url: Optional[str] = None,
@@ -330,16 +350,18 @@ class Chat(BaseSDK):
330
350
 
331
351
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
332
352
  :param model: The ID of the model to use for this request.
333
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
353
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
334
354
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
335
355
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
336
- :param min_tokens: The minimum number of tokens to generate in the completion.
337
356
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
338
357
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
339
358
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
340
359
  :param response_format:
341
360
  :param tools:
342
361
  :param tool_choice:
362
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
363
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
364
+ :param n: Number of completions to return for each request, input tokens are only billed once.
343
365
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
344
366
  :param retries: Override the default retry configuration for this method
345
367
  :param server_url: Override the default server URL for this method
@@ -358,7 +380,6 @@ class Chat(BaseSDK):
358
380
  temperature=temperature,
359
381
  top_p=top_p,
360
382
  max_tokens=max_tokens,
361
- min_tokens=min_tokens,
362
383
  stream=stream,
363
384
  stop=stop,
364
385
  random_seed=random_seed,
@@ -372,6 +393,9 @@ class Chat(BaseSDK):
372
393
  tool_choice=utils.get_pydantic_model(
373
394
  tool_choice, Optional[models.ChatCompletionRequestToolChoice]
374
395
  ),
396
+ presence_penalty=presence_penalty,
397
+ frequency_penalty=frequency_penalty,
398
+ n=n,
375
399
  safe_prompt=safe_prompt,
376
400
  )
377
401
 
@@ -421,15 +445,17 @@ class Chat(BaseSDK):
421
445
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
422
446
  raise models.HTTPValidationError(data=data)
423
447
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
448
+ http_res_text = utils.stream_to_text(http_res)
424
449
  raise models.SDKError(
425
- "API error occurred", http_res.status_code, http_res.text, http_res
450
+ "API error occurred", http_res.status_code, http_res_text, http_res
426
451
  )
427
452
 
428
453
  content_type = http_res.headers.get("Content-Type")
454
+ http_res_text = utils.stream_to_text(http_res)
429
455
  raise models.SDKError(
430
456
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
431
457
  http_res.status_code,
432
- http_res.text,
458
+ http_res_text,
433
459
  http_res,
434
460
  )
435
461
 
@@ -441,10 +467,9 @@ class Chat(BaseSDK):
441
467
  List[models.ChatCompletionRequestMessagesTypedDict],
442
468
  ],
443
469
  model: OptionalNullable[str] = "azureai",
444
- temperature: Optional[float] = 0.7,
470
+ temperature: OptionalNullable[float] = UNSET,
445
471
  top_p: Optional[float] = 1,
446
472
  max_tokens: OptionalNullable[int] = UNSET,
447
- min_tokens: OptionalNullable[int] = UNSET,
448
473
  stream: Optional[bool] = False,
449
474
  stop: Optional[
450
475
  Union[
@@ -465,6 +490,9 @@ class Chat(BaseSDK):
465
490
  models.ChatCompletionRequestToolChoiceTypedDict,
466
491
  ]
467
492
  ] = None,
493
+ presence_penalty: Optional[float] = 0,
494
+ frequency_penalty: Optional[float] = 0,
495
+ n: OptionalNullable[int] = UNSET,
468
496
  safe_prompt: Optional[bool] = False,
469
497
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
470
498
  server_url: Optional[str] = None,
@@ -474,16 +502,18 @@ class Chat(BaseSDK):
474
502
 
475
503
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
476
504
  :param model: The ID of the model to use for this request.
477
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
505
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
478
506
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
479
507
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
480
- :param min_tokens: The minimum number of tokens to generate in the completion.
481
508
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
482
509
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
483
510
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
484
511
  :param response_format:
485
512
  :param tools:
486
513
  :param tool_choice:
514
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
515
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
516
+ :param n: Number of completions to return for each request, input tokens are only billed once.
487
517
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
488
518
  :param retries: Override the default retry configuration for this method
489
519
  :param server_url: Override the default server URL for this method
@@ -502,7 +532,6 @@ class Chat(BaseSDK):
502
532
  temperature=temperature,
503
533
  top_p=top_p,
504
534
  max_tokens=max_tokens,
505
- min_tokens=min_tokens,
506
535
  stream=stream,
507
536
  stop=stop,
508
537
  random_seed=random_seed,
@@ -516,6 +545,9 @@ class Chat(BaseSDK):
516
545
  tool_choice=utils.get_pydantic_model(
517
546
  tool_choice, Optional[models.ChatCompletionRequestToolChoice]
518
547
  ),
548
+ presence_penalty=presence_penalty,
549
+ frequency_penalty=frequency_penalty,
550
+ n=n,
519
551
  safe_prompt=safe_prompt,
520
552
  )
521
553
 
@@ -565,14 +597,16 @@ class Chat(BaseSDK):
565
597
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
566
598
  raise models.HTTPValidationError(data=data)
567
599
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
600
+ http_res_text = await utils.stream_to_text_async(http_res)
568
601
  raise models.SDKError(
569
- "API error occurred", http_res.status_code, http_res.text, http_res
602
+ "API error occurred", http_res.status_code, http_res_text, http_res
570
603
  )
571
604
 
572
605
  content_type = http_res.headers.get("Content-Type")
606
+ http_res_text = await utils.stream_to_text_async(http_res)
573
607
  raise models.SDKError(
574
608
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
575
609
  http_res.status_code,
576
- http_res.text,
610
+ http_res_text,
577
611
  http_res,
578
612
  )
@@ -2,6 +2,8 @@
2
2
 
3
3
  from .assistantmessage import (
4
4
  AssistantMessage,
5
+ AssistantMessageContent,
6
+ AssistantMessageContentTypedDict,
5
7
  AssistantMessageRole,
6
8
  AssistantMessageTypedDict,
7
9
  )
@@ -42,7 +44,7 @@ from .completionresponsestreamchoice import (
42
44
  FinishReason,
43
45
  )
44
46
  from .contentchunk import ContentChunk, ContentChunkTypedDict
45
- from .deltamessage import DeltaMessage, DeltaMessageTypedDict
47
+ from .deltamessage import Content, ContentTypedDict, DeltaMessage, DeltaMessageTypedDict
46
48
  from .function import Function, FunctionTypedDict
47
49
  from .functioncall import (
48
50
  Arguments,
@@ -57,10 +59,10 @@ from .responseformats import ResponseFormats
57
59
  from .sdkerror import SDKError
58
60
  from .security import Security, SecurityTypedDict
59
61
  from .systemmessage import (
60
- Content,
61
- ContentTypedDict,
62
62
  Role,
63
63
  SystemMessage,
64
+ SystemMessageContent,
65
+ SystemMessageContentTypedDict,
64
66
  SystemMessageTypedDict,
65
67
  )
66
68
  from .textchunk import TextChunk, TextChunkTypedDict, Type
@@ -89,6 +91,8 @@ __all__ = [
89
91
  "Arguments",
90
92
  "ArgumentsTypedDict",
91
93
  "AssistantMessage",
94
+ "AssistantMessageContent",
95
+ "AssistantMessageContentTypedDict",
92
96
  "AssistantMessageRole",
93
97
  "AssistantMessageTypedDict",
94
98
  "ChatCompletionChoice",
@@ -143,6 +147,8 @@ __all__ = [
143
147
  "Stop",
144
148
  "StopTypedDict",
145
149
  "SystemMessage",
150
+ "SystemMessageContent",
151
+ "SystemMessageContentTypedDict",
146
152
  "SystemMessageTypedDict",
147
153
  "TextChunk",
148
154
  "TextChunkTypedDict",
@@ -1,6 +1,7 @@
1
1
  """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
2
2
 
3
3
  from __future__ import annotations
4
+ from .contentchunk import ContentChunk, ContentChunkTypedDict
4
5
  from .toolcall import ToolCall, ToolCallTypedDict
5
6
  from mistralai_azure.types import (
6
7
  BaseModel,
@@ -10,28 +11,32 @@ from mistralai_azure.types import (
10
11
  UNSET_SENTINEL,
11
12
  )
12
13
  from pydantic import model_serializer
13
- from typing import List, Literal, Optional, TypedDict
14
- from typing_extensions import NotRequired
14
+ from typing import List, Literal, Optional, Union
15
+ from typing_extensions import NotRequired, TypedDict
16
+
17
+
18
+ AssistantMessageContentTypedDict = Union[str, List[ContentChunkTypedDict]]
19
+
20
+
21
+ AssistantMessageContent = Union[str, List[ContentChunk]]
15
22
 
16
23
 
17
24
  AssistantMessageRole = Literal["assistant"]
18
25
 
19
26
 
20
27
  class AssistantMessageTypedDict(TypedDict):
21
- content: NotRequired[Nullable[str]]
28
+ content: NotRequired[Nullable[AssistantMessageContentTypedDict]]
22
29
  tool_calls: NotRequired[Nullable[List[ToolCallTypedDict]]]
23
30
  prefix: NotRequired[bool]
24
- r"""Set this to `true` when adding an assistant message as prefix to condition the model response. The role of the prefix message is to force the model to start its answer by the content of the message."""
25
31
  role: NotRequired[AssistantMessageRole]
26
32
 
27
33
 
28
34
  class AssistantMessage(BaseModel):
29
- content: OptionalNullable[str] = UNSET
35
+ content: OptionalNullable[AssistantMessageContent] = UNSET
30
36
 
31
37
  tool_calls: OptionalNullable[List[ToolCall]] = UNSET
32
38
 
33
39
  prefix: Optional[bool] = False
34
- r"""Set this to `true` when adding an assistant message as prefix to condition the model response. The role of the prefix message is to force the model to start its answer by the content of the message."""
35
40
 
36
41
  role: Optional[AssistantMessageRole] = "assistant"
37
42
 
@@ -2,12 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
  from .assistantmessage import AssistantMessage, AssistantMessageTypedDict
5
- from mistralai_azure.types import BaseModel
6
- from typing import Literal, TypedDict
5
+ from mistralai_azure.types import BaseModel, UnrecognizedStr
6
+ from mistralai_azure.utils import validate_open_enum
7
+ from pydantic.functional_validators import PlainValidator
8
+ from typing import Literal, Union
9
+ from typing_extensions import Annotated, TypedDict
7
10
 
8
11
 
9
- ChatCompletionChoiceFinishReason = Literal[
10
- "stop", "length", "model_length", "error", "tool_calls"
12
+ ChatCompletionChoiceFinishReason = Union[
13
+ Literal["stop", "length", "model_length", "error", "tool_calls"], UnrecognizedStr
11
14
  ]
12
15
 
13
16
 
@@ -22,4 +25,6 @@ class ChatCompletionChoice(BaseModel):
22
25
 
23
26
  message: AssistantMessage
24
27
 
25
- finish_reason: ChatCompletionChoiceFinishReason
28
+ finish_reason: Annotated[
29
+ ChatCompletionChoiceFinishReason, PlainValidator(validate_open_enum(False))
30
+ ]
@@ -18,8 +18,8 @@ from mistralai_azure.types import (
18
18
  )
19
19
  from mistralai_azure.utils import get_discriminator
20
20
  from pydantic import Discriminator, Tag, model_serializer
21
- from typing import List, Optional, TypedDict, Union
22
- from typing_extensions import Annotated, NotRequired
21
+ from typing import List, Optional, Union
22
+ from typing_extensions import Annotated, NotRequired, TypedDict
23
23
 
24
24
 
25
25
  ChatCompletionRequestStopTypedDict = Union[str, List[str]]
@@ -60,14 +60,12 @@ class ChatCompletionRequestTypedDict(TypedDict):
60
60
  r"""The prompt(s) to generate completions for, encoded as a list of dict with role and content."""
61
61
  model: NotRequired[Nullable[str]]
62
62
  r"""The ID of the model to use for this request."""
63
- temperature: NotRequired[float]
64
- r"""What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both."""
63
+ temperature: NotRequired[Nullable[float]]
64
+ r"""What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value."""
65
65
  top_p: NotRequired[float]
66
66
  r"""Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both."""
67
67
  max_tokens: NotRequired[Nullable[int]]
68
68
  r"""The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."""
69
- min_tokens: NotRequired[Nullable[int]]
70
- r"""The minimum number of tokens to generate in the completion."""
71
69
  stream: NotRequired[bool]
72
70
  r"""Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON."""
73
71
  stop: NotRequired[ChatCompletionRequestStopTypedDict]
@@ -77,6 +75,12 @@ class ChatCompletionRequestTypedDict(TypedDict):
77
75
  response_format: NotRequired[ResponseFormatTypedDict]
78
76
  tools: NotRequired[Nullable[List[ToolTypedDict]]]
79
77
  tool_choice: NotRequired[ChatCompletionRequestToolChoiceTypedDict]
78
+ presence_penalty: NotRequired[float]
79
+ r"""presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative."""
80
+ frequency_penalty: NotRequired[float]
81
+ r"""frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition."""
82
+ n: NotRequired[Nullable[int]]
83
+ r"""Number of completions to return for each request, input tokens are only billed once."""
80
84
  safe_prompt: NotRequired[bool]
81
85
  r"""Whether to inject a safety prompt before all conversations."""
82
86
 
@@ -88,8 +92,8 @@ class ChatCompletionRequest(BaseModel):
88
92
  model: OptionalNullable[str] = "azureai"
89
93
  r"""The ID of the model to use for this request."""
90
94
 
91
- temperature: Optional[float] = 0.7
92
- r"""What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both."""
95
+ temperature: OptionalNullable[float] = UNSET
96
+ r"""What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value."""
93
97
 
94
98
  top_p: Optional[float] = 1
95
99
  r"""Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both."""
@@ -97,9 +101,6 @@ class ChatCompletionRequest(BaseModel):
97
101
  max_tokens: OptionalNullable[int] = UNSET
98
102
  r"""The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length."""
99
103
 
100
- min_tokens: OptionalNullable[int] = UNSET
101
- r"""The minimum number of tokens to generate in the completion."""
102
-
103
104
  stream: Optional[bool] = False
104
105
  r"""Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON."""
105
106
 
@@ -115,6 +116,15 @@ class ChatCompletionRequest(BaseModel):
115
116
 
116
117
  tool_choice: Optional[ChatCompletionRequestToolChoice] = None
117
118
 
119
+ presence_penalty: Optional[float] = 0
120
+ r"""presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative."""
121
+
122
+ frequency_penalty: Optional[float] = 0
123
+ r"""frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition."""
124
+
125
+ n: OptionalNullable[int] = UNSET
126
+ r"""Number of completions to return for each request, input tokens are only billed once."""
127
+
118
128
  safe_prompt: Optional[bool] = False
119
129
  r"""Whether to inject a safety prompt before all conversations."""
120
130
 
@@ -125,16 +135,25 @@ class ChatCompletionRequest(BaseModel):
125
135
  "temperature",
126
136
  "top_p",
127
137
  "max_tokens",
128
- "min_tokens",
129
138
  "stream",
130
139
  "stop",
131
140
  "random_seed",
132
141
  "response_format",
133
142
  "tools",
134
143
  "tool_choice",
144
+ "presence_penalty",
145
+ "frequency_penalty",
146
+ "n",
135
147
  "safe_prompt",
136
148
  ]
137
- nullable_fields = ["model", "max_tokens", "min_tokens", "random_seed", "tools"]
149
+ nullable_fields = [
150
+ "model",
151
+ "temperature",
152
+ "max_tokens",
153
+ "random_seed",
154
+ "tools",
155
+ "n",
156
+ ]
138
157
  null_default_fields = []
139
158
 
140
159
  serialized = handler(self)
@@ -4,8 +4,8 @@ from __future__ import annotations
4
4
  from .chatcompletionchoice import ChatCompletionChoice, ChatCompletionChoiceTypedDict
5
5
  from .usageinfo import UsageInfo, UsageInfoTypedDict
6
6
  from mistralai_azure.types import BaseModel
7
- from typing import List, Optional, TypedDict
8
- from typing_extensions import NotRequired
7
+ from typing import List, Optional
8
+ from typing_extensions import NotRequired, TypedDict
9
9
 
10
10
 
11
11
  class ChatCompletionResponseTypedDict(TypedDict):