mistralai 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. mistralai/__init__.py +4 -0
  2. mistralai/_version.py +12 -0
  3. mistralai/agents.py +56 -22
  4. mistralai/batch.py +17 -0
  5. mistralai/chat.py +64 -30
  6. mistralai/classifiers.py +396 -0
  7. mistralai/embeddings.py +10 -6
  8. mistralai/files.py +252 -19
  9. mistralai/fim.py +40 -30
  10. mistralai/jobs.py +40 -20
  11. mistralai/mistral_jobs.py +733 -0
  12. mistralai/models/__init__.py +108 -18
  13. mistralai/models/agentscompletionrequest.py +27 -10
  14. mistralai/models/agentscompletionstreamrequest.py +27 -10
  15. mistralai/models/apiendpoint.py +9 -0
  16. mistralai/models/archiveftmodelout.py +11 -5
  17. mistralai/models/assistantmessage.py +11 -6
  18. mistralai/models/basemodelcard.py +22 -6
  19. mistralai/models/batcherror.py +17 -0
  20. mistralai/models/batchjobin.py +58 -0
  21. mistralai/models/batchjobout.py +117 -0
  22. mistralai/models/batchjobsout.py +30 -0
  23. mistralai/models/batchjobstatus.py +15 -0
  24. mistralai/models/chatclassificationrequest.py +104 -0
  25. mistralai/models/chatcompletionchoice.py +9 -4
  26. mistralai/models/chatcompletionrequest.py +32 -13
  27. mistralai/models/chatcompletionresponse.py +2 -2
  28. mistralai/models/chatcompletionstreamrequest.py +32 -13
  29. mistralai/models/checkpointout.py +1 -1
  30. mistralai/models/classificationobject.py +21 -0
  31. mistralai/models/classificationrequest.py +59 -0
  32. mistralai/models/classificationresponse.py +21 -0
  33. mistralai/models/completionchunk.py +2 -2
  34. mistralai/models/completionevent.py +1 -1
  35. mistralai/models/completionresponsestreamchoice.py +11 -5
  36. mistralai/models/delete_model_v1_models_model_id_deleteop.py +1 -2
  37. mistralai/models/deletefileout.py +1 -1
  38. mistralai/models/deletemodelout.py +2 -2
  39. mistralai/models/deltamessage.py +14 -7
  40. mistralai/models/detailedjobout.py +11 -5
  41. mistralai/models/embeddingrequest.py +5 -5
  42. mistralai/models/embeddingresponse.py +2 -1
  43. mistralai/models/embeddingresponsedata.py +2 -2
  44. mistralai/models/eventout.py +2 -2
  45. mistralai/models/filepurpose.py +8 -0
  46. mistralai/models/files_api_routes_delete_fileop.py +1 -2
  47. mistralai/models/files_api_routes_download_fileop.py +16 -0
  48. mistralai/models/files_api_routes_list_filesop.py +96 -0
  49. mistralai/models/files_api_routes_retrieve_fileop.py +1 -2
  50. mistralai/models/files_api_routes_upload_fileop.py +9 -9
  51. mistralai/models/fileschema.py +7 -21
  52. mistralai/models/fimcompletionrequest.py +20 -13
  53. mistralai/models/fimcompletionresponse.py +2 -2
  54. mistralai/models/fimcompletionstreamrequest.py +20 -13
  55. mistralai/models/ftmodelcapabilitiesout.py +2 -2
  56. mistralai/models/ftmodelcard.py +24 -6
  57. mistralai/models/ftmodelout.py +9 -5
  58. mistralai/models/function.py +2 -2
  59. mistralai/models/functioncall.py +2 -1
  60. mistralai/models/functionname.py +1 -1
  61. mistralai/models/githubrepositoryin.py +11 -5
  62. mistralai/models/githubrepositoryout.py +11 -5
  63. mistralai/models/httpvalidationerror.py +0 -2
  64. mistralai/models/imageurl.py +1 -2
  65. mistralai/models/imageurlchunk.py +11 -5
  66. mistralai/models/jobin.py +2 -2
  67. mistralai/models/jobmetadataout.py +1 -2
  68. mistralai/models/jobout.py +10 -5
  69. mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
  70. mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
  71. mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
  72. mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +1 -2
  73. mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +1 -2
  74. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +1 -2
  75. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +2 -2
  76. mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +1 -2
  77. mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +1 -2
  78. mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +1 -2
  79. mistralai/models/jobsout.py +9 -5
  80. mistralai/models/legacyjobmetadataout.py +12 -5
  81. mistralai/models/listfilesout.py +5 -1
  82. mistralai/models/metricout.py +1 -2
  83. mistralai/models/modelcapabilities.py +2 -2
  84. mistralai/models/modellist.py +2 -2
  85. mistralai/models/responseformat.py +2 -2
  86. mistralai/models/retrieve_model_v1_models_model_id_getop.py +2 -2
  87. mistralai/models/retrievefileout.py +10 -21
  88. mistralai/models/sampletype.py +6 -2
  89. mistralai/models/security.py +2 -2
  90. mistralai/models/source.py +3 -2
  91. mistralai/models/systemmessage.py +6 -6
  92. mistralai/models/textchunk.py +9 -5
  93. mistralai/models/tool.py +2 -2
  94. mistralai/models/toolcall.py +2 -2
  95. mistralai/models/toolchoice.py +2 -2
  96. mistralai/models/toolmessage.py +2 -2
  97. mistralai/models/trainingfile.py +2 -2
  98. mistralai/models/trainingparameters.py +7 -2
  99. mistralai/models/trainingparametersin.py +7 -2
  100. mistralai/models/unarchiveftmodelout.py +11 -5
  101. mistralai/models/updateftmodelin.py +1 -2
  102. mistralai/models/uploadfileout.py +7 -21
  103. mistralai/models/usageinfo.py +1 -1
  104. mistralai/models/usermessage.py +36 -5
  105. mistralai/models/validationerror.py +2 -1
  106. mistralai/models/wandbintegration.py +11 -5
  107. mistralai/models/wandbintegrationout.py +12 -6
  108. mistralai/models_.py +48 -24
  109. mistralai/sdk.py +7 -0
  110. mistralai/sdkconfiguration.py +7 -7
  111. mistralai/utils/__init__.py +8 -0
  112. mistralai/utils/annotations.py +13 -2
  113. mistralai/utils/serializers.py +25 -0
  114. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/METADATA +90 -14
  115. mistralai-1.2.1.dist-info/RECORD +276 -0
  116. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/WHEEL +1 -1
  117. mistralai_azure/__init__.py +4 -0
  118. mistralai_azure/_version.py +12 -0
  119. mistralai_azure/chat.py +64 -30
  120. mistralai_azure/models/__init__.py +9 -3
  121. mistralai_azure/models/assistantmessage.py +11 -6
  122. mistralai_azure/models/chatcompletionchoice.py +10 -5
  123. mistralai_azure/models/chatcompletionrequest.py +32 -13
  124. mistralai_azure/models/chatcompletionresponse.py +2 -2
  125. mistralai_azure/models/chatcompletionstreamrequest.py +32 -13
  126. mistralai_azure/models/completionchunk.py +2 -2
  127. mistralai_azure/models/completionevent.py +1 -1
  128. mistralai_azure/models/completionresponsestreamchoice.py +9 -4
  129. mistralai_azure/models/deltamessage.py +14 -7
  130. mistralai_azure/models/function.py +2 -2
  131. mistralai_azure/models/functioncall.py +2 -1
  132. mistralai_azure/models/functionname.py +1 -1
  133. mistralai_azure/models/httpvalidationerror.py +0 -2
  134. mistralai_azure/models/responseformat.py +2 -2
  135. mistralai_azure/models/security.py +1 -2
  136. mistralai_azure/models/systemmessage.py +6 -6
  137. mistralai_azure/models/textchunk.py +9 -5
  138. mistralai_azure/models/tool.py +2 -2
  139. mistralai_azure/models/toolcall.py +2 -2
  140. mistralai_azure/models/toolchoice.py +2 -2
  141. mistralai_azure/models/toolmessage.py +2 -2
  142. mistralai_azure/models/usageinfo.py +1 -1
  143. mistralai_azure/models/usermessage.py +36 -5
  144. mistralai_azure/models/validationerror.py +2 -1
  145. mistralai_azure/sdkconfiguration.py +7 -7
  146. mistralai_azure/utils/__init__.py +8 -0
  147. mistralai_azure/utils/annotations.py +13 -2
  148. mistralai_azure/utils/serializers.py +25 -0
  149. mistralai_gcp/__init__.py +4 -0
  150. mistralai_gcp/_version.py +12 -0
  151. mistralai_gcp/chat.py +64 -30
  152. mistralai_gcp/fim.py +40 -30
  153. mistralai_gcp/models/__init__.py +9 -3
  154. mistralai_gcp/models/assistantmessage.py +11 -6
  155. mistralai_gcp/models/chatcompletionchoice.py +10 -5
  156. mistralai_gcp/models/chatcompletionrequest.py +32 -13
  157. mistralai_gcp/models/chatcompletionresponse.py +2 -2
  158. mistralai_gcp/models/chatcompletionstreamrequest.py +32 -13
  159. mistralai_gcp/models/completionchunk.py +2 -2
  160. mistralai_gcp/models/completionevent.py +1 -1
  161. mistralai_gcp/models/completionresponsestreamchoice.py +9 -4
  162. mistralai_gcp/models/deltamessage.py +14 -7
  163. mistralai_gcp/models/fimcompletionrequest.py +20 -13
  164. mistralai_gcp/models/fimcompletionresponse.py +2 -2
  165. mistralai_gcp/models/fimcompletionstreamrequest.py +20 -13
  166. mistralai_gcp/models/function.py +2 -2
  167. mistralai_gcp/models/functioncall.py +2 -1
  168. mistralai_gcp/models/functionname.py +1 -1
  169. mistralai_gcp/models/httpvalidationerror.py +0 -2
  170. mistralai_gcp/models/responseformat.py +2 -2
  171. mistralai_gcp/models/security.py +1 -2
  172. mistralai_gcp/models/systemmessage.py +6 -6
  173. mistralai_gcp/models/textchunk.py +9 -5
  174. mistralai_gcp/models/tool.py +2 -2
  175. mistralai_gcp/models/toolcall.py +2 -2
  176. mistralai_gcp/models/toolchoice.py +2 -2
  177. mistralai_gcp/models/toolmessage.py +2 -2
  178. mistralai_gcp/models/usageinfo.py +1 -1
  179. mistralai_gcp/models/usermessage.py +36 -5
  180. mistralai_gcp/models/validationerror.py +2 -1
  181. mistralai_gcp/sdk.py +20 -11
  182. mistralai_gcp/sdkconfiguration.py +7 -7
  183. mistralai_gcp/utils/__init__.py +8 -0
  184. mistralai_gcp/utils/annotations.py +13 -2
  185. mistralai_gcp/utils/serializers.py +25 -0
  186. mistralai-1.1.0.dist-info/RECORD +0 -254
  187. {mistralai-1.1.0.dist-info → mistralai-1.2.1.dist-info}/LICENSE +0 -0
mistralai_gcp/chat.py CHANGED
@@ -16,10 +16,9 @@ class Chat(BaseSDK):
16
16
  *,
17
17
  model: Nullable[str],
18
18
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
19
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
20
20
  top_p: Optional[float] = 1,
21
21
  max_tokens: OptionalNullable[int] = UNSET,
22
- min_tokens: OptionalNullable[int] = UNSET,
23
22
  stream: Optional[bool] = True,
24
23
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
25
24
  random_seed: OptionalNullable[int] = UNSET,
@@ -35,6 +34,9 @@ class Chat(BaseSDK):
35
34
  models.ChatCompletionStreamRequestToolChoiceTypedDict,
36
35
  ]
37
36
  ] = None,
37
+ presence_penalty: Optional[float] = 0,
38
+ frequency_penalty: Optional[float] = 0,
39
+ n: OptionalNullable[int] = UNSET,
38
40
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
39
41
  server_url: Optional[str] = None,
40
42
  timeout_ms: Optional[int] = None,
@@ -45,16 +47,18 @@ class Chat(BaseSDK):
45
47
 
46
48
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
47
49
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
48
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
50
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
49
51
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
50
52
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
51
- :param min_tokens: The minimum number of tokens to generate in the completion.
52
53
  :param stream:
53
54
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
54
55
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
55
56
  :param response_format:
56
57
  :param tools:
57
58
  :param tool_choice:
59
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
60
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
61
+ :param n: Number of completions to return for each request, input tokens are only billed once.
58
62
  :param retries: Override the default retry configuration for this method
59
63
  :param server_url: Override the default server URL for this method
60
64
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -72,7 +76,6 @@ class Chat(BaseSDK):
72
76
  temperature=temperature,
73
77
  top_p=top_p,
74
78
  max_tokens=max_tokens,
75
- min_tokens=min_tokens,
76
79
  stream=stream,
77
80
  stop=stop,
78
81
  random_seed=random_seed,
@@ -84,6 +87,9 @@ class Chat(BaseSDK):
84
87
  tool_choice=utils.get_pydantic_model(
85
88
  tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
86
89
  ),
90
+ presence_penalty=presence_penalty,
91
+ frequency_penalty=frequency_penalty,
92
+ n=n,
87
93
  )
88
94
 
89
95
  req = self.build_request(
@@ -132,18 +138,21 @@ class Chat(BaseSDK):
132
138
  sentinel="[DONE]",
133
139
  )
134
140
  if utils.match_response(http_res, "422", "application/json"):
135
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
141
+ http_res_text = utils.stream_to_text(http_res)
142
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
136
143
  raise models.HTTPValidationError(data=data)
137
144
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
145
+ http_res_text = utils.stream_to_text(http_res)
138
146
  raise models.SDKError(
139
- "API error occurred", http_res.status_code, http_res.text, http_res
147
+ "API error occurred", http_res.status_code, http_res_text, http_res
140
148
  )
141
149
 
142
150
  content_type = http_res.headers.get("Content-Type")
151
+ http_res_text = utils.stream_to_text(http_res)
143
152
  raise models.SDKError(
144
153
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
145
154
  http_res.status_code,
146
- http_res.text,
155
+ http_res_text,
147
156
  http_res,
148
157
  )
149
158
 
@@ -152,10 +161,9 @@ class Chat(BaseSDK):
152
161
  *,
153
162
  model: Nullable[str],
154
163
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
155
- temperature: Optional[float] = 0.7,
164
+ temperature: OptionalNullable[float] = UNSET,
156
165
  top_p: Optional[float] = 1,
157
166
  max_tokens: OptionalNullable[int] = UNSET,
158
- min_tokens: OptionalNullable[int] = UNSET,
159
167
  stream: Optional[bool] = True,
160
168
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
161
169
  random_seed: OptionalNullable[int] = UNSET,
@@ -171,6 +179,9 @@ class Chat(BaseSDK):
171
179
  models.ChatCompletionStreamRequestToolChoiceTypedDict,
172
180
  ]
173
181
  ] = None,
182
+ presence_penalty: Optional[float] = 0,
183
+ frequency_penalty: Optional[float] = 0,
184
+ n: OptionalNullable[int] = UNSET,
174
185
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
175
186
  server_url: Optional[str] = None,
176
187
  timeout_ms: Optional[int] = None,
@@ -181,16 +192,18 @@ class Chat(BaseSDK):
181
192
 
182
193
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
183
194
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
184
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
195
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
185
196
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
186
197
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
187
- :param min_tokens: The minimum number of tokens to generate in the completion.
188
198
  :param stream:
189
199
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
190
200
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
191
201
  :param response_format:
192
202
  :param tools:
193
203
  :param tool_choice:
204
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
205
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
206
+ :param n: Number of completions to return for each request, input tokens are only billed once.
194
207
  :param retries: Override the default retry configuration for this method
195
208
  :param server_url: Override the default server URL for this method
196
209
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -208,7 +221,6 @@ class Chat(BaseSDK):
208
221
  temperature=temperature,
209
222
  top_p=top_p,
210
223
  max_tokens=max_tokens,
211
- min_tokens=min_tokens,
212
224
  stream=stream,
213
225
  stop=stop,
214
226
  random_seed=random_seed,
@@ -220,6 +232,9 @@ class Chat(BaseSDK):
220
232
  tool_choice=utils.get_pydantic_model(
221
233
  tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
222
234
  ),
235
+ presence_penalty=presence_penalty,
236
+ frequency_penalty=frequency_penalty,
237
+ n=n,
223
238
  )
224
239
 
225
240
  req = self.build_request_async(
@@ -268,18 +283,21 @@ class Chat(BaseSDK):
268
283
  sentinel="[DONE]",
269
284
  )
270
285
  if utils.match_response(http_res, "422", "application/json"):
271
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
286
+ http_res_text = await utils.stream_to_text_async(http_res)
287
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
272
288
  raise models.HTTPValidationError(data=data)
273
289
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
290
+ http_res_text = await utils.stream_to_text_async(http_res)
274
291
  raise models.SDKError(
275
- "API error occurred", http_res.status_code, http_res.text, http_res
292
+ "API error occurred", http_res.status_code, http_res_text, http_res
276
293
  )
277
294
 
278
295
  content_type = http_res.headers.get("Content-Type")
296
+ http_res_text = await utils.stream_to_text_async(http_res)
279
297
  raise models.SDKError(
280
298
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
281
299
  http_res.status_code,
282
- http_res.text,
300
+ http_res_text,
283
301
  http_res,
284
302
  )
285
303
 
@@ -291,10 +309,9 @@ class Chat(BaseSDK):
291
309
  List[models.ChatCompletionRequestMessages],
292
310
  List[models.ChatCompletionRequestMessagesTypedDict],
293
311
  ],
294
- temperature: Optional[float] = 0.7,
312
+ temperature: OptionalNullable[float] = UNSET,
295
313
  top_p: Optional[float] = 1,
296
314
  max_tokens: OptionalNullable[int] = UNSET,
297
- min_tokens: OptionalNullable[int] = UNSET,
298
315
  stream: Optional[bool] = False,
299
316
  stop: Optional[
300
317
  Union[
@@ -315,6 +332,9 @@ class Chat(BaseSDK):
315
332
  models.ChatCompletionRequestToolChoiceTypedDict,
316
333
  ]
317
334
  ] = None,
335
+ presence_penalty: Optional[float] = 0,
336
+ frequency_penalty: Optional[float] = 0,
337
+ n: OptionalNullable[int] = UNSET,
318
338
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
319
339
  server_url: Optional[str] = None,
320
340
  timeout_ms: Optional[int] = None,
@@ -323,16 +343,18 @@ class Chat(BaseSDK):
323
343
 
324
344
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
325
345
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
326
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
346
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
327
347
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
328
348
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
329
- :param min_tokens: The minimum number of tokens to generate in the completion.
330
349
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
331
350
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
332
351
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
333
352
  :param response_format:
334
353
  :param tools:
335
354
  :param tool_choice:
355
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
356
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
357
+ :param n: Number of completions to return for each request, input tokens are only billed once.
336
358
  :param retries: Override the default retry configuration for this method
337
359
  :param server_url: Override the default server URL for this method
338
360
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -350,7 +372,6 @@ class Chat(BaseSDK):
350
372
  temperature=temperature,
351
373
  top_p=top_p,
352
374
  max_tokens=max_tokens,
353
- min_tokens=min_tokens,
354
375
  stream=stream,
355
376
  stop=stop,
356
377
  random_seed=random_seed,
@@ -364,6 +385,9 @@ class Chat(BaseSDK):
364
385
  tool_choice=utils.get_pydantic_model(
365
386
  tool_choice, Optional[models.ChatCompletionRequestToolChoice]
366
387
  ),
388
+ presence_penalty=presence_penalty,
389
+ frequency_penalty=frequency_penalty,
390
+ n=n,
367
391
  )
368
392
 
369
393
  req = self.build_request(
@@ -412,15 +436,17 @@ class Chat(BaseSDK):
412
436
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
413
437
  raise models.HTTPValidationError(data=data)
414
438
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
439
+ http_res_text = utils.stream_to_text(http_res)
415
440
  raise models.SDKError(
416
- "API error occurred", http_res.status_code, http_res.text, http_res
441
+ "API error occurred", http_res.status_code, http_res_text, http_res
417
442
  )
418
443
 
419
444
  content_type = http_res.headers.get("Content-Type")
445
+ http_res_text = utils.stream_to_text(http_res)
420
446
  raise models.SDKError(
421
447
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
422
448
  http_res.status_code,
423
- http_res.text,
449
+ http_res_text,
424
450
  http_res,
425
451
  )
426
452
 
@@ -432,10 +458,9 @@ class Chat(BaseSDK):
432
458
  List[models.ChatCompletionRequestMessages],
433
459
  List[models.ChatCompletionRequestMessagesTypedDict],
434
460
  ],
435
- temperature: Optional[float] = 0.7,
461
+ temperature: OptionalNullable[float] = UNSET,
436
462
  top_p: Optional[float] = 1,
437
463
  max_tokens: OptionalNullable[int] = UNSET,
438
- min_tokens: OptionalNullable[int] = UNSET,
439
464
  stream: Optional[bool] = False,
440
465
  stop: Optional[
441
466
  Union[
@@ -456,6 +481,9 @@ class Chat(BaseSDK):
456
481
  models.ChatCompletionRequestToolChoiceTypedDict,
457
482
  ]
458
483
  ] = None,
484
+ presence_penalty: Optional[float] = 0,
485
+ frequency_penalty: Optional[float] = 0,
486
+ n: OptionalNullable[int] = UNSET,
459
487
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
460
488
  server_url: Optional[str] = None,
461
489
  timeout_ms: Optional[int] = None,
@@ -464,16 +492,18 @@ class Chat(BaseSDK):
464
492
 
465
493
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
466
494
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
467
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
495
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
468
496
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
469
497
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
470
- :param min_tokens: The minimum number of tokens to generate in the completion.
471
498
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
472
499
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
473
500
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
474
501
  :param response_format:
475
502
  :param tools:
476
503
  :param tool_choice:
504
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
505
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
506
+ :param n: Number of completions to return for each request, input tokens are only billed once.
477
507
  :param retries: Override the default retry configuration for this method
478
508
  :param server_url: Override the default server URL for this method
479
509
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -491,7 +521,6 @@ class Chat(BaseSDK):
491
521
  temperature=temperature,
492
522
  top_p=top_p,
493
523
  max_tokens=max_tokens,
494
- min_tokens=min_tokens,
495
524
  stream=stream,
496
525
  stop=stop,
497
526
  random_seed=random_seed,
@@ -505,6 +534,9 @@ class Chat(BaseSDK):
505
534
  tool_choice=utils.get_pydantic_model(
506
535
  tool_choice, Optional[models.ChatCompletionRequestToolChoice]
507
536
  ),
537
+ presence_penalty=presence_penalty,
538
+ frequency_penalty=frequency_penalty,
539
+ n=n,
508
540
  )
509
541
 
510
542
  req = self.build_request_async(
@@ -553,14 +585,16 @@ class Chat(BaseSDK):
553
585
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
554
586
  raise models.HTTPValidationError(data=data)
555
587
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
588
+ http_res_text = await utils.stream_to_text_async(http_res)
556
589
  raise models.SDKError(
557
- "API error occurred", http_res.status_code, http_res.text, http_res
590
+ "API error occurred", http_res.status_code, http_res_text, http_res
558
591
  )
559
592
 
560
593
  content_type = http_res.headers.get("Content-Type")
594
+ http_res_text = await utils.stream_to_text_async(http_res)
561
595
  raise models.SDKError(
562
596
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
563
597
  http_res.status_code,
564
- http_res.text,
598
+ http_res_text,
565
599
  http_res,
566
600
  )
mistralai_gcp/fim.py CHANGED
@@ -16,10 +16,9 @@ class Fim(BaseSDK):
16
16
  *,
17
17
  model: Nullable[str],
18
18
  prompt: str,
19
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
20
20
  top_p: Optional[float] = 1,
21
21
  max_tokens: OptionalNullable[int] = UNSET,
22
- min_tokens: OptionalNullable[int] = UNSET,
23
22
  stream: Optional[bool] = True,
24
23
  stop: Optional[
25
24
  Union[
@@ -29,6 +28,7 @@ class Fim(BaseSDK):
29
28
  ] = None,
30
29
  random_seed: OptionalNullable[int] = UNSET,
31
30
  suffix: OptionalNullable[str] = UNSET,
31
+ min_tokens: OptionalNullable[int] = UNSET,
32
32
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
33
33
  server_url: Optional[str] = None,
34
34
  timeout_ms: Optional[int] = None,
@@ -39,14 +39,14 @@ class Fim(BaseSDK):
39
39
 
40
40
  :param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
41
41
  :param prompt: The text/code to complete.
42
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
42
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
43
43
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
44
44
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
45
- :param min_tokens: The minimum number of tokens to generate in the completion.
46
45
  :param stream:
47
46
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
48
47
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
49
48
  :param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
49
+ :param min_tokens: The minimum number of tokens to generate in the completion.
50
50
  :param retries: Override the default retry configuration for this method
51
51
  :param server_url: Override the default server URL for this method
52
52
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -64,12 +64,12 @@ class Fim(BaseSDK):
64
64
  temperature=temperature,
65
65
  top_p=top_p,
66
66
  max_tokens=max_tokens,
67
- min_tokens=min_tokens,
68
67
  stream=stream,
69
68
  stop=stop,
70
69
  random_seed=random_seed,
71
70
  prompt=prompt,
72
71
  suffix=suffix,
72
+ min_tokens=min_tokens,
73
73
  )
74
74
 
75
75
  req = self.build_request(
@@ -118,18 +118,21 @@ class Fim(BaseSDK):
118
118
  sentinel="[DONE]",
119
119
  )
120
120
  if utils.match_response(http_res, "422", "application/json"):
121
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
121
+ http_res_text = utils.stream_to_text(http_res)
122
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
122
123
  raise models.HTTPValidationError(data=data)
123
124
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
125
+ http_res_text = utils.stream_to_text(http_res)
124
126
  raise models.SDKError(
125
- "API error occurred", http_res.status_code, http_res.text, http_res
127
+ "API error occurred", http_res.status_code, http_res_text, http_res
126
128
  )
127
129
 
128
130
  content_type = http_res.headers.get("Content-Type")
131
+ http_res_text = utils.stream_to_text(http_res)
129
132
  raise models.SDKError(
130
133
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
131
134
  http_res.status_code,
132
- http_res.text,
135
+ http_res_text,
133
136
  http_res,
134
137
  )
135
138
 
@@ -138,10 +141,9 @@ class Fim(BaseSDK):
138
141
  *,
139
142
  model: Nullable[str],
140
143
  prompt: str,
141
- temperature: Optional[float] = 0.7,
144
+ temperature: OptionalNullable[float] = UNSET,
142
145
  top_p: Optional[float] = 1,
143
146
  max_tokens: OptionalNullable[int] = UNSET,
144
- min_tokens: OptionalNullable[int] = UNSET,
145
147
  stream: Optional[bool] = True,
146
148
  stop: Optional[
147
149
  Union[
@@ -151,6 +153,7 @@ class Fim(BaseSDK):
151
153
  ] = None,
152
154
  random_seed: OptionalNullable[int] = UNSET,
153
155
  suffix: OptionalNullable[str] = UNSET,
156
+ min_tokens: OptionalNullable[int] = UNSET,
154
157
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
155
158
  server_url: Optional[str] = None,
156
159
  timeout_ms: Optional[int] = None,
@@ -161,14 +164,14 @@ class Fim(BaseSDK):
161
164
 
162
165
  :param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
163
166
  :param prompt: The text/code to complete.
164
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
167
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
165
168
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
166
169
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
167
- :param min_tokens: The minimum number of tokens to generate in the completion.
168
170
  :param stream:
169
171
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
170
172
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
171
173
  :param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
174
+ :param min_tokens: The minimum number of tokens to generate in the completion.
172
175
  :param retries: Override the default retry configuration for this method
173
176
  :param server_url: Override the default server URL for this method
174
177
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -186,12 +189,12 @@ class Fim(BaseSDK):
186
189
  temperature=temperature,
187
190
  top_p=top_p,
188
191
  max_tokens=max_tokens,
189
- min_tokens=min_tokens,
190
192
  stream=stream,
191
193
  stop=stop,
192
194
  random_seed=random_seed,
193
195
  prompt=prompt,
194
196
  suffix=suffix,
197
+ min_tokens=min_tokens,
195
198
  )
196
199
 
197
200
  req = self.build_request_async(
@@ -240,18 +243,21 @@ class Fim(BaseSDK):
240
243
  sentinel="[DONE]",
241
244
  )
242
245
  if utils.match_response(http_res, "422", "application/json"):
243
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
246
+ http_res_text = await utils.stream_to_text_async(http_res)
247
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
244
248
  raise models.HTTPValidationError(data=data)
245
249
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
250
+ http_res_text = await utils.stream_to_text_async(http_res)
246
251
  raise models.SDKError(
247
- "API error occurred", http_res.status_code, http_res.text, http_res
252
+ "API error occurred", http_res.status_code, http_res_text, http_res
248
253
  )
249
254
 
250
255
  content_type = http_res.headers.get("Content-Type")
256
+ http_res_text = await utils.stream_to_text_async(http_res)
251
257
  raise models.SDKError(
252
258
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
253
259
  http_res.status_code,
254
- http_res.text,
260
+ http_res_text,
255
261
  http_res,
256
262
  )
257
263
 
@@ -260,10 +266,9 @@ class Fim(BaseSDK):
260
266
  *,
261
267
  model: Nullable[str],
262
268
  prompt: str,
263
- temperature: Optional[float] = 0.7,
269
+ temperature: OptionalNullable[float] = UNSET,
264
270
  top_p: Optional[float] = 1,
265
271
  max_tokens: OptionalNullable[int] = UNSET,
266
- min_tokens: OptionalNullable[int] = UNSET,
267
272
  stream: Optional[bool] = False,
268
273
  stop: Optional[
269
274
  Union[
@@ -273,6 +278,7 @@ class Fim(BaseSDK):
273
278
  ] = None,
274
279
  random_seed: OptionalNullable[int] = UNSET,
275
280
  suffix: OptionalNullable[str] = UNSET,
281
+ min_tokens: OptionalNullable[int] = UNSET,
276
282
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
277
283
  server_url: Optional[str] = None,
278
284
  timeout_ms: Optional[int] = None,
@@ -283,14 +289,14 @@ class Fim(BaseSDK):
283
289
 
284
290
  :param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
285
291
  :param prompt: The text/code to complete.
286
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
292
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
287
293
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
288
294
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
289
- :param min_tokens: The minimum number of tokens to generate in the completion.
290
295
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
291
296
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
292
297
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
293
298
  :param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
299
+ :param min_tokens: The minimum number of tokens to generate in the completion.
294
300
  :param retries: Override the default retry configuration for this method
295
301
  :param server_url: Override the default server URL for this method
296
302
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -308,12 +314,12 @@ class Fim(BaseSDK):
308
314
  temperature=temperature,
309
315
  top_p=top_p,
310
316
  max_tokens=max_tokens,
311
- min_tokens=min_tokens,
312
317
  stream=stream,
313
318
  stop=stop,
314
319
  random_seed=random_seed,
315
320
  prompt=prompt,
316
321
  suffix=suffix,
322
+ min_tokens=min_tokens,
317
323
  )
318
324
 
319
325
  req = self.build_request(
@@ -362,15 +368,17 @@ class Fim(BaseSDK):
362
368
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
363
369
  raise models.HTTPValidationError(data=data)
364
370
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
371
+ http_res_text = utils.stream_to_text(http_res)
365
372
  raise models.SDKError(
366
- "API error occurred", http_res.status_code, http_res.text, http_res
373
+ "API error occurred", http_res.status_code, http_res_text, http_res
367
374
  )
368
375
 
369
376
  content_type = http_res.headers.get("Content-Type")
377
+ http_res_text = utils.stream_to_text(http_res)
370
378
  raise models.SDKError(
371
379
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
372
380
  http_res.status_code,
373
- http_res.text,
381
+ http_res_text,
374
382
  http_res,
375
383
  )
376
384
 
@@ -379,10 +387,9 @@ class Fim(BaseSDK):
379
387
  *,
380
388
  model: Nullable[str],
381
389
  prompt: str,
382
- temperature: Optional[float] = 0.7,
390
+ temperature: OptionalNullable[float] = UNSET,
383
391
  top_p: Optional[float] = 1,
384
392
  max_tokens: OptionalNullable[int] = UNSET,
385
- min_tokens: OptionalNullable[int] = UNSET,
386
393
  stream: Optional[bool] = False,
387
394
  stop: Optional[
388
395
  Union[
@@ -392,6 +399,7 @@ class Fim(BaseSDK):
392
399
  ] = None,
393
400
  random_seed: OptionalNullable[int] = UNSET,
394
401
  suffix: OptionalNullable[str] = UNSET,
402
+ min_tokens: OptionalNullable[int] = UNSET,
395
403
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
396
404
  server_url: Optional[str] = None,
397
405
  timeout_ms: Optional[int] = None,
@@ -402,14 +410,14 @@ class Fim(BaseSDK):
402
410
 
403
411
  :param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
404
412
  :param prompt: The text/code to complete.
405
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
413
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
406
414
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
407
415
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
408
- :param min_tokens: The minimum number of tokens to generate in the completion.
409
416
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
410
417
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
411
418
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
412
419
  :param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
420
+ :param min_tokens: The minimum number of tokens to generate in the completion.
413
421
  :param retries: Override the default retry configuration for this method
414
422
  :param server_url: Override the default server URL for this method
415
423
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -427,12 +435,12 @@ class Fim(BaseSDK):
427
435
  temperature=temperature,
428
436
  top_p=top_p,
429
437
  max_tokens=max_tokens,
430
- min_tokens=min_tokens,
431
438
  stream=stream,
432
439
  stop=stop,
433
440
  random_seed=random_seed,
434
441
  prompt=prompt,
435
442
  suffix=suffix,
443
+ min_tokens=min_tokens,
436
444
  )
437
445
 
438
446
  req = self.build_request_async(
@@ -481,14 +489,16 @@ class Fim(BaseSDK):
481
489
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
482
490
  raise models.HTTPValidationError(data=data)
483
491
  if utils.match_response(http_res, ["4XX", "5XX"], "*"):
492
+ http_res_text = await utils.stream_to_text_async(http_res)
484
493
  raise models.SDKError(
485
- "API error occurred", http_res.status_code, http_res.text, http_res
494
+ "API error occurred", http_res.status_code, http_res_text, http_res
486
495
  )
487
496
 
488
497
  content_type = http_res.headers.get("Content-Type")
498
+ http_res_text = await utils.stream_to_text_async(http_res)
489
499
  raise models.SDKError(
490
500
  f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
491
501
  http_res.status_code,
492
- http_res.text,
502
+ http_res_text,
493
503
  http_res,
494
504
  )