mistralai 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_version.py +12 -0
- mistralai/agents.py +56 -22
- mistralai/batch.py +17 -0
- mistralai/chat.py +64 -30
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +10 -6
- mistralai/files.py +252 -19
- mistralai/fim.py +40 -30
- mistralai/jobs.py +40 -20
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +108 -18
- mistralai/models/agentscompletionrequest.py +27 -10
- mistralai/models/agentscompletionstreamrequest.py +27 -10
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +11 -5
- mistralai/models/assistantmessage.py +11 -6
- mistralai/models/basemodelcard.py +22 -6
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +9 -4
- mistralai/models/chatcompletionrequest.py +32 -13
- mistralai/models/chatcompletionresponse.py +2 -2
- mistralai/models/chatcompletionstreamrequest.py +32 -13
- mistralai/models/checkpointout.py +1 -1
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +2 -2
- mistralai/models/completionevent.py +1 -1
- mistralai/models/completionresponsestreamchoice.py +11 -5
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +1 -2
- mistralai/models/deletefileout.py +1 -1
- mistralai/models/deletemodelout.py +2 -2
- mistralai/models/deltamessage.py +14 -7
- mistralai/models/detailedjobout.py +11 -5
- mistralai/models/embeddingrequest.py +5 -5
- mistralai/models/embeddingresponse.py +2 -1
- mistralai/models/embeddingresponsedata.py +2 -2
- mistralai/models/eventout.py +2 -2
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +1 -2
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +1 -2
- mistralai/models/files_api_routes_upload_fileop.py +9 -9
- mistralai/models/fileschema.py +7 -21
- mistralai/models/fimcompletionrequest.py +20 -13
- mistralai/models/fimcompletionresponse.py +2 -2
- mistralai/models/fimcompletionstreamrequest.py +20 -13
- mistralai/models/ftmodelcapabilitiesout.py +2 -2
- mistralai/models/ftmodelcard.py +24 -6
- mistralai/models/ftmodelout.py +9 -5
- mistralai/models/function.py +2 -2
- mistralai/models/functioncall.py +2 -1
- mistralai/models/functionname.py +1 -1
- mistralai/models/githubrepositoryin.py +11 -5
- mistralai/models/githubrepositoryout.py +11 -5
- mistralai/models/httpvalidationerror.py +0 -2
- mistralai/models/imageurl.py +1 -2
- mistralai/models/imageurlchunk.py +11 -5
- mistralai/models/jobin.py +2 -2
- mistralai/models/jobmetadataout.py +1 -2
- mistralai/models/jobout.py +10 -5
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +2 -2
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +1 -2
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +1 -2
- mistralai/models/jobsout.py +9 -5
- mistralai/models/legacyjobmetadataout.py +12 -5
- mistralai/models/listfilesout.py +5 -1
- mistralai/models/metricout.py +1 -2
- mistralai/models/modelcapabilities.py +2 -2
- mistralai/models/modellist.py +2 -2
- mistralai/models/responseformat.py +2 -2
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +2 -2
- mistralai/models/retrievefileout.py +10 -21
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +2 -2
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +6 -6
- mistralai/models/textchunk.py +9 -5
- mistralai/models/tool.py +2 -2
- mistralai/models/toolcall.py +2 -2
- mistralai/models/toolchoice.py +2 -2
- mistralai/models/toolmessage.py +2 -2
- mistralai/models/trainingfile.py +2 -2
- mistralai/models/trainingparameters.py +7 -2
- mistralai/models/trainingparametersin.py +7 -2
- mistralai/models/unarchiveftmodelout.py +11 -5
- mistralai/models/updateftmodelin.py +1 -2
- mistralai/models/uploadfileout.py +7 -21
- mistralai/models/usageinfo.py +1 -1
- mistralai/models/usermessage.py +36 -5
- mistralai/models/validationerror.py +2 -1
- mistralai/models/wandbintegration.py +11 -5
- mistralai/models/wandbintegrationout.py +12 -6
- mistralai/models_.py +48 -24
- mistralai/sdk.py +7 -0
- mistralai/sdkconfiguration.py +7 -7
- mistralai/utils/__init__.py +8 -0
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/METADATA +90 -14
- mistralai-1.2.0.dist-info/RECORD +276 -0
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_version.py +12 -0
- mistralai_azure/chat.py +64 -30
- mistralai_azure/models/__init__.py +9 -3
- mistralai_azure/models/assistantmessage.py +11 -6
- mistralai_azure/models/chatcompletionchoice.py +10 -5
- mistralai_azure/models/chatcompletionrequest.py +32 -13
- mistralai_azure/models/chatcompletionresponse.py +2 -2
- mistralai_azure/models/chatcompletionstreamrequest.py +32 -13
- mistralai_azure/models/completionchunk.py +2 -2
- mistralai_azure/models/completionevent.py +1 -1
- mistralai_azure/models/completionresponsestreamchoice.py +9 -4
- mistralai_azure/models/deltamessage.py +14 -7
- mistralai_azure/models/function.py +2 -2
- mistralai_azure/models/functioncall.py +2 -1
- mistralai_azure/models/functionname.py +1 -1
- mistralai_azure/models/httpvalidationerror.py +0 -2
- mistralai_azure/models/responseformat.py +2 -2
- mistralai_azure/models/security.py +1 -2
- mistralai_azure/models/systemmessage.py +6 -6
- mistralai_azure/models/textchunk.py +9 -5
- mistralai_azure/models/tool.py +2 -2
- mistralai_azure/models/toolcall.py +2 -2
- mistralai_azure/models/toolchoice.py +2 -2
- mistralai_azure/models/toolmessage.py +2 -2
- mistralai_azure/models/usageinfo.py +1 -1
- mistralai_azure/models/usermessage.py +36 -5
- mistralai_azure/models/validationerror.py +2 -1
- mistralai_azure/sdkconfiguration.py +7 -7
- mistralai_azure/utils/__init__.py +8 -0
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/serializers.py +25 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/chat.py +64 -30
- mistralai_gcp/fim.py +40 -30
- mistralai_gcp/models/__init__.py +9 -3
- mistralai_gcp/models/assistantmessage.py +11 -6
- mistralai_gcp/models/chatcompletionchoice.py +10 -5
- mistralai_gcp/models/chatcompletionrequest.py +32 -13
- mistralai_gcp/models/chatcompletionresponse.py +2 -2
- mistralai_gcp/models/chatcompletionstreamrequest.py +32 -13
- mistralai_gcp/models/completionchunk.py +2 -2
- mistralai_gcp/models/completionevent.py +1 -1
- mistralai_gcp/models/completionresponsestreamchoice.py +9 -4
- mistralai_gcp/models/deltamessage.py +14 -7
- mistralai_gcp/models/fimcompletionrequest.py +20 -13
- mistralai_gcp/models/fimcompletionresponse.py +2 -2
- mistralai_gcp/models/fimcompletionstreamrequest.py +20 -13
- mistralai_gcp/models/function.py +2 -2
- mistralai_gcp/models/functioncall.py +2 -1
- mistralai_gcp/models/functionname.py +1 -1
- mistralai_gcp/models/httpvalidationerror.py +0 -2
- mistralai_gcp/models/responseformat.py +2 -2
- mistralai_gcp/models/security.py +1 -2
- mistralai_gcp/models/systemmessage.py +6 -6
- mistralai_gcp/models/textchunk.py +9 -5
- mistralai_gcp/models/tool.py +2 -2
- mistralai_gcp/models/toolcall.py +2 -2
- mistralai_gcp/models/toolchoice.py +2 -2
- mistralai_gcp/models/toolmessage.py +2 -2
- mistralai_gcp/models/usageinfo.py +1 -1
- mistralai_gcp/models/usermessage.py +36 -5
- mistralai_gcp/models/validationerror.py +2 -1
- mistralai_gcp/sdkconfiguration.py +7 -7
- mistralai_gcp/utils/__init__.py +8 -0
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/serializers.py +25 -0
- mistralai-1.1.0.dist-info/RECORD +0 -254
- {mistralai-1.1.0.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_gcp/chat.py
CHANGED
|
@@ -16,10 +16,9 @@ class Chat(BaseSDK):
|
|
|
16
16
|
*,
|
|
17
17
|
model: Nullable[str],
|
|
18
18
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
19
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
20
20
|
top_p: Optional[float] = 1,
|
|
21
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
22
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
23
22
|
stream: Optional[bool] = True,
|
|
24
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
25
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -35,6 +34,9 @@ class Chat(BaseSDK):
|
|
|
35
34
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
36
35
|
]
|
|
37
36
|
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
38
40
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
39
41
|
server_url: Optional[str] = None,
|
|
40
42
|
timeout_ms: Optional[int] = None,
|
|
@@ -45,16 +47,18 @@ class Chat(BaseSDK):
|
|
|
45
47
|
|
|
46
48
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
47
49
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
48
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
50
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
49
51
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
50
52
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
51
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
52
53
|
:param stream:
|
|
53
54
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
54
55
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
55
56
|
:param response_format:
|
|
56
57
|
:param tools:
|
|
57
58
|
:param tool_choice:
|
|
59
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
60
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
61
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
58
62
|
:param retries: Override the default retry configuration for this method
|
|
59
63
|
:param server_url: Override the default server URL for this method
|
|
60
64
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -72,7 +76,6 @@ class Chat(BaseSDK):
|
|
|
72
76
|
temperature=temperature,
|
|
73
77
|
top_p=top_p,
|
|
74
78
|
max_tokens=max_tokens,
|
|
75
|
-
min_tokens=min_tokens,
|
|
76
79
|
stream=stream,
|
|
77
80
|
stop=stop,
|
|
78
81
|
random_seed=random_seed,
|
|
@@ -84,6 +87,9 @@ class Chat(BaseSDK):
|
|
|
84
87
|
tool_choice=utils.get_pydantic_model(
|
|
85
88
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
86
89
|
),
|
|
90
|
+
presence_penalty=presence_penalty,
|
|
91
|
+
frequency_penalty=frequency_penalty,
|
|
92
|
+
n=n,
|
|
87
93
|
)
|
|
88
94
|
|
|
89
95
|
req = self.build_request(
|
|
@@ -132,18 +138,21 @@ class Chat(BaseSDK):
|
|
|
132
138
|
sentinel="[DONE]",
|
|
133
139
|
)
|
|
134
140
|
if utils.match_response(http_res, "422", "application/json"):
|
|
135
|
-
|
|
141
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
142
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
136
143
|
raise models.HTTPValidationError(data=data)
|
|
137
144
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
145
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
138
146
|
raise models.SDKError(
|
|
139
|
-
"API error occurred", http_res.status_code,
|
|
147
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
140
148
|
)
|
|
141
149
|
|
|
142
150
|
content_type = http_res.headers.get("Content-Type")
|
|
151
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
143
152
|
raise models.SDKError(
|
|
144
153
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
145
154
|
http_res.status_code,
|
|
146
|
-
|
|
155
|
+
http_res_text,
|
|
147
156
|
http_res,
|
|
148
157
|
)
|
|
149
158
|
|
|
@@ -152,10 +161,9 @@ class Chat(BaseSDK):
|
|
|
152
161
|
*,
|
|
153
162
|
model: Nullable[str],
|
|
154
163
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
155
|
-
temperature:
|
|
164
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
156
165
|
top_p: Optional[float] = 1,
|
|
157
166
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
158
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
159
167
|
stream: Optional[bool] = True,
|
|
160
168
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
161
169
|
random_seed: OptionalNullable[int] = UNSET,
|
|
@@ -171,6 +179,9 @@ class Chat(BaseSDK):
|
|
|
171
179
|
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
172
180
|
]
|
|
173
181
|
] = None,
|
|
182
|
+
presence_penalty: Optional[float] = 0,
|
|
183
|
+
frequency_penalty: Optional[float] = 0,
|
|
184
|
+
n: OptionalNullable[int] = UNSET,
|
|
174
185
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
175
186
|
server_url: Optional[str] = None,
|
|
176
187
|
timeout_ms: Optional[int] = None,
|
|
@@ -181,16 +192,18 @@ class Chat(BaseSDK):
|
|
|
181
192
|
|
|
182
193
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
183
194
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
184
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
195
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
185
196
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
186
197
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
187
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
188
198
|
:param stream:
|
|
189
199
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
190
200
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
191
201
|
:param response_format:
|
|
192
202
|
:param tools:
|
|
193
203
|
:param tool_choice:
|
|
204
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
205
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
206
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
194
207
|
:param retries: Override the default retry configuration for this method
|
|
195
208
|
:param server_url: Override the default server URL for this method
|
|
196
209
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -208,7 +221,6 @@ class Chat(BaseSDK):
|
|
|
208
221
|
temperature=temperature,
|
|
209
222
|
top_p=top_p,
|
|
210
223
|
max_tokens=max_tokens,
|
|
211
|
-
min_tokens=min_tokens,
|
|
212
224
|
stream=stream,
|
|
213
225
|
stop=stop,
|
|
214
226
|
random_seed=random_seed,
|
|
@@ -220,6 +232,9 @@ class Chat(BaseSDK):
|
|
|
220
232
|
tool_choice=utils.get_pydantic_model(
|
|
221
233
|
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
222
234
|
),
|
|
235
|
+
presence_penalty=presence_penalty,
|
|
236
|
+
frequency_penalty=frequency_penalty,
|
|
237
|
+
n=n,
|
|
223
238
|
)
|
|
224
239
|
|
|
225
240
|
req = self.build_request_async(
|
|
@@ -268,18 +283,21 @@ class Chat(BaseSDK):
|
|
|
268
283
|
sentinel="[DONE]",
|
|
269
284
|
)
|
|
270
285
|
if utils.match_response(http_res, "422", "application/json"):
|
|
271
|
-
|
|
286
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
287
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
272
288
|
raise models.HTTPValidationError(data=data)
|
|
273
289
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
290
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
274
291
|
raise models.SDKError(
|
|
275
|
-
"API error occurred", http_res.status_code,
|
|
292
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
276
293
|
)
|
|
277
294
|
|
|
278
295
|
content_type = http_res.headers.get("Content-Type")
|
|
296
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
279
297
|
raise models.SDKError(
|
|
280
298
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
281
299
|
http_res.status_code,
|
|
282
|
-
|
|
300
|
+
http_res_text,
|
|
283
301
|
http_res,
|
|
284
302
|
)
|
|
285
303
|
|
|
@@ -291,10 +309,9 @@ class Chat(BaseSDK):
|
|
|
291
309
|
List[models.ChatCompletionRequestMessages],
|
|
292
310
|
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
293
311
|
],
|
|
294
|
-
temperature:
|
|
312
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
295
313
|
top_p: Optional[float] = 1,
|
|
296
314
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
297
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
298
315
|
stream: Optional[bool] = False,
|
|
299
316
|
stop: Optional[
|
|
300
317
|
Union[
|
|
@@ -315,6 +332,9 @@ class Chat(BaseSDK):
|
|
|
315
332
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
316
333
|
]
|
|
317
334
|
] = None,
|
|
335
|
+
presence_penalty: Optional[float] = 0,
|
|
336
|
+
frequency_penalty: Optional[float] = 0,
|
|
337
|
+
n: OptionalNullable[int] = UNSET,
|
|
318
338
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
319
339
|
server_url: Optional[str] = None,
|
|
320
340
|
timeout_ms: Optional[int] = None,
|
|
@@ -323,16 +343,18 @@ class Chat(BaseSDK):
|
|
|
323
343
|
|
|
324
344
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
325
345
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
326
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
346
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
327
347
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
328
348
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
329
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
330
349
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
331
350
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
332
351
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
333
352
|
:param response_format:
|
|
334
353
|
:param tools:
|
|
335
354
|
:param tool_choice:
|
|
355
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
356
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
357
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
336
358
|
:param retries: Override the default retry configuration for this method
|
|
337
359
|
:param server_url: Override the default server URL for this method
|
|
338
360
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -350,7 +372,6 @@ class Chat(BaseSDK):
|
|
|
350
372
|
temperature=temperature,
|
|
351
373
|
top_p=top_p,
|
|
352
374
|
max_tokens=max_tokens,
|
|
353
|
-
min_tokens=min_tokens,
|
|
354
375
|
stream=stream,
|
|
355
376
|
stop=stop,
|
|
356
377
|
random_seed=random_seed,
|
|
@@ -364,6 +385,9 @@ class Chat(BaseSDK):
|
|
|
364
385
|
tool_choice=utils.get_pydantic_model(
|
|
365
386
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
366
387
|
),
|
|
388
|
+
presence_penalty=presence_penalty,
|
|
389
|
+
frequency_penalty=frequency_penalty,
|
|
390
|
+
n=n,
|
|
367
391
|
)
|
|
368
392
|
|
|
369
393
|
req = self.build_request(
|
|
@@ -412,15 +436,17 @@ class Chat(BaseSDK):
|
|
|
412
436
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
413
437
|
raise models.HTTPValidationError(data=data)
|
|
414
438
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
439
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
415
440
|
raise models.SDKError(
|
|
416
|
-
"API error occurred", http_res.status_code,
|
|
441
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
417
442
|
)
|
|
418
443
|
|
|
419
444
|
content_type = http_res.headers.get("Content-Type")
|
|
445
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
420
446
|
raise models.SDKError(
|
|
421
447
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
422
448
|
http_res.status_code,
|
|
423
|
-
|
|
449
|
+
http_res_text,
|
|
424
450
|
http_res,
|
|
425
451
|
)
|
|
426
452
|
|
|
@@ -432,10 +458,9 @@ class Chat(BaseSDK):
|
|
|
432
458
|
List[models.ChatCompletionRequestMessages],
|
|
433
459
|
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
434
460
|
],
|
|
435
|
-
temperature:
|
|
461
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
436
462
|
top_p: Optional[float] = 1,
|
|
437
463
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
438
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
439
464
|
stream: Optional[bool] = False,
|
|
440
465
|
stop: Optional[
|
|
441
466
|
Union[
|
|
@@ -456,6 +481,9 @@ class Chat(BaseSDK):
|
|
|
456
481
|
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
457
482
|
]
|
|
458
483
|
] = None,
|
|
484
|
+
presence_penalty: Optional[float] = 0,
|
|
485
|
+
frequency_penalty: Optional[float] = 0,
|
|
486
|
+
n: OptionalNullable[int] = UNSET,
|
|
459
487
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
460
488
|
server_url: Optional[str] = None,
|
|
461
489
|
timeout_ms: Optional[int] = None,
|
|
@@ -464,16 +492,18 @@ class Chat(BaseSDK):
|
|
|
464
492
|
|
|
465
493
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
466
494
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
467
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
495
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
468
496
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
469
497
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
470
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
471
498
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
472
499
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
473
500
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
474
501
|
:param response_format:
|
|
475
502
|
:param tools:
|
|
476
503
|
:param tool_choice:
|
|
504
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
505
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
506
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
477
507
|
:param retries: Override the default retry configuration for this method
|
|
478
508
|
:param server_url: Override the default server URL for this method
|
|
479
509
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -491,7 +521,6 @@ class Chat(BaseSDK):
|
|
|
491
521
|
temperature=temperature,
|
|
492
522
|
top_p=top_p,
|
|
493
523
|
max_tokens=max_tokens,
|
|
494
|
-
min_tokens=min_tokens,
|
|
495
524
|
stream=stream,
|
|
496
525
|
stop=stop,
|
|
497
526
|
random_seed=random_seed,
|
|
@@ -505,6 +534,9 @@ class Chat(BaseSDK):
|
|
|
505
534
|
tool_choice=utils.get_pydantic_model(
|
|
506
535
|
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
507
536
|
),
|
|
537
|
+
presence_penalty=presence_penalty,
|
|
538
|
+
frequency_penalty=frequency_penalty,
|
|
539
|
+
n=n,
|
|
508
540
|
)
|
|
509
541
|
|
|
510
542
|
req = self.build_request_async(
|
|
@@ -553,14 +585,16 @@ class Chat(BaseSDK):
|
|
|
553
585
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
554
586
|
raise models.HTTPValidationError(data=data)
|
|
555
587
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
588
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
556
589
|
raise models.SDKError(
|
|
557
|
-
"API error occurred", http_res.status_code,
|
|
590
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
558
591
|
)
|
|
559
592
|
|
|
560
593
|
content_type = http_res.headers.get("Content-Type")
|
|
594
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
561
595
|
raise models.SDKError(
|
|
562
596
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
563
597
|
http_res.status_code,
|
|
564
|
-
|
|
598
|
+
http_res_text,
|
|
565
599
|
http_res,
|
|
566
600
|
)
|
mistralai_gcp/fim.py
CHANGED
|
@@ -16,10 +16,9 @@ class Fim(BaseSDK):
|
|
|
16
16
|
*,
|
|
17
17
|
model: Nullable[str],
|
|
18
18
|
prompt: str,
|
|
19
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
20
20
|
top_p: Optional[float] = 1,
|
|
21
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
22
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
23
22
|
stream: Optional[bool] = True,
|
|
24
23
|
stop: Optional[
|
|
25
24
|
Union[
|
|
@@ -29,6 +28,7 @@ class Fim(BaseSDK):
|
|
|
29
28
|
] = None,
|
|
30
29
|
random_seed: OptionalNullable[int] = UNSET,
|
|
31
30
|
suffix: OptionalNullable[str] = UNSET,
|
|
31
|
+
min_tokens: OptionalNullable[int] = UNSET,
|
|
32
32
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
33
33
|
server_url: Optional[str] = None,
|
|
34
34
|
timeout_ms: Optional[int] = None,
|
|
@@ -39,14 +39,14 @@ class Fim(BaseSDK):
|
|
|
39
39
|
|
|
40
40
|
:param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
|
|
41
41
|
:param prompt: The text/code to complete.
|
|
42
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
42
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
43
43
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
44
44
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
45
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
46
45
|
:param stream:
|
|
47
46
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
48
47
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
49
48
|
:param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
|
|
49
|
+
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
50
50
|
:param retries: Override the default retry configuration for this method
|
|
51
51
|
:param server_url: Override the default server URL for this method
|
|
52
52
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -64,12 +64,12 @@ class Fim(BaseSDK):
|
|
|
64
64
|
temperature=temperature,
|
|
65
65
|
top_p=top_p,
|
|
66
66
|
max_tokens=max_tokens,
|
|
67
|
-
min_tokens=min_tokens,
|
|
68
67
|
stream=stream,
|
|
69
68
|
stop=stop,
|
|
70
69
|
random_seed=random_seed,
|
|
71
70
|
prompt=prompt,
|
|
72
71
|
suffix=suffix,
|
|
72
|
+
min_tokens=min_tokens,
|
|
73
73
|
)
|
|
74
74
|
|
|
75
75
|
req = self.build_request(
|
|
@@ -118,18 +118,21 @@ class Fim(BaseSDK):
|
|
|
118
118
|
sentinel="[DONE]",
|
|
119
119
|
)
|
|
120
120
|
if utils.match_response(http_res, "422", "application/json"):
|
|
121
|
-
|
|
121
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
122
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
122
123
|
raise models.HTTPValidationError(data=data)
|
|
123
124
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
125
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
124
126
|
raise models.SDKError(
|
|
125
|
-
"API error occurred", http_res.status_code,
|
|
127
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
126
128
|
)
|
|
127
129
|
|
|
128
130
|
content_type = http_res.headers.get("Content-Type")
|
|
131
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
129
132
|
raise models.SDKError(
|
|
130
133
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
131
134
|
http_res.status_code,
|
|
132
|
-
|
|
135
|
+
http_res_text,
|
|
133
136
|
http_res,
|
|
134
137
|
)
|
|
135
138
|
|
|
@@ -138,10 +141,9 @@ class Fim(BaseSDK):
|
|
|
138
141
|
*,
|
|
139
142
|
model: Nullable[str],
|
|
140
143
|
prompt: str,
|
|
141
|
-
temperature:
|
|
144
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
142
145
|
top_p: Optional[float] = 1,
|
|
143
146
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
144
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
145
147
|
stream: Optional[bool] = True,
|
|
146
148
|
stop: Optional[
|
|
147
149
|
Union[
|
|
@@ -151,6 +153,7 @@ class Fim(BaseSDK):
|
|
|
151
153
|
] = None,
|
|
152
154
|
random_seed: OptionalNullable[int] = UNSET,
|
|
153
155
|
suffix: OptionalNullable[str] = UNSET,
|
|
156
|
+
min_tokens: OptionalNullable[int] = UNSET,
|
|
154
157
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
155
158
|
server_url: Optional[str] = None,
|
|
156
159
|
timeout_ms: Optional[int] = None,
|
|
@@ -161,14 +164,14 @@ class Fim(BaseSDK):
|
|
|
161
164
|
|
|
162
165
|
:param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
|
|
163
166
|
:param prompt: The text/code to complete.
|
|
164
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
167
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
165
168
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
166
169
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
167
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
168
170
|
:param stream:
|
|
169
171
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
170
172
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
171
173
|
:param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
|
|
174
|
+
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
172
175
|
:param retries: Override the default retry configuration for this method
|
|
173
176
|
:param server_url: Override the default server URL for this method
|
|
174
177
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -186,12 +189,12 @@ class Fim(BaseSDK):
|
|
|
186
189
|
temperature=temperature,
|
|
187
190
|
top_p=top_p,
|
|
188
191
|
max_tokens=max_tokens,
|
|
189
|
-
min_tokens=min_tokens,
|
|
190
192
|
stream=stream,
|
|
191
193
|
stop=stop,
|
|
192
194
|
random_seed=random_seed,
|
|
193
195
|
prompt=prompt,
|
|
194
196
|
suffix=suffix,
|
|
197
|
+
min_tokens=min_tokens,
|
|
195
198
|
)
|
|
196
199
|
|
|
197
200
|
req = self.build_request_async(
|
|
@@ -240,18 +243,21 @@ class Fim(BaseSDK):
|
|
|
240
243
|
sentinel="[DONE]",
|
|
241
244
|
)
|
|
242
245
|
if utils.match_response(http_res, "422", "application/json"):
|
|
243
|
-
|
|
246
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
247
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
244
248
|
raise models.HTTPValidationError(data=data)
|
|
245
249
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
250
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
246
251
|
raise models.SDKError(
|
|
247
|
-
"API error occurred", http_res.status_code,
|
|
252
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
248
253
|
)
|
|
249
254
|
|
|
250
255
|
content_type = http_res.headers.get("Content-Type")
|
|
256
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
251
257
|
raise models.SDKError(
|
|
252
258
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
253
259
|
http_res.status_code,
|
|
254
|
-
|
|
260
|
+
http_res_text,
|
|
255
261
|
http_res,
|
|
256
262
|
)
|
|
257
263
|
|
|
@@ -260,10 +266,9 @@ class Fim(BaseSDK):
|
|
|
260
266
|
*,
|
|
261
267
|
model: Nullable[str],
|
|
262
268
|
prompt: str,
|
|
263
|
-
temperature:
|
|
269
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
264
270
|
top_p: Optional[float] = 1,
|
|
265
271
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
266
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
267
272
|
stream: Optional[bool] = False,
|
|
268
273
|
stop: Optional[
|
|
269
274
|
Union[
|
|
@@ -273,6 +278,7 @@ class Fim(BaseSDK):
|
|
|
273
278
|
] = None,
|
|
274
279
|
random_seed: OptionalNullable[int] = UNSET,
|
|
275
280
|
suffix: OptionalNullable[str] = UNSET,
|
|
281
|
+
min_tokens: OptionalNullable[int] = UNSET,
|
|
276
282
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
277
283
|
server_url: Optional[str] = None,
|
|
278
284
|
timeout_ms: Optional[int] = None,
|
|
@@ -283,14 +289,14 @@ class Fim(BaseSDK):
|
|
|
283
289
|
|
|
284
290
|
:param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
|
|
285
291
|
:param prompt: The text/code to complete.
|
|
286
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
292
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
287
293
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
288
294
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
289
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
290
295
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
291
296
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
292
297
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
293
298
|
:param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
|
|
299
|
+
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
294
300
|
:param retries: Override the default retry configuration for this method
|
|
295
301
|
:param server_url: Override the default server URL for this method
|
|
296
302
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -308,12 +314,12 @@ class Fim(BaseSDK):
|
|
|
308
314
|
temperature=temperature,
|
|
309
315
|
top_p=top_p,
|
|
310
316
|
max_tokens=max_tokens,
|
|
311
|
-
min_tokens=min_tokens,
|
|
312
317
|
stream=stream,
|
|
313
318
|
stop=stop,
|
|
314
319
|
random_seed=random_seed,
|
|
315
320
|
prompt=prompt,
|
|
316
321
|
suffix=suffix,
|
|
322
|
+
min_tokens=min_tokens,
|
|
317
323
|
)
|
|
318
324
|
|
|
319
325
|
req = self.build_request(
|
|
@@ -362,15 +368,17 @@ class Fim(BaseSDK):
|
|
|
362
368
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
363
369
|
raise models.HTTPValidationError(data=data)
|
|
364
370
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
371
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
365
372
|
raise models.SDKError(
|
|
366
|
-
"API error occurred", http_res.status_code,
|
|
373
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
367
374
|
)
|
|
368
375
|
|
|
369
376
|
content_type = http_res.headers.get("Content-Type")
|
|
377
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
370
378
|
raise models.SDKError(
|
|
371
379
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
372
380
|
http_res.status_code,
|
|
373
|
-
|
|
381
|
+
http_res_text,
|
|
374
382
|
http_res,
|
|
375
383
|
)
|
|
376
384
|
|
|
@@ -379,10 +387,9 @@ class Fim(BaseSDK):
|
|
|
379
387
|
*,
|
|
380
388
|
model: Nullable[str],
|
|
381
389
|
prompt: str,
|
|
382
|
-
temperature:
|
|
390
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
383
391
|
top_p: Optional[float] = 1,
|
|
384
392
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
385
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
386
393
|
stream: Optional[bool] = False,
|
|
387
394
|
stop: Optional[
|
|
388
395
|
Union[
|
|
@@ -392,6 +399,7 @@ class Fim(BaseSDK):
|
|
|
392
399
|
] = None,
|
|
393
400
|
random_seed: OptionalNullable[int] = UNSET,
|
|
394
401
|
suffix: OptionalNullable[str] = UNSET,
|
|
402
|
+
min_tokens: OptionalNullable[int] = UNSET,
|
|
395
403
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
396
404
|
server_url: Optional[str] = None,
|
|
397
405
|
timeout_ms: Optional[int] = None,
|
|
@@ -402,14 +410,14 @@ class Fim(BaseSDK):
|
|
|
402
410
|
|
|
403
411
|
:param model: ID of the model to use. Only compatible for now with: - `codestral-2405` - `codestral-latest`
|
|
404
412
|
:param prompt: The text/code to complete.
|
|
405
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
413
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
406
414
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
407
415
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
408
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
409
416
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
410
417
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
411
418
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
412
419
|
:param suffix: Optional text/code that adds more context for the model. When given a `prompt` and a `suffix` the model will fill what is between them. When `suffix` is not provided, the model will simply execute completion starting with `prompt`.
|
|
420
|
+
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
413
421
|
:param retries: Override the default retry configuration for this method
|
|
414
422
|
:param server_url: Override the default server URL for this method
|
|
415
423
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -427,12 +435,12 @@ class Fim(BaseSDK):
|
|
|
427
435
|
temperature=temperature,
|
|
428
436
|
top_p=top_p,
|
|
429
437
|
max_tokens=max_tokens,
|
|
430
|
-
min_tokens=min_tokens,
|
|
431
438
|
stream=stream,
|
|
432
439
|
stop=stop,
|
|
433
440
|
random_seed=random_seed,
|
|
434
441
|
prompt=prompt,
|
|
435
442
|
suffix=suffix,
|
|
443
|
+
min_tokens=min_tokens,
|
|
436
444
|
)
|
|
437
445
|
|
|
438
446
|
req = self.build_request_async(
|
|
@@ -481,14 +489,16 @@ class Fim(BaseSDK):
|
|
|
481
489
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
482
490
|
raise models.HTTPValidationError(data=data)
|
|
483
491
|
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
492
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
484
493
|
raise models.SDKError(
|
|
485
|
-
"API error occurred", http_res.status_code,
|
|
494
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
486
495
|
)
|
|
487
496
|
|
|
488
497
|
content_type = http_res.headers.get("Content-Type")
|
|
498
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
489
499
|
raise models.SDKError(
|
|
490
500
|
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
491
501
|
http_res.status_code,
|
|
492
|
-
|
|
502
|
+
http_res_text,
|
|
493
503
|
http_res,
|
|
494
504
|
)
|