mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_hooks/sdkhooks.py +23 -4
- mistralai/_hooks/types.py +27 -9
- mistralai/_version.py +12 -0
- mistralai/agents.py +334 -164
- mistralai/basesdk.py +90 -5
- mistralai/batch.py +17 -0
- mistralai/chat.py +316 -166
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +79 -55
- mistralai/files.py +487 -194
- mistralai/fim.py +206 -132
- mistralai/fine_tuning.py +3 -2
- mistralai/jobs.py +392 -263
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +593 -50
- mistralai/models/agentscompletionrequest.py +70 -17
- mistralai/models/agentscompletionstreamrequest.py +72 -17
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +15 -5
- mistralai/models/assistantmessage.py +22 -10
- mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +13 -6
- mistralai/models/chatcompletionrequest.py +86 -21
- mistralai/models/chatcompletionresponse.py +8 -4
- mistralai/models/chatcompletionstreamrequest.py +88 -21
- mistralai/models/checkpointout.py +4 -3
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +12 -5
- mistralai/models/completionevent.py +2 -3
- mistralai/models/completionresponsestreamchoice.py +22 -8
- mistralai/models/contentchunk.py +13 -10
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
- mistralai/models/deletefileout.py +4 -3
- mistralai/models/deletemodelout.py +5 -4
- mistralai/models/deltamessage.py +23 -11
- mistralai/models/detailedjobout.py +70 -12
- mistralai/models/embeddingrequest.py +14 -9
- mistralai/models/embeddingresponse.py +7 -3
- mistralai/models/embeddingresponsedata.py +5 -4
- mistralai/models/eventout.py +11 -6
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +5 -5
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
- mistralai/models/files_api_routes_upload_fileop.py +33 -14
- mistralai/models/fileschema.py +22 -15
- mistralai/models/fimcompletionrequest.py +44 -16
- mistralai/models/fimcompletionresponse.py +8 -4
- mistralai/models/fimcompletionstreamrequest.py +44 -16
- mistralai/models/finetuneablemodel.py +7 -1
- mistralai/models/ftmodelcapabilitiesout.py +6 -4
- mistralai/models/ftmodelcard.py +121 -0
- mistralai/models/ftmodelout.py +39 -9
- mistralai/models/function.py +5 -4
- mistralai/models/functioncall.py +4 -3
- mistralai/models/functionname.py +17 -0
- mistralai/models/githubrepositoryin.py +24 -7
- mistralai/models/githubrepositoryout.py +24 -7
- mistralai/models/httpvalidationerror.py +1 -3
- mistralai/models/imageurl.py +47 -0
- mistralai/models/imageurlchunk.py +38 -0
- mistralai/models/jobin.py +24 -7
- mistralai/models/jobmetadataout.py +32 -8
- mistralai/models/jobout.py +65 -12
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
- mistralai/models/jobsout.py +13 -5
- mistralai/models/legacyjobmetadataout.py +55 -9
- mistralai/models/listfilesout.py +7 -3
- mistralai/models/metricout.py +12 -8
- mistralai/models/modelcapabilities.py +9 -4
- mistralai/models/modellist.py +21 -7
- mistralai/models/responseformat.py +7 -8
- mistralai/models/responseformats.py +8 -0
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
- mistralai/models/retrievefileout.py +25 -15
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +14 -5
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +10 -9
- mistralai/models/textchunk.py +14 -5
- mistralai/models/tool.py +10 -9
- mistralai/models/toolcall.py +10 -8
- mistralai/models/toolchoice.py +29 -0
- mistralai/models/toolchoiceenum.py +7 -0
- mistralai/models/toolmessage.py +13 -6
- mistralai/models/tooltypes.py +8 -0
- mistralai/models/trainingfile.py +4 -4
- mistralai/models/trainingparameters.py +34 -8
- mistralai/models/trainingparametersin.py +36 -10
- mistralai/models/unarchiveftmodelout.py +15 -5
- mistralai/models/updateftmodelin.py +9 -6
- mistralai/models/uploadfileout.py +22 -15
- mistralai/models/usageinfo.py +4 -3
- mistralai/models/usermessage.py +42 -10
- mistralai/models/validationerror.py +5 -3
- mistralai/models/wandbintegration.py +23 -7
- mistralai/models/wandbintegrationout.py +23 -8
- mistralai/models_.py +416 -294
- mistralai/sdk.py +31 -19
- mistralai/sdkconfiguration.py +9 -11
- mistralai/utils/__init__.py +14 -1
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/logger.py +4 -1
- mistralai/utils/retries.py +2 -1
- mistralai/utils/security.py +13 -6
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
- mistralai-1.2.0.dist-info/RECORD +276 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_hooks/sdkhooks.py +23 -4
- mistralai_azure/_hooks/types.py +27 -9
- mistralai_azure/_version.py +12 -0
- mistralai_azure/basesdk.py +91 -6
- mistralai_azure/chat.py +308 -166
- mistralai_azure/models/__init__.py +164 -16
- mistralai_azure/models/assistantmessage.py +29 -11
- mistralai_azure/models/chatcompletionchoice.py +15 -6
- mistralai_azure/models/chatcompletionrequest.py +94 -22
- mistralai_azure/models/chatcompletionresponse.py +8 -4
- mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
- mistralai_azure/models/completionchunk.py +12 -5
- mistralai_azure/models/completionevent.py +2 -3
- mistralai_azure/models/completionresponsestreamchoice.py +19 -8
- mistralai_azure/models/contentchunk.py +4 -11
- mistralai_azure/models/deltamessage.py +30 -12
- mistralai_azure/models/function.py +5 -4
- mistralai_azure/models/functioncall.py +4 -3
- mistralai_azure/models/functionname.py +17 -0
- mistralai_azure/models/httpvalidationerror.py +1 -3
- mistralai_azure/models/responseformat.py +7 -8
- mistralai_azure/models/responseformats.py +8 -0
- mistralai_azure/models/security.py +13 -5
- mistralai_azure/models/systemmessage.py +10 -9
- mistralai_azure/models/textchunk.py +14 -5
- mistralai_azure/models/tool.py +10 -9
- mistralai_azure/models/toolcall.py +10 -8
- mistralai_azure/models/toolchoice.py +29 -0
- mistralai_azure/models/toolchoiceenum.py +7 -0
- mistralai_azure/models/toolmessage.py +20 -7
- mistralai_azure/models/tooltypes.py +8 -0
- mistralai_azure/models/usageinfo.py +4 -3
- mistralai_azure/models/usermessage.py +42 -10
- mistralai_azure/models/validationerror.py +5 -3
- mistralai_azure/sdkconfiguration.py +9 -11
- mistralai_azure/utils/__init__.py +16 -3
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/forms.py +10 -9
- mistralai_azure/utils/headers.py +8 -8
- mistralai_azure/utils/logger.py +6 -0
- mistralai_azure/utils/queryparams.py +16 -14
- mistralai_azure/utils/retries.py +2 -1
- mistralai_azure/utils/security.py +12 -6
- mistralai_azure/utils/serializers.py +42 -8
- mistralai_azure/utils/url.py +13 -8
- mistralai_azure/utils/values.py +6 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_hooks/sdkhooks.py +23 -4
- mistralai_gcp/_hooks/types.py +27 -9
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/basesdk.py +91 -6
- mistralai_gcp/chat.py +308 -166
- mistralai_gcp/fim.py +198 -132
- mistralai_gcp/models/__init__.py +186 -18
- mistralai_gcp/models/assistantmessage.py +29 -11
- mistralai_gcp/models/chatcompletionchoice.py +15 -6
- mistralai_gcp/models/chatcompletionrequest.py +91 -22
- mistralai_gcp/models/chatcompletionresponse.py +8 -4
- mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
- mistralai_gcp/models/completionchunk.py +12 -5
- mistralai_gcp/models/completionevent.py +2 -3
- mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
- mistralai_gcp/models/contentchunk.py +4 -11
- mistralai_gcp/models/deltamessage.py +30 -12
- mistralai_gcp/models/fimcompletionrequest.py +51 -17
- mistralai_gcp/models/fimcompletionresponse.py +8 -4
- mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
- mistralai_gcp/models/function.py +5 -4
- mistralai_gcp/models/functioncall.py +4 -3
- mistralai_gcp/models/functionname.py +17 -0
- mistralai_gcp/models/httpvalidationerror.py +1 -3
- mistralai_gcp/models/responseformat.py +7 -8
- mistralai_gcp/models/responseformats.py +8 -0
- mistralai_gcp/models/security.py +13 -5
- mistralai_gcp/models/systemmessage.py +10 -9
- mistralai_gcp/models/textchunk.py +14 -5
- mistralai_gcp/models/tool.py +10 -9
- mistralai_gcp/models/toolcall.py +10 -8
- mistralai_gcp/models/toolchoice.py +29 -0
- mistralai_gcp/models/toolchoiceenum.py +7 -0
- mistralai_gcp/models/toolmessage.py +20 -7
- mistralai_gcp/models/tooltypes.py +8 -0
- mistralai_gcp/models/usageinfo.py +4 -3
- mistralai_gcp/models/usermessage.py +42 -10
- mistralai_gcp/models/validationerror.py +5 -3
- mistralai_gcp/sdk.py +6 -7
- mistralai_gcp/sdkconfiguration.py +9 -11
- mistralai_gcp/utils/__init__.py +16 -3
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/forms.py +10 -9
- mistralai_gcp/utils/headers.py +8 -8
- mistralai_gcp/utils/logger.py +6 -0
- mistralai_gcp/utils/queryparams.py +16 -14
- mistralai_gcp/utils/retries.py +2 -1
- mistralai_gcp/utils/security.py +12 -6
- mistralai_gcp/utils/serializers.py +42 -8
- mistralai_gcp/utils/url.py +13 -8
- mistralai_gcp/utils/values.py +6 -0
- mistralai-1.0.3.dist-info/RECORD +0 -236
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_azure/chat.py
CHANGED
|
@@ -7,24 +7,36 @@ from mistralai_azure.types import OptionalNullable, UNSET
|
|
|
7
7
|
from mistralai_azure.utils import eventstreaming
|
|
8
8
|
from typing import Any, AsyncGenerator, Generator, List, Optional, Union
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
class Chat(BaseSDK):
|
|
11
12
|
r"""Chat Completion API."""
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
def stream(
|
|
15
|
-
self,
|
|
15
|
+
self,
|
|
16
|
+
*,
|
|
16
17
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
17
18
|
model: OptionalNullable[str] = "azureai",
|
|
18
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
19
20
|
top_p: Optional[float] = 1,
|
|
20
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
21
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
22
22
|
stream: Optional[bool] = True,
|
|
23
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
24
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
25
|
-
response_format: Optional[
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
response_format: Optional[
|
|
26
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
27
|
+
] = None,
|
|
28
|
+
tools: OptionalNullable[
|
|
29
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
30
|
+
] = UNSET,
|
|
31
|
+
tool_choice: Optional[
|
|
32
|
+
Union[
|
|
33
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
34
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
35
|
+
]
|
|
36
|
+
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
28
40
|
safe_prompt: Optional[bool] = False,
|
|
29
41
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
30
42
|
server_url: Optional[str] = None,
|
|
@@ -36,16 +48,18 @@ class Chat(BaseSDK):
|
|
|
36
48
|
|
|
37
49
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
38
50
|
:param model: The ID of the model to use for this request.
|
|
39
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
51
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
40
52
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
41
53
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
42
|
-
:param
|
|
43
|
-
:param stream:
|
|
54
|
+
:param stream:
|
|
44
55
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
45
56
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
46
|
-
:param response_format:
|
|
47
|
-
:param tools:
|
|
48
|
-
:param tool_choice:
|
|
57
|
+
:param response_format:
|
|
58
|
+
:param tools:
|
|
59
|
+
:param tool_choice:
|
|
60
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
61
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
62
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
49
63
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
50
64
|
:param retries: Override the default retry configuration for this method
|
|
51
65
|
:param server_url: Override the default server URL for this method
|
|
@@ -55,26 +69,32 @@ class Chat(BaseSDK):
|
|
|
55
69
|
url_variables = None
|
|
56
70
|
if timeout_ms is None:
|
|
57
71
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
58
|
-
|
|
72
|
+
|
|
59
73
|
if server_url is not None:
|
|
60
74
|
base_url = server_url
|
|
61
|
-
|
|
75
|
+
|
|
62
76
|
request = models.ChatCompletionStreamRequest(
|
|
63
77
|
model=model,
|
|
64
78
|
temperature=temperature,
|
|
65
79
|
top_p=top_p,
|
|
66
80
|
max_tokens=max_tokens,
|
|
67
|
-
min_tokens=min_tokens,
|
|
68
81
|
stream=stream,
|
|
69
82
|
stop=stop,
|
|
70
83
|
random_seed=random_seed,
|
|
71
84
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
72
|
-
response_format=utils.get_pydantic_model(
|
|
85
|
+
response_format=utils.get_pydantic_model(
|
|
86
|
+
response_format, Optional[models.ResponseFormat]
|
|
87
|
+
),
|
|
73
88
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
74
|
-
tool_choice=
|
|
89
|
+
tool_choice=utils.get_pydantic_model(
|
|
90
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
91
|
+
),
|
|
92
|
+
presence_penalty=presence_penalty,
|
|
93
|
+
frequency_penalty=frequency_penalty,
|
|
94
|
+
n=n,
|
|
75
95
|
safe_prompt=safe_prompt,
|
|
76
96
|
)
|
|
77
|
-
|
|
97
|
+
|
|
78
98
|
req = self.build_request(
|
|
79
99
|
method="POST",
|
|
80
100
|
path="/chat/completions#stream",
|
|
@@ -87,60 +107,84 @@ class Chat(BaseSDK):
|
|
|
87
107
|
user_agent_header="user-agent",
|
|
88
108
|
accept_header_value="text/event-stream",
|
|
89
109
|
security=self.sdk_configuration.security,
|
|
90
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
110
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
111
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
112
|
+
),
|
|
91
113
|
timeout_ms=timeout_ms,
|
|
92
114
|
)
|
|
93
|
-
|
|
115
|
+
|
|
94
116
|
if retries == UNSET:
|
|
95
117
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
96
118
|
retries = self.sdk_configuration.retry_config
|
|
97
119
|
|
|
98
120
|
retry_config = None
|
|
99
121
|
if isinstance(retries, utils.RetryConfig):
|
|
100
|
-
retry_config = (retries, [
|
|
101
|
-
|
|
102
|
-
"500",
|
|
103
|
-
"502",
|
|
104
|
-
"503",
|
|
105
|
-
"504"
|
|
106
|
-
])
|
|
107
|
-
|
|
122
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
123
|
+
|
|
108
124
|
http_res = self.do_request(
|
|
109
|
-
hook_ctx=HookContext(
|
|
125
|
+
hook_ctx=HookContext(
|
|
126
|
+
operation_id="stream_chat",
|
|
127
|
+
oauth2_scopes=[],
|
|
128
|
+
security_source=self.sdk_configuration.security,
|
|
129
|
+
),
|
|
110
130
|
request=req,
|
|
111
|
-
error_status_codes=["422","4XX","5XX"],
|
|
131
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
112
132
|
stream=True,
|
|
113
|
-
retry_config=retry_config
|
|
133
|
+
retry_config=retry_config,
|
|
114
134
|
)
|
|
115
|
-
|
|
135
|
+
|
|
116
136
|
data: Any = None
|
|
117
137
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
118
|
-
return eventstreaming.stream_events(
|
|
138
|
+
return eventstreaming.stream_events(
|
|
139
|
+
http_res,
|
|
140
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
141
|
+
sentinel="[DONE]",
|
|
142
|
+
)
|
|
119
143
|
if utils.match_response(http_res, "422", "application/json"):
|
|
120
|
-
|
|
144
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
145
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
121
146
|
raise models.HTTPValidationError(data=data)
|
|
122
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
123
|
-
|
|
124
|
-
|
|
147
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
148
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
149
|
+
raise models.SDKError(
|
|
150
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
151
|
+
)
|
|
152
|
+
|
|
125
153
|
content_type = http_res.headers.get("Content-Type")
|
|
126
|
-
|
|
154
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
155
|
+
raise models.SDKError(
|
|
156
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
157
|
+
http_res.status_code,
|
|
158
|
+
http_res_text,
|
|
159
|
+
http_res,
|
|
160
|
+
)
|
|
127
161
|
|
|
128
|
-
|
|
129
|
-
|
|
130
162
|
async def stream_async(
|
|
131
|
-
self,
|
|
163
|
+
self,
|
|
164
|
+
*,
|
|
132
165
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
133
166
|
model: OptionalNullable[str] = "azureai",
|
|
134
|
-
temperature:
|
|
167
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
135
168
|
top_p: Optional[float] = 1,
|
|
136
169
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
137
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
138
170
|
stream: Optional[bool] = True,
|
|
139
171
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
140
172
|
random_seed: OptionalNullable[int] = UNSET,
|
|
141
|
-
response_format: Optional[
|
|
142
|
-
|
|
143
|
-
|
|
173
|
+
response_format: Optional[
|
|
174
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
175
|
+
] = None,
|
|
176
|
+
tools: OptionalNullable[
|
|
177
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
178
|
+
] = UNSET,
|
|
179
|
+
tool_choice: Optional[
|
|
180
|
+
Union[
|
|
181
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
182
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
183
|
+
]
|
|
184
|
+
] = None,
|
|
185
|
+
presence_penalty: Optional[float] = 0,
|
|
186
|
+
frequency_penalty: Optional[float] = 0,
|
|
187
|
+
n: OptionalNullable[int] = UNSET,
|
|
144
188
|
safe_prompt: Optional[bool] = False,
|
|
145
189
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
146
190
|
server_url: Optional[str] = None,
|
|
@@ -152,16 +196,18 @@ class Chat(BaseSDK):
|
|
|
152
196
|
|
|
153
197
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
154
198
|
:param model: The ID of the model to use for this request.
|
|
155
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
199
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
156
200
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
157
201
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
158
|
-
:param
|
|
159
|
-
:param stream:
|
|
202
|
+
:param stream:
|
|
160
203
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
161
204
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
162
|
-
:param response_format:
|
|
163
|
-
:param tools:
|
|
164
|
-
:param tool_choice:
|
|
205
|
+
:param response_format:
|
|
206
|
+
:param tools:
|
|
207
|
+
:param tool_choice:
|
|
208
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
209
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
210
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
165
211
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
166
212
|
:param retries: Override the default retry configuration for this method
|
|
167
213
|
:param server_url: Override the default server URL for this method
|
|
@@ -171,27 +217,33 @@ class Chat(BaseSDK):
|
|
|
171
217
|
url_variables = None
|
|
172
218
|
if timeout_ms is None:
|
|
173
219
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
174
|
-
|
|
220
|
+
|
|
175
221
|
if server_url is not None:
|
|
176
222
|
base_url = server_url
|
|
177
|
-
|
|
223
|
+
|
|
178
224
|
request = models.ChatCompletionStreamRequest(
|
|
179
225
|
model=model,
|
|
180
226
|
temperature=temperature,
|
|
181
227
|
top_p=top_p,
|
|
182
228
|
max_tokens=max_tokens,
|
|
183
|
-
min_tokens=min_tokens,
|
|
184
229
|
stream=stream,
|
|
185
230
|
stop=stop,
|
|
186
231
|
random_seed=random_seed,
|
|
187
232
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
188
|
-
response_format=utils.get_pydantic_model(
|
|
233
|
+
response_format=utils.get_pydantic_model(
|
|
234
|
+
response_format, Optional[models.ResponseFormat]
|
|
235
|
+
),
|
|
189
236
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
190
|
-
tool_choice=
|
|
237
|
+
tool_choice=utils.get_pydantic_model(
|
|
238
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
239
|
+
),
|
|
240
|
+
presence_penalty=presence_penalty,
|
|
241
|
+
frequency_penalty=frequency_penalty,
|
|
242
|
+
n=n,
|
|
191
243
|
safe_prompt=safe_prompt,
|
|
192
244
|
)
|
|
193
|
-
|
|
194
|
-
req = self.
|
|
245
|
+
|
|
246
|
+
req = self.build_request_async(
|
|
195
247
|
method="POST",
|
|
196
248
|
path="/chat/completions#stream",
|
|
197
249
|
base_url=base_url,
|
|
@@ -203,60 +255,92 @@ class Chat(BaseSDK):
|
|
|
203
255
|
user_agent_header="user-agent",
|
|
204
256
|
accept_header_value="text/event-stream",
|
|
205
257
|
security=self.sdk_configuration.security,
|
|
206
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
258
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
259
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
260
|
+
),
|
|
207
261
|
timeout_ms=timeout_ms,
|
|
208
262
|
)
|
|
209
|
-
|
|
263
|
+
|
|
210
264
|
if retries == UNSET:
|
|
211
265
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
212
266
|
retries = self.sdk_configuration.retry_config
|
|
213
267
|
|
|
214
268
|
retry_config = None
|
|
215
269
|
if isinstance(retries, utils.RetryConfig):
|
|
216
|
-
retry_config = (retries, [
|
|
217
|
-
|
|
218
|
-
"500",
|
|
219
|
-
"502",
|
|
220
|
-
"503",
|
|
221
|
-
"504"
|
|
222
|
-
])
|
|
223
|
-
|
|
270
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
271
|
+
|
|
224
272
|
http_res = await self.do_request_async(
|
|
225
|
-
hook_ctx=HookContext(
|
|
273
|
+
hook_ctx=HookContext(
|
|
274
|
+
operation_id="stream_chat",
|
|
275
|
+
oauth2_scopes=[],
|
|
276
|
+
security_source=self.sdk_configuration.security,
|
|
277
|
+
),
|
|
226
278
|
request=req,
|
|
227
|
-
error_status_codes=["422","4XX","5XX"],
|
|
279
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
228
280
|
stream=True,
|
|
229
|
-
retry_config=retry_config
|
|
281
|
+
retry_config=retry_config,
|
|
230
282
|
)
|
|
231
|
-
|
|
283
|
+
|
|
232
284
|
data: Any = None
|
|
233
285
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
234
|
-
return eventstreaming.stream_events_async(
|
|
286
|
+
return eventstreaming.stream_events_async(
|
|
287
|
+
http_res,
|
|
288
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
289
|
+
sentinel="[DONE]",
|
|
290
|
+
)
|
|
235
291
|
if utils.match_response(http_res, "422", "application/json"):
|
|
236
|
-
|
|
292
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
293
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
237
294
|
raise models.HTTPValidationError(data=data)
|
|
238
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
239
|
-
|
|
240
|
-
|
|
295
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
296
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
297
|
+
raise models.SDKError(
|
|
298
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
299
|
+
)
|
|
300
|
+
|
|
241
301
|
content_type = http_res.headers.get("Content-Type")
|
|
242
|
-
|
|
302
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
303
|
+
raise models.SDKError(
|
|
304
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
305
|
+
http_res.status_code,
|
|
306
|
+
http_res_text,
|
|
307
|
+
http_res,
|
|
308
|
+
)
|
|
243
309
|
|
|
244
|
-
|
|
245
|
-
|
|
246
310
|
def complete(
|
|
247
|
-
self,
|
|
248
|
-
|
|
311
|
+
self,
|
|
312
|
+
*,
|
|
313
|
+
messages: Union[
|
|
314
|
+
List[models.ChatCompletionRequestMessages],
|
|
315
|
+
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
316
|
+
],
|
|
249
317
|
model: OptionalNullable[str] = "azureai",
|
|
250
|
-
temperature:
|
|
318
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
251
319
|
top_p: Optional[float] = 1,
|
|
252
320
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
253
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
254
321
|
stream: Optional[bool] = False,
|
|
255
|
-
stop: Optional[
|
|
322
|
+
stop: Optional[
|
|
323
|
+
Union[
|
|
324
|
+
models.ChatCompletionRequestStop,
|
|
325
|
+
models.ChatCompletionRequestStopTypedDict,
|
|
326
|
+
]
|
|
327
|
+
] = None,
|
|
256
328
|
random_seed: OptionalNullable[int] = UNSET,
|
|
257
|
-
response_format: Optional[
|
|
258
|
-
|
|
259
|
-
|
|
329
|
+
response_format: Optional[
|
|
330
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
331
|
+
] = None,
|
|
332
|
+
tools: OptionalNullable[
|
|
333
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
334
|
+
] = UNSET,
|
|
335
|
+
tool_choice: Optional[
|
|
336
|
+
Union[
|
|
337
|
+
models.ChatCompletionRequestToolChoice,
|
|
338
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
339
|
+
]
|
|
340
|
+
] = None,
|
|
341
|
+
presence_penalty: Optional[float] = 0,
|
|
342
|
+
frequency_penalty: Optional[float] = 0,
|
|
343
|
+
n: OptionalNullable[int] = UNSET,
|
|
260
344
|
safe_prompt: Optional[bool] = False,
|
|
261
345
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
262
346
|
server_url: Optional[str] = None,
|
|
@@ -266,16 +350,18 @@ class Chat(BaseSDK):
|
|
|
266
350
|
|
|
267
351
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
268
352
|
:param model: The ID of the model to use for this request.
|
|
269
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
353
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
270
354
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
271
355
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
272
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
273
356
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
274
357
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
275
358
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
276
|
-
:param response_format:
|
|
277
|
-
:param tools:
|
|
278
|
-
:param tool_choice:
|
|
359
|
+
:param response_format:
|
|
360
|
+
:param tools:
|
|
361
|
+
:param tool_choice:
|
|
362
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
363
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
364
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
279
365
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
280
366
|
:param retries: Override the default retry configuration for this method
|
|
281
367
|
:param server_url: Override the default server URL for this method
|
|
@@ -285,26 +371,34 @@ class Chat(BaseSDK):
|
|
|
285
371
|
url_variables = None
|
|
286
372
|
if timeout_ms is None:
|
|
287
373
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
288
|
-
|
|
374
|
+
|
|
289
375
|
if server_url is not None:
|
|
290
376
|
base_url = server_url
|
|
291
|
-
|
|
377
|
+
|
|
292
378
|
request = models.ChatCompletionRequest(
|
|
293
379
|
model=model,
|
|
294
380
|
temperature=temperature,
|
|
295
381
|
top_p=top_p,
|
|
296
382
|
max_tokens=max_tokens,
|
|
297
|
-
min_tokens=min_tokens,
|
|
298
383
|
stream=stream,
|
|
299
384
|
stop=stop,
|
|
300
385
|
random_seed=random_seed,
|
|
301
|
-
messages=utils.get_pydantic_model(
|
|
302
|
-
|
|
386
|
+
messages=utils.get_pydantic_model(
|
|
387
|
+
messages, List[models.ChatCompletionRequestMessages]
|
|
388
|
+
),
|
|
389
|
+
response_format=utils.get_pydantic_model(
|
|
390
|
+
response_format, Optional[models.ResponseFormat]
|
|
391
|
+
),
|
|
303
392
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
304
|
-
tool_choice=
|
|
393
|
+
tool_choice=utils.get_pydantic_model(
|
|
394
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
395
|
+
),
|
|
396
|
+
presence_penalty=presence_penalty,
|
|
397
|
+
frequency_penalty=frequency_penalty,
|
|
398
|
+
n=n,
|
|
305
399
|
safe_prompt=safe_prompt,
|
|
306
400
|
)
|
|
307
|
-
|
|
401
|
+
|
|
308
402
|
req = self.build_request(
|
|
309
403
|
method="POST",
|
|
310
404
|
path="/chat/completions",
|
|
@@ -317,59 +411,88 @@ class Chat(BaseSDK):
|
|
|
317
411
|
user_agent_header="user-agent",
|
|
318
412
|
accept_header_value="application/json",
|
|
319
413
|
security=self.sdk_configuration.security,
|
|
320
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
414
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
415
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
416
|
+
),
|
|
321
417
|
timeout_ms=timeout_ms,
|
|
322
418
|
)
|
|
323
|
-
|
|
419
|
+
|
|
324
420
|
if retries == UNSET:
|
|
325
421
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
326
422
|
retries = self.sdk_configuration.retry_config
|
|
327
423
|
|
|
328
424
|
retry_config = None
|
|
329
425
|
if isinstance(retries, utils.RetryConfig):
|
|
330
|
-
retry_config = (retries, [
|
|
331
|
-
|
|
332
|
-
"500",
|
|
333
|
-
"502",
|
|
334
|
-
"503",
|
|
335
|
-
"504"
|
|
336
|
-
])
|
|
337
|
-
|
|
426
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
427
|
+
|
|
338
428
|
http_res = self.do_request(
|
|
339
|
-
hook_ctx=HookContext(
|
|
429
|
+
hook_ctx=HookContext(
|
|
430
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
431
|
+
oauth2_scopes=[],
|
|
432
|
+
security_source=self.sdk_configuration.security,
|
|
433
|
+
),
|
|
340
434
|
request=req,
|
|
341
|
-
error_status_codes=["422","4XX","5XX"],
|
|
342
|
-
retry_config=retry_config
|
|
435
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
436
|
+
retry_config=retry_config,
|
|
343
437
|
)
|
|
344
|
-
|
|
438
|
+
|
|
345
439
|
data: Any = None
|
|
346
440
|
if utils.match_response(http_res, "200", "application/json"):
|
|
347
|
-
return utils.unmarshal_json(
|
|
441
|
+
return utils.unmarshal_json(
|
|
442
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
443
|
+
)
|
|
348
444
|
if utils.match_response(http_res, "422", "application/json"):
|
|
349
445
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
350
446
|
raise models.HTTPValidationError(data=data)
|
|
351
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
352
|
-
|
|
353
|
-
|
|
447
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
448
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
449
|
+
raise models.SDKError(
|
|
450
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
451
|
+
)
|
|
452
|
+
|
|
354
453
|
content_type = http_res.headers.get("Content-Type")
|
|
355
|
-
|
|
454
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
455
|
+
raise models.SDKError(
|
|
456
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
457
|
+
http_res.status_code,
|
|
458
|
+
http_res_text,
|
|
459
|
+
http_res,
|
|
460
|
+
)
|
|
356
461
|
|
|
357
|
-
|
|
358
|
-
|
|
359
462
|
async def complete_async(
|
|
360
|
-
self,
|
|
361
|
-
|
|
463
|
+
self,
|
|
464
|
+
*,
|
|
465
|
+
messages: Union[
|
|
466
|
+
List[models.ChatCompletionRequestMessages],
|
|
467
|
+
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
468
|
+
],
|
|
362
469
|
model: OptionalNullable[str] = "azureai",
|
|
363
|
-
temperature:
|
|
470
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
364
471
|
top_p: Optional[float] = 1,
|
|
365
472
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
366
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
367
473
|
stream: Optional[bool] = False,
|
|
368
|
-
stop: Optional[
|
|
474
|
+
stop: Optional[
|
|
475
|
+
Union[
|
|
476
|
+
models.ChatCompletionRequestStop,
|
|
477
|
+
models.ChatCompletionRequestStopTypedDict,
|
|
478
|
+
]
|
|
479
|
+
] = None,
|
|
369
480
|
random_seed: OptionalNullable[int] = UNSET,
|
|
370
|
-
response_format: Optional[
|
|
371
|
-
|
|
372
|
-
|
|
481
|
+
response_format: Optional[
|
|
482
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
483
|
+
] = None,
|
|
484
|
+
tools: OptionalNullable[
|
|
485
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
486
|
+
] = UNSET,
|
|
487
|
+
tool_choice: Optional[
|
|
488
|
+
Union[
|
|
489
|
+
models.ChatCompletionRequestToolChoice,
|
|
490
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
491
|
+
]
|
|
492
|
+
] = None,
|
|
493
|
+
presence_penalty: Optional[float] = 0,
|
|
494
|
+
frequency_penalty: Optional[float] = 0,
|
|
495
|
+
n: OptionalNullable[int] = UNSET,
|
|
373
496
|
safe_prompt: Optional[bool] = False,
|
|
374
497
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
375
498
|
server_url: Optional[str] = None,
|
|
@@ -379,16 +502,18 @@ class Chat(BaseSDK):
|
|
|
379
502
|
|
|
380
503
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
381
504
|
:param model: The ID of the model to use for this request.
|
|
382
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
505
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
383
506
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
384
507
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
385
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
386
508
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
387
509
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
388
510
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
389
|
-
:param response_format:
|
|
390
|
-
:param tools:
|
|
391
|
-
:param tool_choice:
|
|
511
|
+
:param response_format:
|
|
512
|
+
:param tools:
|
|
513
|
+
:param tool_choice:
|
|
514
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
515
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
516
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
392
517
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
393
518
|
:param retries: Override the default retry configuration for this method
|
|
394
519
|
:param server_url: Override the default server URL for this method
|
|
@@ -398,27 +523,35 @@ class Chat(BaseSDK):
|
|
|
398
523
|
url_variables = None
|
|
399
524
|
if timeout_ms is None:
|
|
400
525
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
401
|
-
|
|
526
|
+
|
|
402
527
|
if server_url is not None:
|
|
403
528
|
base_url = server_url
|
|
404
|
-
|
|
529
|
+
|
|
405
530
|
request = models.ChatCompletionRequest(
|
|
406
531
|
model=model,
|
|
407
532
|
temperature=temperature,
|
|
408
533
|
top_p=top_p,
|
|
409
534
|
max_tokens=max_tokens,
|
|
410
|
-
min_tokens=min_tokens,
|
|
411
535
|
stream=stream,
|
|
412
536
|
stop=stop,
|
|
413
537
|
random_seed=random_seed,
|
|
414
|
-
messages=utils.get_pydantic_model(
|
|
415
|
-
|
|
538
|
+
messages=utils.get_pydantic_model(
|
|
539
|
+
messages, List[models.ChatCompletionRequestMessages]
|
|
540
|
+
),
|
|
541
|
+
response_format=utils.get_pydantic_model(
|
|
542
|
+
response_format, Optional[models.ResponseFormat]
|
|
543
|
+
),
|
|
416
544
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
417
|
-
tool_choice=
|
|
545
|
+
tool_choice=utils.get_pydantic_model(
|
|
546
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
547
|
+
),
|
|
548
|
+
presence_penalty=presence_penalty,
|
|
549
|
+
frequency_penalty=frequency_penalty,
|
|
550
|
+
n=n,
|
|
418
551
|
safe_prompt=safe_prompt,
|
|
419
552
|
)
|
|
420
|
-
|
|
421
|
-
req = self.
|
|
553
|
+
|
|
554
|
+
req = self.build_request_async(
|
|
422
555
|
method="POST",
|
|
423
556
|
path="/chat/completions",
|
|
424
557
|
base_url=base_url,
|
|
@@ -430,41 +563,50 @@ class Chat(BaseSDK):
|
|
|
430
563
|
user_agent_header="user-agent",
|
|
431
564
|
accept_header_value="application/json",
|
|
432
565
|
security=self.sdk_configuration.security,
|
|
433
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
566
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
567
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
568
|
+
),
|
|
434
569
|
timeout_ms=timeout_ms,
|
|
435
570
|
)
|
|
436
|
-
|
|
571
|
+
|
|
437
572
|
if retries == UNSET:
|
|
438
573
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
439
574
|
retries = self.sdk_configuration.retry_config
|
|
440
575
|
|
|
441
576
|
retry_config = None
|
|
442
577
|
if isinstance(retries, utils.RetryConfig):
|
|
443
|
-
retry_config = (retries, [
|
|
444
|
-
|
|
445
|
-
"500",
|
|
446
|
-
"502",
|
|
447
|
-
"503",
|
|
448
|
-
"504"
|
|
449
|
-
])
|
|
450
|
-
|
|
578
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
579
|
+
|
|
451
580
|
http_res = await self.do_request_async(
|
|
452
|
-
hook_ctx=HookContext(
|
|
581
|
+
hook_ctx=HookContext(
|
|
582
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
583
|
+
oauth2_scopes=[],
|
|
584
|
+
security_source=self.sdk_configuration.security,
|
|
585
|
+
),
|
|
453
586
|
request=req,
|
|
454
|
-
error_status_codes=["422","4XX","5XX"],
|
|
455
|
-
retry_config=retry_config
|
|
587
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
588
|
+
retry_config=retry_config,
|
|
456
589
|
)
|
|
457
|
-
|
|
590
|
+
|
|
458
591
|
data: Any = None
|
|
459
592
|
if utils.match_response(http_res, "200", "application/json"):
|
|
460
|
-
return utils.unmarshal_json(
|
|
593
|
+
return utils.unmarshal_json(
|
|
594
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
595
|
+
)
|
|
461
596
|
if utils.match_response(http_res, "422", "application/json"):
|
|
462
597
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
463
598
|
raise models.HTTPValidationError(data=data)
|
|
464
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
599
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
600
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
601
|
+
raise models.SDKError(
|
|
602
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
603
|
+
)
|
|
469
604
|
|
|
470
|
-
|
|
605
|
+
content_type = http_res.headers.get("Content-Type")
|
|
606
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
607
|
+
raise models.SDKError(
|
|
608
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
609
|
+
http_res.status_code,
|
|
610
|
+
http_res_text,
|
|
611
|
+
http_res,
|
|
612
|
+
)
|