mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_hooks/sdkhooks.py +23 -4
- mistralai/_hooks/types.py +27 -9
- mistralai/_version.py +12 -0
- mistralai/agents.py +334 -164
- mistralai/basesdk.py +90 -5
- mistralai/batch.py +17 -0
- mistralai/chat.py +316 -166
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +79 -55
- mistralai/files.py +487 -194
- mistralai/fim.py +206 -132
- mistralai/fine_tuning.py +3 -2
- mistralai/jobs.py +392 -263
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +593 -50
- mistralai/models/agentscompletionrequest.py +70 -17
- mistralai/models/agentscompletionstreamrequest.py +72 -17
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +15 -5
- mistralai/models/assistantmessage.py +22 -10
- mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +13 -6
- mistralai/models/chatcompletionrequest.py +86 -21
- mistralai/models/chatcompletionresponse.py +8 -4
- mistralai/models/chatcompletionstreamrequest.py +88 -21
- mistralai/models/checkpointout.py +4 -3
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +12 -5
- mistralai/models/completionevent.py +2 -3
- mistralai/models/completionresponsestreamchoice.py +22 -8
- mistralai/models/contentchunk.py +13 -10
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
- mistralai/models/deletefileout.py +4 -3
- mistralai/models/deletemodelout.py +5 -4
- mistralai/models/deltamessage.py +23 -11
- mistralai/models/detailedjobout.py +70 -12
- mistralai/models/embeddingrequest.py +14 -9
- mistralai/models/embeddingresponse.py +7 -3
- mistralai/models/embeddingresponsedata.py +5 -4
- mistralai/models/eventout.py +11 -6
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +5 -5
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
- mistralai/models/files_api_routes_upload_fileop.py +33 -14
- mistralai/models/fileschema.py +22 -15
- mistralai/models/fimcompletionrequest.py +44 -16
- mistralai/models/fimcompletionresponse.py +8 -4
- mistralai/models/fimcompletionstreamrequest.py +44 -16
- mistralai/models/finetuneablemodel.py +7 -1
- mistralai/models/ftmodelcapabilitiesout.py +6 -4
- mistralai/models/ftmodelcard.py +121 -0
- mistralai/models/ftmodelout.py +39 -9
- mistralai/models/function.py +5 -4
- mistralai/models/functioncall.py +4 -3
- mistralai/models/functionname.py +17 -0
- mistralai/models/githubrepositoryin.py +24 -7
- mistralai/models/githubrepositoryout.py +24 -7
- mistralai/models/httpvalidationerror.py +1 -3
- mistralai/models/imageurl.py +47 -0
- mistralai/models/imageurlchunk.py +38 -0
- mistralai/models/jobin.py +24 -7
- mistralai/models/jobmetadataout.py +32 -8
- mistralai/models/jobout.py +65 -12
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
- mistralai/models/jobsout.py +13 -5
- mistralai/models/legacyjobmetadataout.py +55 -9
- mistralai/models/listfilesout.py +7 -3
- mistralai/models/metricout.py +12 -8
- mistralai/models/modelcapabilities.py +9 -4
- mistralai/models/modellist.py +21 -7
- mistralai/models/responseformat.py +7 -8
- mistralai/models/responseformats.py +8 -0
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
- mistralai/models/retrievefileout.py +25 -15
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +14 -5
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +10 -9
- mistralai/models/textchunk.py +14 -5
- mistralai/models/tool.py +10 -9
- mistralai/models/toolcall.py +10 -8
- mistralai/models/toolchoice.py +29 -0
- mistralai/models/toolchoiceenum.py +7 -0
- mistralai/models/toolmessage.py +13 -6
- mistralai/models/tooltypes.py +8 -0
- mistralai/models/trainingfile.py +4 -4
- mistralai/models/trainingparameters.py +34 -8
- mistralai/models/trainingparametersin.py +36 -10
- mistralai/models/unarchiveftmodelout.py +15 -5
- mistralai/models/updateftmodelin.py +9 -6
- mistralai/models/uploadfileout.py +22 -15
- mistralai/models/usageinfo.py +4 -3
- mistralai/models/usermessage.py +42 -10
- mistralai/models/validationerror.py +5 -3
- mistralai/models/wandbintegration.py +23 -7
- mistralai/models/wandbintegrationout.py +23 -8
- mistralai/models_.py +416 -294
- mistralai/sdk.py +31 -19
- mistralai/sdkconfiguration.py +9 -11
- mistralai/utils/__init__.py +14 -1
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/logger.py +4 -1
- mistralai/utils/retries.py +2 -1
- mistralai/utils/security.py +13 -6
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
- mistralai-1.2.0.dist-info/RECORD +276 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_hooks/sdkhooks.py +23 -4
- mistralai_azure/_hooks/types.py +27 -9
- mistralai_azure/_version.py +12 -0
- mistralai_azure/basesdk.py +91 -6
- mistralai_azure/chat.py +308 -166
- mistralai_azure/models/__init__.py +164 -16
- mistralai_azure/models/assistantmessage.py +29 -11
- mistralai_azure/models/chatcompletionchoice.py +15 -6
- mistralai_azure/models/chatcompletionrequest.py +94 -22
- mistralai_azure/models/chatcompletionresponse.py +8 -4
- mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
- mistralai_azure/models/completionchunk.py +12 -5
- mistralai_azure/models/completionevent.py +2 -3
- mistralai_azure/models/completionresponsestreamchoice.py +19 -8
- mistralai_azure/models/contentchunk.py +4 -11
- mistralai_azure/models/deltamessage.py +30 -12
- mistralai_azure/models/function.py +5 -4
- mistralai_azure/models/functioncall.py +4 -3
- mistralai_azure/models/functionname.py +17 -0
- mistralai_azure/models/httpvalidationerror.py +1 -3
- mistralai_azure/models/responseformat.py +7 -8
- mistralai_azure/models/responseformats.py +8 -0
- mistralai_azure/models/security.py +13 -5
- mistralai_azure/models/systemmessage.py +10 -9
- mistralai_azure/models/textchunk.py +14 -5
- mistralai_azure/models/tool.py +10 -9
- mistralai_azure/models/toolcall.py +10 -8
- mistralai_azure/models/toolchoice.py +29 -0
- mistralai_azure/models/toolchoiceenum.py +7 -0
- mistralai_azure/models/toolmessage.py +20 -7
- mistralai_azure/models/tooltypes.py +8 -0
- mistralai_azure/models/usageinfo.py +4 -3
- mistralai_azure/models/usermessage.py +42 -10
- mistralai_azure/models/validationerror.py +5 -3
- mistralai_azure/sdkconfiguration.py +9 -11
- mistralai_azure/utils/__init__.py +16 -3
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/forms.py +10 -9
- mistralai_azure/utils/headers.py +8 -8
- mistralai_azure/utils/logger.py +6 -0
- mistralai_azure/utils/queryparams.py +16 -14
- mistralai_azure/utils/retries.py +2 -1
- mistralai_azure/utils/security.py +12 -6
- mistralai_azure/utils/serializers.py +42 -8
- mistralai_azure/utils/url.py +13 -8
- mistralai_azure/utils/values.py +6 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_hooks/sdkhooks.py +23 -4
- mistralai_gcp/_hooks/types.py +27 -9
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/basesdk.py +91 -6
- mistralai_gcp/chat.py +308 -166
- mistralai_gcp/fim.py +198 -132
- mistralai_gcp/models/__init__.py +186 -18
- mistralai_gcp/models/assistantmessage.py +29 -11
- mistralai_gcp/models/chatcompletionchoice.py +15 -6
- mistralai_gcp/models/chatcompletionrequest.py +91 -22
- mistralai_gcp/models/chatcompletionresponse.py +8 -4
- mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
- mistralai_gcp/models/completionchunk.py +12 -5
- mistralai_gcp/models/completionevent.py +2 -3
- mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
- mistralai_gcp/models/contentchunk.py +4 -11
- mistralai_gcp/models/deltamessage.py +30 -12
- mistralai_gcp/models/fimcompletionrequest.py +51 -17
- mistralai_gcp/models/fimcompletionresponse.py +8 -4
- mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
- mistralai_gcp/models/function.py +5 -4
- mistralai_gcp/models/functioncall.py +4 -3
- mistralai_gcp/models/functionname.py +17 -0
- mistralai_gcp/models/httpvalidationerror.py +1 -3
- mistralai_gcp/models/responseformat.py +7 -8
- mistralai_gcp/models/responseformats.py +8 -0
- mistralai_gcp/models/security.py +13 -5
- mistralai_gcp/models/systemmessage.py +10 -9
- mistralai_gcp/models/textchunk.py +14 -5
- mistralai_gcp/models/tool.py +10 -9
- mistralai_gcp/models/toolcall.py +10 -8
- mistralai_gcp/models/toolchoice.py +29 -0
- mistralai_gcp/models/toolchoiceenum.py +7 -0
- mistralai_gcp/models/toolmessage.py +20 -7
- mistralai_gcp/models/tooltypes.py +8 -0
- mistralai_gcp/models/usageinfo.py +4 -3
- mistralai_gcp/models/usermessage.py +42 -10
- mistralai_gcp/models/validationerror.py +5 -3
- mistralai_gcp/sdk.py +6 -7
- mistralai_gcp/sdkconfiguration.py +9 -11
- mistralai_gcp/utils/__init__.py +16 -3
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/forms.py +10 -9
- mistralai_gcp/utils/headers.py +8 -8
- mistralai_gcp/utils/logger.py +6 -0
- mistralai_gcp/utils/queryparams.py +16 -14
- mistralai_gcp/utils/retries.py +2 -1
- mistralai_gcp/utils/security.py +12 -6
- mistralai_gcp/utils/serializers.py +42 -8
- mistralai_gcp/utils/url.py +13 -8
- mistralai_gcp/utils/values.py +6 -0
- mistralai-1.0.3.dist-info/RECORD +0 -236
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai/chat.py
CHANGED
|
@@ -7,24 +7,36 @@ from mistralai.types import Nullable, OptionalNullable, UNSET
|
|
|
7
7
|
from mistralai.utils import eventstreaming, get_security_from_env
|
|
8
8
|
from typing import Any, AsyncGenerator, Generator, List, Optional, Union
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
class Chat(BaseSDK):
|
|
11
12
|
r"""Chat Completion API."""
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
def complete(
|
|
15
|
-
self,
|
|
15
|
+
self,
|
|
16
|
+
*,
|
|
16
17
|
model: Nullable[str],
|
|
17
18
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
18
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
19
20
|
top_p: Optional[float] = 1,
|
|
20
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
21
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
22
22
|
stream: Optional[bool] = False,
|
|
23
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
24
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
25
|
-
response_format: Optional[
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
response_format: Optional[
|
|
26
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
27
|
+
] = None,
|
|
28
|
+
tools: OptionalNullable[
|
|
29
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
30
|
+
] = UNSET,
|
|
31
|
+
tool_choice: Optional[
|
|
32
|
+
Union[
|
|
33
|
+
models.ChatCompletionRequestToolChoice,
|
|
34
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
35
|
+
]
|
|
36
|
+
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
28
40
|
safe_prompt: Optional[bool] = False,
|
|
29
41
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
30
42
|
server_url: Optional[str] = None,
|
|
@@ -34,16 +46,18 @@ class Chat(BaseSDK):
|
|
|
34
46
|
|
|
35
47
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
36
48
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
37
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
49
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
38
50
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
39
51
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
40
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
41
52
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
42
53
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
43
54
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
44
|
-
:param response_format:
|
|
45
|
-
:param tools:
|
|
46
|
-
:param tool_choice:
|
|
55
|
+
:param response_format:
|
|
56
|
+
:param tools:
|
|
57
|
+
:param tool_choice:
|
|
58
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
59
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
60
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
47
61
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
48
62
|
:param retries: Override the default retry configuration for this method
|
|
49
63
|
:param server_url: Override the default server URL for this method
|
|
@@ -53,26 +67,32 @@ class Chat(BaseSDK):
|
|
|
53
67
|
url_variables = None
|
|
54
68
|
if timeout_ms is None:
|
|
55
69
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
56
|
-
|
|
70
|
+
|
|
57
71
|
if server_url is not None:
|
|
58
72
|
base_url = server_url
|
|
59
|
-
|
|
73
|
+
|
|
60
74
|
request = models.ChatCompletionRequest(
|
|
61
75
|
model=model,
|
|
62
76
|
temperature=temperature,
|
|
63
77
|
top_p=top_p,
|
|
64
78
|
max_tokens=max_tokens,
|
|
65
|
-
min_tokens=min_tokens,
|
|
66
79
|
stream=stream,
|
|
67
80
|
stop=stop,
|
|
68
81
|
random_seed=random_seed,
|
|
69
82
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
70
|
-
response_format=utils.get_pydantic_model(
|
|
83
|
+
response_format=utils.get_pydantic_model(
|
|
84
|
+
response_format, Optional[models.ResponseFormat]
|
|
85
|
+
),
|
|
71
86
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
72
|
-
tool_choice=
|
|
87
|
+
tool_choice=utils.get_pydantic_model(
|
|
88
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
89
|
+
),
|
|
90
|
+
presence_penalty=presence_penalty,
|
|
91
|
+
frequency_penalty=frequency_penalty,
|
|
92
|
+
n=n,
|
|
73
93
|
safe_prompt=safe_prompt,
|
|
74
94
|
)
|
|
75
|
-
|
|
95
|
+
|
|
76
96
|
req = self.build_request(
|
|
77
97
|
method="POST",
|
|
78
98
|
path="/v1/chat/completions",
|
|
@@ -85,59 +105,82 @@ class Chat(BaseSDK):
|
|
|
85
105
|
user_agent_header="user-agent",
|
|
86
106
|
accept_header_value="application/json",
|
|
87
107
|
security=self.sdk_configuration.security,
|
|
88
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
108
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
109
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
110
|
+
),
|
|
89
111
|
timeout_ms=timeout_ms,
|
|
90
112
|
)
|
|
91
|
-
|
|
113
|
+
|
|
92
114
|
if retries == UNSET:
|
|
93
115
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
94
116
|
retries = self.sdk_configuration.retry_config
|
|
95
117
|
|
|
96
118
|
retry_config = None
|
|
97
119
|
if isinstance(retries, utils.RetryConfig):
|
|
98
|
-
retry_config = (retries, [
|
|
99
|
-
|
|
100
|
-
"500",
|
|
101
|
-
"502",
|
|
102
|
-
"503",
|
|
103
|
-
"504"
|
|
104
|
-
])
|
|
105
|
-
|
|
120
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
121
|
+
|
|
106
122
|
http_res = self.do_request(
|
|
107
|
-
hook_ctx=HookContext(
|
|
123
|
+
hook_ctx=HookContext(
|
|
124
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
125
|
+
oauth2_scopes=[],
|
|
126
|
+
security_source=get_security_from_env(
|
|
127
|
+
self.sdk_configuration.security, models.Security
|
|
128
|
+
),
|
|
129
|
+
),
|
|
108
130
|
request=req,
|
|
109
|
-
error_status_codes=["422","4XX","5XX"],
|
|
110
|
-
retry_config=retry_config
|
|
131
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
132
|
+
retry_config=retry_config,
|
|
111
133
|
)
|
|
112
|
-
|
|
134
|
+
|
|
113
135
|
data: Any = None
|
|
114
136
|
if utils.match_response(http_res, "200", "application/json"):
|
|
115
|
-
return utils.unmarshal_json(
|
|
137
|
+
return utils.unmarshal_json(
|
|
138
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
139
|
+
)
|
|
116
140
|
if utils.match_response(http_res, "422", "application/json"):
|
|
117
141
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
118
142
|
raise models.HTTPValidationError(data=data)
|
|
119
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
120
|
-
|
|
121
|
-
|
|
143
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
144
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
145
|
+
raise models.SDKError(
|
|
146
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
147
|
+
)
|
|
148
|
+
|
|
122
149
|
content_type = http_res.headers.get("Content-Type")
|
|
123
|
-
|
|
150
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
151
|
+
raise models.SDKError(
|
|
152
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
153
|
+
http_res.status_code,
|
|
154
|
+
http_res_text,
|
|
155
|
+
http_res,
|
|
156
|
+
)
|
|
124
157
|
|
|
125
|
-
|
|
126
|
-
|
|
127
158
|
async def complete_async(
|
|
128
|
-
self,
|
|
159
|
+
self,
|
|
160
|
+
*,
|
|
129
161
|
model: Nullable[str],
|
|
130
162
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
131
|
-
temperature:
|
|
163
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
132
164
|
top_p: Optional[float] = 1,
|
|
133
165
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
134
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
135
166
|
stream: Optional[bool] = False,
|
|
136
167
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
137
168
|
random_seed: OptionalNullable[int] = UNSET,
|
|
138
|
-
response_format: Optional[
|
|
139
|
-
|
|
140
|
-
|
|
169
|
+
response_format: Optional[
|
|
170
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
171
|
+
] = None,
|
|
172
|
+
tools: OptionalNullable[
|
|
173
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
174
|
+
] = UNSET,
|
|
175
|
+
tool_choice: Optional[
|
|
176
|
+
Union[
|
|
177
|
+
models.ChatCompletionRequestToolChoice,
|
|
178
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
179
|
+
]
|
|
180
|
+
] = None,
|
|
181
|
+
presence_penalty: Optional[float] = 0,
|
|
182
|
+
frequency_penalty: Optional[float] = 0,
|
|
183
|
+
n: OptionalNullable[int] = UNSET,
|
|
141
184
|
safe_prompt: Optional[bool] = False,
|
|
142
185
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
143
186
|
server_url: Optional[str] = None,
|
|
@@ -147,16 +190,18 @@ class Chat(BaseSDK):
|
|
|
147
190
|
|
|
148
191
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
149
192
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
150
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
193
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
151
194
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
152
195
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
153
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
154
196
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
155
197
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
156
198
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
157
|
-
:param response_format:
|
|
158
|
-
:param tools:
|
|
159
|
-
:param tool_choice:
|
|
199
|
+
:param response_format:
|
|
200
|
+
:param tools:
|
|
201
|
+
:param tool_choice:
|
|
202
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
203
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
204
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
160
205
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
161
206
|
:param retries: Override the default retry configuration for this method
|
|
162
207
|
:param server_url: Override the default server URL for this method
|
|
@@ -166,27 +211,33 @@ class Chat(BaseSDK):
|
|
|
166
211
|
url_variables = None
|
|
167
212
|
if timeout_ms is None:
|
|
168
213
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
169
|
-
|
|
214
|
+
|
|
170
215
|
if server_url is not None:
|
|
171
216
|
base_url = server_url
|
|
172
|
-
|
|
217
|
+
|
|
173
218
|
request = models.ChatCompletionRequest(
|
|
174
219
|
model=model,
|
|
175
220
|
temperature=temperature,
|
|
176
221
|
top_p=top_p,
|
|
177
222
|
max_tokens=max_tokens,
|
|
178
|
-
min_tokens=min_tokens,
|
|
179
223
|
stream=stream,
|
|
180
224
|
stop=stop,
|
|
181
225
|
random_seed=random_seed,
|
|
182
226
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
183
|
-
response_format=utils.get_pydantic_model(
|
|
227
|
+
response_format=utils.get_pydantic_model(
|
|
228
|
+
response_format, Optional[models.ResponseFormat]
|
|
229
|
+
),
|
|
184
230
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
185
|
-
tool_choice=
|
|
231
|
+
tool_choice=utils.get_pydantic_model(
|
|
232
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
233
|
+
),
|
|
234
|
+
presence_penalty=presence_penalty,
|
|
235
|
+
frequency_penalty=frequency_penalty,
|
|
236
|
+
n=n,
|
|
186
237
|
safe_prompt=safe_prompt,
|
|
187
238
|
)
|
|
188
|
-
|
|
189
|
-
req = self.
|
|
239
|
+
|
|
240
|
+
req = self.build_request_async(
|
|
190
241
|
method="POST",
|
|
191
242
|
path="/v1/chat/completions",
|
|
192
243
|
base_url=base_url,
|
|
@@ -198,59 +249,90 @@ class Chat(BaseSDK):
|
|
|
198
249
|
user_agent_header="user-agent",
|
|
199
250
|
accept_header_value="application/json",
|
|
200
251
|
security=self.sdk_configuration.security,
|
|
201
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
252
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
253
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
254
|
+
),
|
|
202
255
|
timeout_ms=timeout_ms,
|
|
203
256
|
)
|
|
204
|
-
|
|
257
|
+
|
|
205
258
|
if retries == UNSET:
|
|
206
259
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
207
260
|
retries = self.sdk_configuration.retry_config
|
|
208
261
|
|
|
209
262
|
retry_config = None
|
|
210
263
|
if isinstance(retries, utils.RetryConfig):
|
|
211
|
-
retry_config = (retries, [
|
|
212
|
-
|
|
213
|
-
"500",
|
|
214
|
-
"502",
|
|
215
|
-
"503",
|
|
216
|
-
"504"
|
|
217
|
-
])
|
|
218
|
-
|
|
264
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
265
|
+
|
|
219
266
|
http_res = await self.do_request_async(
|
|
220
|
-
hook_ctx=HookContext(
|
|
267
|
+
hook_ctx=HookContext(
|
|
268
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
269
|
+
oauth2_scopes=[],
|
|
270
|
+
security_source=get_security_from_env(
|
|
271
|
+
self.sdk_configuration.security, models.Security
|
|
272
|
+
),
|
|
273
|
+
),
|
|
221
274
|
request=req,
|
|
222
|
-
error_status_codes=["422","4XX","5XX"],
|
|
223
|
-
retry_config=retry_config
|
|
275
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
276
|
+
retry_config=retry_config,
|
|
224
277
|
)
|
|
225
|
-
|
|
278
|
+
|
|
226
279
|
data: Any = None
|
|
227
280
|
if utils.match_response(http_res, "200", "application/json"):
|
|
228
|
-
return utils.unmarshal_json(
|
|
281
|
+
return utils.unmarshal_json(
|
|
282
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
283
|
+
)
|
|
229
284
|
if utils.match_response(http_res, "422", "application/json"):
|
|
230
285
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
231
286
|
raise models.HTTPValidationError(data=data)
|
|
232
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
233
|
-
|
|
234
|
-
|
|
287
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
288
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
289
|
+
raise models.SDKError(
|
|
290
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
291
|
+
)
|
|
292
|
+
|
|
235
293
|
content_type = http_res.headers.get("Content-Type")
|
|
236
|
-
|
|
294
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
295
|
+
raise models.SDKError(
|
|
296
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
297
|
+
http_res.status_code,
|
|
298
|
+
http_res_text,
|
|
299
|
+
http_res,
|
|
300
|
+
)
|
|
237
301
|
|
|
238
|
-
|
|
239
|
-
|
|
240
302
|
def stream(
|
|
241
|
-
self,
|
|
303
|
+
self,
|
|
304
|
+
*,
|
|
242
305
|
model: Nullable[str],
|
|
243
|
-
messages: Union[
|
|
244
|
-
|
|
306
|
+
messages: Union[
|
|
307
|
+
List[models.ChatCompletionStreamRequestMessages],
|
|
308
|
+
List[models.ChatCompletionStreamRequestMessagesTypedDict],
|
|
309
|
+
],
|
|
310
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
245
311
|
top_p: Optional[float] = 1,
|
|
246
312
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
247
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
248
313
|
stream: Optional[bool] = True,
|
|
249
|
-
stop: Optional[
|
|
314
|
+
stop: Optional[
|
|
315
|
+
Union[
|
|
316
|
+
models.ChatCompletionStreamRequestStop,
|
|
317
|
+
models.ChatCompletionStreamRequestStopTypedDict,
|
|
318
|
+
]
|
|
319
|
+
] = None,
|
|
250
320
|
random_seed: OptionalNullable[int] = UNSET,
|
|
251
|
-
response_format: Optional[
|
|
252
|
-
|
|
253
|
-
|
|
321
|
+
response_format: Optional[
|
|
322
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
323
|
+
] = None,
|
|
324
|
+
tools: OptionalNullable[
|
|
325
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
326
|
+
] = UNSET,
|
|
327
|
+
tool_choice: Optional[
|
|
328
|
+
Union[
|
|
329
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
330
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
331
|
+
]
|
|
332
|
+
] = None,
|
|
333
|
+
presence_penalty: Optional[float] = 0,
|
|
334
|
+
frequency_penalty: Optional[float] = 0,
|
|
335
|
+
n: OptionalNullable[int] = UNSET,
|
|
254
336
|
safe_prompt: Optional[bool] = False,
|
|
255
337
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
256
338
|
server_url: Optional[str] = None,
|
|
@@ -262,16 +344,18 @@ class Chat(BaseSDK):
|
|
|
262
344
|
|
|
263
345
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
264
346
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
265
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
347
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
266
348
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
267
349
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
268
|
-
:param
|
|
269
|
-
:param stream:
|
|
350
|
+
:param stream:
|
|
270
351
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
271
352
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
272
|
-
:param response_format:
|
|
273
|
-
:param tools:
|
|
274
|
-
:param tool_choice:
|
|
353
|
+
:param response_format:
|
|
354
|
+
:param tools:
|
|
355
|
+
:param tool_choice:
|
|
356
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
357
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
358
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
275
359
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
276
360
|
:param retries: Override the default retry configuration for this method
|
|
277
361
|
:param server_url: Override the default server URL for this method
|
|
@@ -281,26 +365,34 @@ class Chat(BaseSDK):
|
|
|
281
365
|
url_variables = None
|
|
282
366
|
if timeout_ms is None:
|
|
283
367
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
284
|
-
|
|
368
|
+
|
|
285
369
|
if server_url is not None:
|
|
286
370
|
base_url = server_url
|
|
287
|
-
|
|
371
|
+
|
|
288
372
|
request = models.ChatCompletionStreamRequest(
|
|
289
373
|
model=model,
|
|
290
374
|
temperature=temperature,
|
|
291
375
|
top_p=top_p,
|
|
292
376
|
max_tokens=max_tokens,
|
|
293
|
-
min_tokens=min_tokens,
|
|
294
377
|
stream=stream,
|
|
295
378
|
stop=stop,
|
|
296
379
|
random_seed=random_seed,
|
|
297
|
-
messages=utils.get_pydantic_model(
|
|
298
|
-
|
|
380
|
+
messages=utils.get_pydantic_model(
|
|
381
|
+
messages, List[models.ChatCompletionStreamRequestMessages]
|
|
382
|
+
),
|
|
383
|
+
response_format=utils.get_pydantic_model(
|
|
384
|
+
response_format, Optional[models.ResponseFormat]
|
|
385
|
+
),
|
|
299
386
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
300
|
-
tool_choice=
|
|
387
|
+
tool_choice=utils.get_pydantic_model(
|
|
388
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
389
|
+
),
|
|
390
|
+
presence_penalty=presence_penalty,
|
|
391
|
+
frequency_penalty=frequency_penalty,
|
|
392
|
+
n=n,
|
|
301
393
|
safe_prompt=safe_prompt,
|
|
302
394
|
)
|
|
303
|
-
|
|
395
|
+
|
|
304
396
|
req = self.build_request(
|
|
305
397
|
method="POST",
|
|
306
398
|
path="/v1/chat/completions#stream",
|
|
@@ -313,60 +405,94 @@ class Chat(BaseSDK):
|
|
|
313
405
|
user_agent_header="user-agent",
|
|
314
406
|
accept_header_value="text/event-stream",
|
|
315
407
|
security=self.sdk_configuration.security,
|
|
316
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
408
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
409
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
410
|
+
),
|
|
317
411
|
timeout_ms=timeout_ms,
|
|
318
412
|
)
|
|
319
|
-
|
|
413
|
+
|
|
320
414
|
if retries == UNSET:
|
|
321
415
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
322
416
|
retries = self.sdk_configuration.retry_config
|
|
323
417
|
|
|
324
418
|
retry_config = None
|
|
325
419
|
if isinstance(retries, utils.RetryConfig):
|
|
326
|
-
retry_config = (retries, [
|
|
327
|
-
|
|
328
|
-
"500",
|
|
329
|
-
"502",
|
|
330
|
-
"503",
|
|
331
|
-
"504"
|
|
332
|
-
])
|
|
333
|
-
|
|
420
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
421
|
+
|
|
334
422
|
http_res = self.do_request(
|
|
335
|
-
hook_ctx=HookContext(
|
|
423
|
+
hook_ctx=HookContext(
|
|
424
|
+
operation_id="stream_chat",
|
|
425
|
+
oauth2_scopes=[],
|
|
426
|
+
security_source=get_security_from_env(
|
|
427
|
+
self.sdk_configuration.security, models.Security
|
|
428
|
+
),
|
|
429
|
+
),
|
|
336
430
|
request=req,
|
|
337
|
-
error_status_codes=["422","4XX","5XX"],
|
|
431
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
338
432
|
stream=True,
|
|
339
|
-
retry_config=retry_config
|
|
433
|
+
retry_config=retry_config,
|
|
340
434
|
)
|
|
341
|
-
|
|
435
|
+
|
|
342
436
|
data: Any = None
|
|
343
437
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
344
|
-
return eventstreaming.stream_events(
|
|
438
|
+
return eventstreaming.stream_events(
|
|
439
|
+
http_res,
|
|
440
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
441
|
+
sentinel="[DONE]",
|
|
442
|
+
)
|
|
345
443
|
if utils.match_response(http_res, "422", "application/json"):
|
|
346
|
-
|
|
444
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
445
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
347
446
|
raise models.HTTPValidationError(data=data)
|
|
348
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
349
|
-
|
|
350
|
-
|
|
447
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
448
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
449
|
+
raise models.SDKError(
|
|
450
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
451
|
+
)
|
|
452
|
+
|
|
351
453
|
content_type = http_res.headers.get("Content-Type")
|
|
352
|
-
|
|
454
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
455
|
+
raise models.SDKError(
|
|
456
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
457
|
+
http_res.status_code,
|
|
458
|
+
http_res_text,
|
|
459
|
+
http_res,
|
|
460
|
+
)
|
|
353
461
|
|
|
354
|
-
|
|
355
|
-
|
|
356
462
|
async def stream_async(
|
|
357
|
-
self,
|
|
463
|
+
self,
|
|
464
|
+
*,
|
|
358
465
|
model: Nullable[str],
|
|
359
|
-
messages: Union[
|
|
360
|
-
|
|
466
|
+
messages: Union[
|
|
467
|
+
List[models.ChatCompletionStreamRequestMessages],
|
|
468
|
+
List[models.ChatCompletionStreamRequestMessagesTypedDict],
|
|
469
|
+
],
|
|
470
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
361
471
|
top_p: Optional[float] = 1,
|
|
362
472
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
363
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
364
473
|
stream: Optional[bool] = True,
|
|
365
|
-
stop: Optional[
|
|
474
|
+
stop: Optional[
|
|
475
|
+
Union[
|
|
476
|
+
models.ChatCompletionStreamRequestStop,
|
|
477
|
+
models.ChatCompletionStreamRequestStopTypedDict,
|
|
478
|
+
]
|
|
479
|
+
] = None,
|
|
366
480
|
random_seed: OptionalNullable[int] = UNSET,
|
|
367
|
-
response_format: Optional[
|
|
368
|
-
|
|
369
|
-
|
|
481
|
+
response_format: Optional[
|
|
482
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
483
|
+
] = None,
|
|
484
|
+
tools: OptionalNullable[
|
|
485
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
486
|
+
] = UNSET,
|
|
487
|
+
tool_choice: Optional[
|
|
488
|
+
Union[
|
|
489
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
490
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
491
|
+
]
|
|
492
|
+
] = None,
|
|
493
|
+
presence_penalty: Optional[float] = 0,
|
|
494
|
+
frequency_penalty: Optional[float] = 0,
|
|
495
|
+
n: OptionalNullable[int] = UNSET,
|
|
370
496
|
safe_prompt: Optional[bool] = False,
|
|
371
497
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
372
498
|
server_url: Optional[str] = None,
|
|
@@ -378,16 +504,18 @@ class Chat(BaseSDK):
|
|
|
378
504
|
|
|
379
505
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
380
506
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
381
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
507
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
382
508
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
383
509
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
384
|
-
:param
|
|
385
|
-
:param stream:
|
|
510
|
+
:param stream:
|
|
386
511
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
387
512
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
388
|
-
:param response_format:
|
|
389
|
-
:param tools:
|
|
390
|
-
:param tool_choice:
|
|
513
|
+
:param response_format:
|
|
514
|
+
:param tools:
|
|
515
|
+
:param tool_choice:
|
|
516
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
517
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
518
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
391
519
|
:param safe_prompt: Whether to inject a safety prompt before all conversations.
|
|
392
520
|
:param retries: Override the default retry configuration for this method
|
|
393
521
|
:param server_url: Override the default server URL for this method
|
|
@@ -397,27 +525,35 @@ class Chat(BaseSDK):
|
|
|
397
525
|
url_variables = None
|
|
398
526
|
if timeout_ms is None:
|
|
399
527
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
400
|
-
|
|
528
|
+
|
|
401
529
|
if server_url is not None:
|
|
402
530
|
base_url = server_url
|
|
403
|
-
|
|
531
|
+
|
|
404
532
|
request = models.ChatCompletionStreamRequest(
|
|
405
533
|
model=model,
|
|
406
534
|
temperature=temperature,
|
|
407
535
|
top_p=top_p,
|
|
408
536
|
max_tokens=max_tokens,
|
|
409
|
-
min_tokens=min_tokens,
|
|
410
537
|
stream=stream,
|
|
411
538
|
stop=stop,
|
|
412
539
|
random_seed=random_seed,
|
|
413
|
-
messages=utils.get_pydantic_model(
|
|
414
|
-
|
|
540
|
+
messages=utils.get_pydantic_model(
|
|
541
|
+
messages, List[models.ChatCompletionStreamRequestMessages]
|
|
542
|
+
),
|
|
543
|
+
response_format=utils.get_pydantic_model(
|
|
544
|
+
response_format, Optional[models.ResponseFormat]
|
|
545
|
+
),
|
|
415
546
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
416
|
-
tool_choice=
|
|
547
|
+
tool_choice=utils.get_pydantic_model(
|
|
548
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
549
|
+
),
|
|
550
|
+
presence_penalty=presence_penalty,
|
|
551
|
+
frequency_penalty=frequency_penalty,
|
|
552
|
+
n=n,
|
|
417
553
|
safe_prompt=safe_prompt,
|
|
418
554
|
)
|
|
419
|
-
|
|
420
|
-
req = self.
|
|
555
|
+
|
|
556
|
+
req = self.build_request_async(
|
|
421
557
|
method="POST",
|
|
422
558
|
path="/v1/chat/completions#stream",
|
|
423
559
|
base_url=base_url,
|
|
@@ -429,42 +565,56 @@ class Chat(BaseSDK):
|
|
|
429
565
|
user_agent_header="user-agent",
|
|
430
566
|
accept_header_value="text/event-stream",
|
|
431
567
|
security=self.sdk_configuration.security,
|
|
432
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
568
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
569
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
570
|
+
),
|
|
433
571
|
timeout_ms=timeout_ms,
|
|
434
572
|
)
|
|
435
|
-
|
|
573
|
+
|
|
436
574
|
if retries == UNSET:
|
|
437
575
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
438
576
|
retries = self.sdk_configuration.retry_config
|
|
439
577
|
|
|
440
578
|
retry_config = None
|
|
441
579
|
if isinstance(retries, utils.RetryConfig):
|
|
442
|
-
retry_config = (retries, [
|
|
443
|
-
|
|
444
|
-
"500",
|
|
445
|
-
"502",
|
|
446
|
-
"503",
|
|
447
|
-
"504"
|
|
448
|
-
])
|
|
449
|
-
|
|
580
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
581
|
+
|
|
450
582
|
http_res = await self.do_request_async(
|
|
451
|
-
hook_ctx=HookContext(
|
|
583
|
+
hook_ctx=HookContext(
|
|
584
|
+
operation_id="stream_chat",
|
|
585
|
+
oauth2_scopes=[],
|
|
586
|
+
security_source=get_security_from_env(
|
|
587
|
+
self.sdk_configuration.security, models.Security
|
|
588
|
+
),
|
|
589
|
+
),
|
|
452
590
|
request=req,
|
|
453
|
-
error_status_codes=["422","4XX","5XX"],
|
|
591
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
454
592
|
stream=True,
|
|
455
|
-
retry_config=retry_config
|
|
593
|
+
retry_config=retry_config,
|
|
456
594
|
)
|
|
457
|
-
|
|
595
|
+
|
|
458
596
|
data: Any = None
|
|
459
597
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
460
|
-
return eventstreaming.stream_events_async(
|
|
598
|
+
return eventstreaming.stream_events_async(
|
|
599
|
+
http_res,
|
|
600
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
601
|
+
sentinel="[DONE]",
|
|
602
|
+
)
|
|
461
603
|
if utils.match_response(http_res, "422", "application/json"):
|
|
462
|
-
|
|
604
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
605
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
463
606
|
raise models.HTTPValidationError(data=data)
|
|
464
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
607
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
608
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
609
|
+
raise models.SDKError(
|
|
610
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
611
|
+
)
|
|
469
612
|
|
|
470
|
-
|
|
613
|
+
content_type = http_res.headers.get("Content-Type")
|
|
614
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
615
|
+
raise models.SDKError(
|
|
616
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
617
|
+
http_res.status_code,
|
|
618
|
+
http_res_text,
|
|
619
|
+
http_res,
|
|
620
|
+
)
|