mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mistralai/__init__.py +4 -0
- mistralai/_hooks/sdkhooks.py +23 -4
- mistralai/_hooks/types.py +27 -9
- mistralai/_version.py +12 -0
- mistralai/agents.py +334 -164
- mistralai/basesdk.py +90 -5
- mistralai/batch.py +17 -0
- mistralai/chat.py +316 -166
- mistralai/classifiers.py +396 -0
- mistralai/embeddings.py +79 -55
- mistralai/files.py +487 -194
- mistralai/fim.py +206 -132
- mistralai/fine_tuning.py +3 -2
- mistralai/jobs.py +392 -263
- mistralai/mistral_jobs.py +733 -0
- mistralai/models/__init__.py +593 -50
- mistralai/models/agentscompletionrequest.py +70 -17
- mistralai/models/agentscompletionstreamrequest.py +72 -17
- mistralai/models/apiendpoint.py +9 -0
- mistralai/models/archiveftmodelout.py +15 -5
- mistralai/models/assistantmessage.py +22 -10
- mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
- mistralai/models/batcherror.py +17 -0
- mistralai/models/batchjobin.py +58 -0
- mistralai/models/batchjobout.py +117 -0
- mistralai/models/batchjobsout.py +30 -0
- mistralai/models/batchjobstatus.py +15 -0
- mistralai/models/chatclassificationrequest.py +104 -0
- mistralai/models/chatcompletionchoice.py +13 -6
- mistralai/models/chatcompletionrequest.py +86 -21
- mistralai/models/chatcompletionresponse.py +8 -4
- mistralai/models/chatcompletionstreamrequest.py +88 -21
- mistralai/models/checkpointout.py +4 -3
- mistralai/models/classificationobject.py +21 -0
- mistralai/models/classificationrequest.py +59 -0
- mistralai/models/classificationresponse.py +21 -0
- mistralai/models/completionchunk.py +12 -5
- mistralai/models/completionevent.py +2 -3
- mistralai/models/completionresponsestreamchoice.py +22 -8
- mistralai/models/contentchunk.py +13 -10
- mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
- mistralai/models/deletefileout.py +4 -3
- mistralai/models/deletemodelout.py +5 -4
- mistralai/models/deltamessage.py +23 -11
- mistralai/models/detailedjobout.py +70 -12
- mistralai/models/embeddingrequest.py +14 -9
- mistralai/models/embeddingresponse.py +7 -3
- mistralai/models/embeddingresponsedata.py +5 -4
- mistralai/models/eventout.py +11 -6
- mistralai/models/filepurpose.py +8 -0
- mistralai/models/files_api_routes_delete_fileop.py +5 -5
- mistralai/models/files_api_routes_download_fileop.py +16 -0
- mistralai/models/files_api_routes_list_filesop.py +96 -0
- mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
- mistralai/models/files_api_routes_upload_fileop.py +33 -14
- mistralai/models/fileschema.py +22 -15
- mistralai/models/fimcompletionrequest.py +44 -16
- mistralai/models/fimcompletionresponse.py +8 -4
- mistralai/models/fimcompletionstreamrequest.py +44 -16
- mistralai/models/finetuneablemodel.py +7 -1
- mistralai/models/ftmodelcapabilitiesout.py +6 -4
- mistralai/models/ftmodelcard.py +121 -0
- mistralai/models/ftmodelout.py +39 -9
- mistralai/models/function.py +5 -4
- mistralai/models/functioncall.py +4 -3
- mistralai/models/functionname.py +17 -0
- mistralai/models/githubrepositoryin.py +24 -7
- mistralai/models/githubrepositoryout.py +24 -7
- mistralai/models/httpvalidationerror.py +1 -3
- mistralai/models/imageurl.py +47 -0
- mistralai/models/imageurlchunk.py +38 -0
- mistralai/models/jobin.py +24 -7
- mistralai/models/jobmetadataout.py +32 -8
- mistralai/models/jobout.py +65 -12
- mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
- mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
- mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
- mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
- mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
- mistralai/models/jobsout.py +13 -5
- mistralai/models/legacyjobmetadataout.py +55 -9
- mistralai/models/listfilesout.py +7 -3
- mistralai/models/metricout.py +12 -8
- mistralai/models/modelcapabilities.py +9 -4
- mistralai/models/modellist.py +21 -7
- mistralai/models/responseformat.py +7 -8
- mistralai/models/responseformats.py +8 -0
- mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
- mistralai/models/retrievefileout.py +25 -15
- mistralai/models/sampletype.py +6 -2
- mistralai/models/security.py +14 -5
- mistralai/models/source.py +3 -2
- mistralai/models/systemmessage.py +10 -9
- mistralai/models/textchunk.py +14 -5
- mistralai/models/tool.py +10 -9
- mistralai/models/toolcall.py +10 -8
- mistralai/models/toolchoice.py +29 -0
- mistralai/models/toolchoiceenum.py +7 -0
- mistralai/models/toolmessage.py +13 -6
- mistralai/models/tooltypes.py +8 -0
- mistralai/models/trainingfile.py +4 -4
- mistralai/models/trainingparameters.py +34 -8
- mistralai/models/trainingparametersin.py +36 -10
- mistralai/models/unarchiveftmodelout.py +15 -5
- mistralai/models/updateftmodelin.py +9 -6
- mistralai/models/uploadfileout.py +22 -15
- mistralai/models/usageinfo.py +4 -3
- mistralai/models/usermessage.py +42 -10
- mistralai/models/validationerror.py +5 -3
- mistralai/models/wandbintegration.py +23 -7
- mistralai/models/wandbintegrationout.py +23 -8
- mistralai/models_.py +416 -294
- mistralai/sdk.py +31 -19
- mistralai/sdkconfiguration.py +9 -11
- mistralai/utils/__init__.py +14 -1
- mistralai/utils/annotations.py +13 -2
- mistralai/utils/logger.py +4 -1
- mistralai/utils/retries.py +2 -1
- mistralai/utils/security.py +13 -6
- mistralai/utils/serializers.py +25 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
- mistralai-1.2.0.dist-info/RECORD +276 -0
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
- mistralai_azure/__init__.py +4 -0
- mistralai_azure/_hooks/sdkhooks.py +23 -4
- mistralai_azure/_hooks/types.py +27 -9
- mistralai_azure/_version.py +12 -0
- mistralai_azure/basesdk.py +91 -6
- mistralai_azure/chat.py +308 -166
- mistralai_azure/models/__init__.py +164 -16
- mistralai_azure/models/assistantmessage.py +29 -11
- mistralai_azure/models/chatcompletionchoice.py +15 -6
- mistralai_azure/models/chatcompletionrequest.py +94 -22
- mistralai_azure/models/chatcompletionresponse.py +8 -4
- mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
- mistralai_azure/models/completionchunk.py +12 -5
- mistralai_azure/models/completionevent.py +2 -3
- mistralai_azure/models/completionresponsestreamchoice.py +19 -8
- mistralai_azure/models/contentchunk.py +4 -11
- mistralai_azure/models/deltamessage.py +30 -12
- mistralai_azure/models/function.py +5 -4
- mistralai_azure/models/functioncall.py +4 -3
- mistralai_azure/models/functionname.py +17 -0
- mistralai_azure/models/httpvalidationerror.py +1 -3
- mistralai_azure/models/responseformat.py +7 -8
- mistralai_azure/models/responseformats.py +8 -0
- mistralai_azure/models/security.py +13 -5
- mistralai_azure/models/systemmessage.py +10 -9
- mistralai_azure/models/textchunk.py +14 -5
- mistralai_azure/models/tool.py +10 -9
- mistralai_azure/models/toolcall.py +10 -8
- mistralai_azure/models/toolchoice.py +29 -0
- mistralai_azure/models/toolchoiceenum.py +7 -0
- mistralai_azure/models/toolmessage.py +20 -7
- mistralai_azure/models/tooltypes.py +8 -0
- mistralai_azure/models/usageinfo.py +4 -3
- mistralai_azure/models/usermessage.py +42 -10
- mistralai_azure/models/validationerror.py +5 -3
- mistralai_azure/sdkconfiguration.py +9 -11
- mistralai_azure/utils/__init__.py +16 -3
- mistralai_azure/utils/annotations.py +13 -2
- mistralai_azure/utils/forms.py +10 -9
- mistralai_azure/utils/headers.py +8 -8
- mistralai_azure/utils/logger.py +6 -0
- mistralai_azure/utils/queryparams.py +16 -14
- mistralai_azure/utils/retries.py +2 -1
- mistralai_azure/utils/security.py +12 -6
- mistralai_azure/utils/serializers.py +42 -8
- mistralai_azure/utils/url.py +13 -8
- mistralai_azure/utils/values.py +6 -0
- mistralai_gcp/__init__.py +4 -0
- mistralai_gcp/_hooks/sdkhooks.py +23 -4
- mistralai_gcp/_hooks/types.py +27 -9
- mistralai_gcp/_version.py +12 -0
- mistralai_gcp/basesdk.py +91 -6
- mistralai_gcp/chat.py +308 -166
- mistralai_gcp/fim.py +198 -132
- mistralai_gcp/models/__init__.py +186 -18
- mistralai_gcp/models/assistantmessage.py +29 -11
- mistralai_gcp/models/chatcompletionchoice.py +15 -6
- mistralai_gcp/models/chatcompletionrequest.py +91 -22
- mistralai_gcp/models/chatcompletionresponse.py +8 -4
- mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
- mistralai_gcp/models/completionchunk.py +12 -5
- mistralai_gcp/models/completionevent.py +2 -3
- mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
- mistralai_gcp/models/contentchunk.py +4 -11
- mistralai_gcp/models/deltamessage.py +30 -12
- mistralai_gcp/models/fimcompletionrequest.py +51 -17
- mistralai_gcp/models/fimcompletionresponse.py +8 -4
- mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
- mistralai_gcp/models/function.py +5 -4
- mistralai_gcp/models/functioncall.py +4 -3
- mistralai_gcp/models/functionname.py +17 -0
- mistralai_gcp/models/httpvalidationerror.py +1 -3
- mistralai_gcp/models/responseformat.py +7 -8
- mistralai_gcp/models/responseformats.py +8 -0
- mistralai_gcp/models/security.py +13 -5
- mistralai_gcp/models/systemmessage.py +10 -9
- mistralai_gcp/models/textchunk.py +14 -5
- mistralai_gcp/models/tool.py +10 -9
- mistralai_gcp/models/toolcall.py +10 -8
- mistralai_gcp/models/toolchoice.py +29 -0
- mistralai_gcp/models/toolchoiceenum.py +7 -0
- mistralai_gcp/models/toolmessage.py +20 -7
- mistralai_gcp/models/tooltypes.py +8 -0
- mistralai_gcp/models/usageinfo.py +4 -3
- mistralai_gcp/models/usermessage.py +42 -10
- mistralai_gcp/models/validationerror.py +5 -3
- mistralai_gcp/sdk.py +6 -7
- mistralai_gcp/sdkconfiguration.py +9 -11
- mistralai_gcp/utils/__init__.py +16 -3
- mistralai_gcp/utils/annotations.py +13 -2
- mistralai_gcp/utils/forms.py +10 -9
- mistralai_gcp/utils/headers.py +8 -8
- mistralai_gcp/utils/logger.py +6 -0
- mistralai_gcp/utils/queryparams.py +16 -14
- mistralai_gcp/utils/retries.py +2 -1
- mistralai_gcp/utils/security.py +12 -6
- mistralai_gcp/utils/serializers.py +42 -8
- mistralai_gcp/utils/url.py +13 -8
- mistralai_gcp/utils/values.py +6 -0
- mistralai-1.0.3.dist-info/RECORD +0 -236
- {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_gcp/chat.py
CHANGED
|
@@ -7,24 +7,36 @@ from mistralai_gcp.types import Nullable, OptionalNullable, UNSET
|
|
|
7
7
|
from mistralai_gcp.utils import eventstreaming
|
|
8
8
|
from typing import Any, AsyncGenerator, Generator, List, Optional, Union
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
class Chat(BaseSDK):
|
|
11
12
|
r"""Chat Completion API."""
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
def stream(
|
|
15
|
-
self,
|
|
15
|
+
self,
|
|
16
|
+
*,
|
|
16
17
|
model: Nullable[str],
|
|
17
18
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
18
|
-
temperature:
|
|
19
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
19
20
|
top_p: Optional[float] = 1,
|
|
20
21
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
21
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
22
22
|
stream: Optional[bool] = True,
|
|
23
23
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
24
24
|
random_seed: OptionalNullable[int] = UNSET,
|
|
25
|
-
response_format: Optional[
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
response_format: Optional[
|
|
26
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
27
|
+
] = None,
|
|
28
|
+
tools: OptionalNullable[
|
|
29
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
30
|
+
] = UNSET,
|
|
31
|
+
tool_choice: Optional[
|
|
32
|
+
Union[
|
|
33
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
34
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
35
|
+
]
|
|
36
|
+
] = None,
|
|
37
|
+
presence_penalty: Optional[float] = 0,
|
|
38
|
+
frequency_penalty: Optional[float] = 0,
|
|
39
|
+
n: OptionalNullable[int] = UNSET,
|
|
28
40
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
29
41
|
server_url: Optional[str] = None,
|
|
30
42
|
timeout_ms: Optional[int] = None,
|
|
@@ -35,16 +47,18 @@ class Chat(BaseSDK):
|
|
|
35
47
|
|
|
36
48
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
37
49
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
38
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
50
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
39
51
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
40
52
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
41
|
-
:param
|
|
42
|
-
:param stream:
|
|
53
|
+
:param stream:
|
|
43
54
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
44
55
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
45
|
-
:param response_format:
|
|
46
|
-
:param tools:
|
|
47
|
-
:param tool_choice:
|
|
56
|
+
:param response_format:
|
|
57
|
+
:param tools:
|
|
58
|
+
:param tool_choice:
|
|
59
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
60
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
61
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
48
62
|
:param retries: Override the default retry configuration for this method
|
|
49
63
|
:param server_url: Override the default server URL for this method
|
|
50
64
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -53,25 +67,31 @@ class Chat(BaseSDK):
|
|
|
53
67
|
url_variables = None
|
|
54
68
|
if timeout_ms is None:
|
|
55
69
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
56
|
-
|
|
70
|
+
|
|
57
71
|
if server_url is not None:
|
|
58
72
|
base_url = server_url
|
|
59
|
-
|
|
73
|
+
|
|
60
74
|
request = models.ChatCompletionStreamRequest(
|
|
61
75
|
model=model,
|
|
62
76
|
temperature=temperature,
|
|
63
77
|
top_p=top_p,
|
|
64
78
|
max_tokens=max_tokens,
|
|
65
|
-
min_tokens=min_tokens,
|
|
66
79
|
stream=stream,
|
|
67
80
|
stop=stop,
|
|
68
81
|
random_seed=random_seed,
|
|
69
82
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
70
|
-
response_format=utils.get_pydantic_model(
|
|
83
|
+
response_format=utils.get_pydantic_model(
|
|
84
|
+
response_format, Optional[models.ResponseFormat]
|
|
85
|
+
),
|
|
71
86
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
72
|
-
tool_choice=
|
|
87
|
+
tool_choice=utils.get_pydantic_model(
|
|
88
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
89
|
+
),
|
|
90
|
+
presence_penalty=presence_penalty,
|
|
91
|
+
frequency_penalty=frequency_penalty,
|
|
92
|
+
n=n,
|
|
73
93
|
)
|
|
74
|
-
|
|
94
|
+
|
|
75
95
|
req = self.build_request(
|
|
76
96
|
method="POST",
|
|
77
97
|
path="/streamRawPredict",
|
|
@@ -84,60 +104,84 @@ class Chat(BaseSDK):
|
|
|
84
104
|
user_agent_header="user-agent",
|
|
85
105
|
accept_header_value="text/event-stream",
|
|
86
106
|
security=self.sdk_configuration.security,
|
|
87
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
107
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
108
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
109
|
+
),
|
|
88
110
|
timeout_ms=timeout_ms,
|
|
89
111
|
)
|
|
90
|
-
|
|
112
|
+
|
|
91
113
|
if retries == UNSET:
|
|
92
114
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
93
115
|
retries = self.sdk_configuration.retry_config
|
|
94
116
|
|
|
95
117
|
retry_config = None
|
|
96
118
|
if isinstance(retries, utils.RetryConfig):
|
|
97
|
-
retry_config = (retries, [
|
|
98
|
-
|
|
99
|
-
"500",
|
|
100
|
-
"502",
|
|
101
|
-
"503",
|
|
102
|
-
"504"
|
|
103
|
-
])
|
|
104
|
-
|
|
119
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
120
|
+
|
|
105
121
|
http_res = self.do_request(
|
|
106
|
-
hook_ctx=HookContext(
|
|
122
|
+
hook_ctx=HookContext(
|
|
123
|
+
operation_id="stream_chat",
|
|
124
|
+
oauth2_scopes=[],
|
|
125
|
+
security_source=self.sdk_configuration.security,
|
|
126
|
+
),
|
|
107
127
|
request=req,
|
|
108
|
-
error_status_codes=["422","4XX","5XX"],
|
|
128
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
109
129
|
stream=True,
|
|
110
|
-
retry_config=retry_config
|
|
130
|
+
retry_config=retry_config,
|
|
111
131
|
)
|
|
112
|
-
|
|
132
|
+
|
|
113
133
|
data: Any = None
|
|
114
134
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
115
|
-
return eventstreaming.stream_events(
|
|
135
|
+
return eventstreaming.stream_events(
|
|
136
|
+
http_res,
|
|
137
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
138
|
+
sentinel="[DONE]",
|
|
139
|
+
)
|
|
116
140
|
if utils.match_response(http_res, "422", "application/json"):
|
|
117
|
-
|
|
141
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
142
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
118
143
|
raise models.HTTPValidationError(data=data)
|
|
119
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
120
|
-
|
|
121
|
-
|
|
144
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
145
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
146
|
+
raise models.SDKError(
|
|
147
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
148
|
+
)
|
|
149
|
+
|
|
122
150
|
content_type = http_res.headers.get("Content-Type")
|
|
123
|
-
|
|
151
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
152
|
+
raise models.SDKError(
|
|
153
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
154
|
+
http_res.status_code,
|
|
155
|
+
http_res_text,
|
|
156
|
+
http_res,
|
|
157
|
+
)
|
|
124
158
|
|
|
125
|
-
|
|
126
|
-
|
|
127
159
|
async def stream_async(
|
|
128
|
-
self,
|
|
160
|
+
self,
|
|
161
|
+
*,
|
|
129
162
|
model: Nullable[str],
|
|
130
163
|
messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
|
|
131
|
-
temperature:
|
|
164
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
132
165
|
top_p: Optional[float] = 1,
|
|
133
166
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
134
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
135
167
|
stream: Optional[bool] = True,
|
|
136
168
|
stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
|
|
137
169
|
random_seed: OptionalNullable[int] = UNSET,
|
|
138
|
-
response_format: Optional[
|
|
139
|
-
|
|
140
|
-
|
|
170
|
+
response_format: Optional[
|
|
171
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
172
|
+
] = None,
|
|
173
|
+
tools: OptionalNullable[
|
|
174
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
175
|
+
] = UNSET,
|
|
176
|
+
tool_choice: Optional[
|
|
177
|
+
Union[
|
|
178
|
+
models.ChatCompletionStreamRequestToolChoice,
|
|
179
|
+
models.ChatCompletionStreamRequestToolChoiceTypedDict,
|
|
180
|
+
]
|
|
181
|
+
] = None,
|
|
182
|
+
presence_penalty: Optional[float] = 0,
|
|
183
|
+
frequency_penalty: Optional[float] = 0,
|
|
184
|
+
n: OptionalNullable[int] = UNSET,
|
|
141
185
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
142
186
|
server_url: Optional[str] = None,
|
|
143
187
|
timeout_ms: Optional[int] = None,
|
|
@@ -148,16 +192,18 @@ class Chat(BaseSDK):
|
|
|
148
192
|
|
|
149
193
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
150
194
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
151
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
195
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
152
196
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
153
197
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
154
|
-
:param
|
|
155
|
-
:param stream:
|
|
198
|
+
:param stream:
|
|
156
199
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
157
200
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
158
|
-
:param response_format:
|
|
159
|
-
:param tools:
|
|
160
|
-
:param tool_choice:
|
|
201
|
+
:param response_format:
|
|
202
|
+
:param tools:
|
|
203
|
+
:param tool_choice:
|
|
204
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
205
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
206
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
161
207
|
:param retries: Override the default retry configuration for this method
|
|
162
208
|
:param server_url: Override the default server URL for this method
|
|
163
209
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -166,26 +212,32 @@ class Chat(BaseSDK):
|
|
|
166
212
|
url_variables = None
|
|
167
213
|
if timeout_ms is None:
|
|
168
214
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
169
|
-
|
|
215
|
+
|
|
170
216
|
if server_url is not None:
|
|
171
217
|
base_url = server_url
|
|
172
|
-
|
|
218
|
+
|
|
173
219
|
request = models.ChatCompletionStreamRequest(
|
|
174
220
|
model=model,
|
|
175
221
|
temperature=temperature,
|
|
176
222
|
top_p=top_p,
|
|
177
223
|
max_tokens=max_tokens,
|
|
178
|
-
min_tokens=min_tokens,
|
|
179
224
|
stream=stream,
|
|
180
225
|
stop=stop,
|
|
181
226
|
random_seed=random_seed,
|
|
182
227
|
messages=utils.get_pydantic_model(messages, List[models.Messages]),
|
|
183
|
-
response_format=utils.get_pydantic_model(
|
|
228
|
+
response_format=utils.get_pydantic_model(
|
|
229
|
+
response_format, Optional[models.ResponseFormat]
|
|
230
|
+
),
|
|
184
231
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
185
|
-
tool_choice=
|
|
232
|
+
tool_choice=utils.get_pydantic_model(
|
|
233
|
+
tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
|
|
234
|
+
),
|
|
235
|
+
presence_penalty=presence_penalty,
|
|
236
|
+
frequency_penalty=frequency_penalty,
|
|
237
|
+
n=n,
|
|
186
238
|
)
|
|
187
|
-
|
|
188
|
-
req = self.
|
|
239
|
+
|
|
240
|
+
req = self.build_request_async(
|
|
189
241
|
method="POST",
|
|
190
242
|
path="/streamRawPredict",
|
|
191
243
|
base_url=base_url,
|
|
@@ -197,60 +249,92 @@ class Chat(BaseSDK):
|
|
|
197
249
|
user_agent_header="user-agent",
|
|
198
250
|
accept_header_value="text/event-stream",
|
|
199
251
|
security=self.sdk_configuration.security,
|
|
200
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
252
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
253
|
+
request, False, False, "json", models.ChatCompletionStreamRequest
|
|
254
|
+
),
|
|
201
255
|
timeout_ms=timeout_ms,
|
|
202
256
|
)
|
|
203
|
-
|
|
257
|
+
|
|
204
258
|
if retries == UNSET:
|
|
205
259
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
206
260
|
retries = self.sdk_configuration.retry_config
|
|
207
261
|
|
|
208
262
|
retry_config = None
|
|
209
263
|
if isinstance(retries, utils.RetryConfig):
|
|
210
|
-
retry_config = (retries, [
|
|
211
|
-
|
|
212
|
-
"500",
|
|
213
|
-
"502",
|
|
214
|
-
"503",
|
|
215
|
-
"504"
|
|
216
|
-
])
|
|
217
|
-
|
|
264
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
265
|
+
|
|
218
266
|
http_res = await self.do_request_async(
|
|
219
|
-
hook_ctx=HookContext(
|
|
267
|
+
hook_ctx=HookContext(
|
|
268
|
+
operation_id="stream_chat",
|
|
269
|
+
oauth2_scopes=[],
|
|
270
|
+
security_source=self.sdk_configuration.security,
|
|
271
|
+
),
|
|
220
272
|
request=req,
|
|
221
|
-
error_status_codes=["422","4XX","5XX"],
|
|
273
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
222
274
|
stream=True,
|
|
223
|
-
retry_config=retry_config
|
|
275
|
+
retry_config=retry_config,
|
|
224
276
|
)
|
|
225
|
-
|
|
277
|
+
|
|
226
278
|
data: Any = None
|
|
227
279
|
if utils.match_response(http_res, "200", "text/event-stream"):
|
|
228
|
-
return eventstreaming.stream_events_async(
|
|
280
|
+
return eventstreaming.stream_events_async(
|
|
281
|
+
http_res,
|
|
282
|
+
lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
|
|
283
|
+
sentinel="[DONE]",
|
|
284
|
+
)
|
|
229
285
|
if utils.match_response(http_res, "422", "application/json"):
|
|
230
|
-
|
|
286
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
287
|
+
data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
|
|
231
288
|
raise models.HTTPValidationError(data=data)
|
|
232
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
233
|
-
|
|
234
|
-
|
|
289
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
290
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
291
|
+
raise models.SDKError(
|
|
292
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
293
|
+
)
|
|
294
|
+
|
|
235
295
|
content_type = http_res.headers.get("Content-Type")
|
|
236
|
-
|
|
296
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
297
|
+
raise models.SDKError(
|
|
298
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
299
|
+
http_res.status_code,
|
|
300
|
+
http_res_text,
|
|
301
|
+
http_res,
|
|
302
|
+
)
|
|
237
303
|
|
|
238
|
-
|
|
239
|
-
|
|
240
304
|
def complete(
|
|
241
|
-
self,
|
|
305
|
+
self,
|
|
306
|
+
*,
|
|
242
307
|
model: Nullable[str],
|
|
243
|
-
messages: Union[
|
|
244
|
-
|
|
308
|
+
messages: Union[
|
|
309
|
+
List[models.ChatCompletionRequestMessages],
|
|
310
|
+
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
311
|
+
],
|
|
312
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
245
313
|
top_p: Optional[float] = 1,
|
|
246
314
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
247
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
248
315
|
stream: Optional[bool] = False,
|
|
249
|
-
stop: Optional[
|
|
316
|
+
stop: Optional[
|
|
317
|
+
Union[
|
|
318
|
+
models.ChatCompletionRequestStop,
|
|
319
|
+
models.ChatCompletionRequestStopTypedDict,
|
|
320
|
+
]
|
|
321
|
+
] = None,
|
|
250
322
|
random_seed: OptionalNullable[int] = UNSET,
|
|
251
|
-
response_format: Optional[
|
|
252
|
-
|
|
253
|
-
|
|
323
|
+
response_format: Optional[
|
|
324
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
325
|
+
] = None,
|
|
326
|
+
tools: OptionalNullable[
|
|
327
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
328
|
+
] = UNSET,
|
|
329
|
+
tool_choice: Optional[
|
|
330
|
+
Union[
|
|
331
|
+
models.ChatCompletionRequestToolChoice,
|
|
332
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
333
|
+
]
|
|
334
|
+
] = None,
|
|
335
|
+
presence_penalty: Optional[float] = 0,
|
|
336
|
+
frequency_penalty: Optional[float] = 0,
|
|
337
|
+
n: OptionalNullable[int] = UNSET,
|
|
254
338
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
255
339
|
server_url: Optional[str] = None,
|
|
256
340
|
timeout_ms: Optional[int] = None,
|
|
@@ -259,16 +343,18 @@ class Chat(BaseSDK):
|
|
|
259
343
|
|
|
260
344
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
261
345
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
262
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
346
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
263
347
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
264
348
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
265
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
266
349
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
267
350
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
268
351
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
269
|
-
:param response_format:
|
|
270
|
-
:param tools:
|
|
271
|
-
:param tool_choice:
|
|
352
|
+
:param response_format:
|
|
353
|
+
:param tools:
|
|
354
|
+
:param tool_choice:
|
|
355
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
356
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
357
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
272
358
|
:param retries: Override the default retry configuration for this method
|
|
273
359
|
:param server_url: Override the default server URL for this method
|
|
274
360
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -277,25 +363,33 @@ class Chat(BaseSDK):
|
|
|
277
363
|
url_variables = None
|
|
278
364
|
if timeout_ms is None:
|
|
279
365
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
280
|
-
|
|
366
|
+
|
|
281
367
|
if server_url is not None:
|
|
282
368
|
base_url = server_url
|
|
283
|
-
|
|
369
|
+
|
|
284
370
|
request = models.ChatCompletionRequest(
|
|
285
371
|
model=model,
|
|
286
372
|
temperature=temperature,
|
|
287
373
|
top_p=top_p,
|
|
288
374
|
max_tokens=max_tokens,
|
|
289
|
-
min_tokens=min_tokens,
|
|
290
375
|
stream=stream,
|
|
291
376
|
stop=stop,
|
|
292
377
|
random_seed=random_seed,
|
|
293
|
-
messages=utils.get_pydantic_model(
|
|
294
|
-
|
|
378
|
+
messages=utils.get_pydantic_model(
|
|
379
|
+
messages, List[models.ChatCompletionRequestMessages]
|
|
380
|
+
),
|
|
381
|
+
response_format=utils.get_pydantic_model(
|
|
382
|
+
response_format, Optional[models.ResponseFormat]
|
|
383
|
+
),
|
|
295
384
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
296
|
-
tool_choice=
|
|
385
|
+
tool_choice=utils.get_pydantic_model(
|
|
386
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
387
|
+
),
|
|
388
|
+
presence_penalty=presence_penalty,
|
|
389
|
+
frequency_penalty=frequency_penalty,
|
|
390
|
+
n=n,
|
|
297
391
|
)
|
|
298
|
-
|
|
392
|
+
|
|
299
393
|
req = self.build_request(
|
|
300
394
|
method="POST",
|
|
301
395
|
path="/rawPredict",
|
|
@@ -308,59 +402,88 @@ class Chat(BaseSDK):
|
|
|
308
402
|
user_agent_header="user-agent",
|
|
309
403
|
accept_header_value="application/json",
|
|
310
404
|
security=self.sdk_configuration.security,
|
|
311
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
405
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
406
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
407
|
+
),
|
|
312
408
|
timeout_ms=timeout_ms,
|
|
313
409
|
)
|
|
314
|
-
|
|
410
|
+
|
|
315
411
|
if retries == UNSET:
|
|
316
412
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
317
413
|
retries = self.sdk_configuration.retry_config
|
|
318
414
|
|
|
319
415
|
retry_config = None
|
|
320
416
|
if isinstance(retries, utils.RetryConfig):
|
|
321
|
-
retry_config = (retries, [
|
|
322
|
-
|
|
323
|
-
"500",
|
|
324
|
-
"502",
|
|
325
|
-
"503",
|
|
326
|
-
"504"
|
|
327
|
-
])
|
|
328
|
-
|
|
417
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
418
|
+
|
|
329
419
|
http_res = self.do_request(
|
|
330
|
-
hook_ctx=HookContext(
|
|
420
|
+
hook_ctx=HookContext(
|
|
421
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
422
|
+
oauth2_scopes=[],
|
|
423
|
+
security_source=self.sdk_configuration.security,
|
|
424
|
+
),
|
|
331
425
|
request=req,
|
|
332
|
-
error_status_codes=["422","4XX","5XX"],
|
|
333
|
-
retry_config=retry_config
|
|
426
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
427
|
+
retry_config=retry_config,
|
|
334
428
|
)
|
|
335
|
-
|
|
429
|
+
|
|
336
430
|
data: Any = None
|
|
337
431
|
if utils.match_response(http_res, "200", "application/json"):
|
|
338
|
-
return utils.unmarshal_json(
|
|
432
|
+
return utils.unmarshal_json(
|
|
433
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
434
|
+
)
|
|
339
435
|
if utils.match_response(http_res, "422", "application/json"):
|
|
340
436
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
341
437
|
raise models.HTTPValidationError(data=data)
|
|
342
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
343
|
-
|
|
344
|
-
|
|
438
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
439
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
440
|
+
raise models.SDKError(
|
|
441
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
442
|
+
)
|
|
443
|
+
|
|
345
444
|
content_type = http_res.headers.get("Content-Type")
|
|
346
|
-
|
|
445
|
+
http_res_text = utils.stream_to_text(http_res)
|
|
446
|
+
raise models.SDKError(
|
|
447
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
448
|
+
http_res.status_code,
|
|
449
|
+
http_res_text,
|
|
450
|
+
http_res,
|
|
451
|
+
)
|
|
347
452
|
|
|
348
|
-
|
|
349
|
-
|
|
350
453
|
async def complete_async(
|
|
351
|
-
self,
|
|
454
|
+
self,
|
|
455
|
+
*,
|
|
352
456
|
model: Nullable[str],
|
|
353
|
-
messages: Union[
|
|
354
|
-
|
|
457
|
+
messages: Union[
|
|
458
|
+
List[models.ChatCompletionRequestMessages],
|
|
459
|
+
List[models.ChatCompletionRequestMessagesTypedDict],
|
|
460
|
+
],
|
|
461
|
+
temperature: OptionalNullable[float] = UNSET,
|
|
355
462
|
top_p: Optional[float] = 1,
|
|
356
463
|
max_tokens: OptionalNullable[int] = UNSET,
|
|
357
|
-
min_tokens: OptionalNullable[int] = UNSET,
|
|
358
464
|
stream: Optional[bool] = False,
|
|
359
|
-
stop: Optional[
|
|
465
|
+
stop: Optional[
|
|
466
|
+
Union[
|
|
467
|
+
models.ChatCompletionRequestStop,
|
|
468
|
+
models.ChatCompletionRequestStopTypedDict,
|
|
469
|
+
]
|
|
470
|
+
] = None,
|
|
360
471
|
random_seed: OptionalNullable[int] = UNSET,
|
|
361
|
-
response_format: Optional[
|
|
362
|
-
|
|
363
|
-
|
|
472
|
+
response_format: Optional[
|
|
473
|
+
Union[models.ResponseFormat, models.ResponseFormatTypedDict]
|
|
474
|
+
] = None,
|
|
475
|
+
tools: OptionalNullable[
|
|
476
|
+
Union[List[models.Tool], List[models.ToolTypedDict]]
|
|
477
|
+
] = UNSET,
|
|
478
|
+
tool_choice: Optional[
|
|
479
|
+
Union[
|
|
480
|
+
models.ChatCompletionRequestToolChoice,
|
|
481
|
+
models.ChatCompletionRequestToolChoiceTypedDict,
|
|
482
|
+
]
|
|
483
|
+
] = None,
|
|
484
|
+
presence_penalty: Optional[float] = 0,
|
|
485
|
+
frequency_penalty: Optional[float] = 0,
|
|
486
|
+
n: OptionalNullable[int] = UNSET,
|
|
364
487
|
retries: OptionalNullable[utils.RetryConfig] = UNSET,
|
|
365
488
|
server_url: Optional[str] = None,
|
|
366
489
|
timeout_ms: Optional[int] = None,
|
|
@@ -369,16 +492,18 @@ class Chat(BaseSDK):
|
|
|
369
492
|
|
|
370
493
|
:param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
|
|
371
494
|
:param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
|
|
372
|
-
:param temperature: What sampling temperature to use, between 0.0 and
|
|
495
|
+
:param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
|
|
373
496
|
:param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
|
|
374
497
|
:param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
|
|
375
|
-
:param min_tokens: The minimum number of tokens to generate in the completion.
|
|
376
498
|
:param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
|
|
377
499
|
:param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
|
|
378
500
|
:param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
|
|
379
|
-
:param response_format:
|
|
380
|
-
:param tools:
|
|
381
|
-
:param tool_choice:
|
|
501
|
+
:param response_format:
|
|
502
|
+
:param tools:
|
|
503
|
+
:param tool_choice:
|
|
504
|
+
:param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
|
|
505
|
+
:param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
|
|
506
|
+
:param n: Number of completions to return for each request, input tokens are only billed once.
|
|
382
507
|
:param retries: Override the default retry configuration for this method
|
|
383
508
|
:param server_url: Override the default server URL for this method
|
|
384
509
|
:param timeout_ms: Override the default request timeout configuration for this method in milliseconds
|
|
@@ -387,26 +512,34 @@ class Chat(BaseSDK):
|
|
|
387
512
|
url_variables = None
|
|
388
513
|
if timeout_ms is None:
|
|
389
514
|
timeout_ms = self.sdk_configuration.timeout_ms
|
|
390
|
-
|
|
515
|
+
|
|
391
516
|
if server_url is not None:
|
|
392
517
|
base_url = server_url
|
|
393
|
-
|
|
518
|
+
|
|
394
519
|
request = models.ChatCompletionRequest(
|
|
395
520
|
model=model,
|
|
396
521
|
temperature=temperature,
|
|
397
522
|
top_p=top_p,
|
|
398
523
|
max_tokens=max_tokens,
|
|
399
|
-
min_tokens=min_tokens,
|
|
400
524
|
stream=stream,
|
|
401
525
|
stop=stop,
|
|
402
526
|
random_seed=random_seed,
|
|
403
|
-
messages=utils.get_pydantic_model(
|
|
404
|
-
|
|
527
|
+
messages=utils.get_pydantic_model(
|
|
528
|
+
messages, List[models.ChatCompletionRequestMessages]
|
|
529
|
+
),
|
|
530
|
+
response_format=utils.get_pydantic_model(
|
|
531
|
+
response_format, Optional[models.ResponseFormat]
|
|
532
|
+
),
|
|
405
533
|
tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
|
|
406
|
-
tool_choice=
|
|
534
|
+
tool_choice=utils.get_pydantic_model(
|
|
535
|
+
tool_choice, Optional[models.ChatCompletionRequestToolChoice]
|
|
536
|
+
),
|
|
537
|
+
presence_penalty=presence_penalty,
|
|
538
|
+
frequency_penalty=frequency_penalty,
|
|
539
|
+
n=n,
|
|
407
540
|
)
|
|
408
|
-
|
|
409
|
-
req = self.
|
|
541
|
+
|
|
542
|
+
req = self.build_request_async(
|
|
410
543
|
method="POST",
|
|
411
544
|
path="/rawPredict",
|
|
412
545
|
base_url=base_url,
|
|
@@ -418,41 +551,50 @@ class Chat(BaseSDK):
|
|
|
418
551
|
user_agent_header="user-agent",
|
|
419
552
|
accept_header_value="application/json",
|
|
420
553
|
security=self.sdk_configuration.security,
|
|
421
|
-
get_serialized_body=lambda: utils.serialize_request_body(
|
|
554
|
+
get_serialized_body=lambda: utils.serialize_request_body(
|
|
555
|
+
request, False, False, "json", models.ChatCompletionRequest
|
|
556
|
+
),
|
|
422
557
|
timeout_ms=timeout_ms,
|
|
423
558
|
)
|
|
424
|
-
|
|
559
|
+
|
|
425
560
|
if retries == UNSET:
|
|
426
561
|
if self.sdk_configuration.retry_config is not UNSET:
|
|
427
562
|
retries = self.sdk_configuration.retry_config
|
|
428
563
|
|
|
429
564
|
retry_config = None
|
|
430
565
|
if isinstance(retries, utils.RetryConfig):
|
|
431
|
-
retry_config = (retries, [
|
|
432
|
-
|
|
433
|
-
"500",
|
|
434
|
-
"502",
|
|
435
|
-
"503",
|
|
436
|
-
"504"
|
|
437
|
-
])
|
|
438
|
-
|
|
566
|
+
retry_config = (retries, ["429", "500", "502", "503", "504"])
|
|
567
|
+
|
|
439
568
|
http_res = await self.do_request_async(
|
|
440
|
-
hook_ctx=HookContext(
|
|
569
|
+
hook_ctx=HookContext(
|
|
570
|
+
operation_id="chat_completion_v1_chat_completions_post",
|
|
571
|
+
oauth2_scopes=[],
|
|
572
|
+
security_source=self.sdk_configuration.security,
|
|
573
|
+
),
|
|
441
574
|
request=req,
|
|
442
|
-
error_status_codes=["422","4XX","5XX"],
|
|
443
|
-
retry_config=retry_config
|
|
575
|
+
error_status_codes=["422", "4XX", "5XX"],
|
|
576
|
+
retry_config=retry_config,
|
|
444
577
|
)
|
|
445
|
-
|
|
578
|
+
|
|
446
579
|
data: Any = None
|
|
447
580
|
if utils.match_response(http_res, "200", "application/json"):
|
|
448
|
-
return utils.unmarshal_json(
|
|
581
|
+
return utils.unmarshal_json(
|
|
582
|
+
http_res.text, Optional[models.ChatCompletionResponse]
|
|
583
|
+
)
|
|
449
584
|
if utils.match_response(http_res, "422", "application/json"):
|
|
450
585
|
data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
|
|
451
586
|
raise models.HTTPValidationError(data=data)
|
|
452
|
-
if utils.match_response(http_res, ["4XX","5XX"], "*"):
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
587
|
+
if utils.match_response(http_res, ["4XX", "5XX"], "*"):
|
|
588
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
589
|
+
raise models.SDKError(
|
|
590
|
+
"API error occurred", http_res.status_code, http_res_text, http_res
|
|
591
|
+
)
|
|
457
592
|
|
|
458
|
-
|
|
593
|
+
content_type = http_res.headers.get("Content-Type")
|
|
594
|
+
http_res_text = await utils.stream_to_text_async(http_res)
|
|
595
|
+
raise models.SDKError(
|
|
596
|
+
f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
|
|
597
|
+
http_res.status_code,
|
|
598
|
+
http_res_text,
|
|
599
|
+
http_res,
|
|
600
|
+
)
|