mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. mistralai/__init__.py +4 -0
  2. mistralai/_hooks/sdkhooks.py +23 -4
  3. mistralai/_hooks/types.py +27 -9
  4. mistralai/_version.py +12 -0
  5. mistralai/agents.py +334 -164
  6. mistralai/basesdk.py +90 -5
  7. mistralai/batch.py +17 -0
  8. mistralai/chat.py +316 -166
  9. mistralai/classifiers.py +396 -0
  10. mistralai/embeddings.py +79 -55
  11. mistralai/files.py +487 -194
  12. mistralai/fim.py +206 -132
  13. mistralai/fine_tuning.py +3 -2
  14. mistralai/jobs.py +392 -263
  15. mistralai/mistral_jobs.py +733 -0
  16. mistralai/models/__init__.py +593 -50
  17. mistralai/models/agentscompletionrequest.py +70 -17
  18. mistralai/models/agentscompletionstreamrequest.py +72 -17
  19. mistralai/models/apiendpoint.py +9 -0
  20. mistralai/models/archiveftmodelout.py +15 -5
  21. mistralai/models/assistantmessage.py +22 -10
  22. mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
  23. mistralai/models/batcherror.py +17 -0
  24. mistralai/models/batchjobin.py +58 -0
  25. mistralai/models/batchjobout.py +117 -0
  26. mistralai/models/batchjobsout.py +30 -0
  27. mistralai/models/batchjobstatus.py +15 -0
  28. mistralai/models/chatclassificationrequest.py +104 -0
  29. mistralai/models/chatcompletionchoice.py +13 -6
  30. mistralai/models/chatcompletionrequest.py +86 -21
  31. mistralai/models/chatcompletionresponse.py +8 -4
  32. mistralai/models/chatcompletionstreamrequest.py +88 -21
  33. mistralai/models/checkpointout.py +4 -3
  34. mistralai/models/classificationobject.py +21 -0
  35. mistralai/models/classificationrequest.py +59 -0
  36. mistralai/models/classificationresponse.py +21 -0
  37. mistralai/models/completionchunk.py +12 -5
  38. mistralai/models/completionevent.py +2 -3
  39. mistralai/models/completionresponsestreamchoice.py +22 -8
  40. mistralai/models/contentchunk.py +13 -10
  41. mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
  42. mistralai/models/deletefileout.py +4 -3
  43. mistralai/models/deletemodelout.py +5 -4
  44. mistralai/models/deltamessage.py +23 -11
  45. mistralai/models/detailedjobout.py +70 -12
  46. mistralai/models/embeddingrequest.py +14 -9
  47. mistralai/models/embeddingresponse.py +7 -3
  48. mistralai/models/embeddingresponsedata.py +5 -4
  49. mistralai/models/eventout.py +11 -6
  50. mistralai/models/filepurpose.py +8 -0
  51. mistralai/models/files_api_routes_delete_fileop.py +5 -5
  52. mistralai/models/files_api_routes_download_fileop.py +16 -0
  53. mistralai/models/files_api_routes_list_filesop.py +96 -0
  54. mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
  55. mistralai/models/files_api_routes_upload_fileop.py +33 -14
  56. mistralai/models/fileschema.py +22 -15
  57. mistralai/models/fimcompletionrequest.py +44 -16
  58. mistralai/models/fimcompletionresponse.py +8 -4
  59. mistralai/models/fimcompletionstreamrequest.py +44 -16
  60. mistralai/models/finetuneablemodel.py +7 -1
  61. mistralai/models/ftmodelcapabilitiesout.py +6 -4
  62. mistralai/models/ftmodelcard.py +121 -0
  63. mistralai/models/ftmodelout.py +39 -9
  64. mistralai/models/function.py +5 -4
  65. mistralai/models/functioncall.py +4 -3
  66. mistralai/models/functionname.py +17 -0
  67. mistralai/models/githubrepositoryin.py +24 -7
  68. mistralai/models/githubrepositoryout.py +24 -7
  69. mistralai/models/httpvalidationerror.py +1 -3
  70. mistralai/models/imageurl.py +47 -0
  71. mistralai/models/imageurlchunk.py +38 -0
  72. mistralai/models/jobin.py +24 -7
  73. mistralai/models/jobmetadataout.py +32 -8
  74. mistralai/models/jobout.py +65 -12
  75. mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
  76. mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
  77. mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
  78. mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
  79. mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
  80. mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
  81. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
  82. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
  83. mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
  84. mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
  85. mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
  86. mistralai/models/jobsout.py +13 -5
  87. mistralai/models/legacyjobmetadataout.py +55 -9
  88. mistralai/models/listfilesout.py +7 -3
  89. mistralai/models/metricout.py +12 -8
  90. mistralai/models/modelcapabilities.py +9 -4
  91. mistralai/models/modellist.py +21 -7
  92. mistralai/models/responseformat.py +7 -8
  93. mistralai/models/responseformats.py +8 -0
  94. mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
  95. mistralai/models/retrievefileout.py +25 -15
  96. mistralai/models/sampletype.py +6 -2
  97. mistralai/models/security.py +14 -5
  98. mistralai/models/source.py +3 -2
  99. mistralai/models/systemmessage.py +10 -9
  100. mistralai/models/textchunk.py +14 -5
  101. mistralai/models/tool.py +10 -9
  102. mistralai/models/toolcall.py +10 -8
  103. mistralai/models/toolchoice.py +29 -0
  104. mistralai/models/toolchoiceenum.py +7 -0
  105. mistralai/models/toolmessage.py +13 -6
  106. mistralai/models/tooltypes.py +8 -0
  107. mistralai/models/trainingfile.py +4 -4
  108. mistralai/models/trainingparameters.py +34 -8
  109. mistralai/models/trainingparametersin.py +36 -10
  110. mistralai/models/unarchiveftmodelout.py +15 -5
  111. mistralai/models/updateftmodelin.py +9 -6
  112. mistralai/models/uploadfileout.py +22 -15
  113. mistralai/models/usageinfo.py +4 -3
  114. mistralai/models/usermessage.py +42 -10
  115. mistralai/models/validationerror.py +5 -3
  116. mistralai/models/wandbintegration.py +23 -7
  117. mistralai/models/wandbintegrationout.py +23 -8
  118. mistralai/models_.py +416 -294
  119. mistralai/sdk.py +31 -19
  120. mistralai/sdkconfiguration.py +9 -11
  121. mistralai/utils/__init__.py +14 -1
  122. mistralai/utils/annotations.py +13 -2
  123. mistralai/utils/logger.py +4 -1
  124. mistralai/utils/retries.py +2 -1
  125. mistralai/utils/security.py +13 -6
  126. mistralai/utils/serializers.py +25 -0
  127. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
  128. mistralai-1.2.0.dist-info/RECORD +276 -0
  129. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
  130. mistralai_azure/__init__.py +4 -0
  131. mistralai_azure/_hooks/sdkhooks.py +23 -4
  132. mistralai_azure/_hooks/types.py +27 -9
  133. mistralai_azure/_version.py +12 -0
  134. mistralai_azure/basesdk.py +91 -6
  135. mistralai_azure/chat.py +308 -166
  136. mistralai_azure/models/__init__.py +164 -16
  137. mistralai_azure/models/assistantmessage.py +29 -11
  138. mistralai_azure/models/chatcompletionchoice.py +15 -6
  139. mistralai_azure/models/chatcompletionrequest.py +94 -22
  140. mistralai_azure/models/chatcompletionresponse.py +8 -4
  141. mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
  142. mistralai_azure/models/completionchunk.py +12 -5
  143. mistralai_azure/models/completionevent.py +2 -3
  144. mistralai_azure/models/completionresponsestreamchoice.py +19 -8
  145. mistralai_azure/models/contentchunk.py +4 -11
  146. mistralai_azure/models/deltamessage.py +30 -12
  147. mistralai_azure/models/function.py +5 -4
  148. mistralai_azure/models/functioncall.py +4 -3
  149. mistralai_azure/models/functionname.py +17 -0
  150. mistralai_azure/models/httpvalidationerror.py +1 -3
  151. mistralai_azure/models/responseformat.py +7 -8
  152. mistralai_azure/models/responseformats.py +8 -0
  153. mistralai_azure/models/security.py +13 -5
  154. mistralai_azure/models/systemmessage.py +10 -9
  155. mistralai_azure/models/textchunk.py +14 -5
  156. mistralai_azure/models/tool.py +10 -9
  157. mistralai_azure/models/toolcall.py +10 -8
  158. mistralai_azure/models/toolchoice.py +29 -0
  159. mistralai_azure/models/toolchoiceenum.py +7 -0
  160. mistralai_azure/models/toolmessage.py +20 -7
  161. mistralai_azure/models/tooltypes.py +8 -0
  162. mistralai_azure/models/usageinfo.py +4 -3
  163. mistralai_azure/models/usermessage.py +42 -10
  164. mistralai_azure/models/validationerror.py +5 -3
  165. mistralai_azure/sdkconfiguration.py +9 -11
  166. mistralai_azure/utils/__init__.py +16 -3
  167. mistralai_azure/utils/annotations.py +13 -2
  168. mistralai_azure/utils/forms.py +10 -9
  169. mistralai_azure/utils/headers.py +8 -8
  170. mistralai_azure/utils/logger.py +6 -0
  171. mistralai_azure/utils/queryparams.py +16 -14
  172. mistralai_azure/utils/retries.py +2 -1
  173. mistralai_azure/utils/security.py +12 -6
  174. mistralai_azure/utils/serializers.py +42 -8
  175. mistralai_azure/utils/url.py +13 -8
  176. mistralai_azure/utils/values.py +6 -0
  177. mistralai_gcp/__init__.py +4 -0
  178. mistralai_gcp/_hooks/sdkhooks.py +23 -4
  179. mistralai_gcp/_hooks/types.py +27 -9
  180. mistralai_gcp/_version.py +12 -0
  181. mistralai_gcp/basesdk.py +91 -6
  182. mistralai_gcp/chat.py +308 -166
  183. mistralai_gcp/fim.py +198 -132
  184. mistralai_gcp/models/__init__.py +186 -18
  185. mistralai_gcp/models/assistantmessage.py +29 -11
  186. mistralai_gcp/models/chatcompletionchoice.py +15 -6
  187. mistralai_gcp/models/chatcompletionrequest.py +91 -22
  188. mistralai_gcp/models/chatcompletionresponse.py +8 -4
  189. mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
  190. mistralai_gcp/models/completionchunk.py +12 -5
  191. mistralai_gcp/models/completionevent.py +2 -3
  192. mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
  193. mistralai_gcp/models/contentchunk.py +4 -11
  194. mistralai_gcp/models/deltamessage.py +30 -12
  195. mistralai_gcp/models/fimcompletionrequest.py +51 -17
  196. mistralai_gcp/models/fimcompletionresponse.py +8 -4
  197. mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
  198. mistralai_gcp/models/function.py +5 -4
  199. mistralai_gcp/models/functioncall.py +4 -3
  200. mistralai_gcp/models/functionname.py +17 -0
  201. mistralai_gcp/models/httpvalidationerror.py +1 -3
  202. mistralai_gcp/models/responseformat.py +7 -8
  203. mistralai_gcp/models/responseformats.py +8 -0
  204. mistralai_gcp/models/security.py +13 -5
  205. mistralai_gcp/models/systemmessage.py +10 -9
  206. mistralai_gcp/models/textchunk.py +14 -5
  207. mistralai_gcp/models/tool.py +10 -9
  208. mistralai_gcp/models/toolcall.py +10 -8
  209. mistralai_gcp/models/toolchoice.py +29 -0
  210. mistralai_gcp/models/toolchoiceenum.py +7 -0
  211. mistralai_gcp/models/toolmessage.py +20 -7
  212. mistralai_gcp/models/tooltypes.py +8 -0
  213. mistralai_gcp/models/usageinfo.py +4 -3
  214. mistralai_gcp/models/usermessage.py +42 -10
  215. mistralai_gcp/models/validationerror.py +5 -3
  216. mistralai_gcp/sdk.py +6 -7
  217. mistralai_gcp/sdkconfiguration.py +9 -11
  218. mistralai_gcp/utils/__init__.py +16 -3
  219. mistralai_gcp/utils/annotations.py +13 -2
  220. mistralai_gcp/utils/forms.py +10 -9
  221. mistralai_gcp/utils/headers.py +8 -8
  222. mistralai_gcp/utils/logger.py +6 -0
  223. mistralai_gcp/utils/queryparams.py +16 -14
  224. mistralai_gcp/utils/retries.py +2 -1
  225. mistralai_gcp/utils/security.py +12 -6
  226. mistralai_gcp/utils/serializers.py +42 -8
  227. mistralai_gcp/utils/url.py +13 -8
  228. mistralai_gcp/utils/values.py +6 -0
  229. mistralai-1.0.3.dist-info/RECORD +0 -236
  230. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_azure/chat.py CHANGED
@@ -7,24 +7,36 @@ from mistralai_azure.types import OptionalNullable, UNSET
7
7
  from mistralai_azure.utils import eventstreaming
8
8
  from typing import Any, AsyncGenerator, Generator, List, Optional, Union
9
9
 
10
+
10
11
  class Chat(BaseSDK):
11
12
  r"""Chat Completion API."""
12
-
13
-
13
+
14
14
  def stream(
15
- self, *,
15
+ self,
16
+ *,
16
17
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
17
18
  model: OptionalNullable[str] = "azureai",
18
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
19
20
  top_p: Optional[float] = 1,
20
21
  max_tokens: OptionalNullable[int] = UNSET,
21
- min_tokens: OptionalNullable[int] = UNSET,
22
22
  stream: Optional[bool] = True,
23
23
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
24
24
  random_seed: OptionalNullable[int] = UNSET,
25
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
26
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
27
- tool_choice: Optional[models.ToolChoice] = "auto",
25
+ response_format: Optional[
26
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
27
+ ] = None,
28
+ tools: OptionalNullable[
29
+ Union[List[models.Tool], List[models.ToolTypedDict]]
30
+ ] = UNSET,
31
+ tool_choice: Optional[
32
+ Union[
33
+ models.ChatCompletionStreamRequestToolChoice,
34
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
35
+ ]
36
+ ] = None,
37
+ presence_penalty: Optional[float] = 0,
38
+ frequency_penalty: Optional[float] = 0,
39
+ n: OptionalNullable[int] = UNSET,
28
40
  safe_prompt: Optional[bool] = False,
29
41
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
30
42
  server_url: Optional[str] = None,
@@ -36,16 +48,18 @@ class Chat(BaseSDK):
36
48
 
37
49
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
38
50
  :param model: The ID of the model to use for this request.
39
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
51
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
40
52
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
41
53
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
42
- :param min_tokens: The minimum number of tokens to generate in the completion.
43
- :param stream:
54
+ :param stream:
44
55
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
45
56
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
46
- :param response_format:
47
- :param tools:
48
- :param tool_choice:
57
+ :param response_format:
58
+ :param tools:
59
+ :param tool_choice:
60
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
61
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
62
+ :param n: Number of completions to return for each request, input tokens are only billed once.
49
63
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
50
64
  :param retries: Override the default retry configuration for this method
51
65
  :param server_url: Override the default server URL for this method
@@ -55,26 +69,32 @@ class Chat(BaseSDK):
55
69
  url_variables = None
56
70
  if timeout_ms is None:
57
71
  timeout_ms = self.sdk_configuration.timeout_ms
58
-
72
+
59
73
  if server_url is not None:
60
74
  base_url = server_url
61
-
75
+
62
76
  request = models.ChatCompletionStreamRequest(
63
77
  model=model,
64
78
  temperature=temperature,
65
79
  top_p=top_p,
66
80
  max_tokens=max_tokens,
67
- min_tokens=min_tokens,
68
81
  stream=stream,
69
82
  stop=stop,
70
83
  random_seed=random_seed,
71
84
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
72
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
85
+ response_format=utils.get_pydantic_model(
86
+ response_format, Optional[models.ResponseFormat]
87
+ ),
73
88
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
74
- tool_choice=tool_choice,
89
+ tool_choice=utils.get_pydantic_model(
90
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
91
+ ),
92
+ presence_penalty=presence_penalty,
93
+ frequency_penalty=frequency_penalty,
94
+ n=n,
75
95
  safe_prompt=safe_prompt,
76
96
  )
77
-
97
+
78
98
  req = self.build_request(
79
99
  method="POST",
80
100
  path="/chat/completions#stream",
@@ -87,60 +107,84 @@ class Chat(BaseSDK):
87
107
  user_agent_header="user-agent",
88
108
  accept_header_value="text/event-stream",
89
109
  security=self.sdk_configuration.security,
90
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
110
+ get_serialized_body=lambda: utils.serialize_request_body(
111
+ request, False, False, "json", models.ChatCompletionStreamRequest
112
+ ),
91
113
  timeout_ms=timeout_ms,
92
114
  )
93
-
115
+
94
116
  if retries == UNSET:
95
117
  if self.sdk_configuration.retry_config is not UNSET:
96
118
  retries = self.sdk_configuration.retry_config
97
119
 
98
120
  retry_config = None
99
121
  if isinstance(retries, utils.RetryConfig):
100
- retry_config = (retries, [
101
- "429",
102
- "500",
103
- "502",
104
- "503",
105
- "504"
106
- ])
107
-
122
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
123
+
108
124
  http_res = self.do_request(
109
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=self.sdk_configuration.security),
125
+ hook_ctx=HookContext(
126
+ operation_id="stream_chat",
127
+ oauth2_scopes=[],
128
+ security_source=self.sdk_configuration.security,
129
+ ),
110
130
  request=req,
111
- error_status_codes=["422","4XX","5XX"],
131
+ error_status_codes=["422", "4XX", "5XX"],
112
132
  stream=True,
113
- retry_config=retry_config
133
+ retry_config=retry_config,
114
134
  )
115
-
135
+
116
136
  data: Any = None
117
137
  if utils.match_response(http_res, "200", "text/event-stream"):
118
- return eventstreaming.stream_events(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
138
+ return eventstreaming.stream_events(
139
+ http_res,
140
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
141
+ sentinel="[DONE]",
142
+ )
119
143
  if utils.match_response(http_res, "422", "application/json"):
120
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
144
+ http_res_text = utils.stream_to_text(http_res)
145
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
121
146
  raise models.HTTPValidationError(data=data)
122
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
123
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
124
-
147
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
148
+ http_res_text = utils.stream_to_text(http_res)
149
+ raise models.SDKError(
150
+ "API error occurred", http_res.status_code, http_res_text, http_res
151
+ )
152
+
125
153
  content_type = http_res.headers.get("Content-Type")
126
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
154
+ http_res_text = utils.stream_to_text(http_res)
155
+ raise models.SDKError(
156
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
157
+ http_res.status_code,
158
+ http_res_text,
159
+ http_res,
160
+ )
127
161
 
128
-
129
-
130
162
  async def stream_async(
131
- self, *,
163
+ self,
164
+ *,
132
165
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
133
166
  model: OptionalNullable[str] = "azureai",
134
- temperature: Optional[float] = 0.7,
167
+ temperature: OptionalNullable[float] = UNSET,
135
168
  top_p: Optional[float] = 1,
136
169
  max_tokens: OptionalNullable[int] = UNSET,
137
- min_tokens: OptionalNullable[int] = UNSET,
138
170
  stream: Optional[bool] = True,
139
171
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
140
172
  random_seed: OptionalNullable[int] = UNSET,
141
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
142
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
143
- tool_choice: Optional[models.ToolChoice] = "auto",
173
+ response_format: Optional[
174
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
175
+ ] = None,
176
+ tools: OptionalNullable[
177
+ Union[List[models.Tool], List[models.ToolTypedDict]]
178
+ ] = UNSET,
179
+ tool_choice: Optional[
180
+ Union[
181
+ models.ChatCompletionStreamRequestToolChoice,
182
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
183
+ ]
184
+ ] = None,
185
+ presence_penalty: Optional[float] = 0,
186
+ frequency_penalty: Optional[float] = 0,
187
+ n: OptionalNullable[int] = UNSET,
144
188
  safe_prompt: Optional[bool] = False,
145
189
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
146
190
  server_url: Optional[str] = None,
@@ -152,16 +196,18 @@ class Chat(BaseSDK):
152
196
 
153
197
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
154
198
  :param model: The ID of the model to use for this request.
155
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
199
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
156
200
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
157
201
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
158
- :param min_tokens: The minimum number of tokens to generate in the completion.
159
- :param stream:
202
+ :param stream:
160
203
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
161
204
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
162
- :param response_format:
163
- :param tools:
164
- :param tool_choice:
205
+ :param response_format:
206
+ :param tools:
207
+ :param tool_choice:
208
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
209
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
210
+ :param n: Number of completions to return for each request, input tokens are only billed once.
165
211
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
166
212
  :param retries: Override the default retry configuration for this method
167
213
  :param server_url: Override the default server URL for this method
@@ -171,27 +217,33 @@ class Chat(BaseSDK):
171
217
  url_variables = None
172
218
  if timeout_ms is None:
173
219
  timeout_ms = self.sdk_configuration.timeout_ms
174
-
220
+
175
221
  if server_url is not None:
176
222
  base_url = server_url
177
-
223
+
178
224
  request = models.ChatCompletionStreamRequest(
179
225
  model=model,
180
226
  temperature=temperature,
181
227
  top_p=top_p,
182
228
  max_tokens=max_tokens,
183
- min_tokens=min_tokens,
184
229
  stream=stream,
185
230
  stop=stop,
186
231
  random_seed=random_seed,
187
232
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
188
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
233
+ response_format=utils.get_pydantic_model(
234
+ response_format, Optional[models.ResponseFormat]
235
+ ),
189
236
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
190
- tool_choice=tool_choice,
237
+ tool_choice=utils.get_pydantic_model(
238
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
239
+ ),
240
+ presence_penalty=presence_penalty,
241
+ frequency_penalty=frequency_penalty,
242
+ n=n,
191
243
  safe_prompt=safe_prompt,
192
244
  )
193
-
194
- req = self.build_request(
245
+
246
+ req = self.build_request_async(
195
247
  method="POST",
196
248
  path="/chat/completions#stream",
197
249
  base_url=base_url,
@@ -203,60 +255,92 @@ class Chat(BaseSDK):
203
255
  user_agent_header="user-agent",
204
256
  accept_header_value="text/event-stream",
205
257
  security=self.sdk_configuration.security,
206
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
258
+ get_serialized_body=lambda: utils.serialize_request_body(
259
+ request, False, False, "json", models.ChatCompletionStreamRequest
260
+ ),
207
261
  timeout_ms=timeout_ms,
208
262
  )
209
-
263
+
210
264
  if retries == UNSET:
211
265
  if self.sdk_configuration.retry_config is not UNSET:
212
266
  retries = self.sdk_configuration.retry_config
213
267
 
214
268
  retry_config = None
215
269
  if isinstance(retries, utils.RetryConfig):
216
- retry_config = (retries, [
217
- "429",
218
- "500",
219
- "502",
220
- "503",
221
- "504"
222
- ])
223
-
270
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
271
+
224
272
  http_res = await self.do_request_async(
225
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=self.sdk_configuration.security),
273
+ hook_ctx=HookContext(
274
+ operation_id="stream_chat",
275
+ oauth2_scopes=[],
276
+ security_source=self.sdk_configuration.security,
277
+ ),
226
278
  request=req,
227
- error_status_codes=["422","4XX","5XX"],
279
+ error_status_codes=["422", "4XX", "5XX"],
228
280
  stream=True,
229
- retry_config=retry_config
281
+ retry_config=retry_config,
230
282
  )
231
-
283
+
232
284
  data: Any = None
233
285
  if utils.match_response(http_res, "200", "text/event-stream"):
234
- return eventstreaming.stream_events_async(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
286
+ return eventstreaming.stream_events_async(
287
+ http_res,
288
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
289
+ sentinel="[DONE]",
290
+ )
235
291
  if utils.match_response(http_res, "422", "application/json"):
236
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
292
+ http_res_text = await utils.stream_to_text_async(http_res)
293
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
237
294
  raise models.HTTPValidationError(data=data)
238
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
239
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
240
-
295
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
296
+ http_res_text = await utils.stream_to_text_async(http_res)
297
+ raise models.SDKError(
298
+ "API error occurred", http_res.status_code, http_res_text, http_res
299
+ )
300
+
241
301
  content_type = http_res.headers.get("Content-Type")
242
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
302
+ http_res_text = await utils.stream_to_text_async(http_res)
303
+ raise models.SDKError(
304
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
305
+ http_res.status_code,
306
+ http_res_text,
307
+ http_res,
308
+ )
243
309
 
244
-
245
-
246
310
  def complete(
247
- self, *,
248
- messages: Union[List[models.ChatCompletionRequestMessages], List[models.ChatCompletionRequestMessagesTypedDict]],
311
+ self,
312
+ *,
313
+ messages: Union[
314
+ List[models.ChatCompletionRequestMessages],
315
+ List[models.ChatCompletionRequestMessagesTypedDict],
316
+ ],
249
317
  model: OptionalNullable[str] = "azureai",
250
- temperature: Optional[float] = 0.7,
318
+ temperature: OptionalNullable[float] = UNSET,
251
319
  top_p: Optional[float] = 1,
252
320
  max_tokens: OptionalNullable[int] = UNSET,
253
- min_tokens: OptionalNullable[int] = UNSET,
254
321
  stream: Optional[bool] = False,
255
- stop: Optional[Union[models.ChatCompletionRequestStop, models.ChatCompletionRequestStopTypedDict]] = None,
322
+ stop: Optional[
323
+ Union[
324
+ models.ChatCompletionRequestStop,
325
+ models.ChatCompletionRequestStopTypedDict,
326
+ ]
327
+ ] = None,
256
328
  random_seed: OptionalNullable[int] = UNSET,
257
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
258
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
259
- tool_choice: Optional[models.ChatCompletionRequestToolChoice] = "auto",
329
+ response_format: Optional[
330
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
331
+ ] = None,
332
+ tools: OptionalNullable[
333
+ Union[List[models.Tool], List[models.ToolTypedDict]]
334
+ ] = UNSET,
335
+ tool_choice: Optional[
336
+ Union[
337
+ models.ChatCompletionRequestToolChoice,
338
+ models.ChatCompletionRequestToolChoiceTypedDict,
339
+ ]
340
+ ] = None,
341
+ presence_penalty: Optional[float] = 0,
342
+ frequency_penalty: Optional[float] = 0,
343
+ n: OptionalNullable[int] = UNSET,
260
344
  safe_prompt: Optional[bool] = False,
261
345
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
262
346
  server_url: Optional[str] = None,
@@ -266,16 +350,18 @@ class Chat(BaseSDK):
266
350
 
267
351
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
268
352
  :param model: The ID of the model to use for this request.
269
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
353
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
270
354
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
271
355
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
272
- :param min_tokens: The minimum number of tokens to generate in the completion.
273
356
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
274
357
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
275
358
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
276
- :param response_format:
277
- :param tools:
278
- :param tool_choice:
359
+ :param response_format:
360
+ :param tools:
361
+ :param tool_choice:
362
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
363
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
364
+ :param n: Number of completions to return for each request, input tokens are only billed once.
279
365
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
280
366
  :param retries: Override the default retry configuration for this method
281
367
  :param server_url: Override the default server URL for this method
@@ -285,26 +371,34 @@ class Chat(BaseSDK):
285
371
  url_variables = None
286
372
  if timeout_ms is None:
287
373
  timeout_ms = self.sdk_configuration.timeout_ms
288
-
374
+
289
375
  if server_url is not None:
290
376
  base_url = server_url
291
-
377
+
292
378
  request = models.ChatCompletionRequest(
293
379
  model=model,
294
380
  temperature=temperature,
295
381
  top_p=top_p,
296
382
  max_tokens=max_tokens,
297
- min_tokens=min_tokens,
298
383
  stream=stream,
299
384
  stop=stop,
300
385
  random_seed=random_seed,
301
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionRequestMessages]),
302
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
386
+ messages=utils.get_pydantic_model(
387
+ messages, List[models.ChatCompletionRequestMessages]
388
+ ),
389
+ response_format=utils.get_pydantic_model(
390
+ response_format, Optional[models.ResponseFormat]
391
+ ),
303
392
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
304
- tool_choice=tool_choice,
393
+ tool_choice=utils.get_pydantic_model(
394
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
395
+ ),
396
+ presence_penalty=presence_penalty,
397
+ frequency_penalty=frequency_penalty,
398
+ n=n,
305
399
  safe_prompt=safe_prompt,
306
400
  )
307
-
401
+
308
402
  req = self.build_request(
309
403
  method="POST",
310
404
  path="/chat/completions",
@@ -317,59 +411,88 @@ class Chat(BaseSDK):
317
411
  user_agent_header="user-agent",
318
412
  accept_header_value="application/json",
319
413
  security=self.sdk_configuration.security,
320
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
414
+ get_serialized_body=lambda: utils.serialize_request_body(
415
+ request, False, False, "json", models.ChatCompletionRequest
416
+ ),
321
417
  timeout_ms=timeout_ms,
322
418
  )
323
-
419
+
324
420
  if retries == UNSET:
325
421
  if self.sdk_configuration.retry_config is not UNSET:
326
422
  retries = self.sdk_configuration.retry_config
327
423
 
328
424
  retry_config = None
329
425
  if isinstance(retries, utils.RetryConfig):
330
- retry_config = (retries, [
331
- "429",
332
- "500",
333
- "502",
334
- "503",
335
- "504"
336
- ])
337
-
426
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
427
+
338
428
  http_res = self.do_request(
339
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=self.sdk_configuration.security),
429
+ hook_ctx=HookContext(
430
+ operation_id="chat_completion_v1_chat_completions_post",
431
+ oauth2_scopes=[],
432
+ security_source=self.sdk_configuration.security,
433
+ ),
340
434
  request=req,
341
- error_status_codes=["422","4XX","5XX"],
342
- retry_config=retry_config
435
+ error_status_codes=["422", "4XX", "5XX"],
436
+ retry_config=retry_config,
343
437
  )
344
-
438
+
345
439
  data: Any = None
346
440
  if utils.match_response(http_res, "200", "application/json"):
347
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
441
+ return utils.unmarshal_json(
442
+ http_res.text, Optional[models.ChatCompletionResponse]
443
+ )
348
444
  if utils.match_response(http_res, "422", "application/json"):
349
445
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
350
446
  raise models.HTTPValidationError(data=data)
351
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
352
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
353
-
447
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
448
+ http_res_text = utils.stream_to_text(http_res)
449
+ raise models.SDKError(
450
+ "API error occurred", http_res.status_code, http_res_text, http_res
451
+ )
452
+
354
453
  content_type = http_res.headers.get("Content-Type")
355
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
454
+ http_res_text = utils.stream_to_text(http_res)
455
+ raise models.SDKError(
456
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
457
+ http_res.status_code,
458
+ http_res_text,
459
+ http_res,
460
+ )
356
461
 
357
-
358
-
359
462
  async def complete_async(
360
- self, *,
361
- messages: Union[List[models.ChatCompletionRequestMessages], List[models.ChatCompletionRequestMessagesTypedDict]],
463
+ self,
464
+ *,
465
+ messages: Union[
466
+ List[models.ChatCompletionRequestMessages],
467
+ List[models.ChatCompletionRequestMessagesTypedDict],
468
+ ],
362
469
  model: OptionalNullable[str] = "azureai",
363
- temperature: Optional[float] = 0.7,
470
+ temperature: OptionalNullable[float] = UNSET,
364
471
  top_p: Optional[float] = 1,
365
472
  max_tokens: OptionalNullable[int] = UNSET,
366
- min_tokens: OptionalNullable[int] = UNSET,
367
473
  stream: Optional[bool] = False,
368
- stop: Optional[Union[models.ChatCompletionRequestStop, models.ChatCompletionRequestStopTypedDict]] = None,
474
+ stop: Optional[
475
+ Union[
476
+ models.ChatCompletionRequestStop,
477
+ models.ChatCompletionRequestStopTypedDict,
478
+ ]
479
+ ] = None,
369
480
  random_seed: OptionalNullable[int] = UNSET,
370
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
371
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
372
- tool_choice: Optional[models.ChatCompletionRequestToolChoice] = "auto",
481
+ response_format: Optional[
482
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
483
+ ] = None,
484
+ tools: OptionalNullable[
485
+ Union[List[models.Tool], List[models.ToolTypedDict]]
486
+ ] = UNSET,
487
+ tool_choice: Optional[
488
+ Union[
489
+ models.ChatCompletionRequestToolChoice,
490
+ models.ChatCompletionRequestToolChoiceTypedDict,
491
+ ]
492
+ ] = None,
493
+ presence_penalty: Optional[float] = 0,
494
+ frequency_penalty: Optional[float] = 0,
495
+ n: OptionalNullable[int] = UNSET,
373
496
  safe_prompt: Optional[bool] = False,
374
497
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
375
498
  server_url: Optional[str] = None,
@@ -379,16 +502,18 @@ class Chat(BaseSDK):
379
502
 
380
503
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
381
504
  :param model: The ID of the model to use for this request.
382
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
505
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
383
506
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
384
507
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
385
- :param min_tokens: The minimum number of tokens to generate in the completion.
386
508
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
387
509
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
388
510
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
389
- :param response_format:
390
- :param tools:
391
- :param tool_choice:
511
+ :param response_format:
512
+ :param tools:
513
+ :param tool_choice:
514
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
515
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
516
+ :param n: Number of completions to return for each request, input tokens are only billed once.
392
517
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
393
518
  :param retries: Override the default retry configuration for this method
394
519
  :param server_url: Override the default server URL for this method
@@ -398,27 +523,35 @@ class Chat(BaseSDK):
398
523
  url_variables = None
399
524
  if timeout_ms is None:
400
525
  timeout_ms = self.sdk_configuration.timeout_ms
401
-
526
+
402
527
  if server_url is not None:
403
528
  base_url = server_url
404
-
529
+
405
530
  request = models.ChatCompletionRequest(
406
531
  model=model,
407
532
  temperature=temperature,
408
533
  top_p=top_p,
409
534
  max_tokens=max_tokens,
410
- min_tokens=min_tokens,
411
535
  stream=stream,
412
536
  stop=stop,
413
537
  random_seed=random_seed,
414
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionRequestMessages]),
415
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
538
+ messages=utils.get_pydantic_model(
539
+ messages, List[models.ChatCompletionRequestMessages]
540
+ ),
541
+ response_format=utils.get_pydantic_model(
542
+ response_format, Optional[models.ResponseFormat]
543
+ ),
416
544
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
417
- tool_choice=tool_choice,
545
+ tool_choice=utils.get_pydantic_model(
546
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
547
+ ),
548
+ presence_penalty=presence_penalty,
549
+ frequency_penalty=frequency_penalty,
550
+ n=n,
418
551
  safe_prompt=safe_prompt,
419
552
  )
420
-
421
- req = self.build_request(
553
+
554
+ req = self.build_request_async(
422
555
  method="POST",
423
556
  path="/chat/completions",
424
557
  base_url=base_url,
@@ -430,41 +563,50 @@ class Chat(BaseSDK):
430
563
  user_agent_header="user-agent",
431
564
  accept_header_value="application/json",
432
565
  security=self.sdk_configuration.security,
433
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
566
+ get_serialized_body=lambda: utils.serialize_request_body(
567
+ request, False, False, "json", models.ChatCompletionRequest
568
+ ),
434
569
  timeout_ms=timeout_ms,
435
570
  )
436
-
571
+
437
572
  if retries == UNSET:
438
573
  if self.sdk_configuration.retry_config is not UNSET:
439
574
  retries = self.sdk_configuration.retry_config
440
575
 
441
576
  retry_config = None
442
577
  if isinstance(retries, utils.RetryConfig):
443
- retry_config = (retries, [
444
- "429",
445
- "500",
446
- "502",
447
- "503",
448
- "504"
449
- ])
450
-
578
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
579
+
451
580
  http_res = await self.do_request_async(
452
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=self.sdk_configuration.security),
581
+ hook_ctx=HookContext(
582
+ operation_id="chat_completion_v1_chat_completions_post",
583
+ oauth2_scopes=[],
584
+ security_source=self.sdk_configuration.security,
585
+ ),
453
586
  request=req,
454
- error_status_codes=["422","4XX","5XX"],
455
- retry_config=retry_config
587
+ error_status_codes=["422", "4XX", "5XX"],
588
+ retry_config=retry_config,
456
589
  )
457
-
590
+
458
591
  data: Any = None
459
592
  if utils.match_response(http_res, "200", "application/json"):
460
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
593
+ return utils.unmarshal_json(
594
+ http_res.text, Optional[models.ChatCompletionResponse]
595
+ )
461
596
  if utils.match_response(http_res, "422", "application/json"):
462
597
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
463
598
  raise models.HTTPValidationError(data=data)
464
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
465
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
466
-
467
- content_type = http_res.headers.get("Content-Type")
468
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
599
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
600
+ http_res_text = await utils.stream_to_text_async(http_res)
601
+ raise models.SDKError(
602
+ "API error occurred", http_res.status_code, http_res_text, http_res
603
+ )
469
604
 
470
-
605
+ content_type = http_res.headers.get("Content-Type")
606
+ http_res_text = await utils.stream_to_text_async(http_res)
607
+ raise models.SDKError(
608
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
609
+ http_res.status_code,
610
+ http_res_text,
611
+ http_res,
612
+ )