mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. mistralai/__init__.py +4 -0
  2. mistralai/_hooks/sdkhooks.py +23 -4
  3. mistralai/_hooks/types.py +27 -9
  4. mistralai/_version.py +12 -0
  5. mistralai/agents.py +334 -164
  6. mistralai/basesdk.py +90 -5
  7. mistralai/batch.py +17 -0
  8. mistralai/chat.py +316 -166
  9. mistralai/classifiers.py +396 -0
  10. mistralai/embeddings.py +79 -55
  11. mistralai/files.py +487 -194
  12. mistralai/fim.py +206 -132
  13. mistralai/fine_tuning.py +3 -2
  14. mistralai/jobs.py +392 -263
  15. mistralai/mistral_jobs.py +733 -0
  16. mistralai/models/__init__.py +593 -50
  17. mistralai/models/agentscompletionrequest.py +70 -17
  18. mistralai/models/agentscompletionstreamrequest.py +72 -17
  19. mistralai/models/apiendpoint.py +9 -0
  20. mistralai/models/archiveftmodelout.py +15 -5
  21. mistralai/models/assistantmessage.py +22 -10
  22. mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
  23. mistralai/models/batcherror.py +17 -0
  24. mistralai/models/batchjobin.py +58 -0
  25. mistralai/models/batchjobout.py +117 -0
  26. mistralai/models/batchjobsout.py +30 -0
  27. mistralai/models/batchjobstatus.py +15 -0
  28. mistralai/models/chatclassificationrequest.py +104 -0
  29. mistralai/models/chatcompletionchoice.py +13 -6
  30. mistralai/models/chatcompletionrequest.py +86 -21
  31. mistralai/models/chatcompletionresponse.py +8 -4
  32. mistralai/models/chatcompletionstreamrequest.py +88 -21
  33. mistralai/models/checkpointout.py +4 -3
  34. mistralai/models/classificationobject.py +21 -0
  35. mistralai/models/classificationrequest.py +59 -0
  36. mistralai/models/classificationresponse.py +21 -0
  37. mistralai/models/completionchunk.py +12 -5
  38. mistralai/models/completionevent.py +2 -3
  39. mistralai/models/completionresponsestreamchoice.py +22 -8
  40. mistralai/models/contentchunk.py +13 -10
  41. mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
  42. mistralai/models/deletefileout.py +4 -3
  43. mistralai/models/deletemodelout.py +5 -4
  44. mistralai/models/deltamessage.py +23 -11
  45. mistralai/models/detailedjobout.py +70 -12
  46. mistralai/models/embeddingrequest.py +14 -9
  47. mistralai/models/embeddingresponse.py +7 -3
  48. mistralai/models/embeddingresponsedata.py +5 -4
  49. mistralai/models/eventout.py +11 -6
  50. mistralai/models/filepurpose.py +8 -0
  51. mistralai/models/files_api_routes_delete_fileop.py +5 -5
  52. mistralai/models/files_api_routes_download_fileop.py +16 -0
  53. mistralai/models/files_api_routes_list_filesop.py +96 -0
  54. mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
  55. mistralai/models/files_api_routes_upload_fileop.py +33 -14
  56. mistralai/models/fileschema.py +22 -15
  57. mistralai/models/fimcompletionrequest.py +44 -16
  58. mistralai/models/fimcompletionresponse.py +8 -4
  59. mistralai/models/fimcompletionstreamrequest.py +44 -16
  60. mistralai/models/finetuneablemodel.py +7 -1
  61. mistralai/models/ftmodelcapabilitiesout.py +6 -4
  62. mistralai/models/ftmodelcard.py +121 -0
  63. mistralai/models/ftmodelout.py +39 -9
  64. mistralai/models/function.py +5 -4
  65. mistralai/models/functioncall.py +4 -3
  66. mistralai/models/functionname.py +17 -0
  67. mistralai/models/githubrepositoryin.py +24 -7
  68. mistralai/models/githubrepositoryout.py +24 -7
  69. mistralai/models/httpvalidationerror.py +1 -3
  70. mistralai/models/imageurl.py +47 -0
  71. mistralai/models/imageurlchunk.py +38 -0
  72. mistralai/models/jobin.py +24 -7
  73. mistralai/models/jobmetadataout.py +32 -8
  74. mistralai/models/jobout.py +65 -12
  75. mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
  76. mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
  77. mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
  78. mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
  79. mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
  80. mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
  81. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
  82. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
  83. mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
  84. mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
  85. mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
  86. mistralai/models/jobsout.py +13 -5
  87. mistralai/models/legacyjobmetadataout.py +55 -9
  88. mistralai/models/listfilesout.py +7 -3
  89. mistralai/models/metricout.py +12 -8
  90. mistralai/models/modelcapabilities.py +9 -4
  91. mistralai/models/modellist.py +21 -7
  92. mistralai/models/responseformat.py +7 -8
  93. mistralai/models/responseformats.py +8 -0
  94. mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
  95. mistralai/models/retrievefileout.py +25 -15
  96. mistralai/models/sampletype.py +6 -2
  97. mistralai/models/security.py +14 -5
  98. mistralai/models/source.py +3 -2
  99. mistralai/models/systemmessage.py +10 -9
  100. mistralai/models/textchunk.py +14 -5
  101. mistralai/models/tool.py +10 -9
  102. mistralai/models/toolcall.py +10 -8
  103. mistralai/models/toolchoice.py +29 -0
  104. mistralai/models/toolchoiceenum.py +7 -0
  105. mistralai/models/toolmessage.py +13 -6
  106. mistralai/models/tooltypes.py +8 -0
  107. mistralai/models/trainingfile.py +4 -4
  108. mistralai/models/trainingparameters.py +34 -8
  109. mistralai/models/trainingparametersin.py +36 -10
  110. mistralai/models/unarchiveftmodelout.py +15 -5
  111. mistralai/models/updateftmodelin.py +9 -6
  112. mistralai/models/uploadfileout.py +22 -15
  113. mistralai/models/usageinfo.py +4 -3
  114. mistralai/models/usermessage.py +42 -10
  115. mistralai/models/validationerror.py +5 -3
  116. mistralai/models/wandbintegration.py +23 -7
  117. mistralai/models/wandbintegrationout.py +23 -8
  118. mistralai/models_.py +416 -294
  119. mistralai/sdk.py +31 -19
  120. mistralai/sdkconfiguration.py +9 -11
  121. mistralai/utils/__init__.py +14 -1
  122. mistralai/utils/annotations.py +13 -2
  123. mistralai/utils/logger.py +4 -1
  124. mistralai/utils/retries.py +2 -1
  125. mistralai/utils/security.py +13 -6
  126. mistralai/utils/serializers.py +25 -0
  127. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
  128. mistralai-1.2.0.dist-info/RECORD +276 -0
  129. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
  130. mistralai_azure/__init__.py +4 -0
  131. mistralai_azure/_hooks/sdkhooks.py +23 -4
  132. mistralai_azure/_hooks/types.py +27 -9
  133. mistralai_azure/_version.py +12 -0
  134. mistralai_azure/basesdk.py +91 -6
  135. mistralai_azure/chat.py +308 -166
  136. mistralai_azure/models/__init__.py +164 -16
  137. mistralai_azure/models/assistantmessage.py +29 -11
  138. mistralai_azure/models/chatcompletionchoice.py +15 -6
  139. mistralai_azure/models/chatcompletionrequest.py +94 -22
  140. mistralai_azure/models/chatcompletionresponse.py +8 -4
  141. mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
  142. mistralai_azure/models/completionchunk.py +12 -5
  143. mistralai_azure/models/completionevent.py +2 -3
  144. mistralai_azure/models/completionresponsestreamchoice.py +19 -8
  145. mistralai_azure/models/contentchunk.py +4 -11
  146. mistralai_azure/models/deltamessage.py +30 -12
  147. mistralai_azure/models/function.py +5 -4
  148. mistralai_azure/models/functioncall.py +4 -3
  149. mistralai_azure/models/functionname.py +17 -0
  150. mistralai_azure/models/httpvalidationerror.py +1 -3
  151. mistralai_azure/models/responseformat.py +7 -8
  152. mistralai_azure/models/responseformats.py +8 -0
  153. mistralai_azure/models/security.py +13 -5
  154. mistralai_azure/models/systemmessage.py +10 -9
  155. mistralai_azure/models/textchunk.py +14 -5
  156. mistralai_azure/models/tool.py +10 -9
  157. mistralai_azure/models/toolcall.py +10 -8
  158. mistralai_azure/models/toolchoice.py +29 -0
  159. mistralai_azure/models/toolchoiceenum.py +7 -0
  160. mistralai_azure/models/toolmessage.py +20 -7
  161. mistralai_azure/models/tooltypes.py +8 -0
  162. mistralai_azure/models/usageinfo.py +4 -3
  163. mistralai_azure/models/usermessage.py +42 -10
  164. mistralai_azure/models/validationerror.py +5 -3
  165. mistralai_azure/sdkconfiguration.py +9 -11
  166. mistralai_azure/utils/__init__.py +16 -3
  167. mistralai_azure/utils/annotations.py +13 -2
  168. mistralai_azure/utils/forms.py +10 -9
  169. mistralai_azure/utils/headers.py +8 -8
  170. mistralai_azure/utils/logger.py +6 -0
  171. mistralai_azure/utils/queryparams.py +16 -14
  172. mistralai_azure/utils/retries.py +2 -1
  173. mistralai_azure/utils/security.py +12 -6
  174. mistralai_azure/utils/serializers.py +42 -8
  175. mistralai_azure/utils/url.py +13 -8
  176. mistralai_azure/utils/values.py +6 -0
  177. mistralai_gcp/__init__.py +4 -0
  178. mistralai_gcp/_hooks/sdkhooks.py +23 -4
  179. mistralai_gcp/_hooks/types.py +27 -9
  180. mistralai_gcp/_version.py +12 -0
  181. mistralai_gcp/basesdk.py +91 -6
  182. mistralai_gcp/chat.py +308 -166
  183. mistralai_gcp/fim.py +198 -132
  184. mistralai_gcp/models/__init__.py +186 -18
  185. mistralai_gcp/models/assistantmessage.py +29 -11
  186. mistralai_gcp/models/chatcompletionchoice.py +15 -6
  187. mistralai_gcp/models/chatcompletionrequest.py +91 -22
  188. mistralai_gcp/models/chatcompletionresponse.py +8 -4
  189. mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
  190. mistralai_gcp/models/completionchunk.py +12 -5
  191. mistralai_gcp/models/completionevent.py +2 -3
  192. mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
  193. mistralai_gcp/models/contentchunk.py +4 -11
  194. mistralai_gcp/models/deltamessage.py +30 -12
  195. mistralai_gcp/models/fimcompletionrequest.py +51 -17
  196. mistralai_gcp/models/fimcompletionresponse.py +8 -4
  197. mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
  198. mistralai_gcp/models/function.py +5 -4
  199. mistralai_gcp/models/functioncall.py +4 -3
  200. mistralai_gcp/models/functionname.py +17 -0
  201. mistralai_gcp/models/httpvalidationerror.py +1 -3
  202. mistralai_gcp/models/responseformat.py +7 -8
  203. mistralai_gcp/models/responseformats.py +8 -0
  204. mistralai_gcp/models/security.py +13 -5
  205. mistralai_gcp/models/systemmessage.py +10 -9
  206. mistralai_gcp/models/textchunk.py +14 -5
  207. mistralai_gcp/models/tool.py +10 -9
  208. mistralai_gcp/models/toolcall.py +10 -8
  209. mistralai_gcp/models/toolchoice.py +29 -0
  210. mistralai_gcp/models/toolchoiceenum.py +7 -0
  211. mistralai_gcp/models/toolmessage.py +20 -7
  212. mistralai_gcp/models/tooltypes.py +8 -0
  213. mistralai_gcp/models/usageinfo.py +4 -3
  214. mistralai_gcp/models/usermessage.py +42 -10
  215. mistralai_gcp/models/validationerror.py +5 -3
  216. mistralai_gcp/sdk.py +6 -7
  217. mistralai_gcp/sdkconfiguration.py +9 -11
  218. mistralai_gcp/utils/__init__.py +16 -3
  219. mistralai_gcp/utils/annotations.py +13 -2
  220. mistralai_gcp/utils/forms.py +10 -9
  221. mistralai_gcp/utils/headers.py +8 -8
  222. mistralai_gcp/utils/logger.py +6 -0
  223. mistralai_gcp/utils/queryparams.py +16 -14
  224. mistralai_gcp/utils/retries.py +2 -1
  225. mistralai_gcp/utils/security.py +12 -6
  226. mistralai_gcp/utils/serializers.py +42 -8
  227. mistralai_gcp/utils/url.py +13 -8
  228. mistralai_gcp/utils/values.py +6 -0
  229. mistralai-1.0.3.dist-info/RECORD +0 -236
  230. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai/chat.py CHANGED
@@ -7,24 +7,36 @@ from mistralai.types import Nullable, OptionalNullable, UNSET
7
7
  from mistralai.utils import eventstreaming, get_security_from_env
8
8
  from typing import Any, AsyncGenerator, Generator, List, Optional, Union
9
9
 
10
+
10
11
  class Chat(BaseSDK):
11
12
  r"""Chat Completion API."""
12
-
13
-
13
+
14
14
  def complete(
15
- self, *,
15
+ self,
16
+ *,
16
17
  model: Nullable[str],
17
18
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
18
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
19
20
  top_p: Optional[float] = 1,
20
21
  max_tokens: OptionalNullable[int] = UNSET,
21
- min_tokens: OptionalNullable[int] = UNSET,
22
22
  stream: Optional[bool] = False,
23
23
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
24
24
  random_seed: OptionalNullable[int] = UNSET,
25
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
26
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
27
- tool_choice: Optional[models.ToolChoice] = "auto",
25
+ response_format: Optional[
26
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
27
+ ] = None,
28
+ tools: OptionalNullable[
29
+ Union[List[models.Tool], List[models.ToolTypedDict]]
30
+ ] = UNSET,
31
+ tool_choice: Optional[
32
+ Union[
33
+ models.ChatCompletionRequestToolChoice,
34
+ models.ChatCompletionRequestToolChoiceTypedDict,
35
+ ]
36
+ ] = None,
37
+ presence_penalty: Optional[float] = 0,
38
+ frequency_penalty: Optional[float] = 0,
39
+ n: OptionalNullable[int] = UNSET,
28
40
  safe_prompt: Optional[bool] = False,
29
41
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
30
42
  server_url: Optional[str] = None,
@@ -34,16 +46,18 @@ class Chat(BaseSDK):
34
46
 
35
47
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
36
48
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
37
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
49
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
38
50
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
39
51
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
40
- :param min_tokens: The minimum number of tokens to generate in the completion.
41
52
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
42
53
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
43
54
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
44
- :param response_format:
45
- :param tools:
46
- :param tool_choice:
55
+ :param response_format:
56
+ :param tools:
57
+ :param tool_choice:
58
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
59
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
60
+ :param n: Number of completions to return for each request, input tokens are only billed once.
47
61
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
48
62
  :param retries: Override the default retry configuration for this method
49
63
  :param server_url: Override the default server URL for this method
@@ -53,26 +67,32 @@ class Chat(BaseSDK):
53
67
  url_variables = None
54
68
  if timeout_ms is None:
55
69
  timeout_ms = self.sdk_configuration.timeout_ms
56
-
70
+
57
71
  if server_url is not None:
58
72
  base_url = server_url
59
-
73
+
60
74
  request = models.ChatCompletionRequest(
61
75
  model=model,
62
76
  temperature=temperature,
63
77
  top_p=top_p,
64
78
  max_tokens=max_tokens,
65
- min_tokens=min_tokens,
66
79
  stream=stream,
67
80
  stop=stop,
68
81
  random_seed=random_seed,
69
82
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
70
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
83
+ response_format=utils.get_pydantic_model(
84
+ response_format, Optional[models.ResponseFormat]
85
+ ),
71
86
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
72
- tool_choice=tool_choice,
87
+ tool_choice=utils.get_pydantic_model(
88
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
89
+ ),
90
+ presence_penalty=presence_penalty,
91
+ frequency_penalty=frequency_penalty,
92
+ n=n,
73
93
  safe_prompt=safe_prompt,
74
94
  )
75
-
95
+
76
96
  req = self.build_request(
77
97
  method="POST",
78
98
  path="/v1/chat/completions",
@@ -85,59 +105,82 @@ class Chat(BaseSDK):
85
105
  user_agent_header="user-agent",
86
106
  accept_header_value="application/json",
87
107
  security=self.sdk_configuration.security,
88
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
108
+ get_serialized_body=lambda: utils.serialize_request_body(
109
+ request, False, False, "json", models.ChatCompletionRequest
110
+ ),
89
111
  timeout_ms=timeout_ms,
90
112
  )
91
-
113
+
92
114
  if retries == UNSET:
93
115
  if self.sdk_configuration.retry_config is not UNSET:
94
116
  retries = self.sdk_configuration.retry_config
95
117
 
96
118
  retry_config = None
97
119
  if isinstance(retries, utils.RetryConfig):
98
- retry_config = (retries, [
99
- "429",
100
- "500",
101
- "502",
102
- "503",
103
- "504"
104
- ])
105
-
120
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
121
+
106
122
  http_res = self.do_request(
107
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=get_security_from_env(self.sdk_configuration.security, models.Security)),
123
+ hook_ctx=HookContext(
124
+ operation_id="chat_completion_v1_chat_completions_post",
125
+ oauth2_scopes=[],
126
+ security_source=get_security_from_env(
127
+ self.sdk_configuration.security, models.Security
128
+ ),
129
+ ),
108
130
  request=req,
109
- error_status_codes=["422","4XX","5XX"],
110
- retry_config=retry_config
131
+ error_status_codes=["422", "4XX", "5XX"],
132
+ retry_config=retry_config,
111
133
  )
112
-
134
+
113
135
  data: Any = None
114
136
  if utils.match_response(http_res, "200", "application/json"):
115
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
137
+ return utils.unmarshal_json(
138
+ http_res.text, Optional[models.ChatCompletionResponse]
139
+ )
116
140
  if utils.match_response(http_res, "422", "application/json"):
117
141
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
118
142
  raise models.HTTPValidationError(data=data)
119
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
120
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
121
-
143
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
144
+ http_res_text = utils.stream_to_text(http_res)
145
+ raise models.SDKError(
146
+ "API error occurred", http_res.status_code, http_res_text, http_res
147
+ )
148
+
122
149
  content_type = http_res.headers.get("Content-Type")
123
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
150
+ http_res_text = utils.stream_to_text(http_res)
151
+ raise models.SDKError(
152
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
153
+ http_res.status_code,
154
+ http_res_text,
155
+ http_res,
156
+ )
124
157
 
125
-
126
-
127
158
  async def complete_async(
128
- self, *,
159
+ self,
160
+ *,
129
161
  model: Nullable[str],
130
162
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
131
- temperature: Optional[float] = 0.7,
163
+ temperature: OptionalNullable[float] = UNSET,
132
164
  top_p: Optional[float] = 1,
133
165
  max_tokens: OptionalNullable[int] = UNSET,
134
- min_tokens: OptionalNullable[int] = UNSET,
135
166
  stream: Optional[bool] = False,
136
167
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
137
168
  random_seed: OptionalNullable[int] = UNSET,
138
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
139
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
140
- tool_choice: Optional[models.ToolChoice] = "auto",
169
+ response_format: Optional[
170
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
171
+ ] = None,
172
+ tools: OptionalNullable[
173
+ Union[List[models.Tool], List[models.ToolTypedDict]]
174
+ ] = UNSET,
175
+ tool_choice: Optional[
176
+ Union[
177
+ models.ChatCompletionRequestToolChoice,
178
+ models.ChatCompletionRequestToolChoiceTypedDict,
179
+ ]
180
+ ] = None,
181
+ presence_penalty: Optional[float] = 0,
182
+ frequency_penalty: Optional[float] = 0,
183
+ n: OptionalNullable[int] = UNSET,
141
184
  safe_prompt: Optional[bool] = False,
142
185
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
143
186
  server_url: Optional[str] = None,
@@ -147,16 +190,18 @@ class Chat(BaseSDK):
147
190
 
148
191
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
149
192
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
150
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
193
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
151
194
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
152
195
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
153
- :param min_tokens: The minimum number of tokens to generate in the completion.
154
196
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
155
197
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
156
198
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
157
- :param response_format:
158
- :param tools:
159
- :param tool_choice:
199
+ :param response_format:
200
+ :param tools:
201
+ :param tool_choice:
202
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
203
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
204
+ :param n: Number of completions to return for each request, input tokens are only billed once.
160
205
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
161
206
  :param retries: Override the default retry configuration for this method
162
207
  :param server_url: Override the default server URL for this method
@@ -166,27 +211,33 @@ class Chat(BaseSDK):
166
211
  url_variables = None
167
212
  if timeout_ms is None:
168
213
  timeout_ms = self.sdk_configuration.timeout_ms
169
-
214
+
170
215
  if server_url is not None:
171
216
  base_url = server_url
172
-
217
+
173
218
  request = models.ChatCompletionRequest(
174
219
  model=model,
175
220
  temperature=temperature,
176
221
  top_p=top_p,
177
222
  max_tokens=max_tokens,
178
- min_tokens=min_tokens,
179
223
  stream=stream,
180
224
  stop=stop,
181
225
  random_seed=random_seed,
182
226
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
183
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
227
+ response_format=utils.get_pydantic_model(
228
+ response_format, Optional[models.ResponseFormat]
229
+ ),
184
230
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
185
- tool_choice=tool_choice,
231
+ tool_choice=utils.get_pydantic_model(
232
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
233
+ ),
234
+ presence_penalty=presence_penalty,
235
+ frequency_penalty=frequency_penalty,
236
+ n=n,
186
237
  safe_prompt=safe_prompt,
187
238
  )
188
-
189
- req = self.build_request(
239
+
240
+ req = self.build_request_async(
190
241
  method="POST",
191
242
  path="/v1/chat/completions",
192
243
  base_url=base_url,
@@ -198,59 +249,90 @@ class Chat(BaseSDK):
198
249
  user_agent_header="user-agent",
199
250
  accept_header_value="application/json",
200
251
  security=self.sdk_configuration.security,
201
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
252
+ get_serialized_body=lambda: utils.serialize_request_body(
253
+ request, False, False, "json", models.ChatCompletionRequest
254
+ ),
202
255
  timeout_ms=timeout_ms,
203
256
  )
204
-
257
+
205
258
  if retries == UNSET:
206
259
  if self.sdk_configuration.retry_config is not UNSET:
207
260
  retries = self.sdk_configuration.retry_config
208
261
 
209
262
  retry_config = None
210
263
  if isinstance(retries, utils.RetryConfig):
211
- retry_config = (retries, [
212
- "429",
213
- "500",
214
- "502",
215
- "503",
216
- "504"
217
- ])
218
-
264
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
265
+
219
266
  http_res = await self.do_request_async(
220
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=get_security_from_env(self.sdk_configuration.security, models.Security)),
267
+ hook_ctx=HookContext(
268
+ operation_id="chat_completion_v1_chat_completions_post",
269
+ oauth2_scopes=[],
270
+ security_source=get_security_from_env(
271
+ self.sdk_configuration.security, models.Security
272
+ ),
273
+ ),
221
274
  request=req,
222
- error_status_codes=["422","4XX","5XX"],
223
- retry_config=retry_config
275
+ error_status_codes=["422", "4XX", "5XX"],
276
+ retry_config=retry_config,
224
277
  )
225
-
278
+
226
279
  data: Any = None
227
280
  if utils.match_response(http_res, "200", "application/json"):
228
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
281
+ return utils.unmarshal_json(
282
+ http_res.text, Optional[models.ChatCompletionResponse]
283
+ )
229
284
  if utils.match_response(http_res, "422", "application/json"):
230
285
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
231
286
  raise models.HTTPValidationError(data=data)
232
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
233
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
234
-
287
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
288
+ http_res_text = await utils.stream_to_text_async(http_res)
289
+ raise models.SDKError(
290
+ "API error occurred", http_res.status_code, http_res_text, http_res
291
+ )
292
+
235
293
  content_type = http_res.headers.get("Content-Type")
236
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
294
+ http_res_text = await utils.stream_to_text_async(http_res)
295
+ raise models.SDKError(
296
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
297
+ http_res.status_code,
298
+ http_res_text,
299
+ http_res,
300
+ )
237
301
 
238
-
239
-
240
302
  def stream(
241
- self, *,
303
+ self,
304
+ *,
242
305
  model: Nullable[str],
243
- messages: Union[List[models.ChatCompletionStreamRequestMessages], List[models.ChatCompletionStreamRequestMessagesTypedDict]],
244
- temperature: Optional[float] = 0.7,
306
+ messages: Union[
307
+ List[models.ChatCompletionStreamRequestMessages],
308
+ List[models.ChatCompletionStreamRequestMessagesTypedDict],
309
+ ],
310
+ temperature: OptionalNullable[float] = UNSET,
245
311
  top_p: Optional[float] = 1,
246
312
  max_tokens: OptionalNullable[int] = UNSET,
247
- min_tokens: OptionalNullable[int] = UNSET,
248
313
  stream: Optional[bool] = True,
249
- stop: Optional[Union[models.ChatCompletionStreamRequestStop, models.ChatCompletionStreamRequestStopTypedDict]] = None,
314
+ stop: Optional[
315
+ Union[
316
+ models.ChatCompletionStreamRequestStop,
317
+ models.ChatCompletionStreamRequestStopTypedDict,
318
+ ]
319
+ ] = None,
250
320
  random_seed: OptionalNullable[int] = UNSET,
251
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
252
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
253
- tool_choice: Optional[models.ChatCompletionStreamRequestToolChoice] = "auto",
321
+ response_format: Optional[
322
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
323
+ ] = None,
324
+ tools: OptionalNullable[
325
+ Union[List[models.Tool], List[models.ToolTypedDict]]
326
+ ] = UNSET,
327
+ tool_choice: Optional[
328
+ Union[
329
+ models.ChatCompletionStreamRequestToolChoice,
330
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
331
+ ]
332
+ ] = None,
333
+ presence_penalty: Optional[float] = 0,
334
+ frequency_penalty: Optional[float] = 0,
335
+ n: OptionalNullable[int] = UNSET,
254
336
  safe_prompt: Optional[bool] = False,
255
337
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
256
338
  server_url: Optional[str] = None,
@@ -262,16 +344,18 @@ class Chat(BaseSDK):
262
344
 
263
345
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
264
346
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
265
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
347
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
266
348
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
267
349
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
268
- :param min_tokens: The minimum number of tokens to generate in the completion.
269
- :param stream:
350
+ :param stream:
270
351
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
271
352
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
272
- :param response_format:
273
- :param tools:
274
- :param tool_choice:
353
+ :param response_format:
354
+ :param tools:
355
+ :param tool_choice:
356
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
357
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
358
+ :param n: Number of completions to return for each request, input tokens are only billed once.
275
359
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
276
360
  :param retries: Override the default retry configuration for this method
277
361
  :param server_url: Override the default server URL for this method
@@ -281,26 +365,34 @@ class Chat(BaseSDK):
281
365
  url_variables = None
282
366
  if timeout_ms is None:
283
367
  timeout_ms = self.sdk_configuration.timeout_ms
284
-
368
+
285
369
  if server_url is not None:
286
370
  base_url = server_url
287
-
371
+
288
372
  request = models.ChatCompletionStreamRequest(
289
373
  model=model,
290
374
  temperature=temperature,
291
375
  top_p=top_p,
292
376
  max_tokens=max_tokens,
293
- min_tokens=min_tokens,
294
377
  stream=stream,
295
378
  stop=stop,
296
379
  random_seed=random_seed,
297
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionStreamRequestMessages]),
298
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
380
+ messages=utils.get_pydantic_model(
381
+ messages, List[models.ChatCompletionStreamRequestMessages]
382
+ ),
383
+ response_format=utils.get_pydantic_model(
384
+ response_format, Optional[models.ResponseFormat]
385
+ ),
299
386
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
300
- tool_choice=tool_choice,
387
+ tool_choice=utils.get_pydantic_model(
388
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
389
+ ),
390
+ presence_penalty=presence_penalty,
391
+ frequency_penalty=frequency_penalty,
392
+ n=n,
301
393
  safe_prompt=safe_prompt,
302
394
  )
303
-
395
+
304
396
  req = self.build_request(
305
397
  method="POST",
306
398
  path="/v1/chat/completions#stream",
@@ -313,60 +405,94 @@ class Chat(BaseSDK):
313
405
  user_agent_header="user-agent",
314
406
  accept_header_value="text/event-stream",
315
407
  security=self.sdk_configuration.security,
316
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
408
+ get_serialized_body=lambda: utils.serialize_request_body(
409
+ request, False, False, "json", models.ChatCompletionStreamRequest
410
+ ),
317
411
  timeout_ms=timeout_ms,
318
412
  )
319
-
413
+
320
414
  if retries == UNSET:
321
415
  if self.sdk_configuration.retry_config is not UNSET:
322
416
  retries = self.sdk_configuration.retry_config
323
417
 
324
418
  retry_config = None
325
419
  if isinstance(retries, utils.RetryConfig):
326
- retry_config = (retries, [
327
- "429",
328
- "500",
329
- "502",
330
- "503",
331
- "504"
332
- ])
333
-
420
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
421
+
334
422
  http_res = self.do_request(
335
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=get_security_from_env(self.sdk_configuration.security, models.Security)),
423
+ hook_ctx=HookContext(
424
+ operation_id="stream_chat",
425
+ oauth2_scopes=[],
426
+ security_source=get_security_from_env(
427
+ self.sdk_configuration.security, models.Security
428
+ ),
429
+ ),
336
430
  request=req,
337
- error_status_codes=["422","4XX","5XX"],
431
+ error_status_codes=["422", "4XX", "5XX"],
338
432
  stream=True,
339
- retry_config=retry_config
433
+ retry_config=retry_config,
340
434
  )
341
-
435
+
342
436
  data: Any = None
343
437
  if utils.match_response(http_res, "200", "text/event-stream"):
344
- return eventstreaming.stream_events(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
438
+ return eventstreaming.stream_events(
439
+ http_res,
440
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
441
+ sentinel="[DONE]",
442
+ )
345
443
  if utils.match_response(http_res, "422", "application/json"):
346
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
444
+ http_res_text = utils.stream_to_text(http_res)
445
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
347
446
  raise models.HTTPValidationError(data=data)
348
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
349
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
350
-
447
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
448
+ http_res_text = utils.stream_to_text(http_res)
449
+ raise models.SDKError(
450
+ "API error occurred", http_res.status_code, http_res_text, http_res
451
+ )
452
+
351
453
  content_type = http_res.headers.get("Content-Type")
352
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
454
+ http_res_text = utils.stream_to_text(http_res)
455
+ raise models.SDKError(
456
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
457
+ http_res.status_code,
458
+ http_res_text,
459
+ http_res,
460
+ )
353
461
 
354
-
355
-
356
462
  async def stream_async(
357
- self, *,
463
+ self,
464
+ *,
358
465
  model: Nullable[str],
359
- messages: Union[List[models.ChatCompletionStreamRequestMessages], List[models.ChatCompletionStreamRequestMessagesTypedDict]],
360
- temperature: Optional[float] = 0.7,
466
+ messages: Union[
467
+ List[models.ChatCompletionStreamRequestMessages],
468
+ List[models.ChatCompletionStreamRequestMessagesTypedDict],
469
+ ],
470
+ temperature: OptionalNullable[float] = UNSET,
361
471
  top_p: Optional[float] = 1,
362
472
  max_tokens: OptionalNullable[int] = UNSET,
363
- min_tokens: OptionalNullable[int] = UNSET,
364
473
  stream: Optional[bool] = True,
365
- stop: Optional[Union[models.ChatCompletionStreamRequestStop, models.ChatCompletionStreamRequestStopTypedDict]] = None,
474
+ stop: Optional[
475
+ Union[
476
+ models.ChatCompletionStreamRequestStop,
477
+ models.ChatCompletionStreamRequestStopTypedDict,
478
+ ]
479
+ ] = None,
366
480
  random_seed: OptionalNullable[int] = UNSET,
367
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
368
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
369
- tool_choice: Optional[models.ChatCompletionStreamRequestToolChoice] = "auto",
481
+ response_format: Optional[
482
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
483
+ ] = None,
484
+ tools: OptionalNullable[
485
+ Union[List[models.Tool], List[models.ToolTypedDict]]
486
+ ] = UNSET,
487
+ tool_choice: Optional[
488
+ Union[
489
+ models.ChatCompletionStreamRequestToolChoice,
490
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
491
+ ]
492
+ ] = None,
493
+ presence_penalty: Optional[float] = 0,
494
+ frequency_penalty: Optional[float] = 0,
495
+ n: OptionalNullable[int] = UNSET,
370
496
  safe_prompt: Optional[bool] = False,
371
497
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
372
498
  server_url: Optional[str] = None,
@@ -378,16 +504,18 @@ class Chat(BaseSDK):
378
504
 
379
505
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
380
506
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
381
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
507
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
382
508
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
383
509
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
384
- :param min_tokens: The minimum number of tokens to generate in the completion.
385
- :param stream:
510
+ :param stream:
386
511
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
387
512
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
388
- :param response_format:
389
- :param tools:
390
- :param tool_choice:
513
+ :param response_format:
514
+ :param tools:
515
+ :param tool_choice:
516
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
517
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
518
+ :param n: Number of completions to return for each request, input tokens are only billed once.
391
519
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
392
520
  :param retries: Override the default retry configuration for this method
393
521
  :param server_url: Override the default server URL for this method
@@ -397,27 +525,35 @@ class Chat(BaseSDK):
397
525
  url_variables = None
398
526
  if timeout_ms is None:
399
527
  timeout_ms = self.sdk_configuration.timeout_ms
400
-
528
+
401
529
  if server_url is not None:
402
530
  base_url = server_url
403
-
531
+
404
532
  request = models.ChatCompletionStreamRequest(
405
533
  model=model,
406
534
  temperature=temperature,
407
535
  top_p=top_p,
408
536
  max_tokens=max_tokens,
409
- min_tokens=min_tokens,
410
537
  stream=stream,
411
538
  stop=stop,
412
539
  random_seed=random_seed,
413
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionStreamRequestMessages]),
414
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
540
+ messages=utils.get_pydantic_model(
541
+ messages, List[models.ChatCompletionStreamRequestMessages]
542
+ ),
543
+ response_format=utils.get_pydantic_model(
544
+ response_format, Optional[models.ResponseFormat]
545
+ ),
415
546
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
416
- tool_choice=tool_choice,
547
+ tool_choice=utils.get_pydantic_model(
548
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
549
+ ),
550
+ presence_penalty=presence_penalty,
551
+ frequency_penalty=frequency_penalty,
552
+ n=n,
417
553
  safe_prompt=safe_prompt,
418
554
  )
419
-
420
- req = self.build_request(
555
+
556
+ req = self.build_request_async(
421
557
  method="POST",
422
558
  path="/v1/chat/completions#stream",
423
559
  base_url=base_url,
@@ -429,42 +565,56 @@ class Chat(BaseSDK):
429
565
  user_agent_header="user-agent",
430
566
  accept_header_value="text/event-stream",
431
567
  security=self.sdk_configuration.security,
432
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
568
+ get_serialized_body=lambda: utils.serialize_request_body(
569
+ request, False, False, "json", models.ChatCompletionStreamRequest
570
+ ),
433
571
  timeout_ms=timeout_ms,
434
572
  )
435
-
573
+
436
574
  if retries == UNSET:
437
575
  if self.sdk_configuration.retry_config is not UNSET:
438
576
  retries = self.sdk_configuration.retry_config
439
577
 
440
578
  retry_config = None
441
579
  if isinstance(retries, utils.RetryConfig):
442
- retry_config = (retries, [
443
- "429",
444
- "500",
445
- "502",
446
- "503",
447
- "504"
448
- ])
449
-
580
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
581
+
450
582
  http_res = await self.do_request_async(
451
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=get_security_from_env(self.sdk_configuration.security, models.Security)),
583
+ hook_ctx=HookContext(
584
+ operation_id="stream_chat",
585
+ oauth2_scopes=[],
586
+ security_source=get_security_from_env(
587
+ self.sdk_configuration.security, models.Security
588
+ ),
589
+ ),
452
590
  request=req,
453
- error_status_codes=["422","4XX","5XX"],
591
+ error_status_codes=["422", "4XX", "5XX"],
454
592
  stream=True,
455
- retry_config=retry_config
593
+ retry_config=retry_config,
456
594
  )
457
-
595
+
458
596
  data: Any = None
459
597
  if utils.match_response(http_res, "200", "text/event-stream"):
460
- return eventstreaming.stream_events_async(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
598
+ return eventstreaming.stream_events_async(
599
+ http_res,
600
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
601
+ sentinel="[DONE]",
602
+ )
461
603
  if utils.match_response(http_res, "422", "application/json"):
462
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
604
+ http_res_text = await utils.stream_to_text_async(http_res)
605
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
463
606
  raise models.HTTPValidationError(data=data)
464
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
465
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
466
-
467
- content_type = http_res.headers.get("Content-Type")
468
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
607
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
608
+ http_res_text = await utils.stream_to_text_async(http_res)
609
+ raise models.SDKError(
610
+ "API error occurred", http_res.status_code, http_res_text, http_res
611
+ )
469
612
 
470
-
613
+ content_type = http_res.headers.get("Content-Type")
614
+ http_res_text = await utils.stream_to_text_async(http_res)
615
+ raise models.SDKError(
616
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
617
+ http_res.status_code,
618
+ http_res_text,
619
+ http_res,
620
+ )