mistralai 1.0.3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. mistralai/__init__.py +4 -0
  2. mistralai/_hooks/sdkhooks.py +23 -4
  3. mistralai/_hooks/types.py +27 -9
  4. mistralai/_version.py +12 -0
  5. mistralai/agents.py +334 -164
  6. mistralai/basesdk.py +90 -5
  7. mistralai/batch.py +17 -0
  8. mistralai/chat.py +316 -166
  9. mistralai/classifiers.py +396 -0
  10. mistralai/embeddings.py +79 -55
  11. mistralai/files.py +487 -194
  12. mistralai/fim.py +206 -132
  13. mistralai/fine_tuning.py +3 -2
  14. mistralai/jobs.py +392 -263
  15. mistralai/mistral_jobs.py +733 -0
  16. mistralai/models/__init__.py +593 -50
  17. mistralai/models/agentscompletionrequest.py +70 -17
  18. mistralai/models/agentscompletionstreamrequest.py +72 -17
  19. mistralai/models/apiendpoint.py +9 -0
  20. mistralai/models/archiveftmodelout.py +15 -5
  21. mistralai/models/assistantmessage.py +22 -10
  22. mistralai/models/{modelcard.py → basemodelcard.py} +53 -14
  23. mistralai/models/batcherror.py +17 -0
  24. mistralai/models/batchjobin.py +58 -0
  25. mistralai/models/batchjobout.py +117 -0
  26. mistralai/models/batchjobsout.py +30 -0
  27. mistralai/models/batchjobstatus.py +15 -0
  28. mistralai/models/chatclassificationrequest.py +104 -0
  29. mistralai/models/chatcompletionchoice.py +13 -6
  30. mistralai/models/chatcompletionrequest.py +86 -21
  31. mistralai/models/chatcompletionresponse.py +8 -4
  32. mistralai/models/chatcompletionstreamrequest.py +88 -21
  33. mistralai/models/checkpointout.py +4 -3
  34. mistralai/models/classificationobject.py +21 -0
  35. mistralai/models/classificationrequest.py +59 -0
  36. mistralai/models/classificationresponse.py +21 -0
  37. mistralai/models/completionchunk.py +12 -5
  38. mistralai/models/completionevent.py +2 -3
  39. mistralai/models/completionresponsestreamchoice.py +22 -8
  40. mistralai/models/contentchunk.py +13 -10
  41. mistralai/models/delete_model_v1_models_model_id_deleteop.py +5 -5
  42. mistralai/models/deletefileout.py +4 -3
  43. mistralai/models/deletemodelout.py +5 -4
  44. mistralai/models/deltamessage.py +23 -11
  45. mistralai/models/detailedjobout.py +70 -12
  46. mistralai/models/embeddingrequest.py +14 -9
  47. mistralai/models/embeddingresponse.py +7 -3
  48. mistralai/models/embeddingresponsedata.py +5 -4
  49. mistralai/models/eventout.py +11 -6
  50. mistralai/models/filepurpose.py +8 -0
  51. mistralai/models/files_api_routes_delete_fileop.py +5 -5
  52. mistralai/models/files_api_routes_download_fileop.py +16 -0
  53. mistralai/models/files_api_routes_list_filesop.py +96 -0
  54. mistralai/models/files_api_routes_retrieve_fileop.py +5 -5
  55. mistralai/models/files_api_routes_upload_fileop.py +33 -14
  56. mistralai/models/fileschema.py +22 -15
  57. mistralai/models/fimcompletionrequest.py +44 -16
  58. mistralai/models/fimcompletionresponse.py +8 -4
  59. mistralai/models/fimcompletionstreamrequest.py +44 -16
  60. mistralai/models/finetuneablemodel.py +7 -1
  61. mistralai/models/ftmodelcapabilitiesout.py +6 -4
  62. mistralai/models/ftmodelcard.py +121 -0
  63. mistralai/models/ftmodelout.py +39 -9
  64. mistralai/models/function.py +5 -4
  65. mistralai/models/functioncall.py +4 -3
  66. mistralai/models/functionname.py +17 -0
  67. mistralai/models/githubrepositoryin.py +24 -7
  68. mistralai/models/githubrepositoryout.py +24 -7
  69. mistralai/models/httpvalidationerror.py +1 -3
  70. mistralai/models/imageurl.py +47 -0
  71. mistralai/models/imageurlchunk.py +38 -0
  72. mistralai/models/jobin.py +24 -7
  73. mistralai/models/jobmetadataout.py +32 -8
  74. mistralai/models/jobout.py +65 -12
  75. mistralai/models/jobs_api_routes_batch_cancel_batch_jobop.py +16 -0
  76. mistralai/models/jobs_api_routes_batch_get_batch_jobop.py +16 -0
  77. mistralai/models/jobs_api_routes_batch_get_batch_jobsop.py +95 -0
  78. mistralai/models/jobs_api_routes_fine_tuning_archive_fine_tuned_modelop.py +5 -5
  79. mistralai/models/jobs_api_routes_fine_tuning_cancel_fine_tuning_jobop.py +5 -5
  80. mistralai/models/jobs_api_routes_fine_tuning_create_fine_tuning_jobop.py +3 -2
  81. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobop.py +5 -5
  82. mistralai/models/jobs_api_routes_fine_tuning_get_fine_tuning_jobsop.py +85 -18
  83. mistralai/models/jobs_api_routes_fine_tuning_start_fine_tuning_jobop.py +5 -5
  84. mistralai/models/jobs_api_routes_fine_tuning_unarchive_fine_tuned_modelop.py +5 -5
  85. mistralai/models/jobs_api_routes_fine_tuning_update_fine_tuned_modelop.py +10 -6
  86. mistralai/models/jobsout.py +13 -5
  87. mistralai/models/legacyjobmetadataout.py +55 -9
  88. mistralai/models/listfilesout.py +7 -3
  89. mistralai/models/metricout.py +12 -8
  90. mistralai/models/modelcapabilities.py +9 -4
  91. mistralai/models/modellist.py +21 -7
  92. mistralai/models/responseformat.py +7 -8
  93. mistralai/models/responseformats.py +8 -0
  94. mistralai/models/retrieve_model_v1_models_model_id_getop.py +25 -6
  95. mistralai/models/retrievefileout.py +25 -15
  96. mistralai/models/sampletype.py +6 -2
  97. mistralai/models/security.py +14 -5
  98. mistralai/models/source.py +3 -2
  99. mistralai/models/systemmessage.py +10 -9
  100. mistralai/models/textchunk.py +14 -5
  101. mistralai/models/tool.py +10 -9
  102. mistralai/models/toolcall.py +10 -8
  103. mistralai/models/toolchoice.py +29 -0
  104. mistralai/models/toolchoiceenum.py +7 -0
  105. mistralai/models/toolmessage.py +13 -6
  106. mistralai/models/tooltypes.py +8 -0
  107. mistralai/models/trainingfile.py +4 -4
  108. mistralai/models/trainingparameters.py +34 -8
  109. mistralai/models/trainingparametersin.py +36 -10
  110. mistralai/models/unarchiveftmodelout.py +15 -5
  111. mistralai/models/updateftmodelin.py +9 -6
  112. mistralai/models/uploadfileout.py +22 -15
  113. mistralai/models/usageinfo.py +4 -3
  114. mistralai/models/usermessage.py +42 -10
  115. mistralai/models/validationerror.py +5 -3
  116. mistralai/models/wandbintegration.py +23 -7
  117. mistralai/models/wandbintegrationout.py +23 -8
  118. mistralai/models_.py +416 -294
  119. mistralai/sdk.py +31 -19
  120. mistralai/sdkconfiguration.py +9 -11
  121. mistralai/utils/__init__.py +14 -1
  122. mistralai/utils/annotations.py +13 -2
  123. mistralai/utils/logger.py +4 -1
  124. mistralai/utils/retries.py +2 -1
  125. mistralai/utils/security.py +13 -6
  126. mistralai/utils/serializers.py +25 -0
  127. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/METADATA +171 -66
  128. mistralai-1.2.0.dist-info/RECORD +276 -0
  129. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/WHEEL +1 -1
  130. mistralai_azure/__init__.py +4 -0
  131. mistralai_azure/_hooks/sdkhooks.py +23 -4
  132. mistralai_azure/_hooks/types.py +27 -9
  133. mistralai_azure/_version.py +12 -0
  134. mistralai_azure/basesdk.py +91 -6
  135. mistralai_azure/chat.py +308 -166
  136. mistralai_azure/models/__init__.py +164 -16
  137. mistralai_azure/models/assistantmessage.py +29 -11
  138. mistralai_azure/models/chatcompletionchoice.py +15 -6
  139. mistralai_azure/models/chatcompletionrequest.py +94 -22
  140. mistralai_azure/models/chatcompletionresponse.py +8 -4
  141. mistralai_azure/models/chatcompletionstreamrequest.py +96 -22
  142. mistralai_azure/models/completionchunk.py +12 -5
  143. mistralai_azure/models/completionevent.py +2 -3
  144. mistralai_azure/models/completionresponsestreamchoice.py +19 -8
  145. mistralai_azure/models/contentchunk.py +4 -11
  146. mistralai_azure/models/deltamessage.py +30 -12
  147. mistralai_azure/models/function.py +5 -4
  148. mistralai_azure/models/functioncall.py +4 -3
  149. mistralai_azure/models/functionname.py +17 -0
  150. mistralai_azure/models/httpvalidationerror.py +1 -3
  151. mistralai_azure/models/responseformat.py +7 -8
  152. mistralai_azure/models/responseformats.py +8 -0
  153. mistralai_azure/models/security.py +13 -5
  154. mistralai_azure/models/systemmessage.py +10 -9
  155. mistralai_azure/models/textchunk.py +14 -5
  156. mistralai_azure/models/tool.py +10 -9
  157. mistralai_azure/models/toolcall.py +10 -8
  158. mistralai_azure/models/toolchoice.py +29 -0
  159. mistralai_azure/models/toolchoiceenum.py +7 -0
  160. mistralai_azure/models/toolmessage.py +20 -7
  161. mistralai_azure/models/tooltypes.py +8 -0
  162. mistralai_azure/models/usageinfo.py +4 -3
  163. mistralai_azure/models/usermessage.py +42 -10
  164. mistralai_azure/models/validationerror.py +5 -3
  165. mistralai_azure/sdkconfiguration.py +9 -11
  166. mistralai_azure/utils/__init__.py +16 -3
  167. mistralai_azure/utils/annotations.py +13 -2
  168. mistralai_azure/utils/forms.py +10 -9
  169. mistralai_azure/utils/headers.py +8 -8
  170. mistralai_azure/utils/logger.py +6 -0
  171. mistralai_azure/utils/queryparams.py +16 -14
  172. mistralai_azure/utils/retries.py +2 -1
  173. mistralai_azure/utils/security.py +12 -6
  174. mistralai_azure/utils/serializers.py +42 -8
  175. mistralai_azure/utils/url.py +13 -8
  176. mistralai_azure/utils/values.py +6 -0
  177. mistralai_gcp/__init__.py +4 -0
  178. mistralai_gcp/_hooks/sdkhooks.py +23 -4
  179. mistralai_gcp/_hooks/types.py +27 -9
  180. mistralai_gcp/_version.py +12 -0
  181. mistralai_gcp/basesdk.py +91 -6
  182. mistralai_gcp/chat.py +308 -166
  183. mistralai_gcp/fim.py +198 -132
  184. mistralai_gcp/models/__init__.py +186 -18
  185. mistralai_gcp/models/assistantmessage.py +29 -11
  186. mistralai_gcp/models/chatcompletionchoice.py +15 -6
  187. mistralai_gcp/models/chatcompletionrequest.py +91 -22
  188. mistralai_gcp/models/chatcompletionresponse.py +8 -4
  189. mistralai_gcp/models/chatcompletionstreamrequest.py +93 -22
  190. mistralai_gcp/models/completionchunk.py +12 -5
  191. mistralai_gcp/models/completionevent.py +2 -3
  192. mistralai_gcp/models/completionresponsestreamchoice.py +19 -8
  193. mistralai_gcp/models/contentchunk.py +4 -11
  194. mistralai_gcp/models/deltamessage.py +30 -12
  195. mistralai_gcp/models/fimcompletionrequest.py +51 -17
  196. mistralai_gcp/models/fimcompletionresponse.py +8 -4
  197. mistralai_gcp/models/fimcompletionstreamrequest.py +51 -17
  198. mistralai_gcp/models/function.py +5 -4
  199. mistralai_gcp/models/functioncall.py +4 -3
  200. mistralai_gcp/models/functionname.py +17 -0
  201. mistralai_gcp/models/httpvalidationerror.py +1 -3
  202. mistralai_gcp/models/responseformat.py +7 -8
  203. mistralai_gcp/models/responseformats.py +8 -0
  204. mistralai_gcp/models/security.py +13 -5
  205. mistralai_gcp/models/systemmessage.py +10 -9
  206. mistralai_gcp/models/textchunk.py +14 -5
  207. mistralai_gcp/models/tool.py +10 -9
  208. mistralai_gcp/models/toolcall.py +10 -8
  209. mistralai_gcp/models/toolchoice.py +29 -0
  210. mistralai_gcp/models/toolchoiceenum.py +7 -0
  211. mistralai_gcp/models/toolmessage.py +20 -7
  212. mistralai_gcp/models/tooltypes.py +8 -0
  213. mistralai_gcp/models/usageinfo.py +4 -3
  214. mistralai_gcp/models/usermessage.py +42 -10
  215. mistralai_gcp/models/validationerror.py +5 -3
  216. mistralai_gcp/sdk.py +6 -7
  217. mistralai_gcp/sdkconfiguration.py +9 -11
  218. mistralai_gcp/utils/__init__.py +16 -3
  219. mistralai_gcp/utils/annotations.py +13 -2
  220. mistralai_gcp/utils/forms.py +10 -9
  221. mistralai_gcp/utils/headers.py +8 -8
  222. mistralai_gcp/utils/logger.py +6 -0
  223. mistralai_gcp/utils/queryparams.py +16 -14
  224. mistralai_gcp/utils/retries.py +2 -1
  225. mistralai_gcp/utils/security.py +12 -6
  226. mistralai_gcp/utils/serializers.py +42 -8
  227. mistralai_gcp/utils/url.py +13 -8
  228. mistralai_gcp/utils/values.py +6 -0
  229. mistralai-1.0.3.dist-info/RECORD +0 -236
  230. {mistralai-1.0.3.dist-info → mistralai-1.2.0.dist-info}/LICENSE +0 -0
mistralai_gcp/chat.py CHANGED
@@ -7,24 +7,36 @@ from mistralai_gcp.types import Nullable, OptionalNullable, UNSET
7
7
  from mistralai_gcp.utils import eventstreaming
8
8
  from typing import Any, AsyncGenerator, Generator, List, Optional, Union
9
9
 
10
+
10
11
  class Chat(BaseSDK):
11
12
  r"""Chat Completion API."""
12
-
13
-
13
+
14
14
  def stream(
15
- self, *,
15
+ self,
16
+ *,
16
17
  model: Nullable[str],
17
18
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
18
- temperature: Optional[float] = 0.7,
19
+ temperature: OptionalNullable[float] = UNSET,
19
20
  top_p: Optional[float] = 1,
20
21
  max_tokens: OptionalNullable[int] = UNSET,
21
- min_tokens: OptionalNullable[int] = UNSET,
22
22
  stream: Optional[bool] = True,
23
23
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
24
24
  random_seed: OptionalNullable[int] = UNSET,
25
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
26
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
27
- tool_choice: Optional[models.ToolChoice] = "auto",
25
+ response_format: Optional[
26
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
27
+ ] = None,
28
+ tools: OptionalNullable[
29
+ Union[List[models.Tool], List[models.ToolTypedDict]]
30
+ ] = UNSET,
31
+ tool_choice: Optional[
32
+ Union[
33
+ models.ChatCompletionStreamRequestToolChoice,
34
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
35
+ ]
36
+ ] = None,
37
+ presence_penalty: Optional[float] = 0,
38
+ frequency_penalty: Optional[float] = 0,
39
+ n: OptionalNullable[int] = UNSET,
28
40
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
29
41
  server_url: Optional[str] = None,
30
42
  timeout_ms: Optional[int] = None,
@@ -35,16 +47,18 @@ class Chat(BaseSDK):
35
47
 
36
48
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
37
49
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
38
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
50
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
39
51
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
40
52
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
41
- :param min_tokens: The minimum number of tokens to generate in the completion.
42
- :param stream:
53
+ :param stream:
43
54
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
44
55
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
45
- :param response_format:
46
- :param tools:
47
- :param tool_choice:
56
+ :param response_format:
57
+ :param tools:
58
+ :param tool_choice:
59
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
60
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
61
+ :param n: Number of completions to return for each request, input tokens are only billed once.
48
62
  :param retries: Override the default retry configuration for this method
49
63
  :param server_url: Override the default server URL for this method
50
64
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -53,25 +67,31 @@ class Chat(BaseSDK):
53
67
  url_variables = None
54
68
  if timeout_ms is None:
55
69
  timeout_ms = self.sdk_configuration.timeout_ms
56
-
70
+
57
71
  if server_url is not None:
58
72
  base_url = server_url
59
-
73
+
60
74
  request = models.ChatCompletionStreamRequest(
61
75
  model=model,
62
76
  temperature=temperature,
63
77
  top_p=top_p,
64
78
  max_tokens=max_tokens,
65
- min_tokens=min_tokens,
66
79
  stream=stream,
67
80
  stop=stop,
68
81
  random_seed=random_seed,
69
82
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
70
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
83
+ response_format=utils.get_pydantic_model(
84
+ response_format, Optional[models.ResponseFormat]
85
+ ),
71
86
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
72
- tool_choice=tool_choice,
87
+ tool_choice=utils.get_pydantic_model(
88
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
89
+ ),
90
+ presence_penalty=presence_penalty,
91
+ frequency_penalty=frequency_penalty,
92
+ n=n,
73
93
  )
74
-
94
+
75
95
  req = self.build_request(
76
96
  method="POST",
77
97
  path="/streamRawPredict",
@@ -84,60 +104,84 @@ class Chat(BaseSDK):
84
104
  user_agent_header="user-agent",
85
105
  accept_header_value="text/event-stream",
86
106
  security=self.sdk_configuration.security,
87
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
107
+ get_serialized_body=lambda: utils.serialize_request_body(
108
+ request, False, False, "json", models.ChatCompletionStreamRequest
109
+ ),
88
110
  timeout_ms=timeout_ms,
89
111
  )
90
-
112
+
91
113
  if retries == UNSET:
92
114
  if self.sdk_configuration.retry_config is not UNSET:
93
115
  retries = self.sdk_configuration.retry_config
94
116
 
95
117
  retry_config = None
96
118
  if isinstance(retries, utils.RetryConfig):
97
- retry_config = (retries, [
98
- "429",
99
- "500",
100
- "502",
101
- "503",
102
- "504"
103
- ])
104
-
119
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
120
+
105
121
  http_res = self.do_request(
106
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=self.sdk_configuration.security),
122
+ hook_ctx=HookContext(
123
+ operation_id="stream_chat",
124
+ oauth2_scopes=[],
125
+ security_source=self.sdk_configuration.security,
126
+ ),
107
127
  request=req,
108
- error_status_codes=["422","4XX","5XX"],
128
+ error_status_codes=["422", "4XX", "5XX"],
109
129
  stream=True,
110
- retry_config=retry_config
130
+ retry_config=retry_config,
111
131
  )
112
-
132
+
113
133
  data: Any = None
114
134
  if utils.match_response(http_res, "200", "text/event-stream"):
115
- return eventstreaming.stream_events(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
135
+ return eventstreaming.stream_events(
136
+ http_res,
137
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
138
+ sentinel="[DONE]",
139
+ )
116
140
  if utils.match_response(http_res, "422", "application/json"):
117
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
141
+ http_res_text = utils.stream_to_text(http_res)
142
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
118
143
  raise models.HTTPValidationError(data=data)
119
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
120
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
121
-
144
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
145
+ http_res_text = utils.stream_to_text(http_res)
146
+ raise models.SDKError(
147
+ "API error occurred", http_res.status_code, http_res_text, http_res
148
+ )
149
+
122
150
  content_type = http_res.headers.get("Content-Type")
123
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
151
+ http_res_text = utils.stream_to_text(http_res)
152
+ raise models.SDKError(
153
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
154
+ http_res.status_code,
155
+ http_res_text,
156
+ http_res,
157
+ )
124
158
 
125
-
126
-
127
159
  async def stream_async(
128
- self, *,
160
+ self,
161
+ *,
129
162
  model: Nullable[str],
130
163
  messages: Union[List[models.Messages], List[models.MessagesTypedDict]],
131
- temperature: Optional[float] = 0.7,
164
+ temperature: OptionalNullable[float] = UNSET,
132
165
  top_p: Optional[float] = 1,
133
166
  max_tokens: OptionalNullable[int] = UNSET,
134
- min_tokens: OptionalNullable[int] = UNSET,
135
167
  stream: Optional[bool] = True,
136
168
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
137
169
  random_seed: OptionalNullable[int] = UNSET,
138
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
139
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
140
- tool_choice: Optional[models.ToolChoice] = "auto",
170
+ response_format: Optional[
171
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
172
+ ] = None,
173
+ tools: OptionalNullable[
174
+ Union[List[models.Tool], List[models.ToolTypedDict]]
175
+ ] = UNSET,
176
+ tool_choice: Optional[
177
+ Union[
178
+ models.ChatCompletionStreamRequestToolChoice,
179
+ models.ChatCompletionStreamRequestToolChoiceTypedDict,
180
+ ]
181
+ ] = None,
182
+ presence_penalty: Optional[float] = 0,
183
+ frequency_penalty: Optional[float] = 0,
184
+ n: OptionalNullable[int] = UNSET,
141
185
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
142
186
  server_url: Optional[str] = None,
143
187
  timeout_ms: Optional[int] = None,
@@ -148,16 +192,18 @@ class Chat(BaseSDK):
148
192
 
149
193
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
150
194
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
151
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
195
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
152
196
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
153
197
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
154
- :param min_tokens: The minimum number of tokens to generate in the completion.
155
- :param stream:
198
+ :param stream:
156
199
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
157
200
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
158
- :param response_format:
159
- :param tools:
160
- :param tool_choice:
201
+ :param response_format:
202
+ :param tools:
203
+ :param tool_choice:
204
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
205
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
206
+ :param n: Number of completions to return for each request, input tokens are only billed once.
161
207
  :param retries: Override the default retry configuration for this method
162
208
  :param server_url: Override the default server URL for this method
163
209
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -166,26 +212,32 @@ class Chat(BaseSDK):
166
212
  url_variables = None
167
213
  if timeout_ms is None:
168
214
  timeout_ms = self.sdk_configuration.timeout_ms
169
-
215
+
170
216
  if server_url is not None:
171
217
  base_url = server_url
172
-
218
+
173
219
  request = models.ChatCompletionStreamRequest(
174
220
  model=model,
175
221
  temperature=temperature,
176
222
  top_p=top_p,
177
223
  max_tokens=max_tokens,
178
- min_tokens=min_tokens,
179
224
  stream=stream,
180
225
  stop=stop,
181
226
  random_seed=random_seed,
182
227
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
183
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
228
+ response_format=utils.get_pydantic_model(
229
+ response_format, Optional[models.ResponseFormat]
230
+ ),
184
231
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
185
- tool_choice=tool_choice,
232
+ tool_choice=utils.get_pydantic_model(
233
+ tool_choice, Optional[models.ChatCompletionStreamRequestToolChoice]
234
+ ),
235
+ presence_penalty=presence_penalty,
236
+ frequency_penalty=frequency_penalty,
237
+ n=n,
186
238
  )
187
-
188
- req = self.build_request(
239
+
240
+ req = self.build_request_async(
189
241
  method="POST",
190
242
  path="/streamRawPredict",
191
243
  base_url=base_url,
@@ -197,60 +249,92 @@ class Chat(BaseSDK):
197
249
  user_agent_header="user-agent",
198
250
  accept_header_value="text/event-stream",
199
251
  security=self.sdk_configuration.security,
200
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionStreamRequest),
252
+ get_serialized_body=lambda: utils.serialize_request_body(
253
+ request, False, False, "json", models.ChatCompletionStreamRequest
254
+ ),
201
255
  timeout_ms=timeout_ms,
202
256
  )
203
-
257
+
204
258
  if retries == UNSET:
205
259
  if self.sdk_configuration.retry_config is not UNSET:
206
260
  retries = self.sdk_configuration.retry_config
207
261
 
208
262
  retry_config = None
209
263
  if isinstance(retries, utils.RetryConfig):
210
- retry_config = (retries, [
211
- "429",
212
- "500",
213
- "502",
214
- "503",
215
- "504"
216
- ])
217
-
264
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
265
+
218
266
  http_res = await self.do_request_async(
219
- hook_ctx=HookContext(operation_id="stream_chat", oauth2_scopes=[], security_source=self.sdk_configuration.security),
267
+ hook_ctx=HookContext(
268
+ operation_id="stream_chat",
269
+ oauth2_scopes=[],
270
+ security_source=self.sdk_configuration.security,
271
+ ),
220
272
  request=req,
221
- error_status_codes=["422","4XX","5XX"],
273
+ error_status_codes=["422", "4XX", "5XX"],
222
274
  stream=True,
223
- retry_config=retry_config
275
+ retry_config=retry_config,
224
276
  )
225
-
277
+
226
278
  data: Any = None
227
279
  if utils.match_response(http_res, "200", "text/event-stream"):
228
- return eventstreaming.stream_events_async(http_res, lambda raw: utils.unmarshal_json(raw, models.CompletionEvent), sentinel="[DONE]")
280
+ return eventstreaming.stream_events_async(
281
+ http_res,
282
+ lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
283
+ sentinel="[DONE]",
284
+ )
229
285
  if utils.match_response(http_res, "422", "application/json"):
230
- data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
286
+ http_res_text = await utils.stream_to_text_async(http_res)
287
+ data = utils.unmarshal_json(http_res_text, models.HTTPValidationErrorData)
231
288
  raise models.HTTPValidationError(data=data)
232
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
233
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
234
-
289
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
290
+ http_res_text = await utils.stream_to_text_async(http_res)
291
+ raise models.SDKError(
292
+ "API error occurred", http_res.status_code, http_res_text, http_res
293
+ )
294
+
235
295
  content_type = http_res.headers.get("Content-Type")
236
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
296
+ http_res_text = await utils.stream_to_text_async(http_res)
297
+ raise models.SDKError(
298
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
299
+ http_res.status_code,
300
+ http_res_text,
301
+ http_res,
302
+ )
237
303
 
238
-
239
-
240
304
  def complete(
241
- self, *,
305
+ self,
306
+ *,
242
307
  model: Nullable[str],
243
- messages: Union[List[models.ChatCompletionRequestMessages], List[models.ChatCompletionRequestMessagesTypedDict]],
244
- temperature: Optional[float] = 0.7,
308
+ messages: Union[
309
+ List[models.ChatCompletionRequestMessages],
310
+ List[models.ChatCompletionRequestMessagesTypedDict],
311
+ ],
312
+ temperature: OptionalNullable[float] = UNSET,
245
313
  top_p: Optional[float] = 1,
246
314
  max_tokens: OptionalNullable[int] = UNSET,
247
- min_tokens: OptionalNullable[int] = UNSET,
248
315
  stream: Optional[bool] = False,
249
- stop: Optional[Union[models.ChatCompletionRequestStop, models.ChatCompletionRequestStopTypedDict]] = None,
316
+ stop: Optional[
317
+ Union[
318
+ models.ChatCompletionRequestStop,
319
+ models.ChatCompletionRequestStopTypedDict,
320
+ ]
321
+ ] = None,
250
322
  random_seed: OptionalNullable[int] = UNSET,
251
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
252
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
253
- tool_choice: Optional[models.ChatCompletionRequestToolChoice] = "auto",
323
+ response_format: Optional[
324
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
325
+ ] = None,
326
+ tools: OptionalNullable[
327
+ Union[List[models.Tool], List[models.ToolTypedDict]]
328
+ ] = UNSET,
329
+ tool_choice: Optional[
330
+ Union[
331
+ models.ChatCompletionRequestToolChoice,
332
+ models.ChatCompletionRequestToolChoiceTypedDict,
333
+ ]
334
+ ] = None,
335
+ presence_penalty: Optional[float] = 0,
336
+ frequency_penalty: Optional[float] = 0,
337
+ n: OptionalNullable[int] = UNSET,
254
338
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
255
339
  server_url: Optional[str] = None,
256
340
  timeout_ms: Optional[int] = None,
@@ -259,16 +343,18 @@ class Chat(BaseSDK):
259
343
 
260
344
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
261
345
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
262
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
346
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
263
347
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
264
348
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
265
- :param min_tokens: The minimum number of tokens to generate in the completion.
266
349
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
267
350
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
268
351
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
269
- :param response_format:
270
- :param tools:
271
- :param tool_choice:
352
+ :param response_format:
353
+ :param tools:
354
+ :param tool_choice:
355
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
356
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
357
+ :param n: Number of completions to return for each request, input tokens are only billed once.
272
358
  :param retries: Override the default retry configuration for this method
273
359
  :param server_url: Override the default server URL for this method
274
360
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -277,25 +363,33 @@ class Chat(BaseSDK):
277
363
  url_variables = None
278
364
  if timeout_ms is None:
279
365
  timeout_ms = self.sdk_configuration.timeout_ms
280
-
366
+
281
367
  if server_url is not None:
282
368
  base_url = server_url
283
-
369
+
284
370
  request = models.ChatCompletionRequest(
285
371
  model=model,
286
372
  temperature=temperature,
287
373
  top_p=top_p,
288
374
  max_tokens=max_tokens,
289
- min_tokens=min_tokens,
290
375
  stream=stream,
291
376
  stop=stop,
292
377
  random_seed=random_seed,
293
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionRequestMessages]),
294
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
378
+ messages=utils.get_pydantic_model(
379
+ messages, List[models.ChatCompletionRequestMessages]
380
+ ),
381
+ response_format=utils.get_pydantic_model(
382
+ response_format, Optional[models.ResponseFormat]
383
+ ),
295
384
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
296
- tool_choice=tool_choice,
385
+ tool_choice=utils.get_pydantic_model(
386
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
387
+ ),
388
+ presence_penalty=presence_penalty,
389
+ frequency_penalty=frequency_penalty,
390
+ n=n,
297
391
  )
298
-
392
+
299
393
  req = self.build_request(
300
394
  method="POST",
301
395
  path="/rawPredict",
@@ -308,59 +402,88 @@ class Chat(BaseSDK):
308
402
  user_agent_header="user-agent",
309
403
  accept_header_value="application/json",
310
404
  security=self.sdk_configuration.security,
311
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
405
+ get_serialized_body=lambda: utils.serialize_request_body(
406
+ request, False, False, "json", models.ChatCompletionRequest
407
+ ),
312
408
  timeout_ms=timeout_ms,
313
409
  )
314
-
410
+
315
411
  if retries == UNSET:
316
412
  if self.sdk_configuration.retry_config is not UNSET:
317
413
  retries = self.sdk_configuration.retry_config
318
414
 
319
415
  retry_config = None
320
416
  if isinstance(retries, utils.RetryConfig):
321
- retry_config = (retries, [
322
- "429",
323
- "500",
324
- "502",
325
- "503",
326
- "504"
327
- ])
328
-
417
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
418
+
329
419
  http_res = self.do_request(
330
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=self.sdk_configuration.security),
420
+ hook_ctx=HookContext(
421
+ operation_id="chat_completion_v1_chat_completions_post",
422
+ oauth2_scopes=[],
423
+ security_source=self.sdk_configuration.security,
424
+ ),
331
425
  request=req,
332
- error_status_codes=["422","4XX","5XX"],
333
- retry_config=retry_config
426
+ error_status_codes=["422", "4XX", "5XX"],
427
+ retry_config=retry_config,
334
428
  )
335
-
429
+
336
430
  data: Any = None
337
431
  if utils.match_response(http_res, "200", "application/json"):
338
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
432
+ return utils.unmarshal_json(
433
+ http_res.text, Optional[models.ChatCompletionResponse]
434
+ )
339
435
  if utils.match_response(http_res, "422", "application/json"):
340
436
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
341
437
  raise models.HTTPValidationError(data=data)
342
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
343
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
344
-
438
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
439
+ http_res_text = utils.stream_to_text(http_res)
440
+ raise models.SDKError(
441
+ "API error occurred", http_res.status_code, http_res_text, http_res
442
+ )
443
+
345
444
  content_type = http_res.headers.get("Content-Type")
346
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
445
+ http_res_text = utils.stream_to_text(http_res)
446
+ raise models.SDKError(
447
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
448
+ http_res.status_code,
449
+ http_res_text,
450
+ http_res,
451
+ )
347
452
 
348
-
349
-
350
453
  async def complete_async(
351
- self, *,
454
+ self,
455
+ *,
352
456
  model: Nullable[str],
353
- messages: Union[List[models.ChatCompletionRequestMessages], List[models.ChatCompletionRequestMessagesTypedDict]],
354
- temperature: Optional[float] = 0.7,
457
+ messages: Union[
458
+ List[models.ChatCompletionRequestMessages],
459
+ List[models.ChatCompletionRequestMessagesTypedDict],
460
+ ],
461
+ temperature: OptionalNullable[float] = UNSET,
355
462
  top_p: Optional[float] = 1,
356
463
  max_tokens: OptionalNullable[int] = UNSET,
357
- min_tokens: OptionalNullable[int] = UNSET,
358
464
  stream: Optional[bool] = False,
359
- stop: Optional[Union[models.ChatCompletionRequestStop, models.ChatCompletionRequestStopTypedDict]] = None,
465
+ stop: Optional[
466
+ Union[
467
+ models.ChatCompletionRequestStop,
468
+ models.ChatCompletionRequestStopTypedDict,
469
+ ]
470
+ ] = None,
360
471
  random_seed: OptionalNullable[int] = UNSET,
361
- response_format: Optional[Union[models.ResponseFormat, models.ResponseFormatTypedDict]] = None,
362
- tools: OptionalNullable[Union[List[models.Tool], List[models.ToolTypedDict]]] = UNSET,
363
- tool_choice: Optional[models.ChatCompletionRequestToolChoice] = "auto",
472
+ response_format: Optional[
473
+ Union[models.ResponseFormat, models.ResponseFormatTypedDict]
474
+ ] = None,
475
+ tools: OptionalNullable[
476
+ Union[List[models.Tool], List[models.ToolTypedDict]]
477
+ ] = UNSET,
478
+ tool_choice: Optional[
479
+ Union[
480
+ models.ChatCompletionRequestToolChoice,
481
+ models.ChatCompletionRequestToolChoiceTypedDict,
482
+ ]
483
+ ] = None,
484
+ presence_penalty: Optional[float] = 0,
485
+ frequency_penalty: Optional[float] = 0,
486
+ n: OptionalNullable[int] = UNSET,
364
487
  retries: OptionalNullable[utils.RetryConfig] = UNSET,
365
488
  server_url: Optional[str] = None,
366
489
  timeout_ms: Optional[int] = None,
@@ -369,16 +492,18 @@ class Chat(BaseSDK):
369
492
 
370
493
  :param model: ID of the model to use. You can use the [List Available Models](/api/#tag/models/operation/list_models_v1_models_get) API to see all of your available models, or see our [Model overview](/models) for model descriptions.
371
494
  :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
372
- :param temperature: What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
495
+ :param temperature: What sampling temperature to use, we recommend between 0.0 and 0.7. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. The default value varies depending on the model you are targeting. Call the `/models` endpoint to retrieve the appropriate value.
373
496
  :param top_p: Nucleus sampling, where the model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.
374
497
  :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
375
- :param min_tokens: The minimum number of tokens to generate in the completion.
376
498
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
377
499
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
378
500
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
379
- :param response_format:
380
- :param tools:
381
- :param tool_choice:
501
+ :param response_format:
502
+ :param tools:
503
+ :param tool_choice:
504
+ :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
505
+ :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
506
+ :param n: Number of completions to return for each request, input tokens are only billed once.
382
507
  :param retries: Override the default retry configuration for this method
383
508
  :param server_url: Override the default server URL for this method
384
509
  :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
@@ -387,26 +512,34 @@ class Chat(BaseSDK):
387
512
  url_variables = None
388
513
  if timeout_ms is None:
389
514
  timeout_ms = self.sdk_configuration.timeout_ms
390
-
515
+
391
516
  if server_url is not None:
392
517
  base_url = server_url
393
-
518
+
394
519
  request = models.ChatCompletionRequest(
395
520
  model=model,
396
521
  temperature=temperature,
397
522
  top_p=top_p,
398
523
  max_tokens=max_tokens,
399
- min_tokens=min_tokens,
400
524
  stream=stream,
401
525
  stop=stop,
402
526
  random_seed=random_seed,
403
- messages=utils.get_pydantic_model(messages, List[models.ChatCompletionRequestMessages]),
404
- response_format=utils.get_pydantic_model(response_format, Optional[models.ResponseFormat]),
527
+ messages=utils.get_pydantic_model(
528
+ messages, List[models.ChatCompletionRequestMessages]
529
+ ),
530
+ response_format=utils.get_pydantic_model(
531
+ response_format, Optional[models.ResponseFormat]
532
+ ),
405
533
  tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
406
- tool_choice=tool_choice,
534
+ tool_choice=utils.get_pydantic_model(
535
+ tool_choice, Optional[models.ChatCompletionRequestToolChoice]
536
+ ),
537
+ presence_penalty=presence_penalty,
538
+ frequency_penalty=frequency_penalty,
539
+ n=n,
407
540
  )
408
-
409
- req = self.build_request(
541
+
542
+ req = self.build_request_async(
410
543
  method="POST",
411
544
  path="/rawPredict",
412
545
  base_url=base_url,
@@ -418,41 +551,50 @@ class Chat(BaseSDK):
418
551
  user_agent_header="user-agent",
419
552
  accept_header_value="application/json",
420
553
  security=self.sdk_configuration.security,
421
- get_serialized_body=lambda: utils.serialize_request_body(request, False, False, "json", models.ChatCompletionRequest),
554
+ get_serialized_body=lambda: utils.serialize_request_body(
555
+ request, False, False, "json", models.ChatCompletionRequest
556
+ ),
422
557
  timeout_ms=timeout_ms,
423
558
  )
424
-
559
+
425
560
  if retries == UNSET:
426
561
  if self.sdk_configuration.retry_config is not UNSET:
427
562
  retries = self.sdk_configuration.retry_config
428
563
 
429
564
  retry_config = None
430
565
  if isinstance(retries, utils.RetryConfig):
431
- retry_config = (retries, [
432
- "429",
433
- "500",
434
- "502",
435
- "503",
436
- "504"
437
- ])
438
-
566
+ retry_config = (retries, ["429", "500", "502", "503", "504"])
567
+
439
568
  http_res = await self.do_request_async(
440
- hook_ctx=HookContext(operation_id="chat_completion_v1_chat_completions_post", oauth2_scopes=[], security_source=self.sdk_configuration.security),
569
+ hook_ctx=HookContext(
570
+ operation_id="chat_completion_v1_chat_completions_post",
571
+ oauth2_scopes=[],
572
+ security_source=self.sdk_configuration.security,
573
+ ),
441
574
  request=req,
442
- error_status_codes=["422","4XX","5XX"],
443
- retry_config=retry_config
575
+ error_status_codes=["422", "4XX", "5XX"],
576
+ retry_config=retry_config,
444
577
  )
445
-
578
+
446
579
  data: Any = None
447
580
  if utils.match_response(http_res, "200", "application/json"):
448
- return utils.unmarshal_json(http_res.text, Optional[models.ChatCompletionResponse])
581
+ return utils.unmarshal_json(
582
+ http_res.text, Optional[models.ChatCompletionResponse]
583
+ )
449
584
  if utils.match_response(http_res, "422", "application/json"):
450
585
  data = utils.unmarshal_json(http_res.text, models.HTTPValidationErrorData)
451
586
  raise models.HTTPValidationError(data=data)
452
- if utils.match_response(http_res, ["4XX","5XX"], "*"):
453
- raise models.SDKError("API error occurred", http_res.status_code, http_res.text, http_res)
454
-
455
- content_type = http_res.headers.get("Content-Type")
456
- raise models.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res)
587
+ if utils.match_response(http_res, ["4XX", "5XX"], "*"):
588
+ http_res_text = await utils.stream_to_text_async(http_res)
589
+ raise models.SDKError(
590
+ "API error occurred", http_res.status_code, http_res_text, http_res
591
+ )
457
592
 
458
-
593
+ content_type = http_res.headers.get("Content-Type")
594
+ http_res_text = await utils.stream_to_text_async(http_res)
595
+ raise models.SDKError(
596
+ f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
597
+ http_res.status_code,
598
+ http_res_text,
599
+ http_res,
600
+ )