mistralai 1.9.10__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. mistralai/_hooks/registration.py +5 -0
  2. mistralai/_hooks/tracing.py +50 -0
  3. mistralai/_version.py +3 -3
  4. mistralai/accesses.py +51 -116
  5. mistralai/agents.py +58 -85
  6. mistralai/audio.py +8 -3
  7. mistralai/basesdk.py +15 -5
  8. mistralai/batch.py +6 -3
  9. mistralai/beta.py +10 -5
  10. mistralai/chat.py +70 -97
  11. mistralai/classifiers.py +57 -144
  12. mistralai/conversations.py +435 -412
  13. mistralai/documents.py +156 -359
  14. mistralai/embeddings.py +21 -42
  15. mistralai/extra/observability/__init__.py +15 -0
  16. mistralai/extra/observability/otel.py +393 -0
  17. mistralai/extra/run/tools.py +28 -16
  18. mistralai/files.py +53 -176
  19. mistralai/fim.py +46 -73
  20. mistralai/fine_tuning.py +6 -3
  21. mistralai/jobs.py +49 -158
  22. mistralai/libraries.py +71 -178
  23. mistralai/mistral_agents.py +298 -179
  24. mistralai/mistral_jobs.py +51 -138
  25. mistralai/models/__init__.py +94 -5
  26. mistralai/models/agent.py +15 -2
  27. mistralai/models/agentconversation.py +11 -3
  28. mistralai/models/agentcreationrequest.py +6 -2
  29. mistralai/models/agents_api_v1_agents_deleteop.py +16 -0
  30. mistralai/models/agents_api_v1_agents_getop.py +40 -3
  31. mistralai/models/agents_api_v1_agents_listop.py +72 -2
  32. mistralai/models/agents_api_v1_conversations_deleteop.py +18 -0
  33. mistralai/models/agents_api_v1_conversations_listop.py +39 -2
  34. mistralai/models/agentscompletionrequest.py +21 -6
  35. mistralai/models/agentscompletionstreamrequest.py +21 -6
  36. mistralai/models/agentupdaterequest.py +18 -2
  37. mistralai/models/audiotranscriptionrequest.py +2 -0
  38. mistralai/models/batchjobin.py +10 -0
  39. mistralai/models/chatcompletionrequest.py +22 -5
  40. mistralai/models/chatcompletionstreamrequest.py +22 -5
  41. mistralai/models/conversationrequest.py +15 -4
  42. mistralai/models/conversationrestartrequest.py +50 -2
  43. mistralai/models/conversationrestartstreamrequest.py +50 -2
  44. mistralai/models/conversationstreamrequest.py +15 -4
  45. mistralai/models/documentout.py +26 -10
  46. mistralai/models/documentupdatein.py +24 -3
  47. mistralai/models/embeddingrequest.py +8 -8
  48. mistralai/models/files_api_routes_list_filesop.py +7 -0
  49. mistralai/models/fimcompletionrequest.py +8 -9
  50. mistralai/models/fimcompletionstreamrequest.py +8 -9
  51. mistralai/models/httpvalidationerror.py +11 -6
  52. mistralai/models/libraries_documents_list_v1op.py +15 -2
  53. mistralai/models/libraryout.py +10 -7
  54. mistralai/models/listfilesout.py +35 -4
  55. mistralai/models/mistralerror.py +26 -0
  56. mistralai/models/modelcapabilities.py +13 -4
  57. mistralai/models/modelconversation.py +8 -2
  58. mistralai/models/no_response_error.py +13 -0
  59. mistralai/models/ocrpageobject.py +26 -5
  60. mistralai/models/ocrrequest.py +17 -1
  61. mistralai/models/ocrtableobject.py +31 -0
  62. mistralai/models/prediction.py +4 -0
  63. mistralai/models/requestsource.py +7 -0
  64. mistralai/models/responseformat.py +4 -2
  65. mistralai/models/responseformats.py +0 -1
  66. mistralai/models/responsevalidationerror.py +25 -0
  67. mistralai/models/sdkerror.py +30 -14
  68. mistralai/models/sharingdelete.py +36 -5
  69. mistralai/models/sharingin.py +36 -5
  70. mistralai/models/sharingout.py +3 -3
  71. mistralai/models/toolexecutiondeltaevent.py +13 -4
  72. mistralai/models/toolexecutiondoneevent.py +13 -4
  73. mistralai/models/toolexecutionentry.py +9 -4
  74. mistralai/models/toolexecutionstartedevent.py +13 -4
  75. mistralai/models_.py +67 -212
  76. mistralai/ocr.py +33 -36
  77. mistralai/sdk.py +15 -2
  78. mistralai/transcriptions.py +21 -60
  79. mistralai/utils/__init__.py +18 -5
  80. mistralai/utils/eventstreaming.py +10 -0
  81. mistralai/utils/serializers.py +3 -2
  82. mistralai/utils/unmarshal_json_response.py +24 -0
  83. {mistralai-1.9.10.dist-info → mistralai-1.10.0.dist-info}/METADATA +89 -40
  84. {mistralai-1.9.10.dist-info → mistralai-1.10.0.dist-info}/RECORD +86 -75
  85. {mistralai-1.9.10.dist-info → mistralai-1.10.0.dist-info}/WHEEL +1 -1
  86. {mistralai-1.9.10.dist-info → mistralai-1.10.0.dist-info/licenses}/LICENSE +0 -0
mistralai/chat.py CHANGED
@@ -5,7 +5,8 @@ from mistralai import models, utils
5
5
  from mistralai._hooks import HookContext
6
6
  from mistralai.types import OptionalNullable, UNSET
7
7
  from mistralai.utils import eventstreaming, get_security_from_env
8
- from typing import Any, List, Mapping, Optional, Union
8
+ from mistralai.utils.unmarshal_json_response import unmarshal_json_response
9
+ from typing import Any, Dict, List, Mapping, Optional, Union
9
10
 
10
11
  # region imports
11
12
  from typing import Type
@@ -104,6 +105,7 @@ class Chat(BaseSDK):
104
105
  stream: Optional[bool] = False,
105
106
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
106
107
  random_seed: OptionalNullable[int] = UNSET,
108
+ metadata: OptionalNullable[Dict[str, Any]] = UNSET,
107
109
  response_format: Optional[
108
110
  Union[models.ResponseFormat, models.ResponseFormatTypedDict]
109
111
  ] = None,
@@ -140,14 +142,15 @@ class Chat(BaseSDK):
140
142
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
141
143
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
142
144
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
143
- :param response_format:
144
- :param tools:
145
- :param tool_choice:
146
- :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
147
- :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
145
+ :param metadata:
146
+ :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
147
+ :param tools: A list of tools the model may call. Use this to provide a list of functions the model may generate JSON inputs for.
148
+ :param tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `any` or `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.
149
+ :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
150
+ :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
148
151
  :param n: Number of completions to return for each request, input tokens are only billed once.
149
- :param prediction:
150
- :param parallel_tool_calls:
152
+ :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
153
+ :param parallel_tool_calls: Whether to enable parallel function calling during tool use, when enabled the model can call multiple tools in parallel.
151
154
  :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
152
155
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
153
156
  :param retries: Override the default retry configuration for this method
@@ -173,6 +176,7 @@ class Chat(BaseSDK):
173
176
  stream=stream,
174
177
  stop=stop,
175
178
  random_seed=random_seed,
179
+ metadata=metadata,
176
180
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
177
181
  response_format=utils.get_pydantic_model(
178
182
  response_format, Optional[models.ResponseFormat]
@@ -236,31 +240,20 @@ class Chat(BaseSDK):
236
240
 
237
241
  response_data: Any = None
238
242
  if utils.match_response(http_res, "200", "application/json"):
239
- return utils.unmarshal_json(http_res.text, models.ChatCompletionResponse)
243
+ return unmarshal_json_response(models.ChatCompletionResponse, http_res)
240
244
  if utils.match_response(http_res, "422", "application/json"):
241
- response_data = utils.unmarshal_json(
242
- http_res.text, models.HTTPValidationErrorData
245
+ response_data = unmarshal_json_response(
246
+ models.HTTPValidationErrorData, http_res
243
247
  )
244
- raise models.HTTPValidationError(data=response_data)
248
+ raise models.HTTPValidationError(response_data, http_res)
245
249
  if utils.match_response(http_res, "4XX", "*"):
246
250
  http_res_text = utils.stream_to_text(http_res)
247
- raise models.SDKError(
248
- "API error occurred", http_res.status_code, http_res_text, http_res
249
- )
251
+ raise models.SDKError("API error occurred", http_res, http_res_text)
250
252
  if utils.match_response(http_res, "5XX", "*"):
251
253
  http_res_text = utils.stream_to_text(http_res)
252
- raise models.SDKError(
253
- "API error occurred", http_res.status_code, http_res_text, http_res
254
- )
254
+ raise models.SDKError("API error occurred", http_res, http_res_text)
255
255
 
256
- content_type = http_res.headers.get("Content-Type")
257
- http_res_text = utils.stream_to_text(http_res)
258
- raise models.SDKError(
259
- f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
260
- http_res.status_code,
261
- http_res_text,
262
- http_res,
263
- )
256
+ raise models.SDKError("Unexpected response received", http_res)
264
257
 
265
258
  async def complete_async(
266
259
  self,
@@ -273,6 +266,7 @@ class Chat(BaseSDK):
273
266
  stream: Optional[bool] = False,
274
267
  stop: Optional[Union[models.Stop, models.StopTypedDict]] = None,
275
268
  random_seed: OptionalNullable[int] = UNSET,
269
+ metadata: OptionalNullable[Dict[str, Any]] = UNSET,
276
270
  response_format: Optional[
277
271
  Union[models.ResponseFormat, models.ResponseFormatTypedDict]
278
272
  ] = None,
@@ -309,14 +303,15 @@ class Chat(BaseSDK):
309
303
  :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
310
304
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
311
305
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
312
- :param response_format:
313
- :param tools:
314
- :param tool_choice:
315
- :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
316
- :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
306
+ :param metadata:
307
+ :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
308
+ :param tools: A list of tools the model may call. Use this to provide a list of functions the model may generate JSON inputs for.
309
+ :param tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `any` or `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.
310
+ :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
311
+ :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
317
312
  :param n: Number of completions to return for each request, input tokens are only billed once.
318
- :param prediction:
319
- :param parallel_tool_calls:
313
+ :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
314
+ :param parallel_tool_calls: Whether to enable parallel function calling during tool use, when enabled the model can call multiple tools in parallel.
320
315
  :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
321
316
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
322
317
  :param retries: Override the default retry configuration for this method
@@ -342,6 +337,7 @@ class Chat(BaseSDK):
342
337
  stream=stream,
343
338
  stop=stop,
344
339
  random_seed=random_seed,
340
+ metadata=metadata,
345
341
  messages=utils.get_pydantic_model(messages, List[models.Messages]),
346
342
  response_format=utils.get_pydantic_model(
347
343
  response_format, Optional[models.ResponseFormat]
@@ -405,31 +401,20 @@ class Chat(BaseSDK):
405
401
 
406
402
  response_data: Any = None
407
403
  if utils.match_response(http_res, "200", "application/json"):
408
- return utils.unmarshal_json(http_res.text, models.ChatCompletionResponse)
404
+ return unmarshal_json_response(models.ChatCompletionResponse, http_res)
409
405
  if utils.match_response(http_res, "422", "application/json"):
410
- response_data = utils.unmarshal_json(
411
- http_res.text, models.HTTPValidationErrorData
406
+ response_data = unmarshal_json_response(
407
+ models.HTTPValidationErrorData, http_res
412
408
  )
413
- raise models.HTTPValidationError(data=response_data)
409
+ raise models.HTTPValidationError(response_data, http_res)
414
410
  if utils.match_response(http_res, "4XX", "*"):
415
411
  http_res_text = await utils.stream_to_text_async(http_res)
416
- raise models.SDKError(
417
- "API error occurred", http_res.status_code, http_res_text, http_res
418
- )
412
+ raise models.SDKError("API error occurred", http_res, http_res_text)
419
413
  if utils.match_response(http_res, "5XX", "*"):
420
414
  http_res_text = await utils.stream_to_text_async(http_res)
421
- raise models.SDKError(
422
- "API error occurred", http_res.status_code, http_res_text, http_res
423
- )
415
+ raise models.SDKError("API error occurred", http_res, http_res_text)
424
416
 
425
- content_type = http_res.headers.get("Content-Type")
426
- http_res_text = await utils.stream_to_text_async(http_res)
427
- raise models.SDKError(
428
- f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
429
- http_res.status_code,
430
- http_res_text,
431
- http_res,
432
- )
417
+ raise models.SDKError("Unexpected response received", http_res)
433
418
 
434
419
  def stream(
435
420
  self,
@@ -450,6 +435,7 @@ class Chat(BaseSDK):
450
435
  ]
451
436
  ] = None,
452
437
  random_seed: OptionalNullable[int] = UNSET,
438
+ metadata: OptionalNullable[Dict[str, Any]] = UNSET,
453
439
  response_format: Optional[
454
440
  Union[models.ResponseFormat, models.ResponseFormatTypedDict]
455
441
  ] = None,
@@ -488,14 +474,15 @@ class Chat(BaseSDK):
488
474
  :param stream:
489
475
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
490
476
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
491
- :param response_format:
492
- :param tools:
493
- :param tool_choice:
494
- :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
495
- :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
477
+ :param metadata:
478
+ :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
479
+ :param tools: A list of tools the model may call. Use this to provide a list of functions the model may generate JSON inputs for.
480
+ :param tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `any` or `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.
481
+ :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
482
+ :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
496
483
  :param n: Number of completions to return for each request, input tokens are only billed once.
497
- :param prediction:
498
- :param parallel_tool_calls:
484
+ :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
485
+ :param parallel_tool_calls: Whether to enable parallel function calling during tool use, when enabled the model can call multiple tools in parallel.
499
486
  :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
500
487
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
501
488
  :param retries: Override the default retry configuration for this method
@@ -521,6 +508,7 @@ class Chat(BaseSDK):
521
508
  stream=stream,
522
509
  stop=stop,
523
510
  random_seed=random_seed,
511
+ metadata=metadata,
524
512
  messages=utils.get_pydantic_model(
525
513
  messages, List[models.ChatCompletionStreamRequestMessages]
526
514
  ),
@@ -591,32 +579,23 @@ class Chat(BaseSDK):
591
579
  http_res,
592
580
  lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
593
581
  sentinel="[DONE]",
582
+ client_ref=self,
594
583
  )
595
584
  if utils.match_response(http_res, "422", "application/json"):
596
585
  http_res_text = utils.stream_to_text(http_res)
597
- response_data = utils.unmarshal_json(
598
- http_res_text, models.HTTPValidationErrorData
586
+ response_data = unmarshal_json_response(
587
+ models.HTTPValidationErrorData, http_res, http_res_text
599
588
  )
600
- raise models.HTTPValidationError(data=response_data)
589
+ raise models.HTTPValidationError(response_data, http_res, http_res_text)
601
590
  if utils.match_response(http_res, "4XX", "*"):
602
591
  http_res_text = utils.stream_to_text(http_res)
603
- raise models.SDKError(
604
- "API error occurred", http_res.status_code, http_res_text, http_res
605
- )
592
+ raise models.SDKError("API error occurred", http_res, http_res_text)
606
593
  if utils.match_response(http_res, "5XX", "*"):
607
594
  http_res_text = utils.stream_to_text(http_res)
608
- raise models.SDKError(
609
- "API error occurred", http_res.status_code, http_res_text, http_res
610
- )
595
+ raise models.SDKError("API error occurred", http_res, http_res_text)
611
596
 
612
- content_type = http_res.headers.get("Content-Type")
613
597
  http_res_text = utils.stream_to_text(http_res)
614
- raise models.SDKError(
615
- f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
616
- http_res.status_code,
617
- http_res_text,
618
- http_res,
619
- )
598
+ raise models.SDKError("Unexpected response received", http_res, http_res_text)
620
599
 
621
600
  async def stream_async(
622
601
  self,
@@ -637,6 +616,7 @@ class Chat(BaseSDK):
637
616
  ]
638
617
  ] = None,
639
618
  random_seed: OptionalNullable[int] = UNSET,
619
+ metadata: OptionalNullable[Dict[str, Any]] = UNSET,
640
620
  response_format: Optional[
641
621
  Union[models.ResponseFormat, models.ResponseFormatTypedDict]
642
622
  ] = None,
@@ -675,14 +655,15 @@ class Chat(BaseSDK):
675
655
  :param stream:
676
656
  :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
677
657
  :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
678
- :param response_format:
679
- :param tools:
680
- :param tool_choice:
681
- :param presence_penalty: presence_penalty determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
682
- :param frequency_penalty: frequency_penalty penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
658
+ :param metadata:
659
+ :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
660
+ :param tools: A list of tools the model may call. Use this to provide a list of functions the model may generate JSON inputs for.
661
+ :param tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can pick between generating a message or calling one or more tools. `any` or `required` means the model must call one or more tools. Specifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.
662
+ :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
663
+ :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
683
664
  :param n: Number of completions to return for each request, input tokens are only billed once.
684
- :param prediction:
685
- :param parallel_tool_calls:
665
+ :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
666
+ :param parallel_tool_calls: Whether to enable parallel function calling during tool use, when enabled the model can call multiple tools in parallel.
686
667
  :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
687
668
  :param safe_prompt: Whether to inject a safety prompt before all conversations.
688
669
  :param retries: Override the default retry configuration for this method
@@ -708,6 +689,7 @@ class Chat(BaseSDK):
708
689
  stream=stream,
709
690
  stop=stop,
710
691
  random_seed=random_seed,
692
+ metadata=metadata,
711
693
  messages=utils.get_pydantic_model(
712
694
  messages, List[models.ChatCompletionStreamRequestMessages]
713
695
  ),
@@ -778,29 +760,20 @@ class Chat(BaseSDK):
778
760
  http_res,
779
761
  lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
780
762
  sentinel="[DONE]",
763
+ client_ref=self,
781
764
  )
782
765
  if utils.match_response(http_res, "422", "application/json"):
783
766
  http_res_text = await utils.stream_to_text_async(http_res)
784
- response_data = utils.unmarshal_json(
785
- http_res_text, models.HTTPValidationErrorData
767
+ response_data = unmarshal_json_response(
768
+ models.HTTPValidationErrorData, http_res, http_res_text
786
769
  )
787
- raise models.HTTPValidationError(data=response_data)
770
+ raise models.HTTPValidationError(response_data, http_res, http_res_text)
788
771
  if utils.match_response(http_res, "4XX", "*"):
789
772
  http_res_text = await utils.stream_to_text_async(http_res)
790
- raise models.SDKError(
791
- "API error occurred", http_res.status_code, http_res_text, http_res
792
- )
773
+ raise models.SDKError("API error occurred", http_res, http_res_text)
793
774
  if utils.match_response(http_res, "5XX", "*"):
794
775
  http_res_text = await utils.stream_to_text_async(http_res)
795
- raise models.SDKError(
796
- "API error occurred", http_res.status_code, http_res_text, http_res
797
- )
776
+ raise models.SDKError("API error occurred", http_res, http_res_text)
798
777
 
799
- content_type = http_res.headers.get("Content-Type")
800
778
  http_res_text = await utils.stream_to_text_async(http_res)
801
- raise models.SDKError(
802
- f"Unexpected response received (code: {http_res.status_code}, type: {content_type})",
803
- http_res.status_code,
804
- http_res_text,
805
- http_res,
806
- )
779
+ raise models.SDKError("Unexpected response received", http_res, http_res_text)