openai 0.36.1 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/all_models.rb +1 -0
  5. data/lib/openai/models/beta/assistant_create_params.rb +4 -3
  6. data/lib/openai/models/beta/assistant_update_params.rb +4 -3
  7. data/lib/openai/models/beta/threads/run_create_params.rb +4 -3
  8. data/lib/openai/models/chat/completion_create_params.rb +4 -3
  9. data/lib/openai/models/container_create_params.rb +22 -1
  10. data/lib/openai/models/container_create_response.rb +32 -1
  11. data/lib/openai/models/container_list_response.rb +32 -1
  12. data/lib/openai/models/container_retrieve_response.rb +32 -1
  13. data/lib/openai/models/conversations/conversation_create_params.rb +2 -2
  14. data/lib/openai/models/conversations/item_create_params.rb +2 -2
  15. data/lib/openai/models/evals/create_eval_completions_run_data_source.rb +4 -3
  16. data/lib/openai/models/evals/run_cancel_response.rb +8 -6
  17. data/lib/openai/models/evals/run_create_params.rb +8 -6
  18. data/lib/openai/models/evals/run_create_response.rb +8 -6
  19. data/lib/openai/models/evals/run_list_response.rb +8 -6
  20. data/lib/openai/models/evals/run_retrieve_response.rb +8 -6
  21. data/lib/openai/models/graders/score_model_grader.rb +4 -3
  22. data/lib/openai/models/realtime/input_audio_buffer_dtmf_event_received_event.rb +43 -0
  23. data/lib/openai/models/realtime/output_audio_buffer_clear_event.rb +4 -4
  24. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +13 -5
  25. data/lib/openai/models/realtime/realtime_client_event.rb +1 -1
  26. data/lib/openai/models/realtime/realtime_server_event.rb +16 -9
  27. data/lib/openai/models/realtime/realtime_session.rb +13 -5
  28. data/lib/openai/models/realtime/realtime_session_create_request.rb +14 -9
  29. data/lib/openai/models/realtime/realtime_session_create_response.rb +27 -14
  30. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +13 -5
  31. data/lib/openai/models/realtime/realtime_truncation.rb +14 -9
  32. data/lib/openai/models/reasoning.rb +4 -3
  33. data/lib/openai/models/reasoning_effort.rb +5 -3
  34. data/lib/openai/models/responses/compacted_response.rb +56 -0
  35. data/lib/openai/models/responses/input_token_count_params.rb +4 -4
  36. data/lib/openai/models/responses/response.rb +6 -6
  37. data/lib/openai/models/responses/response_compact_params.rb +344 -0
  38. data/lib/openai/models/responses/response_compaction_item.rb +43 -0
  39. data/lib/openai/models/responses/response_compaction_item_param.rb +36 -0
  40. data/lib/openai/models/responses/response_create_params.rb +4 -4
  41. data/lib/openai/models/responses/response_function_shell_call_output_content.rb +10 -10
  42. data/lib/openai/models/responses/response_function_shell_tool_call.rb +5 -5
  43. data/lib/openai/models/responses/response_function_shell_tool_call_output.rb +2 -2
  44. data/lib/openai/models/responses/response_input_item.rb +20 -17
  45. data/lib/openai/models/responses/response_output_item.rb +4 -1
  46. data/lib/openai/models/responses/response_output_item_added_event.rb +2 -2
  47. data/lib/openai/models/responses/response_output_item_done_event.rb +2 -2
  48. data/lib/openai/models/responses/tool.rb +4 -2
  49. data/lib/openai/models/responses/tool_choice_shell.rb +1 -1
  50. data/lib/openai/models/responses_model.rb +1 -0
  51. data/lib/openai/models/video_create_params.rb +11 -6
  52. data/lib/openai/resources/containers.rb +3 -1
  53. data/lib/openai/resources/conversations/items.rb +1 -1
  54. data/lib/openai/resources/conversations.rb +1 -1
  55. data/lib/openai/resources/responses/input_tokens.rb +1 -1
  56. data/lib/openai/resources/responses.rb +33 -2
  57. data/lib/openai/resources/videos.rb +6 -3
  58. data/lib/openai/resources/webhooks.rb +0 -3
  59. data/lib/openai/version.rb +1 -1
  60. data/lib/openai.rb +6 -0
  61. data/manifest.yaml +1 -0
  62. data/rbi/openai/models/all_models.rbi +5 -0
  63. data/rbi/openai/models/beta/assistant_create_params.rbi +8 -6
  64. data/rbi/openai/models/beta/assistant_update_params.rbi +8 -6
  65. data/rbi/openai/models/beta/threads/run_create_params.rbi +8 -6
  66. data/rbi/openai/models/chat/completion_create_params.rbi +8 -6
  67. data/rbi/openai/models/container_create_params.rbi +51 -0
  68. data/rbi/openai/models/container_create_response.rbi +81 -3
  69. data/rbi/openai/models/container_list_response.rbi +80 -3
  70. data/rbi/openai/models/container_retrieve_response.rbi +83 -3
  71. data/rbi/openai/models/conversations/conversation_create_params.rbi +3 -0
  72. data/rbi/openai/models/conversations/item_create_params.rbi +3 -0
  73. data/rbi/openai/models/evals/create_eval_completions_run_data_source.rbi +8 -6
  74. data/rbi/openai/models/evals/run_cancel_response.rbi +16 -12
  75. data/rbi/openai/models/evals/run_create_params.rbi +16 -12
  76. data/rbi/openai/models/evals/run_create_response.rbi +16 -12
  77. data/rbi/openai/models/evals/run_list_response.rbi +16 -12
  78. data/rbi/openai/models/evals/run_retrieve_response.rbi +16 -12
  79. data/rbi/openai/models/graders/score_model_grader.rbi +8 -6
  80. data/rbi/openai/models/realtime/input_audio_buffer_dtmf_event_received_event.rbi +56 -0
  81. data/rbi/openai/models/realtime/output_audio_buffer_clear_event.rbi +4 -4
  82. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +24 -8
  83. data/rbi/openai/models/realtime/realtime_server_event.rbi +6 -5
  84. data/rbi/openai/models/realtime/realtime_session.rbi +24 -8
  85. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +28 -18
  86. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +52 -26
  87. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +24 -8
  88. data/rbi/openai/models/realtime/realtime_truncation.rbi +14 -9
  89. data/rbi/openai/models/reasoning.rbi +8 -6
  90. data/rbi/openai/models/reasoning_effort.rbi +5 -3
  91. data/rbi/openai/models/responses/compacted_response.rbi +105 -0
  92. data/rbi/openai/models/responses/response.rbi +1 -0
  93. data/rbi/openai/models/responses/response_compact_params.rbi +593 -0
  94. data/rbi/openai/models/responses/response_compaction_item.rbi +67 -0
  95. data/rbi/openai/models/responses/response_compaction_item_param.rbi +54 -0
  96. data/rbi/openai/models/responses/response_function_shell_call_output_content.rbi +9 -9
  97. data/rbi/openai/models/responses/response_function_shell_tool_call.rbi +6 -6
  98. data/rbi/openai/models/responses/response_function_shell_tool_call_output.rbi +1 -1
  99. data/rbi/openai/models/responses/response_input_item.rbi +18 -17
  100. data/rbi/openai/models/responses/response_output_item.rbi +1 -0
  101. data/rbi/openai/models/responses/response_output_item_added_event.rbi +1 -0
  102. data/rbi/openai/models/responses/response_output_item_done_event.rbi +1 -0
  103. data/rbi/openai/models/responses/tool.rbi +6 -3
  104. data/rbi/openai/models/responses/tool_choice_shell.rbi +1 -1
  105. data/rbi/openai/models/responses_model.rbi +5 -0
  106. data/rbi/openai/models/video_create_params.rbi +10 -6
  107. data/rbi/openai/resources/beta/assistants.rbi +8 -6
  108. data/rbi/openai/resources/beta/threads/runs.rbi +8 -6
  109. data/rbi/openai/resources/chat/completions.rbi +8 -6
  110. data/rbi/openai/resources/containers.rbi +3 -0
  111. data/rbi/openai/resources/conversations/items.rbi +1 -0
  112. data/rbi/openai/resources/conversations.rbi +1 -0
  113. data/rbi/openai/resources/realtime/calls.rbi +14 -9
  114. data/rbi/openai/resources/responses.rbi +42 -0
  115. data/rbi/openai/resources/videos.rbi +5 -3
  116. data/sig/openai/models/all_models.rbs +2 -0
  117. data/sig/openai/models/container_create_params.rbs +23 -1
  118. data/sig/openai/models/container_create_response.rbs +32 -3
  119. data/sig/openai/models/container_list_response.rbs +32 -3
  120. data/sig/openai/models/container_retrieve_response.rbs +32 -3
  121. data/sig/openai/models/realtime/input_audio_buffer_dtmf_event_received_event.rbs +32 -0
  122. data/sig/openai/models/realtime/realtime_server_event.rbs +1 -0
  123. data/sig/openai/models/reasoning_effort.rbs +2 -1
  124. data/sig/openai/models/responses/compacted_response.rbs +42 -0
  125. data/sig/openai/models/responses/response_compact_params.rbs +226 -0
  126. data/sig/openai/models/responses/response_compaction_item.rbs +39 -0
  127. data/sig/openai/models/responses/response_compaction_item_param.rbs +28 -0
  128. data/sig/openai/models/responses/response_input_item.rbs +1 -0
  129. data/sig/openai/models/responses/response_output_item.rbs +1 -0
  130. data/sig/openai/models/responses_model.rbs +2 -0
  131. data/sig/openai/resources/containers.rbs +1 -0
  132. data/sig/openai/resources/responses.rbs +8 -0
  133. metadata +31 -2
@@ -21,6 +21,7 @@ module OpenAI
21
21
  OpenAI::Realtime::RealtimeErrorEvent,
22
22
  OpenAI::Realtime::InputAudioBufferClearedEvent,
23
23
  OpenAI::Realtime::InputAudioBufferCommittedEvent,
24
+ OpenAI::Realtime::InputAudioBufferDtmfEventReceivedEvent,
24
25
  OpenAI::Realtime::InputAudioBufferSpeechStartedEvent,
25
26
  OpenAI::Realtime::InputAudioBufferSpeechStoppedEvent,
26
27
  OpenAI::Realtime::RateLimitsUpdatedEvent,
@@ -171,8 +172,8 @@ module OpenAI
171
172
  sig { returns(Symbol) }
172
173
  attr_accessor :type
173
174
 
174
- # **WebRTC Only:** Emitted when the server begins streaming audio to the client.
175
- # This event is emitted after an audio content part has been added
175
+ # **WebRTC/SIP Only:** Emitted when the server begins streaming audio to the
176
+ # client. This event is emitted after an audio content part has been added
176
177
  # (`response.content_part.added`) to the response.
177
178
  # [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
178
179
  sig do
@@ -220,7 +221,7 @@ module OpenAI
220
221
  sig { returns(Symbol) }
221
222
  attr_accessor :type
222
223
 
223
- # **WebRTC Only:** Emitted when the output audio buffer has been completely
224
+ # **WebRTC/SIP Only:** Emitted when the output audio buffer has been completely
224
225
  # drained on the server, and no more audio is forthcoming. This event is emitted
225
226
  # after the full response data has been sent to the client (`response.done`).
226
227
  # [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
@@ -269,8 +270,8 @@ module OpenAI
269
270
  sig { returns(Symbol) }
270
271
  attr_accessor :type
271
272
 
272
- # **WebRTC Only:** Emitted when the output audio buffer is cleared. This happens
273
- # either in VAD mode when the user has interrupted
273
+ # **WebRTC/SIP Only:** Emitted when the output audio buffer is cleared. This
274
+ # happens either in VAD mode when the user has interrupted
274
275
  # (`input_audio_buffer.speech_started`), or when the client has emitted the
275
276
  # `output_audio_buffer.clear` event to manually cut off the current audio
276
277
  # response.
@@ -933,7 +933,11 @@ module OpenAI
933
933
  attr_accessor :type
934
934
 
935
935
  # Whether or not to automatically generate a response when a VAD stop event
936
- # occurs.
936
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
937
+ # response if the model is already responding.
938
+ #
939
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
940
+ # will never respond automatically but VAD events will still be emitted.
937
941
  sig { returns(T.nilable(T::Boolean)) }
938
942
  attr_reader :create_response
939
943
 
@@ -955,9 +959,13 @@ module OpenAI
955
959
  sig { returns(T.nilable(Integer)) }
956
960
  attr_accessor :idle_timeout_ms
957
961
 
958
- # Whether or not to automatically interrupt any ongoing response with output to
959
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
960
- # occurs.
962
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
963
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
964
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
965
+ # will continue until complete.
966
+ #
967
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
968
+ # will never respond automatically but VAD events will still be emitted.
961
969
  sig { returns(T.nilable(T::Boolean)) }
962
970
  attr_reader :interrupt_response
963
971
 
@@ -1005,7 +1013,11 @@ module OpenAI
1005
1013
  end
1006
1014
  def self.new(
1007
1015
  # Whether or not to automatically generate a response when a VAD stop event
1008
- # occurs.
1016
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
1017
+ # response if the model is already responding.
1018
+ #
1019
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
1020
+ # will never respond automatically but VAD events will still be emitted.
1009
1021
  create_response: nil,
1010
1022
  # Optional timeout after which a model response will be triggered automatically.
1011
1023
  # This is useful for situations in which a long pause from the user is unexpected,
@@ -1020,9 +1032,13 @@ module OpenAI
1020
1032
  # Response) will be emitted when the timeout is reached. Idle timeout is currently
1021
1033
  # only supported for `server_vad` mode.
1022
1034
  idle_timeout_ms: nil,
1023
- # Whether or not to automatically interrupt any ongoing response with output to
1024
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
1025
- # occurs.
1035
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
1036
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
1037
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
1038
+ # will continue until complete.
1039
+ #
1040
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
1041
+ # will never respond automatically but VAD events will still be emitted.
1026
1042
  interrupt_response: nil,
1027
1043
  # Used only for `server_vad` mode. Amount of audio to include before the VAD
1028
1044
  # detected speech (in milliseconds). Defaults to 300ms.
@@ -214,15 +214,20 @@ module OpenAI
214
214
  # limit, the conversation be truncated, meaning messages (starting from the
215
215
  # oldest) will not be included in the model's context. A 32k context model with
216
216
  # 4,096 max output tokens can only include 28,224 tokens in the context before
217
- # truncation occurs. Clients can configure truncation behavior to truncate with a
218
- # lower max token limit, which is an effective way to control token usage and
219
- # cost. Truncation will reduce the number of cached tokens on the next turn
220
- # (busting the cache), since messages are dropped from the beginning of the
221
- # context. However, clients can also configure truncation to retain messages up to
222
- # a fraction of the maximum context size, which will reduce the need for future
223
- # truncations and thus improve the cache rate. Truncation can be disabled
224
- # entirely, which means the server will never truncate but would instead return an
225
- # error if the conversation exceeds the model's input token limit.
217
+ # truncation occurs.
218
+ #
219
+ # Clients can configure truncation behavior to truncate with a lower max token
220
+ # limit, which is an effective way to control token usage and cost.
221
+ #
222
+ # Truncation will reduce the number of cached tokens on the next turn (busting the
223
+ # cache), since messages are dropped from the beginning of the context. However,
224
+ # clients can also configure truncation to retain messages up to a fraction of the
225
+ # maximum context size, which will reduce the need for future truncations and thus
226
+ # improve the cache rate.
227
+ #
228
+ # Truncation can be disabled entirely, which means the server will never truncate
229
+ # but would instead return an error if the conversation exceeds the model's input
230
+ # token limit.
226
231
  sig do
227
232
  returns(
228
233
  T.nilable(
@@ -344,15 +349,20 @@ module OpenAI
344
349
  # limit, the conversation be truncated, meaning messages (starting from the
345
350
  # oldest) will not be included in the model's context. A 32k context model with
346
351
  # 4,096 max output tokens can only include 28,224 tokens in the context before
347
- # truncation occurs. Clients can configure truncation behavior to truncate with a
348
- # lower max token limit, which is an effective way to control token usage and
349
- # cost. Truncation will reduce the number of cached tokens on the next turn
350
- # (busting the cache), since messages are dropped from the beginning of the
351
- # context. However, clients can also configure truncation to retain messages up to
352
- # a fraction of the maximum context size, which will reduce the need for future
353
- # truncations and thus improve the cache rate. Truncation can be disabled
354
- # entirely, which means the server will never truncate but would instead return an
355
- # error if the conversation exceeds the model's input token limit.
352
+ # truncation occurs.
353
+ #
354
+ # Clients can configure truncation behavior to truncate with a lower max token
355
+ # limit, which is an effective way to control token usage and cost.
356
+ #
357
+ # Truncation will reduce the number of cached tokens on the next turn (busting the
358
+ # cache), since messages are dropped from the beginning of the context. However,
359
+ # clients can also configure truncation to retain messages up to a fraction of the
360
+ # maximum context size, which will reduce the need for future truncations and thus
361
+ # improve the cache rate.
362
+ #
363
+ # Truncation can be disabled entirely, which means the server will never truncate
364
+ # but would instead return an error if the conversation exceeds the model's input
365
+ # token limit.
356
366
  truncation: nil,
357
367
  # The type of session to create. Always `realtime` for the Realtime API.
358
368
  type: :realtime
@@ -227,15 +227,20 @@ module OpenAI
227
227
  # limit, the conversation be truncated, meaning messages (starting from the
228
228
  # oldest) will not be included in the model's context. A 32k context model with
229
229
  # 4,096 max output tokens can only include 28,224 tokens in the context before
230
- # truncation occurs. Clients can configure truncation behavior to truncate with a
231
- # lower max token limit, which is an effective way to control token usage and
232
- # cost. Truncation will reduce the number of cached tokens on the next turn
233
- # (busting the cache), since messages are dropped from the beginning of the
234
- # context. However, clients can also configure truncation to retain messages up to
235
- # a fraction of the maximum context size, which will reduce the need for future
236
- # truncations and thus improve the cache rate. Truncation can be disabled
237
- # entirely, which means the server will never truncate but would instead return an
238
- # error if the conversation exceeds the model's input token limit.
230
+ # truncation occurs.
231
+ #
232
+ # Clients can configure truncation behavior to truncate with a lower max token
233
+ # limit, which is an effective way to control token usage and cost.
234
+ #
235
+ # Truncation will reduce the number of cached tokens on the next turn (busting the
236
+ # cache), since messages are dropped from the beginning of the context. However,
237
+ # clients can also configure truncation to retain messages up to a fraction of the
238
+ # maximum context size, which will reduce the need for future truncations and thus
239
+ # improve the cache rate.
240
+ #
241
+ # Truncation can be disabled entirely, which means the server will never truncate
242
+ # but would instead return an error if the conversation exceeds the model's input
243
+ # token limit.
239
244
  sig do
240
245
  returns(T.nilable(OpenAI::Realtime::RealtimeTruncation::Variants))
241
246
  end
@@ -356,15 +361,20 @@ module OpenAI
356
361
  # limit, the conversation be truncated, meaning messages (starting from the
357
362
  # oldest) will not be included in the model's context. A 32k context model with
358
363
  # 4,096 max output tokens can only include 28,224 tokens in the context before
359
- # truncation occurs. Clients can configure truncation behavior to truncate with a
360
- # lower max token limit, which is an effective way to control token usage and
361
- # cost. Truncation will reduce the number of cached tokens on the next turn
362
- # (busting the cache), since messages are dropped from the beginning of the
363
- # context. However, clients can also configure truncation to retain messages up to
364
- # a fraction of the maximum context size, which will reduce the need for future
365
- # truncations and thus improve the cache rate. Truncation can be disabled
366
- # entirely, which means the server will never truncate but would instead return an
367
- # error if the conversation exceeds the model's input token limit.
364
+ # truncation occurs.
365
+ #
366
+ # Clients can configure truncation behavior to truncate with a lower max token
367
+ # limit, which is an effective way to control token usage and cost.
368
+ #
369
+ # Truncation will reduce the number of cached tokens on the next turn (busting the
370
+ # cache), since messages are dropped from the beginning of the context. However,
371
+ # clients can also configure truncation to retain messages up to a fraction of the
372
+ # maximum context size, which will reduce the need for future truncations and thus
373
+ # improve the cache rate.
374
+ #
375
+ # Truncation can be disabled entirely, which means the server will never truncate
376
+ # but would instead return an error if the conversation exceeds the model's input
377
+ # token limit.
368
378
  truncation: nil,
369
379
  # The type of session to create. Always `realtime` for the Realtime API.
370
380
  type: :realtime
@@ -730,7 +740,11 @@ module OpenAI
730
740
  attr_accessor :type
731
741
 
732
742
  # Whether or not to automatically generate a response when a VAD stop event
733
- # occurs.
743
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
744
+ # response if the model is already responding.
745
+ #
746
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
747
+ # will never respond automatically but VAD events will still be emitted.
734
748
  sig { returns(T.nilable(T::Boolean)) }
735
749
  attr_reader :create_response
736
750
 
@@ -752,9 +766,13 @@ module OpenAI
752
766
  sig { returns(T.nilable(Integer)) }
753
767
  attr_accessor :idle_timeout_ms
754
768
 
755
- # Whether or not to automatically interrupt any ongoing response with output to
756
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
757
- # occurs.
769
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
770
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
771
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
772
+ # will continue until complete.
773
+ #
774
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
775
+ # will never respond automatically but VAD events will still be emitted.
758
776
  sig { returns(T.nilable(T::Boolean)) }
759
777
  attr_reader :interrupt_response
760
778
 
@@ -802,7 +820,11 @@ module OpenAI
802
820
  end
803
821
  def self.new(
804
822
  # Whether or not to automatically generate a response when a VAD stop event
805
- # occurs.
823
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
824
+ # response if the model is already responding.
825
+ #
826
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
827
+ # will never respond automatically but VAD events will still be emitted.
806
828
  create_response: nil,
807
829
  # Optional timeout after which a model response will be triggered automatically.
808
830
  # This is useful for situations in which a long pause from the user is unexpected,
@@ -817,9 +839,13 @@ module OpenAI
817
839
  # Response) will be emitted when the timeout is reached. Idle timeout is currently
818
840
  # only supported for `server_vad` mode.
819
841
  idle_timeout_ms: nil,
820
- # Whether or not to automatically interrupt any ongoing response with output to
821
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
822
- # occurs.
842
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
843
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
844
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
845
+ # will continue until complete.
846
+ #
847
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
848
+ # will never respond automatically but VAD events will still be emitted.
823
849
  interrupt_response: nil,
824
850
  # Used only for `server_vad` mode. Amount of audio to include before the VAD
825
851
  # detected speech (in milliseconds). Defaults to 300ms.
@@ -41,7 +41,11 @@ module OpenAI
41
41
  attr_accessor :type
42
42
 
43
43
  # Whether or not to automatically generate a response when a VAD stop event
44
- # occurs.
44
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
45
+ # response if the model is already responding.
46
+ #
47
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
48
+ # will never respond automatically but VAD events will still be emitted.
45
49
  sig { returns(T.nilable(T::Boolean)) }
46
50
  attr_reader :create_response
47
51
 
@@ -63,9 +67,13 @@ module OpenAI
63
67
  sig { returns(T.nilable(Integer)) }
64
68
  attr_accessor :idle_timeout_ms
65
69
 
66
- # Whether or not to automatically interrupt any ongoing response with output to
67
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
68
- # occurs.
70
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
71
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
72
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
73
+ # will continue until complete.
74
+ #
75
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
76
+ # will never respond automatically but VAD events will still be emitted.
69
77
  sig { returns(T.nilable(T::Boolean)) }
70
78
  attr_reader :interrupt_response
71
79
 
@@ -113,7 +121,11 @@ module OpenAI
113
121
  end
114
122
  def self.new(
115
123
  # Whether or not to automatically generate a response when a VAD stop event
116
- # occurs.
124
+ # occurs. If `interrupt_response` is set to `false` this may fail to create a
125
+ # response if the model is already responding.
126
+ #
127
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
128
+ # will never respond automatically but VAD events will still be emitted.
117
129
  create_response: nil,
118
130
  # Optional timeout after which a model response will be triggered automatically.
119
131
  # This is useful for situations in which a long pause from the user is unexpected,
@@ -128,9 +140,13 @@ module OpenAI
128
140
  # Response) will be emitted when the timeout is reached. Idle timeout is currently
129
141
  # only supported for `server_vad` mode.
130
142
  idle_timeout_ms: nil,
131
- # Whether or not to automatically interrupt any ongoing response with output to
132
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
133
- # occurs.
143
+ # Whether or not to automatically interrupt (cancel) any ongoing response with
144
+ # output to the default conversation (i.e. `conversation` of `auto`) when a VAD
145
+ # start event occurs. If `true` then the response will be cancelled, otherwise it
146
+ # will continue until complete.
147
+ #
148
+ # If both `create_response` and `interrupt_response` are set to `false`, the model
149
+ # will never respond automatically but VAD events will still be emitted.
134
150
  interrupt_response: nil,
135
151
  # Used only for `server_vad` mode. Amount of audio to include before the VAD
136
152
  # detected speech (in milliseconds). Defaults to 300ms.
@@ -7,15 +7,20 @@ module OpenAI
7
7
  # limit, the conversation be truncated, meaning messages (starting from the
8
8
  # oldest) will not be included in the model's context. A 32k context model with
9
9
  # 4,096 max output tokens can only include 28,224 tokens in the context before
10
- # truncation occurs. Clients can configure truncation behavior to truncate with a
11
- # lower max token limit, which is an effective way to control token usage and
12
- # cost. Truncation will reduce the number of cached tokens on the next turn
13
- # (busting the cache), since messages are dropped from the beginning of the
14
- # context. However, clients can also configure truncation to retain messages up to
15
- # a fraction of the maximum context size, which will reduce the need for future
16
- # truncations and thus improve the cache rate. Truncation can be disabled
17
- # entirely, which means the server will never truncate but would instead return an
18
- # error if the conversation exceeds the model's input token limit.
10
+ # truncation occurs.
11
+ #
12
+ # Clients can configure truncation behavior to truncate with a lower max token
13
+ # limit, which is an effective way to control token usage and cost.
14
+ #
15
+ # Truncation will reduce the number of cached tokens on the next turn (busting the
16
+ # cache), since messages are dropped from the beginning of the context. However,
17
+ # clients can also configure truncation to retain messages up to a fraction of the
18
+ # maximum context size, which will reduce the need for future truncations and thus
19
+ # improve the cache rate.
20
+ #
21
+ # Truncation can be disabled entirely, which means the server will never truncate
22
+ # but would instead return an error if the conversation exceeds the model's input
23
+ # token limit.
19
24
  module RealtimeTruncation
20
25
  extend OpenAI::Internal::Type::Union
21
26
 
@@ -8,9 +8,9 @@ module OpenAI
8
8
 
9
9
  # Constrains effort on reasoning for
10
10
  # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
11
- # supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
12
- # reasoning effort can result in faster responses and fewer tokens used on
13
- # reasoning in a response.
11
+ # supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
12
+ # Reducing reasoning effort can result in faster responses and fewer tokens used
13
+ # on reasoning in a response.
14
14
  #
15
15
  # - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
16
16
  # reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -18,6 +18,7 @@ module OpenAI
18
18
  # - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
19
19
  # support `none`.
20
20
  # - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
21
+ # - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
21
22
  sig { returns(T.nilable(OpenAI::ReasoningEffort::OrSymbol)) }
22
23
  attr_accessor :effort
23
24
 
@@ -52,9 +53,9 @@ module OpenAI
52
53
  def self.new(
53
54
  # Constrains effort on reasoning for
54
55
  # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
55
- # supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
56
- # reasoning effort can result in faster responses and fewer tokens used on
57
- # reasoning in a response.
56
+ # supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
57
+ # Reducing reasoning effort can result in faster responses and fewer tokens used
58
+ # on reasoning in a response.
58
59
  #
59
60
  # - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
60
61
  # reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -62,6 +63,7 @@ module OpenAI
62
63
  # - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
63
64
  # support `none`.
64
65
  # - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
66
+ # - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
65
67
  effort: nil,
66
68
  # **Deprecated:** use `summary` instead.
67
69
  #
@@ -4,9 +4,9 @@ module OpenAI
4
4
  module Models
5
5
  # Constrains effort on reasoning for
6
6
  # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
7
- # supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
8
- # reasoning effort can result in faster responses and fewer tokens used on
9
- # reasoning in a response.
7
+ # supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
8
+ # Reducing reasoning effort can result in faster responses and fewer tokens used
9
+ # on reasoning in a response.
10
10
  #
11
11
  # - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
12
12
  # reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -14,6 +14,7 @@ module OpenAI
14
14
  # - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
15
15
  # support `none`.
16
16
  # - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
17
+ # - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
17
18
  module ReasoningEffort
18
19
  extend OpenAI::Internal::Type::Enum
19
20
 
@@ -25,6 +26,7 @@ module OpenAI
25
26
  LOW = T.let(:low, OpenAI::ReasoningEffort::TaggedSymbol)
26
27
  MEDIUM = T.let(:medium, OpenAI::ReasoningEffort::TaggedSymbol)
27
28
  HIGH = T.let(:high, OpenAI::ReasoningEffort::TaggedSymbol)
29
+ XHIGH = T.let(:xhigh, OpenAI::ReasoningEffort::TaggedSymbol)
28
30
 
29
31
  sig { override.returns(T::Array[OpenAI::ReasoningEffort::TaggedSymbol]) }
30
32
  def self.values
@@ -0,0 +1,105 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Responses
6
+ class CompactedResponse < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Responses::CompactedResponse,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # The unique identifier for the compacted response.
16
+ sig { returns(String) }
17
+ attr_accessor :id
18
+
19
+ # Unix timestamp (in seconds) when the compacted conversation was created.
20
+ sig { returns(Integer) }
21
+ attr_accessor :created_at
22
+
23
+ # The object type. Always `response.compaction`.
24
+ sig { returns(Symbol) }
25
+ attr_accessor :object
26
+
27
+ # The compacted list of output items. This is a list of all user messages,
28
+ # followed by a single compaction item.
29
+ sig do
30
+ returns(T::Array[OpenAI::Responses::ResponseOutputItem::Variants])
31
+ end
32
+ attr_accessor :output
33
+
34
+ # Token accounting for the compaction pass, including cached, reasoning, and total
35
+ # tokens.
36
+ sig { returns(OpenAI::Responses::ResponseUsage) }
37
+ attr_reader :usage
38
+
39
+ sig { params(usage: OpenAI::Responses::ResponseUsage::OrHash).void }
40
+ attr_writer :usage
41
+
42
+ sig do
43
+ params(
44
+ id: String,
45
+ created_at: Integer,
46
+ output:
47
+ T::Array[
48
+ T.any(
49
+ OpenAI::Responses::ResponseOutputMessage::OrHash,
50
+ OpenAI::Responses::ResponseFileSearchToolCall::OrHash,
51
+ OpenAI::Responses::ResponseFunctionToolCall::OrHash,
52
+ OpenAI::Responses::ResponseFunctionWebSearch::OrHash,
53
+ OpenAI::Responses::ResponseComputerToolCall::OrHash,
54
+ OpenAI::Responses::ResponseReasoningItem::OrHash,
55
+ OpenAI::Responses::ResponseCompactionItem::OrHash,
56
+ OpenAI::Responses::ResponseOutputItem::ImageGenerationCall::OrHash,
57
+ OpenAI::Responses::ResponseCodeInterpreterToolCall::OrHash,
58
+ OpenAI::Responses::ResponseOutputItem::LocalShellCall::OrHash,
59
+ OpenAI::Responses::ResponseFunctionShellToolCall::OrHash,
60
+ OpenAI::Responses::ResponseFunctionShellToolCallOutput::OrHash,
61
+ OpenAI::Responses::ResponseApplyPatchToolCall::OrHash,
62
+ OpenAI::Responses::ResponseApplyPatchToolCallOutput::OrHash,
63
+ OpenAI::Responses::ResponseOutputItem::McpCall::OrHash,
64
+ OpenAI::Responses::ResponseOutputItem::McpListTools::OrHash,
65
+ OpenAI::Responses::ResponseOutputItem::McpApprovalRequest::OrHash,
66
+ OpenAI::Responses::ResponseCustomToolCall::OrHash
67
+ )
68
+ ],
69
+ usage: OpenAI::Responses::ResponseUsage::OrHash,
70
+ object: Symbol
71
+ ).returns(T.attached_class)
72
+ end
73
+ def self.new(
74
+ # The unique identifier for the compacted response.
75
+ id:,
76
+ # Unix timestamp (in seconds) when the compacted conversation was created.
77
+ created_at:,
78
+ # The compacted list of output items. This is a list of all user messages,
79
+ # followed by a single compaction item.
80
+ output:,
81
+ # Token accounting for the compaction pass, including cached, reasoning, and total
82
+ # tokens.
83
+ usage:,
84
+ # The object type. Always `response.compaction`.
85
+ object: :"response.compaction"
86
+ )
87
+ end
88
+
89
+ sig do
90
+ override.returns(
91
+ {
92
+ id: String,
93
+ created_at: Integer,
94
+ object: Symbol,
95
+ output: T::Array[OpenAI::Responses::ResponseOutputItem::Variants],
96
+ usage: OpenAI::Responses::ResponseUsage
97
+ }
98
+ )
99
+ end
100
+ def to_hash
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -334,6 +334,7 @@ module OpenAI
334
334
  OpenAI::Responses::ResponseFunctionWebSearch::OrHash,
335
335
  OpenAI::Responses::ResponseComputerToolCall::OrHash,
336
336
  OpenAI::Responses::ResponseReasoningItem::OrHash,
337
+ OpenAI::Responses::ResponseCompactionItem::OrHash,
337
338
  OpenAI::Responses::ResponseOutputItem::ImageGenerationCall::OrHash,
338
339
  OpenAI::Responses::ResponseCodeInterpreterToolCall::OrHash,
339
340
  OpenAI::Responses::ResponseOutputItem::LocalShellCall::OrHash,