openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -30,7 +30,7 @@ module OpenAI
30
30
 
31
31
  # This event is the output of audio transcription for user audio written to the
32
32
  # user audio buffer. Transcription begins when the input audio buffer is
33
- # committed by the client or server (in `server_vad` mode). Transcription runs
33
+ # committed by the client or server (when VAD is enabled). Transcription runs
34
34
  # asynchronously with Response creation, so this event may come before or after
35
35
  # the Response events.
36
36
  #
@@ -41,7 +41,7 @@ module OpenAI
41
41
  variant :"conversation.item.input_audio_transcription.completed",
42
42
  -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent }
43
43
 
44
- # Returned when the text value of an input audio transcription content part is updated.
44
+ # Returned when the text value of an input audio transcription content part is updated with incremental transcription results.
45
45
  variant :"conversation.item.input_audio_transcription.delta",
46
46
  -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionDeltaEvent }
47
47
 
@@ -51,7 +51,7 @@ module OpenAI
51
51
  variant :"conversation.item.input_audio_transcription.failed",
52
52
  -> { OpenAI::Realtime::ConversationItemInputAudioTranscriptionFailedEvent }
53
53
 
54
- # Returned when a conversation item is retrieved with `conversation.item.retrieve`.
54
+ # Returned when a conversation item is retrieved with `conversation.item.retrieve`. This is provided as a way to fetch the server's representation of an item, for example to get access to the post-processed audio data after noise cancellation and VAD. It includes the full content of the Item, including audio data.
55
55
  variant :"conversation.item.retrieved",
56
56
  -> { OpenAI::Realtime::RealtimeServerEvent::ConversationItemRetrieved }
57
57
 
@@ -133,6 +133,12 @@ module OpenAI
133
133
  # Returned when a Response is done streaming. Always emitted, no matter the
134
134
  # final state. The Response object included in the `response.done` event will
135
135
  # include all output Items in the Response but will omit the raw audio data.
136
+ #
137
+ # Clients should check the `status` field of the Response to determine if it was successful
138
+ # (`completed`) or if there was another outcome: `cancelled`, `failed`, or `incomplete`.
139
+ #
140
+ # A response will contain all output items that were generated during the response, excluding
141
+ # any audio content.
136
142
  variant :"response.done", -> { OpenAI::Realtime::ResponseDoneEvent }
137
143
 
138
144
  # Returned when the model-generated function call arguments are updated.
@@ -196,10 +202,17 @@ module OpenAI
196
202
  variant :"output_audio_buffer.cleared",
197
203
  -> { OpenAI::Realtime::RealtimeServerEvent::OutputAudioBufferCleared }
198
204
 
199
- # Returned when a conversation item is added.
205
+ # Sent by the server when an Item is added to the default Conversation. This can happen in several cases:
206
+ # - When the client sends a `conversation.item.create` event.
207
+ # - When the input audio buffer is committed. In this case the item will be a user message containing the audio from the buffer.
208
+ # - When the model is generating a Response. In this case the `conversation.item.added` event will be sent when the model starts generating a specific Item, and thus it will not yet have any content (and `status` will be `in_progress`).
209
+ #
210
+ # The event will include the full content of the Item (except when model is generating a Response) except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if necessary.
200
211
  variant :"conversation.item.added", -> { OpenAI::Realtime::ConversationItemAdded }
201
212
 
202
213
  # Returned when a conversation item is finalized.
214
+ #
215
+ # The event will include the full content of the Item except for audio data, which can be retrieved separately with a `conversation.item.retrieve` event if needed.
203
216
  variant :"conversation.item.done", -> { OpenAI::Realtime::ConversationItemDone }
204
217
 
205
218
  # Returned when the server VAD timeout is triggered for the input audio buffer.
@@ -254,7 +267,10 @@ module OpenAI
254
267
 
255
268
  # @!method initialize(event_id:, item:, type: :"conversation.item.retrieved")
256
269
  # Returned when a conversation item is retrieved with
257
- # `conversation.item.retrieve`.
270
+ # `conversation.item.retrieve`. This is provided as a way to fetch the server's
271
+ # representation of an item, for example to get access to the post-processed audio
272
+ # data after noise cancellation and VAD. It includes the full content of the Item,
273
+ # including audio data.
258
274
  #
259
275
  # @param event_id [String] The unique ID of the server event.
260
276
  #
@@ -55,10 +55,8 @@ module OpenAI
55
55
  # what the model heard. The client can optionally set the language and prompt for
56
56
  # transcription, these offer additional guidance to the transcription service.
57
57
  #
58
- # @return [OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription, nil]
59
- optional :input_audio_transcription,
60
- -> { OpenAI::Realtime::RealtimeSession::InputAudioTranscription },
61
- nil?: true
58
+ # @return [OpenAI::Models::Realtime::AudioTranscription, nil]
59
+ optional :input_audio_transcription, -> { OpenAI::Realtime::AudioTranscription }, nil?: true
62
60
 
63
61
  # @!attribute instructions
64
62
  # The default system instructions (i.e. system message) prepended to model calls.
@@ -144,8 +142,8 @@ module OpenAI
144
142
  # @!attribute tools
145
143
  # Tools (functions) available to the model.
146
144
  #
147
- # @return [Array<OpenAI::Models::Realtime::RealtimeSession::Tool>, nil]
148
- optional :tools, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::RealtimeSession::Tool] }
145
+ # @return [Array<OpenAI::Models::Realtime::Models>, nil]
146
+ optional :tools, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Realtime::Models] }
149
147
 
150
148
  # @!attribute tracing
151
149
  # Configuration options for tracing. Set to null to disable tracing. Once tracing
@@ -196,7 +194,7 @@ module OpenAI
196
194
  #
197
195
  # @param input_audio_noise_reduction [OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn
198
196
  #
199
- # @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription, nil] Configuration for input audio transcription, defaults to off and can be set to `
197
+ # @param input_audio_transcription [OpenAI::Models::Realtime::AudioTranscription, nil] Configuration for input audio transcription, defaults to off and can be set to `
200
198
  #
201
199
  # @param instructions [String] The default system instructions (i.e. system message) prepended to model
202
200
  #
@@ -218,7 +216,7 @@ module OpenAI
218
216
  #
219
217
  # @param tool_choice [String] How the model chooses tools. Options are `auto`, `none`, `required`, or
220
218
  #
221
- # @param tools [Array<OpenAI::Models::Realtime::RealtimeSession::Tool>] Tools (functions) available to the model.
219
+ # @param tools [Array<OpenAI::Models::Realtime::Models>] Tools (functions) available to the model.
222
220
  #
223
221
  # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeSession::Tracing::TracingConfiguration, nil] Configuration options for tracing. Set to null to disable tracing. Once
224
222
  #
@@ -258,8 +256,8 @@ module OpenAI
258
256
  # headphones, `far_field` is for far-field microphones such as laptop or
259
257
  # conference room microphones.
260
258
  #
261
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::Type, nil]
262
- optional :type, enum: -> { OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type }
259
+ # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil]
260
+ optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType }
263
261
 
264
262
  # @!method initialize(type: nil)
265
263
  # Some parameter documentations has been truncated, see
@@ -272,70 +270,7 @@ module OpenAI
272
270
  # detection accuracy (reducing false positives) and model performance by improving
273
271
  # perception of the input audio.
274
272
  #
275
- # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction::Type] Type of noise reduction. `near_field` is for close-talking microphones such as h
276
-
277
- # Type of noise reduction. `near_field` is for close-talking microphones such as
278
- # headphones, `far_field` is for far-field microphones such as laptop or
279
- # conference room microphones.
280
- #
281
- # @see OpenAI::Models::Realtime::RealtimeSession::InputAudioNoiseReduction#type
282
- module Type
283
- extend OpenAI::Internal::Type::Enum
284
-
285
- NEAR_FIELD = :near_field
286
- FAR_FIELD = :far_field
287
-
288
- # @!method self.values
289
- # @return [Array<Symbol>]
290
- end
291
- end
292
-
293
- # @see OpenAI::Models::Realtime::RealtimeSession#input_audio_transcription
294
- class InputAudioTranscription < OpenAI::Internal::Type::BaseModel
295
- # @!attribute language
296
- # The language of the input audio. Supplying the input language in
297
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
298
- # format will improve accuracy and latency.
299
- #
300
- # @return [String, nil]
301
- optional :language, String
302
-
303
- # @!attribute model
304
- # The model to use for transcription, current options are `gpt-4o-transcribe`,
305
- # `gpt-4o-mini-transcribe`, and `whisper-1`.
306
- #
307
- # @return [String, nil]
308
- optional :model, String
309
-
310
- # @!attribute prompt
311
- # An optional text to guide the model's style or continue a previous audio
312
- # segment. For `whisper-1`, the
313
- # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
314
- # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
315
- # "expect words related to technology".
316
- #
317
- # @return [String, nil]
318
- optional :prompt, String
319
-
320
- # @!method initialize(language: nil, model: nil, prompt: nil)
321
- # Some parameter documentations has been truncated, see
322
- # {OpenAI::Models::Realtime::RealtimeSession::InputAudioTranscription} for more
323
- # details.
324
- #
325
- # Configuration for input audio transcription, defaults to off and can be set to
326
- # `null` to turn off once on. Input audio transcription is not native to the
327
- # model, since the model consumes audio directly. Transcription runs
328
- # asynchronously through
329
- # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
330
- # and should be treated as guidance of input audio content rather than precisely
331
- # what the model heard. The client can optionally set the language and prompt for
332
- # transcription, these offer additional guidance to the transcription service.
333
- #
334
- # @param language [String] The language of the input audio. Supplying the input language in
335
- #
336
- # @param model [String] The model to use for transcription, current options are `gpt-4o-transcribe`, `gp
337
- #
338
- # @param prompt [String] An optional text to guide the model's style or continue a previous audio
273
+ # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h
339
274
  end
340
275
 
341
276
  # Maximum number of output tokens for a single assistant response, inclusive of
@@ -410,57 +345,6 @@ module OpenAI
410
345
  # @return [Array<Symbol>]
411
346
  end
412
347
 
413
- class Tool < OpenAI::Internal::Type::BaseModel
414
- # @!attribute description
415
- # The description of the function, including guidance on when and how to call it,
416
- # and guidance about what to tell the user when calling (if anything).
417
- #
418
- # @return [String, nil]
419
- optional :description, String
420
-
421
- # @!attribute name
422
- # The name of the function.
423
- #
424
- # @return [String, nil]
425
- optional :name, String
426
-
427
- # @!attribute parameters
428
- # Parameters of the function in JSON Schema.
429
- #
430
- # @return [Object, nil]
431
- optional :parameters, OpenAI::Internal::Type::Unknown
432
-
433
- # @!attribute type
434
- # The type of the tool, i.e. `function`.
435
- #
436
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeSession::Tool::Type, nil]
437
- optional :type, enum: -> { OpenAI::Realtime::RealtimeSession::Tool::Type }
438
-
439
- # @!method initialize(description: nil, name: nil, parameters: nil, type: nil)
440
- # Some parameter documentations has been truncated, see
441
- # {OpenAI::Models::Realtime::RealtimeSession::Tool} for more details.
442
- #
443
- # @param description [String] The description of the function, including guidance on when and how
444
- #
445
- # @param name [String] The name of the function.
446
- #
447
- # @param parameters [Object] Parameters of the function in JSON Schema.
448
- #
449
- # @param type [Symbol, OpenAI::Models::Realtime::RealtimeSession::Tool::Type] The type of the tool, i.e. `function`.
450
-
451
- # The type of the tool, i.e. `function`.
452
- #
453
- # @see OpenAI::Models::Realtime::RealtimeSession::Tool#type
454
- module Type
455
- extend OpenAI::Internal::Type::Enum
456
-
457
- FUNCTION = :function
458
-
459
- # @!method self.values
460
- # @return [Array<Symbol>]
461
- end
462
- end
463
-
464
348
  # Configuration options for tracing. Set to null to disable tracing. Once tracing
465
349
  # is enabled for a session, the configuration cannot be modified.
466
350
  #
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeSessionClientSecret < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute expires_at
8
+ # Timestamp for when the token expires. Currently, all tokens expire after one
9
+ # minute.
10
+ #
11
+ # @return [Integer]
12
+ required :expires_at, Integer
13
+
14
+ # @!attribute value
15
+ # Ephemeral key usable in client environments to authenticate connections to the
16
+ # Realtime API. Use this in client-side environments rather than a standard API
17
+ # token, which should only be used server-side.
18
+ #
19
+ # @return [String]
20
+ required :value, String
21
+
22
+ # @!method initialize(expires_at:, value:)
23
+ # Some parameter documentations has been truncated, see
24
+ # {OpenAI::Models::Realtime::RealtimeSessionClientSecret} for more details.
25
+ #
26
+ # Ephemeral key returned by the API.
27
+ #
28
+ # @param expires_at [Integer] Timestamp for when the token expires. Currently, all tokens expire
29
+ #
30
+ # @param value [String] Ephemeral key usable in client environments to authenticate connections to the R
31
+ end
32
+ end
33
+
34
+ RealtimeSessionClientSecret = Realtime::RealtimeSessionClientSecret
35
+ end
36
+ end
@@ -4,12 +4,6 @@ module OpenAI
4
4
  module Models
5
5
  module Realtime
6
6
  class RealtimeSessionCreateRequest < OpenAI::Internal::Type::BaseModel
7
- # @!attribute model
8
- # The Realtime model used for this session.
9
- #
10
- # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model]
11
- required :model, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::Model }
12
-
13
7
  # @!attribute type
14
8
  # The type of session to create. Always `realtime` for the Realtime API.
15
9
  #
@@ -22,17 +16,11 @@ module OpenAI
22
16
  # @return [OpenAI::Models::Realtime::RealtimeAudioConfig, nil]
23
17
  optional :audio, -> { OpenAI::Realtime::RealtimeAudioConfig }
24
18
 
25
- # @!attribute client_secret
26
- # Configuration options for the generated client secret.
27
- #
28
- # @return [OpenAI::Models::Realtime::RealtimeClientSecretConfig, nil]
29
- optional :client_secret, -> { OpenAI::Realtime::RealtimeClientSecretConfig }
30
-
31
19
  # @!attribute include
32
20
  # Additional fields to include in server outputs.
33
21
  #
34
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
35
- # transcription.
22
+ # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
23
+ # transcription.
36
24
  #
37
25
  # @return [Array<Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Include>, nil]
38
26
  optional :include,
@@ -62,9 +50,17 @@ module OpenAI
62
50
  # @return [Integer, Symbol, :inf, nil]
63
51
  optional :max_output_tokens, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::MaxOutputTokens }
64
52
 
53
+ # @!attribute model
54
+ # The Realtime model used for this session.
55
+ #
56
+ # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model, nil]
57
+ optional :model, union: -> { OpenAI::Realtime::RealtimeSessionCreateRequest::Model }
58
+
65
59
  # @!attribute output_modalities
66
- # The set of modalities the model can respond with. To disable audio, set this to
67
- # ["text"].
60
+ # The set of modalities the model can respond with. It defaults to `["audio"]`,
61
+ # indicating that the model will respond with audio plus a transcript. `["text"]`
62
+ # can be used to make the model respond with text only. It is not possible to
63
+ # request both `text` and `audio` at the same time.
68
64
  #
69
65
  # @return [Array<Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::OutputModality>, nil]
70
66
  optional :output_modalities,
@@ -77,13 +73,6 @@ module OpenAI
77
73
  # @return [OpenAI::Models::Responses::ResponsePrompt, nil]
78
74
  optional :prompt, -> { OpenAI::Responses::ResponsePrompt }, nil?: true
79
75
 
80
- # @!attribute temperature
81
- # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
82
- # temperature of 0.8 is highly recommended for best performance.
83
- #
84
- # @return [Float, nil]
85
- optional :temperature, Float
86
-
87
76
  # @!attribute tool_choice
88
77
  # How the model chooses tools. Provide one of the string modes or force a specific
89
78
  # function/MCP tool.
@@ -94,12 +83,13 @@ module OpenAI
94
83
  # @!attribute tools
95
84
  # Tools available to the model.
96
85
  #
97
- # @return [Array<OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp>, nil]
86
+ # @return [Array<OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp>, nil]
98
87
  optional :tools, -> { OpenAI::Internal::Type::ArrayOf[union: OpenAI::Realtime::RealtimeToolsConfigUnion] }
99
88
 
100
89
  # @!attribute tracing
101
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
102
- # is enabled for a session, the configuration cannot be modified.
90
+ # Realtime API can write session traces to the
91
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
92
+ # tracing is enabled for a session, the configuration cannot be modified.
103
93
  #
104
94
  # `auto` will create a trace for the session with default values for the workflow
105
95
  # name, group id, and metadata.
@@ -109,46 +99,66 @@ module OpenAI
109
99
 
110
100
  # @!attribute truncation
111
101
  # Controls how the realtime conversation is truncated prior to model inference.
112
- # The default is `auto`. When set to `retention_ratio`, the server retains a
113
- # fraction of the conversation tokens prior to the instructions.
102
+ # The default is `auto`.
114
103
  #
115
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation, nil]
104
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio, nil]
116
105
  optional :truncation, union: -> { OpenAI::Realtime::RealtimeTruncation }
117
106
 
118
- # @!method initialize(model:, audio: nil, client_secret: nil, include: nil, instructions: nil, max_output_tokens: nil, output_modalities: nil, prompt: nil, temperature: nil, tool_choice: nil, tools: nil, tracing: nil, truncation: nil, type: :realtime)
107
+ # @!method initialize(audio: nil, include: nil, instructions: nil, max_output_tokens: nil, model: nil, output_modalities: nil, prompt: nil, tool_choice: nil, tools: nil, tracing: nil, truncation: nil, type: :realtime)
119
108
  # Some parameter documentations has been truncated, see
120
109
  # {OpenAI::Models::Realtime::RealtimeSessionCreateRequest} for more details.
121
110
  #
122
111
  # Realtime session object configuration.
123
112
  #
124
- # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model] The Realtime model used for this session.
125
- #
126
113
  # @param audio [OpenAI::Models::Realtime::RealtimeAudioConfig] Configuration for input and output audio.
127
114
  #
128
- # @param client_secret [OpenAI::Models::Realtime::RealtimeClientSecretConfig] Configuration options for the generated client secret.
129
- #
130
115
  # @param include [Array<Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Include>] Additional fields to include in server outputs.
131
116
  #
132
117
  # @param instructions [String] The default system instructions (i.e. system message) prepended to model calls.
133
118
  #
134
119
  # @param max_output_tokens [Integer, Symbol, :inf] Maximum number of output tokens for a single assistant response,
135
120
  #
136
- # @param output_modalities [Array<Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::OutputModality>] The set of modalities the model can respond with. To disable audio,
121
+ # @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model] The Realtime model used for this session.
122
+ #
123
+ # @param output_modalities [Array<Symbol, OpenAI::Models::Realtime::RealtimeSessionCreateRequest::OutputModality>] The set of modalities the model can respond with. It defaults to `["audio"]`, in
137
124
  #
138
125
  # @param prompt [OpenAI::Models::Responses::ResponsePrompt, nil] Reference to a prompt template and its variables.
139
126
  #
140
- # @param temperature [Float] Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a te
141
- #
142
127
  # @param tool_choice [Symbol, OpenAI::Models::Responses::ToolChoiceOptions, OpenAI::Models::Responses::ToolChoiceFunction, OpenAI::Models::Responses::ToolChoiceMcp] How the model chooses tools. Provide one of the string modes or force a specific
143
128
  #
144
- # @param tools [Array<OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp>] Tools available to the model.
129
+ # @param tools [Array<OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp>] Tools available to the model.
145
130
  #
146
- # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeTracingConfig::TracingConfiguration, nil] Configuration options for tracing. Set to null to disable tracing. Once
131
+ # @param tracing [Symbol, :auto, OpenAI::Models::Realtime::RealtimeTracingConfig::TracingConfiguration, nil] Realtime API can write session traces to the [Traces Dashboard](/logs?api=traces
147
132
  #
148
- # @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation] Controls how the realtime conversation is truncated prior to model inference.
133
+ # @param truncation [Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio] Controls how the realtime conversation is truncated prior to model inference.
149
134
  #
150
135
  # @param type [Symbol, :realtime] The type of session to create. Always `realtime` for the Realtime API.
151
136
 
137
+ module Include
138
+ extend OpenAI::Internal::Type::Enum
139
+
140
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
141
+
142
+ # @!method self.values
143
+ # @return [Array<Symbol>]
144
+ end
145
+
146
+ # Maximum number of output tokens for a single assistant response, inclusive of
147
+ # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
148
+ # `inf` for the maximum available tokens for a given model. Defaults to `inf`.
149
+ #
150
+ # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#max_output_tokens
151
+ module MaxOutputTokens
152
+ extend OpenAI::Internal::Type::Union
153
+
154
+ variant Integer
155
+
156
+ variant const: :inf
157
+
158
+ # @!method self.variants
159
+ # @return [Array(Integer, Symbol, :inf)]
160
+ end
161
+
152
162
  # The Realtime model used for this session.
153
163
  #
154
164
  # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#model
@@ -161,10 +171,6 @@ module OpenAI
161
171
 
162
172
  variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_REALTIME_2025_08_28 }
163
173
 
164
- variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME }
165
-
166
- variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_MINI_REALTIME }
167
-
168
174
  variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME_PREVIEW }
169
175
 
170
176
  variant const: -> { OpenAI::Models::Realtime::RealtimeSessionCreateRequest::Model::GPT_4O_REALTIME_PREVIEW_2024_10_01 }
@@ -188,8 +194,6 @@ module OpenAI
188
194
 
189
195
  GPT_REALTIME = :"gpt-realtime"
190
196
  GPT_REALTIME_2025_08_28 = :"gpt-realtime-2025-08-28"
191
- GPT_4O_REALTIME = :"gpt-4o-realtime"
192
- GPT_4O_MINI_REALTIME = :"gpt-4o-mini-realtime"
193
197
  GPT_4O_REALTIME_PREVIEW = :"gpt-4o-realtime-preview"
194
198
  GPT_4O_REALTIME_PREVIEW_2024_10_01 = :"gpt-4o-realtime-preview-2024-10-01"
195
199
  GPT_4O_REALTIME_PREVIEW_2024_12_17 = :"gpt-4o-realtime-preview-2024-12-17"
@@ -200,31 +204,6 @@ module OpenAI
200
204
  # @!endgroup
201
205
  end
202
206
 
203
- module Include
204
- extend OpenAI::Internal::Type::Enum
205
-
206
- ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
207
-
208
- # @!method self.values
209
- # @return [Array<Symbol>]
210
- end
211
-
212
- # Maximum number of output tokens for a single assistant response, inclusive of
213
- # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
214
- # `inf` for the maximum available tokens for a given model. Defaults to `inf`.
215
- #
216
- # @see OpenAI::Models::Realtime::RealtimeSessionCreateRequest#max_output_tokens
217
- module MaxOutputTokens
218
- extend OpenAI::Internal::Type::Union
219
-
220
- variant Integer
221
-
222
- variant const: :inf
223
-
224
- # @!method self.variants
225
- # @return [Array(Integer, Symbol, :inf)]
226
- end
227
-
228
207
  module OutputModality
229
208
  extend OpenAI::Internal::Type::Enum
230
209