openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -9,20 +9,28 @@ module OpenAI
9
9
  T.any(OpenAI::Realtime::RealtimeResponse, OpenAI::Internal::AnyHash)
10
10
  end
11
11
 
12
- # The unique ID of the response.
12
+ # The unique ID of the response, will look like `resp_1234`.
13
13
  sig { returns(T.nilable(String)) }
14
14
  attr_reader :id
15
15
 
16
16
  sig { params(id: String).void }
17
17
  attr_writer :id
18
18
 
19
+ # Configuration for audio output.
20
+ sig { returns(T.nilable(OpenAI::Realtime::RealtimeResponse::Audio)) }
21
+ attr_reader :audio
22
+
23
+ sig do
24
+ params(audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash).void
25
+ end
26
+ attr_writer :audio
27
+
19
28
  # Which conversation the response is added to, determined by the `conversation`
20
29
  # field in the `response.create` event. If `auto`, the response will be added to
21
30
  # the default conversation and the value of `conversation_id` will be an id like
22
31
  # `conv_1234`. If `none`, the response will not be added to any conversation and
23
32
  # the value of `conversation_id` will be `null`. If responses are being triggered
24
- # by server VAD, the response will be added to the default conversation, thus the
25
- # `conversation_id` will be an id like `conv_1234`.
33
+ # automatically by VAD the response will be added to the default conversation
26
34
  sig { returns(T.nilable(String)) }
27
35
  attr_reader :conversation_id
28
36
 
@@ -46,26 +54,6 @@ module OpenAI
46
54
  sig { returns(T.nilable(T::Hash[Symbol, String])) }
47
55
  attr_accessor :metadata
48
56
 
49
- # The set of modalities the model used to respond. If there are multiple
50
- # modalities, the model will pick one, for example if `modalities` is
51
- # `["text", "audio"]`, the model could be responding in either text or audio.
52
- sig do
53
- returns(
54
- T.nilable(
55
- T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol]
56
- )
57
- )
58
- end
59
- attr_reader :modalities
60
-
61
- sig do
62
- params(
63
- modalities:
64
- T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol]
65
- ).void
66
- end
67
- attr_writer :modalities
68
-
69
57
  # The object type, must be `realtime.response`.
70
58
  sig do
71
59
  returns(
@@ -123,23 +111,30 @@ module OpenAI
123
111
  end
124
112
  attr_writer :output
125
113
 
126
- # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
114
+ # The set of modalities the model used to respond, currently the only possible
115
+ # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
116
+ # transcript. Setting the output to mode `text` will disable audio output from the
117
+ # model.
127
118
  sig do
128
119
  returns(
129
120
  T.nilable(
130
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol
121
+ T::Array[
122
+ OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
123
+ ]
131
124
  )
132
125
  )
133
126
  end
134
- attr_reader :output_audio_format
127
+ attr_reader :output_modalities
135
128
 
136
129
  sig do
137
130
  params(
138
- output_audio_format:
139
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol
131
+ output_modalities:
132
+ T::Array[
133
+ OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
134
+ ]
140
135
  ).void
141
136
  end
142
- attr_writer :output_audio_format
137
+ attr_writer :output_modalities
143
138
 
144
139
  # The final status of the response (`completed`, `cancelled`, `failed`, or
145
140
  # `incomplete`, `in_progress`).
@@ -168,13 +163,6 @@ module OpenAI
168
163
  end
169
164
  attr_writer :status_details
170
165
 
171
- # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
172
- sig { returns(T.nilable(Float)) }
173
- attr_reader :temperature
174
-
175
- sig { params(temperature: Float).void }
176
- attr_writer :temperature
177
-
178
166
  # Usage statistics for the Response, this will correspond to billing. A Realtime
179
167
  # API session will maintain a conversation context and append new Items to the
180
168
  # Conversation, thus output from previous turns (text and audio tokens) will
@@ -187,34 +175,14 @@ module OpenAI
187
175
  end
188
176
  attr_writer :usage
189
177
 
190
- # The voice the model used to respond. Current voice options are `alloy`, `ash`,
191
- # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
192
- sig do
193
- returns(
194
- T.nilable(
195
- T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
196
- )
197
- )
198
- end
199
- attr_reader :voice
200
-
201
- sig do
202
- params(
203
- voice:
204
- T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
205
- ).void
206
- end
207
- attr_writer :voice
208
-
209
178
  # The response resource.
210
179
  sig do
211
180
  params(
212
181
  id: String,
182
+ audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash,
213
183
  conversation_id: String,
214
184
  max_output_tokens: T.any(Integer, Symbol),
215
185
  metadata: T.nilable(T::Hash[Symbol, String]),
216
- modalities:
217
- T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol],
218
186
  object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol,
219
187
  output:
220
188
  T::Array[
@@ -230,26 +198,26 @@ module OpenAI
230
198
  OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash
231
199
  )
232
200
  ],
233
- output_audio_format:
234
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol,
201
+ output_modalities:
202
+ T::Array[
203
+ OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
204
+ ],
235
205
  status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol,
236
206
  status_details: OpenAI::Realtime::RealtimeResponseStatus::OrHash,
237
- temperature: Float,
238
- usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash,
239
- voice:
240
- T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
207
+ usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash
241
208
  ).returns(T.attached_class)
242
209
  end
243
210
  def self.new(
244
- # The unique ID of the response.
211
+ # The unique ID of the response, will look like `resp_1234`.
245
212
  id: nil,
213
+ # Configuration for audio output.
214
+ audio: nil,
246
215
  # Which conversation the response is added to, determined by the `conversation`
247
216
  # field in the `response.create` event. If `auto`, the response will be added to
248
217
  # the default conversation and the value of `conversation_id` will be an id like
249
218
  # `conv_1234`. If `none`, the response will not be added to any conversation and
250
219
  # the value of `conversation_id` will be `null`. If responses are being triggered
251
- # by server VAD, the response will be added to the default conversation, thus the
252
- # `conversation_id` will be an id like `conv_1234`.
220
+ # automatically by VAD the response will be added to the default conversation
253
221
  conversation_id: nil,
254
222
  # Maximum number of output tokens for a single assistant response, inclusive of
255
223
  # tool calls, that was used in this response.
@@ -261,31 +229,25 @@ module OpenAI
261
229
  # Keys are strings with a maximum length of 64 characters. Values are strings with
262
230
  # a maximum length of 512 characters.
263
231
  metadata: nil,
264
- # The set of modalities the model used to respond. If there are multiple
265
- # modalities, the model will pick one, for example if `modalities` is
266
- # `["text", "audio"]`, the model could be responding in either text or audio.
267
- modalities: nil,
268
232
  # The object type, must be `realtime.response`.
269
233
  object: nil,
270
234
  # The list of output items generated by the response.
271
235
  output: nil,
272
- # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
273
- output_audio_format: nil,
236
+ # The set of modalities the model used to respond, currently the only possible
237
+ # values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
238
+ # transcript. Setting the output to mode `text` will disable audio output from the
239
+ # model.
240
+ output_modalities: nil,
274
241
  # The final status of the response (`completed`, `cancelled`, `failed`, or
275
242
  # `incomplete`, `in_progress`).
276
243
  status: nil,
277
244
  # Additional details about the status.
278
245
  status_details: nil,
279
- # Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
280
- temperature: nil,
281
246
  # Usage statistics for the Response, this will correspond to billing. A Realtime
282
247
  # API session will maintain a conversation context and append new Items to the
283
248
  # Conversation, thus output from previous turns (text and audio tokens) will
284
249
  # become the input for later turns.
285
- usage: nil,
286
- # The voice the model used to respond. Current voice options are `alloy`, `ash`,
287
- # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
288
- voice: nil
250
+ usage: nil
289
251
  )
290
252
  end
291
253
 
@@ -293,13 +255,10 @@ module OpenAI
293
255
  override.returns(
294
256
  {
295
257
  id: String,
258
+ audio: OpenAI::Realtime::RealtimeResponse::Audio,
296
259
  conversation_id: String,
297
260
  max_output_tokens: T.any(Integer, Symbol),
298
261
  metadata: T.nilable(T::Hash[Symbol, String]),
299
- modalities:
300
- T::Array[
301
- OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol
302
- ],
303
262
  object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol,
304
263
  output:
305
264
  T::Array[
@@ -315,69 +274,270 @@ module OpenAI
315
274
  OpenAI::Realtime::RealtimeMcpApprovalRequest
316
275
  )
317
276
  ],
318
- output_audio_format:
319
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::OrSymbol,
277
+ output_modalities:
278
+ T::Array[
279
+ OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
280
+ ],
320
281
  status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol,
321
282
  status_details: OpenAI::Realtime::RealtimeResponseStatus,
322
- temperature: Float,
323
- usage: OpenAI::Realtime::RealtimeResponseUsage,
324
- voice:
325
- T.any(
326
- String,
327
- OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol
328
- )
283
+ usage: OpenAI::Realtime::RealtimeResponseUsage
329
284
  }
330
285
  )
331
286
  end
332
287
  def to_hash
333
288
  end
334
289
 
335
- # Maximum number of output tokens for a single assistant response, inclusive of
336
- # tool calls, that was used in this response.
337
- module MaxOutputTokens
338
- extend OpenAI::Internal::Type::Union
290
+ class Audio < OpenAI::Internal::Type::BaseModel
291
+ OrHash =
292
+ T.type_alias do
293
+ T.any(
294
+ OpenAI::Realtime::RealtimeResponse::Audio,
295
+ OpenAI::Internal::AnyHash
296
+ )
297
+ end
339
298
 
340
- Variants = T.type_alias { T.any(Integer, Symbol) }
299
+ sig do
300
+ returns(
301
+ T.nilable(OpenAI::Realtime::RealtimeResponse::Audio::Output)
302
+ )
303
+ end
304
+ attr_reader :output
305
+
306
+ sig do
307
+ params(
308
+ output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash
309
+ ).void
310
+ end
311
+ attr_writer :output
312
+
313
+ # Configuration for audio output.
314
+ sig do
315
+ params(
316
+ output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash
317
+ ).returns(T.attached_class)
318
+ end
319
+ def self.new(output: nil)
320
+ end
341
321
 
342
322
  sig do
343
323
  override.returns(
344
- T::Array[
345
- OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants
346
- ]
324
+ { output: OpenAI::Realtime::RealtimeResponse::Audio::Output }
347
325
  )
348
326
  end
349
- def self.variants
327
+ def to_hash
350
328
  end
351
- end
352
329
 
353
- module Modality
354
- extend OpenAI::Internal::Type::Enum
330
+ class Output < OpenAI::Internal::Type::BaseModel
331
+ OrHash =
332
+ T.type_alias do
333
+ T.any(
334
+ OpenAI::Realtime::RealtimeResponse::Audio::Output,
335
+ OpenAI::Internal::AnyHash
336
+ )
337
+ end
355
338
 
356
- TaggedSymbol =
357
- T.type_alias do
358
- T.all(Symbol, OpenAI::Realtime::RealtimeResponse::Modality)
339
+ # The format of the output audio.
340
+ sig do
341
+ returns(
342
+ T.nilable(
343
+ T.any(
344
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
345
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
346
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
347
+ )
348
+ )
349
+ )
359
350
  end
360
- OrSymbol = T.type_alias { T.any(Symbol, String) }
351
+ attr_reader :format_
361
352
 
362
- TEXT =
363
- T.let(
364
- :text,
365
- OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol
366
- )
367
- AUDIO =
368
- T.let(
369
- :audio,
370
- OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol
353
+ sig do
354
+ params(
355
+ format_:
356
+ T.any(
357
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
358
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
359
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
360
+ )
361
+ ).void
362
+ end
363
+ attr_writer :format_
364
+
365
+ # The voice the model uses to respond. Voice cannot be changed during the session
366
+ # once the model has responded with audio at least once. Current voice options are
367
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
368
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
369
+ sig do
370
+ returns(
371
+ T.nilable(
372
+ T.any(
373
+ String,
374
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
375
+ )
376
+ )
377
+ )
378
+ end
379
+ attr_reader :voice
380
+
381
+ sig do
382
+ params(
383
+ voice:
384
+ T.any(
385
+ String,
386
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
387
+ )
388
+ ).void
389
+ end
390
+ attr_writer :voice
391
+
392
+ sig do
393
+ params(
394
+ format_:
395
+ T.any(
396
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
397
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
398
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
399
+ ),
400
+ voice:
401
+ T.any(
402
+ String,
403
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
404
+ )
405
+ ).returns(T.attached_class)
406
+ end
407
+ def self.new(
408
+ # The format of the output audio.
409
+ format_: nil,
410
+ # The voice the model uses to respond. Voice cannot be changed during the session
411
+ # once the model has responded with audio at least once. Current voice options are
412
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
413
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
414
+ voice: nil
371
415
  )
416
+ end
417
+
418
+ sig do
419
+ override.returns(
420
+ {
421
+ format_:
422
+ T.any(
423
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
424
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
425
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
426
+ ),
427
+ voice:
428
+ T.any(
429
+ String,
430
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
431
+ )
432
+ }
433
+ )
434
+ end
435
+ def to_hash
436
+ end
437
+
438
+ # The voice the model uses to respond. Voice cannot be changed during the session
439
+ # once the model has responded with audio at least once. Current voice options are
440
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
441
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
442
+ module Voice
443
+ extend OpenAI::Internal::Type::Union
444
+
445
+ Variants =
446
+ T.type_alias do
447
+ T.any(
448
+ String,
449
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
450
+ )
451
+ end
452
+
453
+ sig do
454
+ override.returns(
455
+ T::Array[
456
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::Variants
457
+ ]
458
+ )
459
+ end
460
+ def self.variants
461
+ end
462
+
463
+ TaggedSymbol =
464
+ T.type_alias do
465
+ T.all(
466
+ Symbol,
467
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice
468
+ )
469
+ end
470
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
471
+
472
+ ALLOY =
473
+ T.let(
474
+ :alloy,
475
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
476
+ )
477
+ ASH =
478
+ T.let(
479
+ :ash,
480
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
481
+ )
482
+ BALLAD =
483
+ T.let(
484
+ :ballad,
485
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
486
+ )
487
+ CORAL =
488
+ T.let(
489
+ :coral,
490
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
491
+ )
492
+ ECHO =
493
+ T.let(
494
+ :echo,
495
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
496
+ )
497
+ SAGE =
498
+ T.let(
499
+ :sage,
500
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
501
+ )
502
+ SHIMMER =
503
+ T.let(
504
+ :shimmer,
505
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
506
+ )
507
+ VERSE =
508
+ T.let(
509
+ :verse,
510
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
511
+ )
512
+ MARIN =
513
+ T.let(
514
+ :marin,
515
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
516
+ )
517
+ CEDAR =
518
+ T.let(
519
+ :cedar,
520
+ OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
521
+ )
522
+ end
523
+ end
524
+ end
525
+
526
+ # Maximum number of output tokens for a single assistant response, inclusive of
527
+ # tool calls, that was used in this response.
528
+ module MaxOutputTokens
529
+ extend OpenAI::Internal::Type::Union
530
+
531
+ Variants = T.type_alias { T.any(Integer, Symbol) }
372
532
 
373
533
  sig do
374
534
  override.returns(
375
535
  T::Array[
376
- OpenAI::Realtime::RealtimeResponse::Modality::TaggedSymbol
536
+ OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants
377
537
  ]
378
538
  )
379
539
  end
380
- def self.values
540
+ def self.variants
381
541
  end
382
542
  end
383
543
 
@@ -406,39 +566,30 @@ module OpenAI
406
566
  end
407
567
  end
408
568
 
409
- # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
410
- module OutputAudioFormat
569
+ module OutputModality
411
570
  extend OpenAI::Internal::Type::Enum
412
571
 
413
572
  TaggedSymbol =
414
573
  T.type_alias do
415
- T.all(
416
- Symbol,
417
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat
418
- )
574
+ T.all(Symbol, OpenAI::Realtime::RealtimeResponse::OutputModality)
419
575
  end
420
576
  OrSymbol = T.type_alias { T.any(Symbol, String) }
421
577
 
422
- PCM16 =
423
- T.let(
424
- :pcm16,
425
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol
426
- )
427
- G711_ULAW =
578
+ TEXT =
428
579
  T.let(
429
- :g711_ulaw,
430
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol
580
+ :text,
581
+ OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
431
582
  )
432
- G711_ALAW =
583
+ AUDIO =
433
584
  T.let(
434
- :g711_alaw,
435
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol
585
+ :audio,
586
+ OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
436
587
  )
437
588
 
438
589
  sig do
439
590
  override.returns(
440
591
  T::Array[
441
- OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol
592
+ OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
442
593
  ]
443
594
  )
444
595
  end
@@ -491,82 +642,6 @@ module OpenAI
491
642
  def self.values
492
643
  end
493
644
  end
494
-
495
- # The voice the model used to respond. Current voice options are `alloy`, `ash`,
496
- # `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
497
- module Voice
498
- extend OpenAI::Internal::Type::Union
499
-
500
- Variants =
501
- T.type_alias do
502
- T.any(
503
- String,
504
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
505
- )
506
- end
507
-
508
- sig do
509
- override.returns(
510
- T::Array[OpenAI::Realtime::RealtimeResponse::Voice::Variants]
511
- )
512
- end
513
- def self.variants
514
- end
515
-
516
- TaggedSymbol =
517
- T.type_alias do
518
- T.all(Symbol, OpenAI::Realtime::RealtimeResponse::Voice)
519
- end
520
- OrSymbol = T.type_alias { T.any(Symbol, String) }
521
-
522
- ALLOY =
523
- T.let(
524
- :alloy,
525
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
526
- )
527
- ASH =
528
- T.let(:ash, OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol)
529
- BALLAD =
530
- T.let(
531
- :ballad,
532
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
533
- )
534
- CORAL =
535
- T.let(
536
- :coral,
537
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
538
- )
539
- ECHO =
540
- T.let(
541
- :echo,
542
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
543
- )
544
- SAGE =
545
- T.let(
546
- :sage,
547
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
548
- )
549
- SHIMMER =
550
- T.let(
551
- :shimmer,
552
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
553
- )
554
- VERSE =
555
- T.let(
556
- :verse,
557
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
558
- )
559
- MARIN =
560
- T.let(
561
- :marin,
562
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
563
- )
564
- CEDAR =
565
- T.let(
566
- :cedar,
567
- OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
568
- )
569
- end
570
645
  end
571
646
  end
572
647
  end