openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -14,14 +14,7 @@ module OpenAI
14
14
  )
15
15
  end
16
16
 
17
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
18
- sig { returns(T.nilable(String)) }
19
- attr_reader :id
20
-
21
- sig { params(id: String).void }
22
- attr_writer :id
23
-
24
- # Configuration for input and output audio for the session.
17
+ # Configuration for input and output audio.
25
18
  sig do
26
19
  returns(
27
20
  T.nilable(OpenAI::Realtime::RealtimeSessionCreateResponse::Audio)
@@ -37,17 +30,23 @@ module OpenAI
37
30
  end
38
31
  attr_writer :audio
39
32
 
40
- # Expiration timestamp for the session, in seconds since epoch.
41
- sig { returns(T.nilable(Integer)) }
42
- attr_reader :expires_at
33
+ # Ephemeral key returned by the API.
34
+ sig do
35
+ returns(T.nilable(OpenAI::Realtime::RealtimeSessionClientSecret))
36
+ end
37
+ attr_reader :client_secret
43
38
 
44
- sig { params(expires_at: Integer).void }
45
- attr_writer :expires_at
39
+ sig do
40
+ params(
41
+ client_secret: OpenAI::Realtime::RealtimeSessionClientSecret::OrHash
42
+ ).void
43
+ end
44
+ attr_writer :client_secret
46
45
 
47
46
  # Additional fields to include in server outputs.
48
47
  #
49
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
50
- # transcription.
48
+ # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
49
+ # transcription.
51
50
  sig do
52
51
  returns(
53
52
  T.nilable(
@@ -102,21 +101,30 @@ module OpenAI
102
101
  attr_writer :max_output_tokens
103
102
 
104
103
  # The Realtime model used for this session.
105
- sig { returns(T.nilable(String)) }
104
+ sig do
105
+ returns(
106
+ T.nilable(
107
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants
108
+ )
109
+ )
110
+ end
106
111
  attr_reader :model
107
112
 
108
- sig { params(model: String).void }
113
+ sig do
114
+ params(
115
+ model:
116
+ T.any(
117
+ String,
118
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::OrSymbol
119
+ )
120
+ ).void
121
+ end
109
122
  attr_writer :model
110
123
 
111
- # The object type. Always `realtime.session`.
112
- sig { returns(T.nilable(String)) }
113
- attr_reader :object
114
-
115
- sig { params(object: String).void }
116
- attr_writer :object
117
-
118
- # The set of modalities the model can respond with. To disable audio, set this to
119
- # ["text"].
124
+ # The set of modalities the model can respond with. It defaults to `["audio"]`,
125
+ # indicating that the model will respond with audio plus a transcript. `["text"]`
126
+ # can be used to make the model respond with text only. It is not possible to
127
+ # request both `text` and `audio` at the same time.
120
128
  sig do
121
129
  returns(
122
130
  T.nilable(
@@ -138,19 +146,48 @@ module OpenAI
138
146
  end
139
147
  attr_writer :output_modalities
140
148
 
141
- # How the model chooses tools. Options are `auto`, `none`, `required`, or specify
142
- # a function.
143
- sig { returns(T.nilable(String)) }
149
+ # Reference to a prompt template and its variables.
150
+ # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
151
+ sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) }
152
+ attr_reader :prompt
153
+
154
+ sig do
155
+ params(
156
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash)
157
+ ).void
158
+ end
159
+ attr_writer :prompt
160
+
161
+ # How the model chooses tools. Provide one of the string modes or force a specific
162
+ # function/MCP tool.
163
+ sig do
164
+ returns(
165
+ T.nilable(
166
+ OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants
167
+ )
168
+ )
169
+ end
144
170
  attr_reader :tool_choice
145
171
 
146
- sig { params(tool_choice: String).void }
172
+ sig do
173
+ params(
174
+ tool_choice:
175
+ T.any(
176
+ OpenAI::Responses::ToolChoiceOptions::OrSymbol,
177
+ OpenAI::Responses::ToolChoiceFunction::OrHash,
178
+ OpenAI::Responses::ToolChoiceMcp::OrHash
179
+ )
180
+ ).void
181
+ end
147
182
  attr_writer :tool_choice
148
183
 
149
- # Tools (functions) available to the model.
184
+ # Tools available to the model.
150
185
  sig do
151
186
  returns(
152
187
  T.nilable(
153
- T::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool]
188
+ T::Array[
189
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants
190
+ ]
154
191
  )
155
192
  )
156
193
  end
@@ -160,14 +197,18 @@ module OpenAI
160
197
  params(
161
198
  tools:
162
199
  T::Array[
163
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::OrHash
200
+ T.any(
201
+ OpenAI::Realtime::Models::OrHash,
202
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::OrHash
203
+ )
164
204
  ]
165
205
  ).void
166
206
  end
167
207
  attr_writer :tools
168
208
 
169
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
170
- # is enabled for a session, the configuration cannot be modified.
209
+ # Realtime API can write session traces to the
210
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
211
+ # tracing is enabled for a session, the configuration cannot be modified.
171
212
  #
172
213
  # `auto` will create a trace for the session with default values for the workflow
173
214
  # name, group id, and metadata.
@@ -178,83 +219,106 @@ module OpenAI
178
219
  )
179
220
  )
180
221
  end
181
- attr_reader :tracing
222
+ attr_accessor :tracing
223
+
224
+ # Controls how the realtime conversation is truncated prior to model inference.
225
+ # The default is `auto`.
226
+ sig do
227
+ returns(T.nilable(OpenAI::Realtime::RealtimeTruncation::Variants))
228
+ end
229
+ attr_reader :truncation
182
230
 
183
231
  sig do
184
232
  params(
185
- tracing:
233
+ truncation:
186
234
  T.any(
187
- Symbol,
188
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash
235
+ OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol,
236
+ OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash
189
237
  )
190
238
  ).void
191
239
  end
192
- attr_writer :tracing
240
+ attr_writer :truncation
193
241
 
194
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
195
- # means that the model will detect the start and end of speech based on audio
196
- # volume and respond at the end of user speech.
242
+ # The type of session to create. Always `realtime` for the Realtime API.
197
243
  sig do
198
244
  returns(
199
245
  T.nilable(
200
- OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection
246
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol
201
247
  )
202
248
  )
203
249
  end
204
- attr_reader :turn_detection
250
+ attr_reader :type
205
251
 
206
252
  sig do
207
253
  params(
208
- turn_detection:
209
- OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection::OrHash
254
+ type:
255
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::OrSymbol
210
256
  ).void
211
257
  end
212
- attr_writer :turn_detection
258
+ attr_writer :type
213
259
 
214
- # A Realtime session configuration object.
260
+ # A new Realtime session configuration, with an ephemeral key. Default TTL for
261
+ # keys is one minute.
215
262
  sig do
216
263
  params(
217
- id: String,
218
264
  audio:
219
265
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::OrHash,
220
- expires_at: Integer,
266
+ client_secret:
267
+ OpenAI::Realtime::RealtimeSessionClientSecret::OrHash,
221
268
  include:
222
269
  T::Array[
223
270
  OpenAI::Realtime::RealtimeSessionCreateResponse::Include::OrSymbol
224
271
  ],
225
272
  instructions: String,
226
273
  max_output_tokens: T.any(Integer, Symbol),
227
- model: String,
228
- object: String,
274
+ model:
275
+ T.any(
276
+ String,
277
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::OrSymbol
278
+ ),
229
279
  output_modalities:
230
280
  T::Array[
231
281
  OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::OrSymbol
232
282
  ],
233
- tool_choice: String,
283
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash),
284
+ tool_choice:
285
+ T.any(
286
+ OpenAI::Responses::ToolChoiceOptions::OrSymbol,
287
+ OpenAI::Responses::ToolChoiceFunction::OrHash,
288
+ OpenAI::Responses::ToolChoiceMcp::OrHash
289
+ ),
234
290
  tools:
235
291
  T::Array[
236
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::OrHash
292
+ T.any(
293
+ OpenAI::Realtime::Models::OrHash,
294
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::OrHash
295
+ )
237
296
  ],
238
297
  tracing:
298
+ T.nilable(
299
+ T.any(
300
+ Symbol,
301
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash
302
+ )
303
+ ),
304
+ truncation:
239
305
  T.any(
240
- Symbol,
241
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::TracingConfiguration::OrHash
306
+ OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol,
307
+ OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash
242
308
  ),
243
- turn_detection:
244
- OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection::OrHash
309
+ type:
310
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::OrSymbol
245
311
  ).returns(T.attached_class)
246
312
  end
247
313
  def self.new(
248
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
249
- id: nil,
250
- # Configuration for input and output audio for the session.
314
+ # Configuration for input and output audio.
251
315
  audio: nil,
252
- # Expiration timestamp for the session, in seconds since epoch.
253
- expires_at: nil,
316
+ # Ephemeral key returned by the API.
317
+ client_secret: nil,
254
318
  # Additional fields to include in server outputs.
255
319
  #
256
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
257
- # transcription.
320
+ # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
321
+ # transcription.
258
322
  include: nil,
259
323
  # The default system instructions (i.e. system message) prepended to model calls.
260
324
  # This field allows the client to guide the model on desired responses. The model
@@ -274,35 +338,39 @@ module OpenAI
274
338
  max_output_tokens: nil,
275
339
  # The Realtime model used for this session.
276
340
  model: nil,
277
- # The object type. Always `realtime.session`.
278
- object: nil,
279
- # The set of modalities the model can respond with. To disable audio, set this to
280
- # ["text"].
341
+ # The set of modalities the model can respond with. It defaults to `["audio"]`,
342
+ # indicating that the model will respond with audio plus a transcript. `["text"]`
343
+ # can be used to make the model respond with text only. It is not possible to
344
+ # request both `text` and `audio` at the same time.
281
345
  output_modalities: nil,
282
- # How the model chooses tools. Options are `auto`, `none`, `required`, or specify
283
- # a function.
346
+ # Reference to a prompt template and its variables.
347
+ # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
348
+ prompt: nil,
349
+ # How the model chooses tools. Provide one of the string modes or force a specific
350
+ # function/MCP tool.
284
351
  tool_choice: nil,
285
- # Tools (functions) available to the model.
352
+ # Tools available to the model.
286
353
  tools: nil,
287
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
288
- # is enabled for a session, the configuration cannot be modified.
354
+ # Realtime API can write session traces to the
355
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
356
+ # tracing is enabled for a session, the configuration cannot be modified.
289
357
  #
290
358
  # `auto` will create a trace for the session with default values for the workflow
291
359
  # name, group id, and metadata.
292
360
  tracing: nil,
293
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
294
- # means that the model will detect the start and end of speech based on audio
295
- # volume and respond at the end of user speech.
296
- turn_detection: nil
361
+ # Controls how the realtime conversation is truncated prior to model inference.
362
+ # The default is `auto`.
363
+ truncation: nil,
364
+ # The type of session to create. Always `realtime` for the Realtime API.
365
+ type: nil
297
366
  )
298
367
  end
299
368
 
300
369
  sig do
301
370
  override.returns(
302
371
  {
303
- id: String,
304
372
  audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio,
305
- expires_at: Integer,
373
+ client_secret: OpenAI::Realtime::RealtimeSessionClientSecret,
306
374
  include:
307
375
  T::Array[
308
376
  OpenAI::Realtime::RealtimeSessionCreateResponse::Include::TaggedSymbol
@@ -310,19 +378,26 @@ module OpenAI
310
378
  instructions: String,
311
379
  max_output_tokens:
312
380
  OpenAI::Realtime::RealtimeSessionCreateResponse::MaxOutputTokens::Variants,
313
- model: String,
314
- object: String,
381
+ model:
382
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants,
315
383
  output_modalities:
316
384
  T::Array[
317
385
  OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
318
386
  ],
319
- tool_choice: String,
387
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt),
388
+ tool_choice:
389
+ OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants,
320
390
  tools:
321
- T::Array[OpenAI::Realtime::RealtimeSessionCreateResponse::Tool],
391
+ T::Array[
392
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants
393
+ ],
322
394
  tracing:
323
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::Variants,
324
- turn_detection:
325
- OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection
395
+ T.nilable(
396
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tracing::Variants
397
+ ),
398
+ truncation: OpenAI::Realtime::RealtimeTruncation::Variants,
399
+ type:
400
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol
326
401
  }
327
402
  )
328
403
  end
@@ -372,7 +447,7 @@ module OpenAI
372
447
  end
373
448
  attr_writer :output
374
449
 
375
- # Configuration for input and output audio for the session.
450
+ # Configuration for input and output audio.
376
451
  sig do
377
452
  params(
378
453
  input:
@@ -406,14 +481,31 @@ module OpenAI
406
481
  )
407
482
  end
408
483
 
409
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
410
- sig { returns(T.nilable(String)) }
484
+ # The format of the input audio.
485
+ sig do
486
+ returns(
487
+ T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants)
488
+ )
489
+ end
411
490
  attr_reader :format_
412
491
 
413
- sig { params(format_: String).void }
492
+ sig do
493
+ params(
494
+ format_:
495
+ T.any(
496
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
497
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
498
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
499
+ )
500
+ ).void
501
+ end
414
502
  attr_writer :format_
415
503
 
416
- # Configuration for input audio noise reduction.
504
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
505
+ # off. Noise reduction filters audio added to the input audio buffer before it is
506
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
507
+ # detection accuracy (reducing false positives) and model performance by improving
508
+ # perception of the input audio.
417
509
  sig do
418
510
  returns(
419
511
  T.nilable(
@@ -431,25 +523,34 @@ module OpenAI
431
523
  end
432
524
  attr_writer :noise_reduction
433
525
 
434
- # Configuration for input audio transcription.
435
- sig do
436
- returns(
437
- T.nilable(
438
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription
439
- )
440
- )
441
- end
526
+ # Configuration for input audio transcription, defaults to off and can be set to
527
+ # `null` to turn off once on. Input audio transcription is not native to the
528
+ # model, since the model consumes audio directly. Transcription runs
529
+ # asynchronously through
530
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
531
+ # and should be treated as guidance of input audio content rather than precisely
532
+ # what the model heard. The client can optionally set the language and prompt for
533
+ # transcription, these offer additional guidance to the transcription service.
534
+ sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
442
535
  attr_reader :transcription
443
536
 
444
537
  sig do
445
538
  params(
446
- transcription:
447
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription::OrHash
539
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash
448
540
  ).void
449
541
  end
450
542
  attr_writer :transcription
451
543
 
452
- # Configuration for turn detection.
544
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
545
+ # set to `null` to turn off, in which case the client must manually trigger model
546
+ # response. Server VAD means that the model will detect the start and end of
547
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
548
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
549
+ # semantically estimate whether the user has finished speaking, then dynamically
550
+ # sets a timeout based on this probability. For example, if user audio trails off
551
+ # with "uhhm", the model will score a low probability of turn end and wait longer
552
+ # for the user to continue speaking. This can be useful for more natural
553
+ # conversations, but may have a higher latency.
453
554
  sig do
454
555
  returns(
455
556
  T.nilable(
@@ -469,23 +570,47 @@ module OpenAI
469
570
 
470
571
  sig do
471
572
  params(
472
- format_: String,
573
+ format_:
574
+ T.any(
575
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
576
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
577
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
578
+ ),
473
579
  noise_reduction:
474
580
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::OrHash,
475
- transcription:
476
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription::OrHash,
581
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash,
477
582
  turn_detection:
478
583
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::OrHash
479
584
  ).returns(T.attached_class)
480
585
  end
481
586
  def self.new(
482
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
587
+ # The format of the input audio.
483
588
  format_: nil,
484
- # Configuration for input audio noise reduction.
589
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
590
+ # off. Noise reduction filters audio added to the input audio buffer before it is
591
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
592
+ # detection accuracy (reducing false positives) and model performance by improving
593
+ # perception of the input audio.
485
594
  noise_reduction: nil,
486
- # Configuration for input audio transcription.
595
+ # Configuration for input audio transcription, defaults to off and can be set to
596
+ # `null` to turn off once on. Input audio transcription is not native to the
597
+ # model, since the model consumes audio directly. Transcription runs
598
+ # asynchronously through
599
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
600
+ # and should be treated as guidance of input audio content rather than precisely
601
+ # what the model heard. The client can optionally set the language and prompt for
602
+ # transcription, these offer additional guidance to the transcription service.
487
603
  transcription: nil,
488
- # Configuration for turn detection.
604
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
605
+ # set to `null` to turn off, in which case the client must manually trigger model
606
+ # response. Server VAD means that the model will detect the start and end of
607
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
608
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
609
+ # semantically estimate whether the user has finished speaking, then dynamically
610
+ # sets a timeout based on this probability. For example, if user audio trails off
611
+ # with "uhhm", the model will score a low probability of turn end and wait longer
612
+ # for the user to continue speaking. This can be useful for more natural
613
+ # conversations, but may have a higher latency.
489
614
  turn_detection: nil
490
615
  )
491
616
  end
@@ -493,11 +618,10 @@ module OpenAI
493
618
  sig do
494
619
  override.returns(
495
620
  {
496
- format_: String,
621
+ format_: OpenAI::Realtime::RealtimeAudioFormats::Variants,
497
622
  noise_reduction:
498
623
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction,
499
- transcription:
500
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription,
624
+ transcription: OpenAI::Realtime::AudioTranscription,
501
625
  turn_detection:
502
626
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection
503
627
  }
@@ -515,182 +639,198 @@ module OpenAI
515
639
  )
516
640
  end
517
641
 
642
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
643
+ # headphones, `far_field` is for far-field microphones such as laptop or
644
+ # conference room microphones.
518
645
  sig do
519
646
  returns(
520
- T.nilable(
521
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
522
- )
647
+ T.nilable(OpenAI::Realtime::NoiseReductionType::TaggedSymbol)
523
648
  )
524
649
  end
525
650
  attr_reader :type
526
651
 
527
652
  sig do
528
653
  params(
529
- type:
530
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol
654
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
531
655
  ).void
532
656
  end
533
657
  attr_writer :type
534
658
 
535
- # Configuration for input audio noise reduction.
659
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
660
+ # off. Noise reduction filters audio added to the input audio buffer before it is
661
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
662
+ # detection accuracy (reducing false positives) and model performance by improving
663
+ # perception of the input audio.
536
664
  sig do
537
665
  params(
538
- type:
539
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol
666
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
540
667
  ).returns(T.attached_class)
541
668
  end
542
- def self.new(type: nil)
669
+ def self.new(
670
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
671
+ # headphones, `far_field` is for far-field microphones such as laptop or
672
+ # conference room microphones.
673
+ type: nil
674
+ )
543
675
  end
544
676
 
545
677
  sig do
546
678
  override.returns(
547
- {
548
- type:
549
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
550
- }
679
+ { type: OpenAI::Realtime::NoiseReductionType::TaggedSymbol }
551
680
  )
552
681
  end
553
682
  def to_hash
554
683
  end
555
-
556
- module Type
557
- extend OpenAI::Internal::Type::Enum
558
-
559
- TaggedSymbol =
560
- T.type_alias do
561
- T.all(
562
- Symbol,
563
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type
564
- )
565
- end
566
- OrSymbol = T.type_alias { T.any(Symbol, String) }
567
-
568
- NEAR_FIELD =
569
- T.let(
570
- :near_field,
571
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
572
- )
573
- FAR_FIELD =
574
- T.let(
575
- :far_field,
576
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
577
- )
578
-
579
- sig do
580
- override.returns(
581
- T::Array[
582
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
583
- ]
584
- )
585
- end
586
- def self.values
587
- end
588
- end
589
684
  end
590
685
 
591
- class Transcription < OpenAI::Internal::Type::BaseModel
686
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
592
687
  OrHash =
593
688
  T.type_alias do
594
689
  T.any(
595
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::Transcription,
690
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection,
596
691
  OpenAI::Internal::AnyHash
597
692
  )
598
693
  end
599
694
 
600
- # The language of the input audio.
601
- sig { returns(T.nilable(String)) }
602
- attr_reader :language
603
-
604
- sig { params(language: String).void }
605
- attr_writer :language
606
-
607
- # The model to use for transcription.
608
- sig { returns(T.nilable(String)) }
609
- attr_reader :model
610
-
611
- sig { params(model: String).void }
612
- attr_writer :model
695
+ # Whether or not to automatically generate a response when a VAD stop event
696
+ # occurs.
697
+ sig { returns(T.nilable(T::Boolean)) }
698
+ attr_reader :create_response
613
699
 
614
- # Optional text to guide the model's style or continue a previous audio segment.
615
- sig { returns(T.nilable(String)) }
616
- attr_reader :prompt
700
+ sig { params(create_response: T::Boolean).void }
701
+ attr_writer :create_response
617
702
 
618
- sig { params(prompt: String).void }
619
- attr_writer :prompt
620
-
621
- # Configuration for input audio transcription.
703
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
704
+ # will wait longer for the user to continue speaking, `high` will respond more
705
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
706
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
622
707
  sig do
623
- params(language: String, model: String, prompt: String).returns(
624
- T.attached_class
708
+ returns(
709
+ T.nilable(
710
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
711
+ )
625
712
  )
626
713
  end
627
- def self.new(
628
- # The language of the input audio.
629
- language: nil,
630
- # The model to use for transcription.
631
- model: nil,
632
- # Optional text to guide the model's style or continue a previous audio segment.
633
- prompt: nil
634
- )
635
- end
714
+ attr_reader :eagerness
636
715
 
637
716
  sig do
638
- override.returns(
639
- { language: String, model: String, prompt: String }
640
- )
641
- end
642
- def to_hash
717
+ params(
718
+ eagerness:
719
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::OrSymbol
720
+ ).void
643
721
  end
644
- end
722
+ attr_writer :eagerness
645
723
 
646
- class TurnDetection < OpenAI::Internal::Type::BaseModel
647
- OrHash =
648
- T.type_alias do
649
- T.any(
650
- OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection,
651
- OpenAI::Internal::AnyHash
652
- )
653
- end
724
+ # Optional idle timeout after which turn detection will auto-timeout when no
725
+ # additional audio is received.
726
+ sig { returns(T.nilable(Integer)) }
727
+ attr_accessor :idle_timeout_ms
728
+
729
+ # Whether or not to automatically interrupt any ongoing response with output to
730
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
731
+ # occurs.
732
+ sig { returns(T.nilable(T::Boolean)) }
733
+ attr_reader :interrupt_response
654
734
 
735
+ sig { params(interrupt_response: T::Boolean).void }
736
+ attr_writer :interrupt_response
737
+
738
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
739
+ # detected speech (in milliseconds). Defaults to 300ms.
655
740
  sig { returns(T.nilable(Integer)) }
656
741
  attr_reader :prefix_padding_ms
657
742
 
658
743
  sig { params(prefix_padding_ms: Integer).void }
659
744
  attr_writer :prefix_padding_ms
660
745
 
746
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
747
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
748
+ # more quickly, but may jump in on short pauses from the user.
661
749
  sig { returns(T.nilable(Integer)) }
662
750
  attr_reader :silence_duration_ms
663
751
 
664
752
  sig { params(silence_duration_ms: Integer).void }
665
753
  attr_writer :silence_duration_ms
666
754
 
755
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
756
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
757
+ # model, and thus might perform better in noisy environments.
667
758
  sig { returns(T.nilable(Float)) }
668
759
  attr_reader :threshold
669
760
 
670
761
  sig { params(threshold: Float).void }
671
762
  attr_writer :threshold
672
763
 
673
- # Type of turn detection, only `server_vad` is currently supported.
674
- sig { returns(T.nilable(String)) }
764
+ # Type of turn detection.
765
+ sig do
766
+ returns(
767
+ T.nilable(
768
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol
769
+ )
770
+ )
771
+ end
675
772
  attr_reader :type
676
773
 
677
- sig { params(type: String).void }
774
+ sig do
775
+ params(
776
+ type:
777
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::OrSymbol
778
+ ).void
779
+ end
678
780
  attr_writer :type
679
781
 
680
- # Configuration for turn detection.
782
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
783
+ # set to `null` to turn off, in which case the client must manually trigger model
784
+ # response. Server VAD means that the model will detect the start and end of
785
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
786
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
787
+ # semantically estimate whether the user has finished speaking, then dynamically
788
+ # sets a timeout based on this probability. For example, if user audio trails off
789
+ # with "uhhm", the model will score a low probability of turn end and wait longer
790
+ # for the user to continue speaking. This can be useful for more natural
791
+ # conversations, but may have a higher latency.
681
792
  sig do
682
793
  params(
794
+ create_response: T::Boolean,
795
+ eagerness:
796
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::OrSymbol,
797
+ idle_timeout_ms: T.nilable(Integer),
798
+ interrupt_response: T::Boolean,
683
799
  prefix_padding_ms: Integer,
684
800
  silence_duration_ms: Integer,
685
801
  threshold: Float,
686
- type: String
802
+ type:
803
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::OrSymbol
687
804
  ).returns(T.attached_class)
688
805
  end
689
806
  def self.new(
807
+ # Whether or not to automatically generate a response when a VAD stop event
808
+ # occurs.
809
+ create_response: nil,
810
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
811
+ # will wait longer for the user to continue speaking, `high` will respond more
812
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
813
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
814
+ eagerness: nil,
815
+ # Optional idle timeout after which turn detection will auto-timeout when no
816
+ # additional audio is received.
817
+ idle_timeout_ms: nil,
818
+ # Whether or not to automatically interrupt any ongoing response with output to
819
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
820
+ # occurs.
821
+ interrupt_response: nil,
822
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
823
+ # detected speech (in milliseconds). Defaults to 300ms.
690
824
  prefix_padding_ms: nil,
825
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
826
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
827
+ # more quickly, but may jump in on short pauses from the user.
691
828
  silence_duration_ms: nil,
829
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
830
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
831
+ # model, and thus might perform better in noisy environments.
692
832
  threshold: nil,
693
- # Type of turn detection, only `server_vad` is currently supported.
833
+ # Type of turn detection.
694
834
  type: nil
695
835
  )
696
836
  end
@@ -698,15 +838,104 @@ module OpenAI
698
838
  sig do
699
839
  override.returns(
700
840
  {
841
+ create_response: T::Boolean,
842
+ eagerness:
843
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol,
844
+ idle_timeout_ms: T.nilable(Integer),
845
+ interrupt_response: T::Boolean,
701
846
  prefix_padding_ms: Integer,
702
847
  silence_duration_ms: Integer,
703
848
  threshold: Float,
704
- type: String
849
+ type:
850
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol
705
851
  }
706
852
  )
707
853
  end
708
854
  def to_hash
709
855
  end
856
+
857
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
858
+ # will wait longer for the user to continue speaking, `high` will respond more
859
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
860
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
861
+ module Eagerness
862
+ extend OpenAI::Internal::Type::Enum
863
+
864
+ TaggedSymbol =
865
+ T.type_alias do
866
+ T.all(
867
+ Symbol,
868
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness
869
+ )
870
+ end
871
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
872
+
873
+ LOW =
874
+ T.let(
875
+ :low,
876
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
877
+ )
878
+ MEDIUM =
879
+ T.let(
880
+ :medium,
881
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
882
+ )
883
+ HIGH =
884
+ T.let(
885
+ :high,
886
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
887
+ )
888
+ AUTO =
889
+ T.let(
890
+ :auto,
891
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
892
+ )
893
+
894
+ sig do
895
+ override.returns(
896
+ T::Array[
897
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Eagerness::TaggedSymbol
898
+ ]
899
+ )
900
+ end
901
+ def self.values
902
+ end
903
+ end
904
+
905
+ # Type of turn detection.
906
+ module Type
907
+ extend OpenAI::Internal::Type::Enum
908
+
909
+ TaggedSymbol =
910
+ T.type_alias do
911
+ T.all(
912
+ Symbol,
913
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type
914
+ )
915
+ end
916
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
917
+
918
+ SERVER_VAD =
919
+ T.let(
920
+ :server_vad,
921
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol
922
+ )
923
+ SEMANTIC_VAD =
924
+ T.let(
925
+ :semantic_vad,
926
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol
927
+ )
928
+
929
+ sig do
930
+ override.returns(
931
+ T::Array[
932
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Input::TurnDetection::Type::TaggedSymbol
933
+ ]
934
+ )
935
+ end
936
+ def self.values
937
+ end
938
+ end
710
939
  end
711
940
  end
712
941
 
@@ -719,19 +948,43 @@ module OpenAI
719
948
  )
720
949
  end
721
950
 
722
- # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
723
- sig { returns(T.nilable(String)) }
951
+ # The format of the output audio.
952
+ sig do
953
+ returns(
954
+ T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants)
955
+ )
956
+ end
724
957
  attr_reader :format_
725
958
 
726
- sig { params(format_: String).void }
959
+ sig do
960
+ params(
961
+ format_:
962
+ T.any(
963
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
964
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
965
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
966
+ )
967
+ ).void
968
+ end
727
969
  attr_writer :format_
728
970
 
971
+ # The speed of the model's spoken response as a multiple of the original speed.
972
+ # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
973
+ # This value can only be changed in between model turns, not while a response is
974
+ # in progress.
975
+ #
976
+ # This parameter is a post-processing adjustment to the audio after it is
977
+ # generated, it's also possible to prompt the model to speak faster or slower.
729
978
  sig { returns(T.nilable(Float)) }
730
979
  attr_reader :speed
731
980
 
732
981
  sig { params(speed: Float).void }
733
982
  attr_writer :speed
734
983
 
984
+ # The voice the model uses to respond. Voice cannot be changed during the session
985
+ # once the model has responded with audio at least once. Current voice options are
986
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
987
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
735
988
  sig do
736
989
  returns(
737
990
  T.nilable(
@@ -754,7 +1007,12 @@ module OpenAI
754
1007
 
755
1008
  sig do
756
1009
  params(
757
- format_: String,
1010
+ format_:
1011
+ T.any(
1012
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
1013
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
1014
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
1015
+ ),
758
1016
  speed: Float,
759
1017
  voice:
760
1018
  T.any(
@@ -764,9 +1022,20 @@ module OpenAI
764
1022
  ).returns(T.attached_class)
765
1023
  end
766
1024
  def self.new(
767
- # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
1025
+ # The format of the output audio.
768
1026
  format_: nil,
1027
+ # The speed of the model's spoken response as a multiple of the original speed.
1028
+ # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
1029
+ # This value can only be changed in between model turns, not while a response is
1030
+ # in progress.
1031
+ #
1032
+ # This parameter is a post-processing adjustment to the audio after it is
1033
+ # generated, it's also possible to prompt the model to speak faster or slower.
769
1034
  speed: nil,
1035
+ # The voice the model uses to respond. Voice cannot be changed during the session
1036
+ # once the model has responded with audio at least once. Current voice options are
1037
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
1038
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
770
1039
  voice: nil
771
1040
  )
772
1041
  end
@@ -774,7 +1043,7 @@ module OpenAI
774
1043
  sig do
775
1044
  override.returns(
776
1045
  {
777
- format_: String,
1046
+ format_: OpenAI::Realtime::RealtimeAudioFormats::Variants,
778
1047
  speed: Float,
779
1048
  voice:
780
1049
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::Output::Voice::Variants
@@ -784,6 +1053,10 @@ module OpenAI
784
1053
  def to_hash
785
1054
  end
786
1055
 
1056
+ # The voice the model uses to respond. Voice cannot be changed during the session
1057
+ # once the model has responded with audio at least once. Current voice options are
1058
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
1059
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
787
1060
  module Voice
788
1061
  extend OpenAI::Internal::Type::Union
789
1062
 
@@ -916,158 +1189,763 @@ module OpenAI
916
1189
  end
917
1190
  end
918
1191
 
919
- module OutputModality
920
- extend OpenAI::Internal::Type::Enum
1192
+ # The Realtime model used for this session.
1193
+ module Model
1194
+ extend OpenAI::Internal::Type::Union
921
1195
 
922
- TaggedSymbol =
1196
+ Variants =
923
1197
  T.type_alias do
924
- T.all(
925
- Symbol,
926
- OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality
1198
+ T.any(
1199
+ String,
1200
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
927
1201
  )
928
1202
  end
929
- OrSymbol = T.type_alias { T.any(Symbol, String) }
930
-
931
- TEXT =
932
- T.let(
933
- :text,
934
- OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
935
- )
936
- AUDIO =
937
- T.let(
938
- :audio,
939
- OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
940
- )
941
1203
 
942
1204
  sig do
943
1205
  override.returns(
944
1206
  T::Array[
945
- OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
1207
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::Variants
946
1208
  ]
947
1209
  )
948
1210
  end
949
- def self.values
1211
+ def self.variants
950
1212
  end
951
- end
952
1213
 
953
- class Tool < OpenAI::Internal::Type::BaseModel
954
- OrHash =
1214
+ TaggedSymbol =
955
1215
  T.type_alias do
956
- T.any(
957
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool,
958
- OpenAI::Internal::AnyHash
1216
+ T.all(
1217
+ Symbol,
1218
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model
959
1219
  )
960
1220
  end
1221
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
961
1222
 
962
- # The description of the function, including guidance on when and how to call it,
963
- # and guidance about what to tell the user when calling (if anything).
964
- sig { returns(T.nilable(String)) }
965
- attr_reader :description
966
-
967
- sig { params(description: String).void }
968
- attr_writer :description
969
-
970
- # The name of the function.
971
- sig { returns(T.nilable(String)) }
972
- attr_reader :name
1223
+ GPT_REALTIME =
1224
+ T.let(
1225
+ :"gpt-realtime",
1226
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1227
+ )
1228
+ GPT_REALTIME_2025_08_28 =
1229
+ T.let(
1230
+ :"gpt-realtime-2025-08-28",
1231
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1232
+ )
1233
+ GPT_4O_REALTIME_PREVIEW =
1234
+ T.let(
1235
+ :"gpt-4o-realtime-preview",
1236
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1237
+ )
1238
+ GPT_4O_REALTIME_PREVIEW_2024_10_01 =
1239
+ T.let(
1240
+ :"gpt-4o-realtime-preview-2024-10-01",
1241
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1242
+ )
1243
+ GPT_4O_REALTIME_PREVIEW_2024_12_17 =
1244
+ T.let(
1245
+ :"gpt-4o-realtime-preview-2024-12-17",
1246
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1247
+ )
1248
+ GPT_4O_REALTIME_PREVIEW_2025_06_03 =
1249
+ T.let(
1250
+ :"gpt-4o-realtime-preview-2025-06-03",
1251
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1252
+ )
1253
+ GPT_4O_MINI_REALTIME_PREVIEW =
1254
+ T.let(
1255
+ :"gpt-4o-mini-realtime-preview",
1256
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1257
+ )
1258
+ GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17 =
1259
+ T.let(
1260
+ :"gpt-4o-mini-realtime-preview-2024-12-17",
1261
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1262
+ )
1263
+ end
973
1264
 
974
- sig { params(name: String).void }
975
- attr_writer :name
1265
+ module OutputModality
1266
+ extend OpenAI::Internal::Type::Enum
976
1267
 
977
- # Parameters of the function in JSON Schema.
978
- sig { returns(T.nilable(T.anything)) }
979
- attr_reader :parameters
1268
+ TaggedSymbol =
1269
+ T.type_alias do
1270
+ T.all(
1271
+ Symbol,
1272
+ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality
1273
+ )
1274
+ end
1275
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
980
1276
 
981
- sig { params(parameters: T.anything).void }
982
- attr_writer :parameters
1277
+ TEXT =
1278
+ T.let(
1279
+ :text,
1280
+ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
1281
+ )
1282
+ AUDIO =
1283
+ T.let(
1284
+ :audio,
1285
+ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
1286
+ )
983
1287
 
984
- # The type of the tool, i.e. `function`.
985
1288
  sig do
986
- returns(
987
- T.nilable(
988
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol
989
- )
1289
+ override.returns(
1290
+ T::Array[
1291
+ OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
1292
+ ]
990
1293
  )
991
1294
  end
992
- attr_reader :type
993
-
994
- sig do
995
- params(
996
- type:
997
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::OrSymbol
998
- ).void
1295
+ def self.values
999
1296
  end
1000
- attr_writer :type
1297
+ end
1001
1298
 
1002
- sig do
1003
- params(
1004
- description: String,
1005
- name: String,
1006
- parameters: T.anything,
1007
- type:
1008
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::OrSymbol
1009
- ).returns(T.attached_class)
1010
- end
1011
- def self.new(
1012
- # The description of the function, including guidance on when and how to call it,
1013
- # and guidance about what to tell the user when calling (if anything).
1014
- description: nil,
1015
- # The name of the function.
1016
- name: nil,
1017
- # Parameters of the function in JSON Schema.
1018
- parameters: nil,
1019
- # The type of the tool, i.e. `function`.
1020
- type: nil
1021
- )
1022
- end
1299
+ # How the model chooses tools. Provide one of the string modes or force a specific
1300
+ # function/MCP tool.
1301
+ module ToolChoice
1302
+ extend OpenAI::Internal::Type::Union
1303
+
1304
+ Variants =
1305
+ T.type_alias do
1306
+ T.any(
1307
+ OpenAI::Responses::ToolChoiceOptions::TaggedSymbol,
1308
+ OpenAI::Responses::ToolChoiceFunction,
1309
+ OpenAI::Responses::ToolChoiceMcp
1310
+ )
1311
+ end
1023
1312
 
1024
1313
  sig do
1025
1314
  override.returns(
1026
- {
1027
- description: String,
1028
- name: String,
1029
- parameters: T.anything,
1030
- type:
1031
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol
1032
- }
1315
+ T::Array[
1316
+ OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants
1317
+ ]
1033
1318
  )
1034
1319
  end
1035
- def to_hash
1320
+ def self.variants
1036
1321
  end
1322
+ end
1037
1323
 
1038
- # The type of the tool, i.e. `function`.
1039
- module Type
1040
- extend OpenAI::Internal::Type::Enum
1324
+ # Give the model access to additional tools via remote Model Context Protocol
1325
+ # (MCP) servers.
1326
+ # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
1327
+ module Tool
1328
+ extend OpenAI::Internal::Type::Union
1041
1329
 
1042
- TaggedSymbol =
1330
+ Variants =
1331
+ T.type_alias do
1332
+ T.any(
1333
+ OpenAI::Realtime::Models,
1334
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool
1335
+ )
1336
+ end
1337
+
1338
+ class McpTool < OpenAI::Internal::Type::BaseModel
1339
+ OrHash =
1043
1340
  T.type_alias do
1044
- T.all(
1045
- Symbol,
1046
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type
1341
+ T.any(
1342
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool,
1343
+ OpenAI::Internal::AnyHash
1047
1344
  )
1048
1345
  end
1049
- OrSymbol = T.type_alias { T.any(Symbol, String) }
1050
1346
 
1051
- FUNCTION =
1052
- T.let(
1053
- :function,
1054
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol
1347
+ # A label for this MCP server, used to identify it in tool calls.
1348
+ sig { returns(String) }
1349
+ attr_accessor :server_label
1350
+
1351
+ # The type of the MCP tool. Always `mcp`.
1352
+ sig { returns(Symbol) }
1353
+ attr_accessor :type
1354
+
1355
+ # List of allowed tool names or a filter object.
1356
+ sig do
1357
+ returns(
1358
+ T.nilable(
1359
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants
1360
+ )
1361
+ )
1362
+ end
1363
+ attr_accessor :allowed_tools
1364
+
1365
+ # An OAuth access token that can be used with a remote MCP server, either with a
1366
+ # custom MCP server URL or a service connector. Your application must handle the
1367
+ # OAuth authorization flow and provide the token here.
1368
+ sig { returns(T.nilable(String)) }
1369
+ attr_reader :authorization
1370
+
1371
+ sig { params(authorization: String).void }
1372
+ attr_writer :authorization
1373
+
1374
+ # Identifier for service connectors, like those available in ChatGPT. One of
1375
+ # `server_url` or `connector_id` must be provided. Learn more about service
1376
+ # connectors
1377
+ # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
1378
+ #
1379
+ # Currently supported `connector_id` values are:
1380
+ #
1381
+ # - Dropbox: `connector_dropbox`
1382
+ # - Gmail: `connector_gmail`
1383
+ # - Google Calendar: `connector_googlecalendar`
1384
+ # - Google Drive: `connector_googledrive`
1385
+ # - Microsoft Teams: `connector_microsoftteams`
1386
+ # - Outlook Calendar: `connector_outlookcalendar`
1387
+ # - Outlook Email: `connector_outlookemail`
1388
+ # - SharePoint: `connector_sharepoint`
1389
+ sig do
1390
+ returns(
1391
+ T.nilable(
1392
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1393
+ )
1394
+ )
1395
+ end
1396
+ attr_reader :connector_id
1397
+
1398
+ sig do
1399
+ params(
1400
+ connector_id:
1401
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::OrSymbol
1402
+ ).void
1403
+ end
1404
+ attr_writer :connector_id
1405
+
1406
+ # Optional HTTP headers to send to the MCP server. Use for authentication or other
1407
+ # purposes.
1408
+ sig { returns(T.nilable(T::Hash[Symbol, String])) }
1409
+ attr_accessor :headers
1410
+
1411
+ # Specify which of the MCP server's tools require approval.
1412
+ sig do
1413
+ returns(
1414
+ T.nilable(
1415
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants
1416
+ )
1055
1417
  )
1418
+ end
1419
+ attr_accessor :require_approval
1420
+
1421
+ # Optional description of the MCP server, used to provide more context.
1422
+ sig { returns(T.nilable(String)) }
1423
+ attr_reader :server_description
1424
+
1425
+ sig { params(server_description: String).void }
1426
+ attr_writer :server_description
1427
+
1428
+ # The URL for the MCP server. One of `server_url` or `connector_id` must be
1429
+ # provided.
1430
+ sig { returns(T.nilable(String)) }
1431
+ attr_reader :server_url
1432
+
1433
+ sig { params(server_url: String).void }
1434
+ attr_writer :server_url
1435
+
1436
+ # Give the model access to additional tools via remote Model Context Protocol
1437
+ # (MCP) servers.
1438
+ # [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
1439
+ sig do
1440
+ params(
1441
+ server_label: String,
1442
+ allowed_tools:
1443
+ T.nilable(
1444
+ T.any(
1445
+ T::Array[String],
1446
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter::OrHash
1447
+ )
1448
+ ),
1449
+ authorization: String,
1450
+ connector_id:
1451
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::OrSymbol,
1452
+ headers: T.nilable(T::Hash[Symbol, String]),
1453
+ require_approval:
1454
+ T.nilable(
1455
+ T.any(
1456
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::OrHash,
1457
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::OrSymbol
1458
+ )
1459
+ ),
1460
+ server_description: String,
1461
+ server_url: String,
1462
+ type: Symbol
1463
+ ).returns(T.attached_class)
1464
+ end
1465
+ def self.new(
1466
+ # A label for this MCP server, used to identify it in tool calls.
1467
+ server_label:,
1468
+ # List of allowed tool names or a filter object.
1469
+ allowed_tools: nil,
1470
+ # An OAuth access token that can be used with a remote MCP server, either with a
1471
+ # custom MCP server URL or a service connector. Your application must handle the
1472
+ # OAuth authorization flow and provide the token here.
1473
+ authorization: nil,
1474
+ # Identifier for service connectors, like those available in ChatGPT. One of
1475
+ # `server_url` or `connector_id` must be provided. Learn more about service
1476
+ # connectors
1477
+ # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
1478
+ #
1479
+ # Currently supported `connector_id` values are:
1480
+ #
1481
+ # - Dropbox: `connector_dropbox`
1482
+ # - Gmail: `connector_gmail`
1483
+ # - Google Calendar: `connector_googlecalendar`
1484
+ # - Google Drive: `connector_googledrive`
1485
+ # - Microsoft Teams: `connector_microsoftteams`
1486
+ # - Outlook Calendar: `connector_outlookcalendar`
1487
+ # - Outlook Email: `connector_outlookemail`
1488
+ # - SharePoint: `connector_sharepoint`
1489
+ connector_id: nil,
1490
+ # Optional HTTP headers to send to the MCP server. Use for authentication or other
1491
+ # purposes.
1492
+ headers: nil,
1493
+ # Specify which of the MCP server's tools require approval.
1494
+ require_approval: nil,
1495
+ # Optional description of the MCP server, used to provide more context.
1496
+ server_description: nil,
1497
+ # The URL for the MCP server. One of `server_url` or `connector_id` must be
1498
+ # provided.
1499
+ server_url: nil,
1500
+ # The type of the MCP tool. Always `mcp`.
1501
+ type: :mcp
1502
+ )
1503
+ end
1056
1504
 
1057
1505
  sig do
1058
1506
  override.returns(
1059
- T::Array[
1060
- OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Type::TaggedSymbol
1061
- ]
1507
+ {
1508
+ server_label: String,
1509
+ type: Symbol,
1510
+ allowed_tools:
1511
+ T.nilable(
1512
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants
1513
+ ),
1514
+ authorization: String,
1515
+ connector_id:
1516
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol,
1517
+ headers: T.nilable(T::Hash[Symbol, String]),
1518
+ require_approval:
1519
+ T.nilable(
1520
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants
1521
+ ),
1522
+ server_description: String,
1523
+ server_url: String
1524
+ }
1062
1525
  )
1063
1526
  end
1064
- def self.values
1527
+ def to_hash
1528
+ end
1529
+
1530
+ # List of allowed tool names or a filter object.
1531
+ module AllowedTools
1532
+ extend OpenAI::Internal::Type::Union
1533
+
1534
+ Variants =
1535
+ T.type_alias do
1536
+ T.any(
1537
+ T::Array[String],
1538
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter
1539
+ )
1540
+ end
1541
+
1542
+ class McpToolFilter < OpenAI::Internal::Type::BaseModel
1543
+ OrHash =
1544
+ T.type_alias do
1545
+ T.any(
1546
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::McpToolFilter,
1547
+ OpenAI::Internal::AnyHash
1548
+ )
1549
+ end
1550
+
1551
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1552
+ # is
1553
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1554
+ # it will match this filter.
1555
+ sig { returns(T.nilable(T::Boolean)) }
1556
+ attr_reader :read_only
1557
+
1558
+ sig { params(read_only: T::Boolean).void }
1559
+ attr_writer :read_only
1560
+
1561
+ # List of allowed tool names.
1562
+ sig { returns(T.nilable(T::Array[String])) }
1563
+ attr_reader :tool_names
1564
+
1565
+ sig { params(tool_names: T::Array[String]).void }
1566
+ attr_writer :tool_names
1567
+
1568
+ # A filter object to specify which tools are allowed.
1569
+ sig do
1570
+ params(
1571
+ read_only: T::Boolean,
1572
+ tool_names: T::Array[String]
1573
+ ).returns(T.attached_class)
1574
+ end
1575
+ def self.new(
1576
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1577
+ # is
1578
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1579
+ # it will match this filter.
1580
+ read_only: nil,
1581
+ # List of allowed tool names.
1582
+ tool_names: nil
1583
+ )
1584
+ end
1585
+
1586
+ sig do
1587
+ override.returns(
1588
+ { read_only: T::Boolean, tool_names: T::Array[String] }
1589
+ )
1590
+ end
1591
+ def to_hash
1592
+ end
1593
+ end
1594
+
1595
+ sig do
1596
+ override.returns(
1597
+ T::Array[
1598
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::AllowedTools::Variants
1599
+ ]
1600
+ )
1601
+ end
1602
+ def self.variants
1603
+ end
1604
+
1605
+ StringArray =
1606
+ T.let(
1607
+ OpenAI::Internal::Type::ArrayOf[String],
1608
+ OpenAI::Internal::Type::Converter
1609
+ )
1610
+ end
1611
+
1612
+ # Identifier for service connectors, like those available in ChatGPT. One of
1613
+ # `server_url` or `connector_id` must be provided. Learn more about service
1614
+ # connectors
1615
+ # [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
1616
+ #
1617
+ # Currently supported `connector_id` values are:
1618
+ #
1619
+ # - Dropbox: `connector_dropbox`
1620
+ # - Gmail: `connector_gmail`
1621
+ # - Google Calendar: `connector_googlecalendar`
1622
+ # - Google Drive: `connector_googledrive`
1623
+ # - Microsoft Teams: `connector_microsoftteams`
1624
+ # - Outlook Calendar: `connector_outlookcalendar`
1625
+ # - Outlook Email: `connector_outlookemail`
1626
+ # - SharePoint: `connector_sharepoint`
1627
+ module ConnectorID
1628
+ extend OpenAI::Internal::Type::Enum
1629
+
1630
+ TaggedSymbol =
1631
+ T.type_alias do
1632
+ T.all(
1633
+ Symbol,
1634
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID
1635
+ )
1636
+ end
1637
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1638
+
1639
+ CONNECTOR_DROPBOX =
1640
+ T.let(
1641
+ :connector_dropbox,
1642
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1643
+ )
1644
+ CONNECTOR_GMAIL =
1645
+ T.let(
1646
+ :connector_gmail,
1647
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1648
+ )
1649
+ CONNECTOR_GOOGLECALENDAR =
1650
+ T.let(
1651
+ :connector_googlecalendar,
1652
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1653
+ )
1654
+ CONNECTOR_GOOGLEDRIVE =
1655
+ T.let(
1656
+ :connector_googledrive,
1657
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1658
+ )
1659
+ CONNECTOR_MICROSOFTTEAMS =
1660
+ T.let(
1661
+ :connector_microsoftteams,
1662
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1663
+ )
1664
+ CONNECTOR_OUTLOOKCALENDAR =
1665
+ T.let(
1666
+ :connector_outlookcalendar,
1667
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1668
+ )
1669
+ CONNECTOR_OUTLOOKEMAIL =
1670
+ T.let(
1671
+ :connector_outlookemail,
1672
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1673
+ )
1674
+ CONNECTOR_SHAREPOINT =
1675
+ T.let(
1676
+ :connector_sharepoint,
1677
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1678
+ )
1679
+
1680
+ sig do
1681
+ override.returns(
1682
+ T::Array[
1683
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::ConnectorID::TaggedSymbol
1684
+ ]
1685
+ )
1686
+ end
1687
+ def self.values
1688
+ end
1065
1689
  end
1690
+
1691
+ # Specify which of the MCP server's tools require approval.
1692
+ module RequireApproval
1693
+ extend OpenAI::Internal::Type::Union
1694
+
1695
+ Variants =
1696
+ T.type_alias do
1697
+ T.any(
1698
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter,
1699
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol
1700
+ )
1701
+ end
1702
+
1703
+ class McpToolApprovalFilter < OpenAI::Internal::Type::BaseModel
1704
+ OrHash =
1705
+ T.type_alias do
1706
+ T.any(
1707
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter,
1708
+ OpenAI::Internal::AnyHash
1709
+ )
1710
+ end
1711
+
1712
+ # A filter object to specify which tools are allowed.
1713
+ sig do
1714
+ returns(
1715
+ T.nilable(
1716
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always
1717
+ )
1718
+ )
1719
+ end
1720
+ attr_reader :always
1721
+
1722
+ sig do
1723
+ params(
1724
+ always:
1725
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash
1726
+ ).void
1727
+ end
1728
+ attr_writer :always
1729
+
1730
+ # A filter object to specify which tools are allowed.
1731
+ sig do
1732
+ returns(
1733
+ T.nilable(
1734
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never
1735
+ )
1736
+ )
1737
+ end
1738
+ attr_reader :never
1739
+
1740
+ sig do
1741
+ params(
1742
+ never:
1743
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash
1744
+ ).void
1745
+ end
1746
+ attr_writer :never
1747
+
1748
+ # Specify which of the MCP server's tools require approval. Can be `always`,
1749
+ # `never`, or a filter object associated with tools that require approval.
1750
+ sig do
1751
+ params(
1752
+ always:
1753
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always::OrHash,
1754
+ never:
1755
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never::OrHash
1756
+ ).returns(T.attached_class)
1757
+ end
1758
+ def self.new(
1759
+ # A filter object to specify which tools are allowed.
1760
+ always: nil,
1761
+ # A filter object to specify which tools are allowed.
1762
+ never: nil
1763
+ )
1764
+ end
1765
+
1766
+ sig do
1767
+ override.returns(
1768
+ {
1769
+ always:
1770
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always,
1771
+ never:
1772
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never
1773
+ }
1774
+ )
1775
+ end
1776
+ def to_hash
1777
+ end
1778
+
1779
+ class Always < OpenAI::Internal::Type::BaseModel
1780
+ OrHash =
1781
+ T.type_alias do
1782
+ T.any(
1783
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Always,
1784
+ OpenAI::Internal::AnyHash
1785
+ )
1786
+ end
1787
+
1788
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1789
+ # is
1790
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1791
+ # it will match this filter.
1792
+ sig { returns(T.nilable(T::Boolean)) }
1793
+ attr_reader :read_only
1794
+
1795
+ sig { params(read_only: T::Boolean).void }
1796
+ attr_writer :read_only
1797
+
1798
+ # List of allowed tool names.
1799
+ sig { returns(T.nilable(T::Array[String])) }
1800
+ attr_reader :tool_names
1801
+
1802
+ sig { params(tool_names: T::Array[String]).void }
1803
+ attr_writer :tool_names
1804
+
1805
+ # A filter object to specify which tools are allowed.
1806
+ sig do
1807
+ params(
1808
+ read_only: T::Boolean,
1809
+ tool_names: T::Array[String]
1810
+ ).returns(T.attached_class)
1811
+ end
1812
+ def self.new(
1813
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1814
+ # is
1815
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1816
+ # it will match this filter.
1817
+ read_only: nil,
1818
+ # List of allowed tool names.
1819
+ tool_names: nil
1820
+ )
1821
+ end
1822
+
1823
+ sig do
1824
+ override.returns(
1825
+ { read_only: T::Boolean, tool_names: T::Array[String] }
1826
+ )
1827
+ end
1828
+ def to_hash
1829
+ end
1830
+ end
1831
+
1832
+ class Never < OpenAI::Internal::Type::BaseModel
1833
+ OrHash =
1834
+ T.type_alias do
1835
+ T.any(
1836
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalFilter::Never,
1837
+ OpenAI::Internal::AnyHash
1838
+ )
1839
+ end
1840
+
1841
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1842
+ # is
1843
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1844
+ # it will match this filter.
1845
+ sig { returns(T.nilable(T::Boolean)) }
1846
+ attr_reader :read_only
1847
+
1848
+ sig { params(read_only: T::Boolean).void }
1849
+ attr_writer :read_only
1850
+
1851
+ # List of allowed tool names.
1852
+ sig { returns(T.nilable(T::Array[String])) }
1853
+ attr_reader :tool_names
1854
+
1855
+ sig { params(tool_names: T::Array[String]).void }
1856
+ attr_writer :tool_names
1857
+
1858
+ # A filter object to specify which tools are allowed.
1859
+ sig do
1860
+ params(
1861
+ read_only: T::Boolean,
1862
+ tool_names: T::Array[String]
1863
+ ).returns(T.attached_class)
1864
+ end
1865
+ def self.new(
1866
+ # Indicates whether or not a tool modifies data or is read-only. If an MCP server
1867
+ # is
1868
+ # [annotated with `readOnlyHint`](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
1869
+ # it will match this filter.
1870
+ read_only: nil,
1871
+ # List of allowed tool names.
1872
+ tool_names: nil
1873
+ )
1874
+ end
1875
+
1876
+ sig do
1877
+ override.returns(
1878
+ { read_only: T::Boolean, tool_names: T::Array[String] }
1879
+ )
1880
+ end
1881
+ def to_hash
1882
+ end
1883
+ end
1884
+ end
1885
+
1886
+ # Specify a single approval policy for all tools. One of `always` or `never`. When
1887
+ # set to `always`, all tools will require approval. When set to `never`, all tools
1888
+ # will not require approval.
1889
+ module McpToolApprovalSetting
1890
+ extend OpenAI::Internal::Type::Enum
1891
+
1892
+ TaggedSymbol =
1893
+ T.type_alias do
1894
+ T.all(
1895
+ Symbol,
1896
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting
1897
+ )
1898
+ end
1899
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1900
+
1901
+ ALWAYS =
1902
+ T.let(
1903
+ :always,
1904
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol
1905
+ )
1906
+ NEVER =
1907
+ T.let(
1908
+ :never,
1909
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol
1910
+ )
1911
+
1912
+ sig do
1913
+ override.returns(
1914
+ T::Array[
1915
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::McpToolApprovalSetting::TaggedSymbol
1916
+ ]
1917
+ )
1918
+ end
1919
+ def self.values
1920
+ end
1921
+ end
1922
+
1923
+ sig do
1924
+ override.returns(
1925
+ T::Array[
1926
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::McpTool::RequireApproval::Variants
1927
+ ]
1928
+ )
1929
+ end
1930
+ def self.variants
1931
+ end
1932
+ end
1933
+ end
1934
+
1935
+ sig do
1936
+ override.returns(
1937
+ T::Array[
1938
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Tool::Variants
1939
+ ]
1940
+ )
1941
+ end
1942
+ def self.variants
1066
1943
  end
1067
1944
  end
1068
1945
 
1069
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
1070
- # is enabled for a session, the configuration cannot be modified.
1946
+ # Realtime API can write session traces to the
1947
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
1948
+ # tracing is enabled for a session, the configuration cannot be modified.
1071
1949
  #
1072
1950
  # `auto` will create a trace for the session with default values for the workflow
1073
1951
  # name, group id, and metadata.
@@ -1092,15 +1970,15 @@ module OpenAI
1092
1970
  end
1093
1971
 
1094
1972
  # The group id to attach to this trace to enable filtering and grouping in the
1095
- # traces dashboard.
1973
+ # Traces Dashboard.
1096
1974
  sig { returns(T.nilable(String)) }
1097
1975
  attr_reader :group_id
1098
1976
 
1099
1977
  sig { params(group_id: String).void }
1100
1978
  attr_writer :group_id
1101
1979
 
1102
- # The arbitrary metadata to attach to this trace to enable filtering in the traces
1103
- # dashboard.
1980
+ # The arbitrary metadata to attach to this trace to enable filtering in the Traces
1981
+ # Dashboard.
1104
1982
  sig { returns(T.nilable(T.anything)) }
1105
1983
  attr_reader :metadata
1106
1984
 
@@ -1108,7 +1986,7 @@ module OpenAI
1108
1986
  attr_writer :metadata
1109
1987
 
1110
1988
  # The name of the workflow to attach to this trace. This is used to name the trace
1111
- # in the traces dashboard.
1989
+ # in the Traces Dashboard.
1112
1990
  sig { returns(T.nilable(String)) }
1113
1991
  attr_reader :workflow_name
1114
1992
 
@@ -1125,13 +2003,13 @@ module OpenAI
1125
2003
  end
1126
2004
  def self.new(
1127
2005
  # The group id to attach to this trace to enable filtering and grouping in the
1128
- # traces dashboard.
2006
+ # Traces Dashboard.
1129
2007
  group_id: nil,
1130
- # The arbitrary metadata to attach to this trace to enable filtering in the traces
1131
- # dashboard.
2008
+ # The arbitrary metadata to attach to this trace to enable filtering in the Traces
2009
+ # Dashboard.
1132
2010
  metadata: nil,
1133
2011
  # The name of the workflow to attach to this trace. This is used to name the trace
1134
- # in the traces dashboard.
2012
+ # in the Traces Dashboard.
1135
2013
  workflow_name: nil
1136
2014
  )
1137
2015
  end
@@ -1160,87 +2038,33 @@ module OpenAI
1160
2038
  end
1161
2039
  end
1162
2040
 
1163
- class TurnDetection < OpenAI::Internal::Type::BaseModel
1164
- OrHash =
2041
+ # The type of session to create. Always `realtime` for the Realtime API.
2042
+ module Type
2043
+ extend OpenAI::Internal::Type::Enum
2044
+
2045
+ TaggedSymbol =
1165
2046
  T.type_alias do
1166
- T.any(
1167
- OpenAI::Realtime::RealtimeSessionCreateResponse::TurnDetection,
1168
- OpenAI::Internal::AnyHash
2047
+ T.all(
2048
+ Symbol,
2049
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type
1169
2050
  )
1170
2051
  end
2052
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1171
2053
 
1172
- # Amount of audio to include before the VAD detected speech (in milliseconds).
1173
- # Defaults to 300ms.
1174
- sig { returns(T.nilable(Integer)) }
1175
- attr_reader :prefix_padding_ms
1176
-
1177
- sig { params(prefix_padding_ms: Integer).void }
1178
- attr_writer :prefix_padding_ms
1179
-
1180
- # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
1181
- # With shorter values the model will respond more quickly, but may jump in on
1182
- # short pauses from the user.
1183
- sig { returns(T.nilable(Integer)) }
1184
- attr_reader :silence_duration_ms
1185
-
1186
- sig { params(silence_duration_ms: Integer).void }
1187
- attr_writer :silence_duration_ms
1188
-
1189
- # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
1190
- # threshold will require louder audio to activate the model, and thus might
1191
- # perform better in noisy environments.
1192
- sig { returns(T.nilable(Float)) }
1193
- attr_reader :threshold
1194
-
1195
- sig { params(threshold: Float).void }
1196
- attr_writer :threshold
1197
-
1198
- # Type of turn detection, only `server_vad` is currently supported.
1199
- sig { returns(T.nilable(String)) }
1200
- attr_reader :type
1201
-
1202
- sig { params(type: String).void }
1203
- attr_writer :type
1204
-
1205
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
1206
- # means that the model will detect the start and end of speech based on audio
1207
- # volume and respond at the end of user speech.
1208
- sig do
1209
- params(
1210
- prefix_padding_ms: Integer,
1211
- silence_duration_ms: Integer,
1212
- threshold: Float,
1213
- type: String
1214
- ).returns(T.attached_class)
1215
- end
1216
- def self.new(
1217
- # Amount of audio to include before the VAD detected speech (in milliseconds).
1218
- # Defaults to 300ms.
1219
- prefix_padding_ms: nil,
1220
- # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
1221
- # With shorter values the model will respond more quickly, but may jump in on
1222
- # short pauses from the user.
1223
- silence_duration_ms: nil,
1224
- # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
1225
- # threshold will require louder audio to activate the model, and thus might
1226
- # perform better in noisy environments.
1227
- threshold: nil,
1228
- # Type of turn detection, only `server_vad` is currently supported.
1229
- type: nil
1230
- )
1231
- end
2054
+ REALTIME =
2055
+ T.let(
2056
+ :realtime,
2057
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol
2058
+ )
1232
2059
 
1233
2060
  sig do
1234
2061
  override.returns(
1235
- {
1236
- prefix_padding_ms: Integer,
1237
- silence_duration_ms: Integer,
1238
- threshold: Float,
1239
- type: String
1240
- }
2062
+ T::Array[
2063
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Type::TaggedSymbol
2064
+ ]
1241
2065
  )
1242
2066
  end
1243
- def to_hash
2067
+ def self.values
1244
2068
  end
1245
2069
  end
1246
2070
  end