openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -0,0 +1,329 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ # The PCM audio format. Only a 24kHz sample rate is supported.
7
+ module RealtimeAudioFormats
8
+ extend OpenAI::Internal::Type::Union
9
+
10
+ Variants =
11
+ T.type_alias do
12
+ T.any(
13
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
14
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
15
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
16
+ )
17
+ end
18
+
19
+ class AudioPCM < OpenAI::Internal::Type::BaseModel
20
+ OrHash =
21
+ T.type_alias do
22
+ T.any(
23
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
24
+ OpenAI::Internal::AnyHash
25
+ )
26
+ end
27
+
28
+ # The sample rate of the audio. Always `24000`.
29
+ sig do
30
+ returns(
31
+ T.nilable(
32
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger
33
+ )
34
+ )
35
+ end
36
+ attr_reader :rate
37
+
38
+ sig do
39
+ params(
40
+ rate:
41
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger
42
+ ).void
43
+ end
44
+ attr_writer :rate
45
+
46
+ # The audio format. Always `audio/pcm`.
47
+ sig do
48
+ returns(
49
+ T.nilable(
50
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
51
+ )
52
+ )
53
+ end
54
+ attr_reader :type
55
+
56
+ sig do
57
+ params(
58
+ type:
59
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
60
+ ).void
61
+ end
62
+ attr_writer :type
63
+
64
+ # The PCM audio format. Only a 24kHz sample rate is supported.
65
+ sig do
66
+ params(
67
+ rate:
68
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger,
69
+ type:
70
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
71
+ ).returns(T.attached_class)
72
+ end
73
+ def self.new(
74
+ # The sample rate of the audio. Always `24000`.
75
+ rate: nil,
76
+ # The audio format. Always `audio/pcm`.
77
+ type: nil
78
+ )
79
+ end
80
+
81
+ sig do
82
+ override.returns(
83
+ {
84
+ rate:
85
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger,
86
+ type:
87
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
88
+ }
89
+ )
90
+ end
91
+ def to_hash
92
+ end
93
+
94
+ # The sample rate of the audio. Always `24000`.
95
+ module Rate
96
+ extend OpenAI::Internal::Type::Enum
97
+
98
+ TaggedInteger =
99
+ T.type_alias do
100
+ T.all(
101
+ Integer,
102
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate
103
+ )
104
+ end
105
+ OrInteger = T.type_alias { Integer }
106
+
107
+ RATE_24000 =
108
+ T.let(
109
+ 24_000,
110
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger
111
+ )
112
+
113
+ sig do
114
+ override.returns(
115
+ T::Array[
116
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger
117
+ ]
118
+ )
119
+ end
120
+ def self.values
121
+ end
122
+ end
123
+
124
+ # The audio format. Always `audio/pcm`.
125
+ module Type
126
+ extend OpenAI::Internal::Type::Enum
127
+
128
+ TaggedSymbol =
129
+ T.type_alias do
130
+ T.all(
131
+ Symbol,
132
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type
133
+ )
134
+ end
135
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
136
+
137
+ AUDIO_PCM =
138
+ T.let(
139
+ :"audio/pcm",
140
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol
141
+ )
142
+
143
+ sig do
144
+ override.returns(
145
+ T::Array[
146
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol
147
+ ]
148
+ )
149
+ end
150
+ def self.values
151
+ end
152
+ end
153
+ end
154
+
155
+ class AudioPCMU < OpenAI::Internal::Type::BaseModel
156
+ OrHash =
157
+ T.type_alias do
158
+ T.any(
159
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
160
+ OpenAI::Internal::AnyHash
161
+ )
162
+ end
163
+
164
+ # The audio format. Always `audio/pcmu`.
165
+ sig do
166
+ returns(
167
+ T.nilable(
168
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
169
+ )
170
+ )
171
+ end
172
+ attr_reader :type
173
+
174
+ sig do
175
+ params(
176
+ type:
177
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
178
+ ).void
179
+ end
180
+ attr_writer :type
181
+
182
+ # The G.711 μ-law format.
183
+ sig do
184
+ params(
185
+ type:
186
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
187
+ ).returns(T.attached_class)
188
+ end
189
+ def self.new(
190
+ # The audio format. Always `audio/pcmu`.
191
+ type: nil
192
+ )
193
+ end
194
+
195
+ sig do
196
+ override.returns(
197
+ {
198
+ type:
199
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
200
+ }
201
+ )
202
+ end
203
+ def to_hash
204
+ end
205
+
206
+ # The audio format. Always `audio/pcmu`.
207
+ module Type
208
+ extend OpenAI::Internal::Type::Enum
209
+
210
+ TaggedSymbol =
211
+ T.type_alias do
212
+ T.all(
213
+ Symbol,
214
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type
215
+ )
216
+ end
217
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
218
+
219
+ AUDIO_PCMU =
220
+ T.let(
221
+ :"audio/pcmu",
222
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol
223
+ )
224
+
225
+ sig do
226
+ override.returns(
227
+ T::Array[
228
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol
229
+ ]
230
+ )
231
+ end
232
+ def self.values
233
+ end
234
+ end
235
+ end
236
+
237
+ class AudioPCMA < OpenAI::Internal::Type::BaseModel
238
+ OrHash =
239
+ T.type_alias do
240
+ T.any(
241
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA,
242
+ OpenAI::Internal::AnyHash
243
+ )
244
+ end
245
+
246
+ # The audio format. Always `audio/pcma`.
247
+ sig do
248
+ returns(
249
+ T.nilable(
250
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
251
+ )
252
+ )
253
+ end
254
+ attr_reader :type
255
+
256
+ sig do
257
+ params(
258
+ type:
259
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
260
+ ).void
261
+ end
262
+ attr_writer :type
263
+
264
+ # The G.711 A-law format.
265
+ sig do
266
+ params(
267
+ type:
268
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
269
+ ).returns(T.attached_class)
270
+ end
271
+ def self.new(
272
+ # The audio format. Always `audio/pcma`.
273
+ type: nil
274
+ )
275
+ end
276
+
277
+ sig do
278
+ override.returns(
279
+ {
280
+ type:
281
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
282
+ }
283
+ )
284
+ end
285
+ def to_hash
286
+ end
287
+
288
+ # The audio format. Always `audio/pcma`.
289
+ module Type
290
+ extend OpenAI::Internal::Type::Enum
291
+
292
+ TaggedSymbol =
293
+ T.type_alias do
294
+ T.all(
295
+ Symbol,
296
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type
297
+ )
298
+ end
299
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
300
+
301
+ AUDIO_PCMA =
302
+ T.let(
303
+ :"audio/pcma",
304
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol
305
+ )
306
+
307
+ sig do
308
+ override.returns(
309
+ T::Array[
310
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol
311
+ ]
312
+ )
313
+ end
314
+ def self.values
315
+ end
316
+ end
317
+ end
318
+
319
+ sig do
320
+ override.returns(
321
+ T::Array[OpenAI::Realtime::RealtimeAudioFormats::Variants]
322
+ )
323
+ end
324
+ def self.variants
325
+ end
326
+ end
327
+ end
328
+ end
329
+ end
@@ -0,0 +1,262 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Whether or not to automatically generate a response when a VAD stop event
16
+ # occurs.
17
+ sig { returns(T.nilable(T::Boolean)) }
18
+ attr_reader :create_response
19
+
20
+ sig { params(create_response: T::Boolean).void }
21
+ attr_writer :create_response
22
+
23
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
24
+ # will wait longer for the user to continue speaking, `high` will respond more
25
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
26
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
27
+ sig do
28
+ returns(
29
+ T.nilable(
30
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
31
+ )
32
+ )
33
+ end
34
+ attr_reader :eagerness
35
+
36
+ sig do
37
+ params(
38
+ eagerness:
39
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
40
+ ).void
41
+ end
42
+ attr_writer :eagerness
43
+
44
+ # Optional idle timeout after which turn detection will auto-timeout when no
45
+ # additional audio is received.
46
+ sig { returns(T.nilable(Integer)) }
47
+ attr_accessor :idle_timeout_ms
48
+
49
+ # Whether or not to automatically interrupt any ongoing response with output to
50
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
51
+ # occurs.
52
+ sig { returns(T.nilable(T::Boolean)) }
53
+ attr_reader :interrupt_response
54
+
55
+ sig { params(interrupt_response: T::Boolean).void }
56
+ attr_writer :interrupt_response
57
+
58
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
59
+ # detected speech (in milliseconds). Defaults to 300ms.
60
+ sig { returns(T.nilable(Integer)) }
61
+ attr_reader :prefix_padding_ms
62
+
63
+ sig { params(prefix_padding_ms: Integer).void }
64
+ attr_writer :prefix_padding_ms
65
+
66
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
67
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
68
+ # more quickly, but may jump in on short pauses from the user.
69
+ sig { returns(T.nilable(Integer)) }
70
+ attr_reader :silence_duration_ms
71
+
72
+ sig { params(silence_duration_ms: Integer).void }
73
+ attr_writer :silence_duration_ms
74
+
75
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
76
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
77
+ # model, and thus might perform better in noisy environments.
78
+ sig { returns(T.nilable(Float)) }
79
+ attr_reader :threshold
80
+
81
+ sig { params(threshold: Float).void }
82
+ attr_writer :threshold
83
+
84
+ # Type of turn detection.
85
+ sig do
86
+ returns(
87
+ T.nilable(
88
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
89
+ )
90
+ )
91
+ end
92
+ attr_reader :type
93
+
94
+ sig do
95
+ params(
96
+ type:
97
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
98
+ ).void
99
+ end
100
+ attr_writer :type
101
+
102
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
103
+ # set to `null` to turn off, in which case the client must manually trigger model
104
+ # response. Server VAD means that the model will detect the start and end of
105
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
106
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
107
+ # semantically estimate whether the user has finished speaking, then dynamically
108
+ # sets a timeout based on this probability. For example, if user audio trails off
109
+ # with "uhhm", the model will score a low probability of turn end and wait longer
110
+ # for the user to continue speaking. This can be useful for more natural
111
+ # conversations, but may have a higher latency.
112
+ sig do
113
+ params(
114
+ create_response: T::Boolean,
115
+ eagerness:
116
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
117
+ idle_timeout_ms: T.nilable(Integer),
118
+ interrupt_response: T::Boolean,
119
+ prefix_padding_ms: Integer,
120
+ silence_duration_ms: Integer,
121
+ threshold: Float,
122
+ type:
123
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
124
+ ).returns(T.attached_class)
125
+ end
126
+ def self.new(
127
+ # Whether or not to automatically generate a response when a VAD stop event
128
+ # occurs.
129
+ create_response: nil,
130
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
131
+ # will wait longer for the user to continue speaking, `high` will respond more
132
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
133
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
134
+ eagerness: nil,
135
+ # Optional idle timeout after which turn detection will auto-timeout when no
136
+ # additional audio is received.
137
+ idle_timeout_ms: nil,
138
+ # Whether or not to automatically interrupt any ongoing response with output to
139
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
140
+ # occurs.
141
+ interrupt_response: nil,
142
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
143
+ # detected speech (in milliseconds). Defaults to 300ms.
144
+ prefix_padding_ms: nil,
145
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
146
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
147
+ # more quickly, but may jump in on short pauses from the user.
148
+ silence_duration_ms: nil,
149
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
150
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
151
+ # model, and thus might perform better in noisy environments.
152
+ threshold: nil,
153
+ # Type of turn detection.
154
+ type: nil
155
+ )
156
+ end
157
+
158
+ sig do
159
+ override.returns(
160
+ {
161
+ create_response: T::Boolean,
162
+ eagerness:
163
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
164
+ idle_timeout_ms: T.nilable(Integer),
165
+ interrupt_response: T::Boolean,
166
+ prefix_padding_ms: Integer,
167
+ silence_duration_ms: Integer,
168
+ threshold: Float,
169
+ type:
170
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
171
+ }
172
+ )
173
+ end
174
+ def to_hash
175
+ end
176
+
177
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
178
+ # will wait longer for the user to continue speaking, `high` will respond more
179
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
180
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
181
+ module Eagerness
182
+ extend OpenAI::Internal::Type::Enum
183
+
184
+ TaggedSymbol =
185
+ T.type_alias do
186
+ T.all(
187
+ Symbol,
188
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness
189
+ )
190
+ end
191
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
192
+
193
+ LOW =
194
+ T.let(
195
+ :low,
196
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
197
+ )
198
+ MEDIUM =
199
+ T.let(
200
+ :medium,
201
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
202
+ )
203
+ HIGH =
204
+ T.let(
205
+ :high,
206
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
207
+ )
208
+ AUTO =
209
+ T.let(
210
+ :auto,
211
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
212
+ )
213
+
214
+ sig do
215
+ override.returns(
216
+ T::Array[
217
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
218
+ ]
219
+ )
220
+ end
221
+ def self.values
222
+ end
223
+ end
224
+
225
+ # Type of turn detection.
226
+ module Type
227
+ extend OpenAI::Internal::Type::Enum
228
+
229
+ TaggedSymbol =
230
+ T.type_alias do
231
+ T.all(
232
+ Symbol,
233
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type
234
+ )
235
+ end
236
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
237
+
238
+ SERVER_VAD =
239
+ T.let(
240
+ :server_vad,
241
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
242
+ )
243
+ SEMANTIC_VAD =
244
+ T.let(
245
+ :semantic_vad,
246
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
247
+ )
248
+
249
+ sig do
250
+ override.returns(
251
+ T::Array[
252
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
253
+ ]
254
+ )
255
+ end
256
+ def self.values
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end
262
+ end