openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -13,15 +13,13 @@ module OpenAI
13
13
  end
14
14
 
15
15
  # Realtime transcription session object configuration.
16
- sig do
17
- returns(OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest)
18
- end
16
+ sig { returns(OpenAI::Realtime::TranscriptionSessionUpdate::Session) }
19
17
  attr_reader :session
20
18
 
21
19
  sig do
22
20
  params(
23
21
  session:
24
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash
22
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::OrHash
25
23
  ).void
26
24
  end
27
25
  attr_writer :session
@@ -41,7 +39,7 @@ module OpenAI
41
39
  sig do
42
40
  params(
43
41
  session:
44
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::OrHash,
42
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::OrHash,
45
43
  event_id: String,
46
44
  type: Symbol
47
45
  ).returns(T.attached_class)
@@ -59,8 +57,7 @@ module OpenAI
59
57
  sig do
60
58
  override.returns(
61
59
  {
62
- session:
63
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest,
60
+ session: OpenAI::Realtime::TranscriptionSessionUpdate::Session,
64
61
  type: Symbol,
65
62
  event_id: String
66
63
  }
@@ -68,6 +65,427 @@ module OpenAI
68
65
  end
69
66
  def to_hash
70
67
  end
68
+
69
+ class Session < OpenAI::Internal::Type::BaseModel
70
+ OrHash =
71
+ T.type_alias do
72
+ T.any(
73
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session,
74
+ OpenAI::Internal::AnyHash
75
+ )
76
+ end
77
+
78
+ # The set of items to include in the transcription. Current available items are:
79
+ # `item.input_audio_transcription.logprobs`
80
+ sig do
81
+ returns(
82
+ T.nilable(
83
+ T::Array[
84
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol
85
+ ]
86
+ )
87
+ )
88
+ end
89
+ attr_reader :include
90
+
91
+ sig do
92
+ params(
93
+ include:
94
+ T::Array[
95
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol
96
+ ]
97
+ ).void
98
+ end
99
+ attr_writer :include
100
+
101
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
102
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
103
+ # (mono), and little-endian byte order.
104
+ sig do
105
+ returns(
106
+ T.nilable(
107
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol
108
+ )
109
+ )
110
+ end
111
+ attr_reader :input_audio_format
112
+
113
+ sig do
114
+ params(
115
+ input_audio_format:
116
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol
117
+ ).void
118
+ end
119
+ attr_writer :input_audio_format
120
+
121
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
122
+ # off. Noise reduction filters audio added to the input audio buffer before it is
123
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
124
+ # detection accuracy (reducing false positives) and model performance by improving
125
+ # perception of the input audio.
126
+ sig do
127
+ returns(
128
+ T.nilable(
129
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction
130
+ )
131
+ )
132
+ end
133
+ attr_reader :input_audio_noise_reduction
134
+
135
+ sig do
136
+ params(
137
+ input_audio_noise_reduction:
138
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction::OrHash
139
+ ).void
140
+ end
141
+ attr_writer :input_audio_noise_reduction
142
+
143
+ # Configuration for input audio transcription. The client can optionally set the
144
+ # language and prompt for transcription, these offer additional guidance to the
145
+ # transcription service.
146
+ sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
147
+ attr_reader :input_audio_transcription
148
+
149
+ sig do
150
+ params(
151
+ input_audio_transcription:
152
+ OpenAI::Realtime::AudioTranscription::OrHash
153
+ ).void
154
+ end
155
+ attr_writer :input_audio_transcription
156
+
157
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
158
+ # means that the model will detect the start and end of speech based on audio
159
+ # volume and respond at the end of user speech.
160
+ sig do
161
+ returns(
162
+ T.nilable(
163
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection
164
+ )
165
+ )
166
+ end
167
+ attr_reader :turn_detection
168
+
169
+ sig do
170
+ params(
171
+ turn_detection:
172
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::OrHash
173
+ ).void
174
+ end
175
+ attr_writer :turn_detection
176
+
177
+ # Realtime transcription session object configuration.
178
+ sig do
179
+ params(
180
+ include:
181
+ T::Array[
182
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol
183
+ ],
184
+ input_audio_format:
185
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol,
186
+ input_audio_noise_reduction:
187
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction::OrHash,
188
+ input_audio_transcription:
189
+ OpenAI::Realtime::AudioTranscription::OrHash,
190
+ turn_detection:
191
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::OrHash
192
+ ).returns(T.attached_class)
193
+ end
194
+ def self.new(
195
+ # The set of items to include in the transcription. Current available items are:
196
+ # `item.input_audio_transcription.logprobs`
197
+ include: nil,
198
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
199
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
200
+ # (mono), and little-endian byte order.
201
+ input_audio_format: nil,
202
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
203
+ # off. Noise reduction filters audio added to the input audio buffer before it is
204
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
205
+ # detection accuracy (reducing false positives) and model performance by improving
206
+ # perception of the input audio.
207
+ input_audio_noise_reduction: nil,
208
+ # Configuration for input audio transcription. The client can optionally set the
209
+ # language and prompt for transcription, these offer additional guidance to the
210
+ # transcription service.
211
+ input_audio_transcription: nil,
212
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
213
+ # means that the model will detect the start and end of speech based on audio
214
+ # volume and respond at the end of user speech.
215
+ turn_detection: nil
216
+ )
217
+ end
218
+
219
+ sig do
220
+ override.returns(
221
+ {
222
+ include:
223
+ T::Array[
224
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::OrSymbol
225
+ ],
226
+ input_audio_format:
227
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::OrSymbol,
228
+ input_audio_noise_reduction:
229
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction,
230
+ input_audio_transcription: OpenAI::Realtime::AudioTranscription,
231
+ turn_detection:
232
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection
233
+ }
234
+ )
235
+ end
236
+ def to_hash
237
+ end
238
+
239
+ module Include
240
+ extend OpenAI::Internal::Type::Enum
241
+
242
+ TaggedSymbol =
243
+ T.type_alias do
244
+ T.all(
245
+ Symbol,
246
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include
247
+ )
248
+ end
249
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
250
+
251
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
252
+ T.let(
253
+ :"item.input_audio_transcription.logprobs",
254
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::TaggedSymbol
255
+ )
256
+
257
+ sig do
258
+ override.returns(
259
+ T::Array[
260
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include::TaggedSymbol
261
+ ]
262
+ )
263
+ end
264
+ def self.values
265
+ end
266
+ end
267
+
268
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
269
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
270
+ # (mono), and little-endian byte order.
271
+ module InputAudioFormat
272
+ extend OpenAI::Internal::Type::Enum
273
+
274
+ TaggedSymbol =
275
+ T.type_alias do
276
+ T.all(
277
+ Symbol,
278
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat
279
+ )
280
+ end
281
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
282
+
283
+ PCM16 =
284
+ T.let(
285
+ :pcm16,
286
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol
287
+ )
288
+ G711_ULAW =
289
+ T.let(
290
+ :g711_ulaw,
291
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol
292
+ )
293
+ G711_ALAW =
294
+ T.let(
295
+ :g711_alaw,
296
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol
297
+ )
298
+
299
+ sig do
300
+ override.returns(
301
+ T::Array[
302
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat::TaggedSymbol
303
+ ]
304
+ )
305
+ end
306
+ def self.values
307
+ end
308
+ end
309
+
310
+ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
311
+ OrHash =
312
+ T.type_alias do
313
+ T.any(
314
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction,
315
+ OpenAI::Internal::AnyHash
316
+ )
317
+ end
318
+
319
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
320
+ # headphones, `far_field` is for far-field microphones such as laptop or
321
+ # conference room microphones.
322
+ sig do
323
+ returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol))
324
+ end
325
+ attr_reader :type
326
+
327
+ sig do
328
+ params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void
329
+ end
330
+ attr_writer :type
331
+
332
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
333
+ # off. Noise reduction filters audio added to the input audio buffer before it is
334
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
335
+ # detection accuracy (reducing false positives) and model performance by improving
336
+ # perception of the input audio.
337
+ sig do
338
+ params(
339
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
340
+ ).returns(T.attached_class)
341
+ end
342
+ def self.new(
343
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
344
+ # headphones, `far_field` is for far-field microphones such as laptop or
345
+ # conference room microphones.
346
+ type: nil
347
+ )
348
+ end
349
+
350
+ sig do
351
+ override.returns(
352
+ { type: OpenAI::Realtime::NoiseReductionType::OrSymbol }
353
+ )
354
+ end
355
+ def to_hash
356
+ end
357
+ end
358
+
359
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
360
+ OrHash =
361
+ T.type_alias do
362
+ T.any(
363
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection,
364
+ OpenAI::Internal::AnyHash
365
+ )
366
+ end
367
+
368
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
369
+ # Defaults to 300ms.
370
+ sig { returns(T.nilable(Integer)) }
371
+ attr_reader :prefix_padding_ms
372
+
373
+ sig { params(prefix_padding_ms: Integer).void }
374
+ attr_writer :prefix_padding_ms
375
+
376
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
377
+ # With shorter values the model will respond more quickly, but may jump in on
378
+ # short pauses from the user.
379
+ sig { returns(T.nilable(Integer)) }
380
+ attr_reader :silence_duration_ms
381
+
382
+ sig { params(silence_duration_ms: Integer).void }
383
+ attr_writer :silence_duration_ms
384
+
385
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
386
+ # threshold will require louder audio to activate the model, and thus might
387
+ # perform better in noisy environments.
388
+ sig { returns(T.nilable(Float)) }
389
+ attr_reader :threshold
390
+
391
+ sig { params(threshold: Float).void }
392
+ attr_writer :threshold
393
+
394
+ # Type of turn detection. Only `server_vad` is currently supported for
395
+ # transcription sessions.
396
+ sig do
397
+ returns(
398
+ T.nilable(
399
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol
400
+ )
401
+ )
402
+ end
403
+ attr_reader :type
404
+
405
+ sig do
406
+ params(
407
+ type:
408
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol
409
+ ).void
410
+ end
411
+ attr_writer :type
412
+
413
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
414
+ # means that the model will detect the start and end of speech based on audio
415
+ # volume and respond at the end of user speech.
416
+ sig do
417
+ params(
418
+ prefix_padding_ms: Integer,
419
+ silence_duration_ms: Integer,
420
+ threshold: Float,
421
+ type:
422
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol
423
+ ).returns(T.attached_class)
424
+ end
425
+ def self.new(
426
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
427
+ # Defaults to 300ms.
428
+ prefix_padding_ms: nil,
429
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
430
+ # With shorter values the model will respond more quickly, but may jump in on
431
+ # short pauses from the user.
432
+ silence_duration_ms: nil,
433
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
434
+ # threshold will require louder audio to activate the model, and thus might
435
+ # perform better in noisy environments.
436
+ threshold: nil,
437
+ # Type of turn detection. Only `server_vad` is currently supported for
438
+ # transcription sessions.
439
+ type: nil
440
+ )
441
+ end
442
+
443
+ sig do
444
+ override.returns(
445
+ {
446
+ prefix_padding_ms: Integer,
447
+ silence_duration_ms: Integer,
448
+ threshold: Float,
449
+ type:
450
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::OrSymbol
451
+ }
452
+ )
453
+ end
454
+ def to_hash
455
+ end
456
+
457
+ # Type of turn detection. Only `server_vad` is currently supported for
458
+ # transcription sessions.
459
+ module Type
460
+ extend OpenAI::Internal::Type::Enum
461
+
462
+ TaggedSymbol =
463
+ T.type_alias do
464
+ T.all(
465
+ Symbol,
466
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type
467
+ )
468
+ end
469
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
470
+
471
+ SERVER_VAD =
472
+ T.let(
473
+ :server_vad,
474
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::TaggedSymbol
475
+ )
476
+
477
+ sig do
478
+ override.returns(
479
+ T::Array[
480
+ OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type::TaggedSymbol
481
+ ]
482
+ )
483
+ end
484
+ def self.values
485
+ end
486
+ end
487
+ end
488
+ end
71
489
  end
72
490
  end
73
491
  end