openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -35,7 +35,7 @@ module OpenAI
35
35
  session:
36
36
  T.any(
37
37
  OpenAI::Realtime::RealtimeSessionCreateResponse::OrHash,
38
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::OrHash
38
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::OrHash
39
39
  ),
40
40
  value: String
41
41
  ).returns(T.attached_class)
@@ -71,595 +71,10 @@ module OpenAI
71
71
  T.type_alias do
72
72
  T.any(
73
73
  OpenAI::Realtime::RealtimeSessionCreateResponse,
74
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse
74
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse
75
75
  )
76
76
  end
77
77
 
78
- class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel
79
- OrHash =
80
- T.type_alias do
81
- T.any(
82
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse,
83
- OpenAI::Internal::AnyHash
84
- )
85
- end
86
-
87
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
88
- sig { returns(T.nilable(String)) }
89
- attr_reader :id
90
-
91
- sig { params(id: String).void }
92
- attr_writer :id
93
-
94
- # Configuration for input audio for the session.
95
- sig do
96
- returns(
97
- T.nilable(
98
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio
99
- )
100
- )
101
- end
102
- attr_reader :audio
103
-
104
- sig do
105
- params(
106
- audio:
107
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash
108
- ).void
109
- end
110
- attr_writer :audio
111
-
112
- # Expiration timestamp for the session, in seconds since epoch.
113
- sig { returns(T.nilable(Integer)) }
114
- attr_reader :expires_at
115
-
116
- sig { params(expires_at: Integer).void }
117
- attr_writer :expires_at
118
-
119
- # Additional fields to include in server outputs.
120
- #
121
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
122
- # transcription.
123
- sig do
124
- returns(
125
- T.nilable(
126
- T::Array[
127
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
128
- ]
129
- )
130
- )
131
- end
132
- attr_reader :include
133
-
134
- sig do
135
- params(
136
- include:
137
- T::Array[
138
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
139
- ]
140
- ).void
141
- end
142
- attr_writer :include
143
-
144
- # The object type. Always `realtime.transcription_session`.
145
- sig { returns(T.nilable(String)) }
146
- attr_reader :object
147
-
148
- sig { params(object: String).void }
149
- attr_writer :object
150
-
151
- # A Realtime transcription session configuration object.
152
- sig do
153
- params(
154
- id: String,
155
- audio:
156
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash,
157
- expires_at: Integer,
158
- include:
159
- T::Array[
160
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
161
- ],
162
- object: String
163
- ).returns(T.attached_class)
164
- end
165
- def self.new(
166
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
167
- id: nil,
168
- # Configuration for input audio for the session.
169
- audio: nil,
170
- # Expiration timestamp for the session, in seconds since epoch.
171
- expires_at: nil,
172
- # Additional fields to include in server outputs.
173
- #
174
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
175
- # transcription.
176
- include: nil,
177
- # The object type. Always `realtime.transcription_session`.
178
- object: nil
179
- )
180
- end
181
-
182
- sig do
183
- override.returns(
184
- {
185
- id: String,
186
- audio:
187
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio,
188
- expires_at: Integer,
189
- include:
190
- T::Array[
191
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
192
- ],
193
- object: String
194
- }
195
- )
196
- end
197
- def to_hash
198
- end
199
-
200
- class Audio < OpenAI::Internal::Type::BaseModel
201
- OrHash =
202
- T.type_alias do
203
- T.any(
204
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio,
205
- OpenAI::Internal::AnyHash
206
- )
207
- end
208
-
209
- sig do
210
- returns(
211
- T.nilable(
212
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input
213
- )
214
- )
215
- end
216
- attr_reader :input
217
-
218
- sig do
219
- params(
220
- input:
221
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
222
- ).void
223
- end
224
- attr_writer :input
225
-
226
- # Configuration for input audio for the session.
227
- sig do
228
- params(
229
- input:
230
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
231
- ).returns(T.attached_class)
232
- end
233
- def self.new(input: nil)
234
- end
235
-
236
- sig do
237
- override.returns(
238
- {
239
- input:
240
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input
241
- }
242
- )
243
- end
244
- def to_hash
245
- end
246
-
247
- class Input < OpenAI::Internal::Type::BaseModel
248
- OrHash =
249
- T.type_alias do
250
- T.any(
251
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input,
252
- OpenAI::Internal::AnyHash
253
- )
254
- end
255
-
256
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
257
- sig { returns(T.nilable(String)) }
258
- attr_reader :format_
259
-
260
- sig { params(format_: String).void }
261
- attr_writer :format_
262
-
263
- # Configuration for input audio noise reduction.
264
- sig do
265
- returns(
266
- T.nilable(
267
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction
268
- )
269
- )
270
- end
271
- attr_reader :noise_reduction
272
-
273
- sig do
274
- params(
275
- noise_reduction:
276
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash
277
- ).void
278
- end
279
- attr_writer :noise_reduction
280
-
281
- # Configuration of the transcription model.
282
- sig do
283
- returns(
284
- T.nilable(
285
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription
286
- )
287
- )
288
- end
289
- attr_reader :transcription
290
-
291
- sig do
292
- params(
293
- transcription:
294
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::OrHash
295
- ).void
296
- end
297
- attr_writer :transcription
298
-
299
- # Configuration for turn detection.
300
- sig do
301
- returns(
302
- T.nilable(
303
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection
304
- )
305
- )
306
- end
307
- attr_reader :turn_detection
308
-
309
- sig do
310
- params(
311
- turn_detection:
312
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection::OrHash
313
- ).void
314
- end
315
- attr_writer :turn_detection
316
-
317
- sig do
318
- params(
319
- format_: String,
320
- noise_reduction:
321
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash,
322
- transcription:
323
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::OrHash,
324
- turn_detection:
325
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection::OrHash
326
- ).returns(T.attached_class)
327
- end
328
- def self.new(
329
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
330
- format_: nil,
331
- # Configuration for input audio noise reduction.
332
- noise_reduction: nil,
333
- # Configuration of the transcription model.
334
- transcription: nil,
335
- # Configuration for turn detection.
336
- turn_detection: nil
337
- )
338
- end
339
-
340
- sig do
341
- override.returns(
342
- {
343
- format_: String,
344
- noise_reduction:
345
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
346
- transcription:
347
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription,
348
- turn_detection:
349
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection
350
- }
351
- )
352
- end
353
- def to_hash
354
- end
355
-
356
- class NoiseReduction < OpenAI::Internal::Type::BaseModel
357
- OrHash =
358
- T.type_alias do
359
- T.any(
360
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
361
- OpenAI::Internal::AnyHash
362
- )
363
- end
364
-
365
- sig do
366
- returns(
367
- T.nilable(
368
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
369
- )
370
- )
371
- end
372
- attr_reader :type
373
-
374
- sig do
375
- params(
376
- type:
377
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol
378
- ).void
379
- end
380
- attr_writer :type
381
-
382
- # Configuration for input audio noise reduction.
383
- sig do
384
- params(
385
- type:
386
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::OrSymbol
387
- ).returns(T.attached_class)
388
- end
389
- def self.new(type: nil)
390
- end
391
-
392
- sig do
393
- override.returns(
394
- {
395
- type:
396
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
397
- }
398
- )
399
- end
400
- def to_hash
401
- end
402
-
403
- module Type
404
- extend OpenAI::Internal::Type::Enum
405
-
406
- TaggedSymbol =
407
- T.type_alias do
408
- T.all(
409
- Symbol,
410
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type
411
- )
412
- end
413
- OrSymbol = T.type_alias { T.any(Symbol, String) }
414
-
415
- NEAR_FIELD =
416
- T.let(
417
- :near_field,
418
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
419
- )
420
- FAR_FIELD =
421
- T.let(
422
- :far_field,
423
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
424
- )
425
-
426
- sig do
427
- override.returns(
428
- T::Array[
429
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type::TaggedSymbol
430
- ]
431
- )
432
- end
433
- def self.values
434
- end
435
- end
436
- end
437
-
438
- class Transcription < OpenAI::Internal::Type::BaseModel
439
- OrHash =
440
- T.type_alias do
441
- T.any(
442
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription,
443
- OpenAI::Internal::AnyHash
444
- )
445
- end
446
-
447
- # The language of the input audio. Supplying the input language in
448
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
449
- # format will improve accuracy and latency.
450
- sig { returns(T.nilable(String)) }
451
- attr_reader :language
452
-
453
- sig { params(language: String).void }
454
- attr_writer :language
455
-
456
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
457
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
458
- sig do
459
- returns(
460
- T.nilable(
461
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol
462
- )
463
- )
464
- end
465
- attr_reader :model
466
-
467
- sig do
468
- params(
469
- model:
470
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::OrSymbol
471
- ).void
472
- end
473
- attr_writer :model
474
-
475
- # An optional text to guide the model's style or continue a previous audio
476
- # segment. The
477
- # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
478
- # should match the audio language.
479
- sig { returns(T.nilable(String)) }
480
- attr_reader :prompt
481
-
482
- sig { params(prompt: String).void }
483
- attr_writer :prompt
484
-
485
- # Configuration of the transcription model.
486
- sig do
487
- params(
488
- language: String,
489
- model:
490
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::OrSymbol,
491
- prompt: String
492
- ).returns(T.attached_class)
493
- end
494
- def self.new(
495
- # The language of the input audio. Supplying the input language in
496
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
497
- # format will improve accuracy and latency.
498
- language: nil,
499
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
500
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
501
- model: nil,
502
- # An optional text to guide the model's style or continue a previous audio
503
- # segment. The
504
- # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
505
- # should match the audio language.
506
- prompt: nil
507
- )
508
- end
509
-
510
- sig do
511
- override.returns(
512
- {
513
- language: String,
514
- model:
515
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol,
516
- prompt: String
517
- }
518
- )
519
- end
520
- def to_hash
521
- end
522
-
523
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
524
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
525
- module Model
526
- extend OpenAI::Internal::Type::Enum
527
-
528
- TaggedSymbol =
529
- T.type_alias do
530
- T.all(
531
- Symbol,
532
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model
533
- )
534
- end
535
- OrSymbol = T.type_alias { T.any(Symbol, String) }
536
-
537
- GPT_4O_TRANSCRIBE =
538
- T.let(
539
- :"gpt-4o-transcribe",
540
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol
541
- )
542
- GPT_4O_MINI_TRANSCRIBE =
543
- T.let(
544
- :"gpt-4o-mini-transcribe",
545
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol
546
- )
547
- WHISPER_1 =
548
- T.let(
549
- :"whisper-1",
550
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol
551
- )
552
-
553
- sig do
554
- override.returns(
555
- T::Array[
556
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model::TaggedSymbol
557
- ]
558
- )
559
- end
560
- def self.values
561
- end
562
- end
563
- end
564
-
565
- class TurnDetection < OpenAI::Internal::Type::BaseModel
566
- OrHash =
567
- T.type_alias do
568
- T.any(
569
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection,
570
- OpenAI::Internal::AnyHash
571
- )
572
- end
573
-
574
- sig { returns(T.nilable(Integer)) }
575
- attr_reader :prefix_padding_ms
576
-
577
- sig { params(prefix_padding_ms: Integer).void }
578
- attr_writer :prefix_padding_ms
579
-
580
- sig { returns(T.nilable(Integer)) }
581
- attr_reader :silence_duration_ms
582
-
583
- sig { params(silence_duration_ms: Integer).void }
584
- attr_writer :silence_duration_ms
585
-
586
- sig { returns(T.nilable(Float)) }
587
- attr_reader :threshold
588
-
589
- sig { params(threshold: Float).void }
590
- attr_writer :threshold
591
-
592
- # Type of turn detection, only `server_vad` is currently supported.
593
- sig { returns(T.nilable(String)) }
594
- attr_reader :type
595
-
596
- sig { params(type: String).void }
597
- attr_writer :type
598
-
599
- # Configuration for turn detection.
600
- sig do
601
- params(
602
- prefix_padding_ms: Integer,
603
- silence_duration_ms: Integer,
604
- threshold: Float,
605
- type: String
606
- ).returns(T.attached_class)
607
- end
608
- def self.new(
609
- prefix_padding_ms: nil,
610
- silence_duration_ms: nil,
611
- threshold: nil,
612
- # Type of turn detection, only `server_vad` is currently supported.
613
- type: nil
614
- )
615
- end
616
-
617
- sig do
618
- override.returns(
619
- {
620
- prefix_padding_ms: Integer,
621
- silence_duration_ms: Integer,
622
- threshold: Float,
623
- type: String
624
- }
625
- )
626
- end
627
- def to_hash
628
- end
629
- end
630
- end
631
- end
632
-
633
- module Include
634
- extend OpenAI::Internal::Type::Enum
635
-
636
- TaggedSymbol =
637
- T.type_alias do
638
- T.all(
639
- Symbol,
640
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include
641
- )
642
- end
643
- OrSymbol = T.type_alias { T.any(Symbol, String) }
644
-
645
- ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
646
- T.let(
647
- :"item.input_audio_transcription.logprobs",
648
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
649
- )
650
-
651
- sig do
652
- override.returns(
653
- T::Array[
654
- OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
655
- ]
656
- )
657
- end
658
- def self.values
659
- end
660
- end
661
- end
662
-
663
78
  sig do
664
79
  override.returns(
665
80
  T::Array[
@@ -43,7 +43,20 @@ module OpenAI
43
43
  sig { returns(T.nilable(String)) }
44
44
  attr_accessor :previous_item_id
45
45
 
46
- # Returned when a conversation item is added.
46
+ # Sent by the server when an Item is added to the default Conversation. This can
47
+ # happen in several cases:
48
+ #
49
+ # - When the client sends a `conversation.item.create` event.
50
+ # - When the input audio buffer is committed. In this case the item will be a user
51
+ # message containing the audio from the buffer.
52
+ # - When the model is generating a Response. In this case the
53
+ # `conversation.item.added` event will be sent when the model starts generating
54
+ # a specific Item, and thus it will not yet have any content (and `status` will
55
+ # be `in_progress`).
56
+ #
57
+ # The event will include the full content of the Item (except when model is
58
+ # generating a Response) except for audio data, which can be retrieved separately
59
+ # with a `conversation.item.retrieve` event if necessary.
47
60
  sig do
48
61
  params(
49
62
  event_id: String,
@@ -44,6 +44,9 @@ module OpenAI
44
44
  attr_accessor :previous_item_id
45
45
 
46
46
  # Returned when a conversation item is finalized.
47
+ #
48
+ # The event will include the full content of the Item except for audio data, which
49
+ # can be retrieved separately with a `conversation.item.retrieve` event if needed.
47
50
  sig do
48
51
  params(
49
52
  event_id: String,
@@ -20,7 +20,7 @@ module OpenAI
20
20
  sig { returns(String) }
21
21
  attr_accessor :event_id
22
22
 
23
- # The ID of the user message item containing the audio.
23
+ # The ID of the item containing the audio that is being transcribed.
24
24
  sig { returns(String) }
25
25
  attr_accessor :item_id
26
26
 
@@ -32,7 +32,8 @@ module OpenAI
32
32
  sig { returns(Symbol) }
33
33
  attr_accessor :type
34
34
 
35
- # Usage statistics for the transcription.
35
+ # Usage statistics for the transcription, this is billed according to the ASR
36
+ # model's pricing rather than the realtime model's pricing.
36
37
  sig do
37
38
  returns(
38
39
  T.any(
@@ -51,9 +52,9 @@ module OpenAI
51
52
 
52
53
  # This event is the output of audio transcription for user audio written to the
53
54
  # user audio buffer. Transcription begins when the input audio buffer is committed
54
- # by the client or server (in `server_vad` mode). Transcription runs
55
- # asynchronously with Response creation, so this event may come before or after
56
- # the Response events.
55
+ # by the client or server (when VAD is enabled). Transcription runs asynchronously
56
+ # with Response creation, so this event may come before or after the Response
57
+ # events.
57
58
  #
58
59
  # Realtime API models accept audio natively, and thus input transcription is a
59
60
  # separate process run on a separate ASR (Automatic Speech Recognition) model. The
@@ -80,11 +81,12 @@ module OpenAI
80
81
  content_index:,
81
82
  # The unique ID of the server event.
82
83
  event_id:,
83
- # The ID of the user message item containing the audio.
84
+ # The ID of the item containing the audio that is being transcribed.
84
85
  item_id:,
85
86
  # The transcribed text.
86
87
  transcript:,
87
- # Usage statistics for the transcription.
88
+ # Usage statistics for the transcription, this is billed according to the ASR
89
+ # model's pricing rather than the realtime model's pricing.
88
90
  usage:,
89
91
  # The log probabilities of the transcription.
90
92
  logprobs: nil,
@@ -113,7 +115,8 @@ module OpenAI
113
115
  def to_hash
114
116
  end
115
117
 
116
- # Usage statistics for the transcription.
118
+ # Usage statistics for the transcription, this is billed according to the ASR
119
+ # model's pricing rather than the realtime model's pricing.
117
120
  module Usage
118
121
  extend OpenAI::Internal::Type::Union
119
122