openai 0.23.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/client_secret_create_response.rb +6 -8
  5. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +1 -1
  6. data/lib/openai/models/realtime/realtime_client_event.rb +2 -6
  7. data/lib/openai/models/realtime/{models.rb → realtime_function_tool.rb} +6 -6
  8. data/lib/openai/models/realtime/realtime_response_create_params.rb +4 -4
  9. data/lib/openai/models/realtime/realtime_server_event.rb +1 -8
  10. data/lib/openai/models/realtime/realtime_session.rb +3 -3
  11. data/lib/openai/models/realtime/realtime_session_create_request.rb +2 -2
  12. data/lib/openai/models/realtime/realtime_session_create_response.rb +21 -33
  13. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -2
  14. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +117 -40
  15. data/lib/openai/models/realtime/transcription_session_updated_event.rb +152 -3
  16. data/lib/openai/version.rb +1 -1
  17. data/lib/openai.rb +1 -4
  18. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +2 -2
  19. data/rbi/openai/models/realtime/realtime_client_event.rbi +1 -2
  20. data/rbi/openai/models/realtime/{models.rbi → realtime_function_tool.rbi} +27 -9
  21. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +5 -5
  22. data/rbi/openai/models/realtime/realtime_server_event.rbi +0 -2
  23. data/rbi/openai/models/realtime/realtime_session.rbi +10 -4
  24. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +4 -4
  25. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +29 -77
  26. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -1
  27. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +290 -101
  28. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +311 -4
  29. data/sig/openai/models/realtime/realtime_client_event.rbs +0 -1
  30. data/sig/openai/models/realtime/{models.rbs → realtime_function_tool.rbs} +9 -9
  31. data/sig/openai/models/realtime/realtime_response_create_params.rbs +1 -1
  32. data/sig/openai/models/realtime/realtime_server_event.rbs +0 -2
  33. data/sig/openai/models/realtime/realtime_session.rbs +6 -6
  34. data/sig/openai/models/realtime/realtime_session_create_response.rbs +13 -31
  35. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -1
  36. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +123 -35
  37. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +118 -4
  38. metadata +5 -14
  39. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +0 -38
  40. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +0 -66
  41. data/lib/openai/models/realtime/transcription_session_created.rb +0 -43
  42. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +0 -51
  43. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +0 -144
  44. data/rbi/openai/models/realtime/transcription_session_created.rbi +0 -79
  45. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +0 -20
  46. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +0 -59
  47. data/sig/openai/models/realtime/transcription_session_created.rbs +0 -32
@@ -15,174 +15,363 @@ module OpenAI
15
15
  )
16
16
  end
17
17
 
18
- # Ephemeral key returned by the API. Only present when the session is created on
19
- # the server via REST API.
20
- sig do
21
- returns(OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret)
22
- end
23
- attr_reader :client_secret
18
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
19
+ sig { returns(String) }
20
+ attr_accessor :id
24
21
 
25
- sig do
26
- params(
27
- client_secret:
28
- OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash
29
- ).void
30
- end
31
- attr_writer :client_secret
22
+ # The object type. Always `realtime.transcription_session`.
23
+ sig { returns(String) }
24
+ attr_accessor :object
32
25
 
33
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
34
- sig { returns(T.nilable(String)) }
35
- attr_reader :input_audio_format
26
+ # The type of session. Always `transcription` for transcription sessions.
27
+ sig { returns(Symbol) }
28
+ attr_accessor :type
36
29
 
37
- sig { params(input_audio_format: String).void }
38
- attr_writer :input_audio_format
39
-
40
- # Configuration of the transcription model.
30
+ # Configuration for input audio for the session.
41
31
  sig do
42
32
  returns(
43
33
  T.nilable(
44
- OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription
34
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio
45
35
  )
46
36
  )
47
37
  end
48
- attr_reader :input_audio_transcription
38
+ attr_reader :audio
49
39
 
50
40
  sig do
51
41
  params(
52
- input_audio_transcription:
53
- OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash
42
+ audio:
43
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash
54
44
  ).void
55
45
  end
56
- attr_writer :input_audio_transcription
46
+ attr_writer :audio
47
+
48
+ # Expiration timestamp for the session, in seconds since epoch.
49
+ sig { returns(T.nilable(Integer)) }
50
+ attr_reader :expires_at
51
+
52
+ sig { params(expires_at: Integer).void }
53
+ attr_writer :expires_at
57
54
 
58
- # The set of modalities the model can respond with. To disable audio, set this to
59
- # ["text"].
55
+ # Additional fields to include in server outputs.
56
+ #
57
+ # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
58
+ # transcription.
60
59
  sig do
61
60
  returns(
62
61
  T.nilable(
63
62
  T::Array[
64
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
63
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
65
64
  ]
66
65
  )
67
66
  )
68
67
  end
69
- attr_reader :modalities
68
+ attr_reader :include
70
69
 
71
70
  sig do
72
71
  params(
73
- modalities:
72
+ include:
74
73
  T::Array[
75
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
74
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
76
75
  ]
77
76
  ).void
78
77
  end
79
- attr_writer :modalities
80
-
81
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
82
- # means that the model will detect the start and end of speech based on audio
83
- # volume and respond at the end of user speech.
84
- sig do
85
- returns(
86
- T.nilable(
87
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
88
- )
89
- )
90
- end
91
- attr_reader :turn_detection
92
-
93
- sig do
94
- params(
95
- turn_detection:
96
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
97
- ).void
98
- end
99
- attr_writer :turn_detection
78
+ attr_writer :include
100
79
 
101
- # A new Realtime transcription session configuration.
102
- #
103
- # When a session is created on the server via REST API, the session object also
104
- # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
105
- # not present when a session is updated via the WebSocket API.
80
+ # A Realtime transcription session configuration object.
106
81
  sig do
107
82
  params(
108
- client_secret:
109
- OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash,
110
- input_audio_format: String,
111
- input_audio_transcription:
112
- OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash,
113
- modalities:
83
+ id: String,
84
+ object: String,
85
+ audio:
86
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash,
87
+ expires_at: Integer,
88
+ include:
114
89
  T::Array[
115
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
90
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
116
91
  ],
117
- turn_detection:
118
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
92
+ type: Symbol
119
93
  ).returns(T.attached_class)
120
94
  end
121
95
  def self.new(
122
- # Ephemeral key returned by the API. Only present when the session is created on
123
- # the server via REST API.
124
- client_secret:,
125
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
126
- input_audio_format: nil,
127
- # Configuration of the transcription model.
128
- input_audio_transcription: nil,
129
- # The set of modalities the model can respond with. To disable audio, set this to
130
- # ["text"].
131
- modalities: nil,
132
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
133
- # means that the model will detect the start and end of speech based on audio
134
- # volume and respond at the end of user speech.
135
- turn_detection: nil
96
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
97
+ id:,
98
+ # The object type. Always `realtime.transcription_session`.
99
+ object:,
100
+ # Configuration for input audio for the session.
101
+ audio: nil,
102
+ # Expiration timestamp for the session, in seconds since epoch.
103
+ expires_at: nil,
104
+ # Additional fields to include in server outputs.
105
+ #
106
+ # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
107
+ # transcription.
108
+ include: nil,
109
+ # The type of session. Always `transcription` for transcription sessions.
110
+ type: :transcription
136
111
  )
137
112
  end
138
113
 
139
114
  sig do
140
115
  override.returns(
141
116
  {
142
- client_secret:
143
- OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret,
144
- input_audio_format: String,
145
- input_audio_transcription:
146
- OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription,
147
- modalities:
117
+ id: String,
118
+ object: String,
119
+ type: Symbol,
120
+ audio:
121
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio,
122
+ expires_at: Integer,
123
+ include:
148
124
  T::Array[
149
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
150
- ],
151
- turn_detection:
152
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
125
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
126
+ ]
153
127
  }
154
128
  )
155
129
  end
156
130
  def to_hash
157
131
  end
158
132
 
159
- module Modality
133
+ class Audio < OpenAI::Internal::Type::BaseModel
134
+ OrHash =
135
+ T.type_alias do
136
+ T.any(
137
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio,
138
+ OpenAI::Internal::AnyHash
139
+ )
140
+ end
141
+
142
+ sig do
143
+ returns(
144
+ T.nilable(
145
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input
146
+ )
147
+ )
148
+ end
149
+ attr_reader :input
150
+
151
+ sig do
152
+ params(
153
+ input:
154
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
155
+ ).void
156
+ end
157
+ attr_writer :input
158
+
159
+ # Configuration for input audio for the session.
160
+ sig do
161
+ params(
162
+ input:
163
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
164
+ ).returns(T.attached_class)
165
+ end
166
+ def self.new(input: nil)
167
+ end
168
+
169
+ sig do
170
+ override.returns(
171
+ {
172
+ input:
173
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input
174
+ }
175
+ )
176
+ end
177
+ def to_hash
178
+ end
179
+
180
+ class Input < OpenAI::Internal::Type::BaseModel
181
+ OrHash =
182
+ T.type_alias do
183
+ T.any(
184
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input,
185
+ OpenAI::Internal::AnyHash
186
+ )
187
+ end
188
+
189
+ # The PCM audio format. Only a 24kHz sample rate is supported.
190
+ sig do
191
+ returns(
192
+ T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants)
193
+ )
194
+ end
195
+ attr_reader :format_
196
+
197
+ sig do
198
+ params(
199
+ format_:
200
+ T.any(
201
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
202
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
203
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
204
+ )
205
+ ).void
206
+ end
207
+ attr_writer :format_
208
+
209
+ # Configuration for input audio noise reduction.
210
+ sig do
211
+ returns(
212
+ T.nilable(
213
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction
214
+ )
215
+ )
216
+ end
217
+ attr_reader :noise_reduction
218
+
219
+ sig do
220
+ params(
221
+ noise_reduction:
222
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash
223
+ ).void
224
+ end
225
+ attr_writer :noise_reduction
226
+
227
+ # Configuration of the transcription model.
228
+ sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
229
+ attr_reader :transcription
230
+
231
+ sig do
232
+ params(
233
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash
234
+ ).void
235
+ end
236
+ attr_writer :transcription
237
+
238
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
239
+ # means that the model will detect the start and end of speech based on audio
240
+ # volume and respond at the end of user speech.
241
+ sig do
242
+ returns(
243
+ T.nilable(
244
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
245
+ )
246
+ )
247
+ end
248
+ attr_reader :turn_detection
249
+
250
+ sig do
251
+ params(
252
+ turn_detection:
253
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
254
+ ).void
255
+ end
256
+ attr_writer :turn_detection
257
+
258
+ sig do
259
+ params(
260
+ format_:
261
+ T.any(
262
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
263
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
264
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
265
+ ),
266
+ noise_reduction:
267
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash,
268
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash,
269
+ turn_detection:
270
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
271
+ ).returns(T.attached_class)
272
+ end
273
+ def self.new(
274
+ # The PCM audio format. Only a 24kHz sample rate is supported.
275
+ format_: nil,
276
+ # Configuration for input audio noise reduction.
277
+ noise_reduction: nil,
278
+ # Configuration of the transcription model.
279
+ transcription: nil,
280
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
281
+ # means that the model will detect the start and end of speech based on audio
282
+ # volume and respond at the end of user speech.
283
+ turn_detection: nil
284
+ )
285
+ end
286
+
287
+ sig do
288
+ override.returns(
289
+ {
290
+ format_: OpenAI::Realtime::RealtimeAudioFormats::Variants,
291
+ noise_reduction:
292
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
293
+ transcription: OpenAI::Realtime::AudioTranscription,
294
+ turn_detection:
295
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
296
+ }
297
+ )
298
+ end
299
+ def to_hash
300
+ end
301
+
302
+ class NoiseReduction < OpenAI::Internal::Type::BaseModel
303
+ OrHash =
304
+ T.type_alias do
305
+ T.any(
306
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
307
+ OpenAI::Internal::AnyHash
308
+ )
309
+ end
310
+
311
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
312
+ # headphones, `far_field` is for far-field microphones such as laptop or
313
+ # conference room microphones.
314
+ sig do
315
+ returns(
316
+ T.nilable(OpenAI::Realtime::NoiseReductionType::TaggedSymbol)
317
+ )
318
+ end
319
+ attr_reader :type
320
+
321
+ sig do
322
+ params(
323
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
324
+ ).void
325
+ end
326
+ attr_writer :type
327
+
328
+ # Configuration for input audio noise reduction.
329
+ sig do
330
+ params(
331
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
332
+ ).returns(T.attached_class)
333
+ end
334
+ def self.new(
335
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
336
+ # headphones, `far_field` is for far-field microphones such as laptop or
337
+ # conference room microphones.
338
+ type: nil
339
+ )
340
+ end
341
+
342
+ sig do
343
+ override.returns(
344
+ { type: OpenAI::Realtime::NoiseReductionType::TaggedSymbol }
345
+ )
346
+ end
347
+ def to_hash
348
+ end
349
+ end
350
+ end
351
+ end
352
+
353
+ module Include
160
354
  extend OpenAI::Internal::Type::Enum
161
355
 
162
356
  TaggedSymbol =
163
357
  T.type_alias do
164
358
  T.all(
165
359
  Symbol,
166
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality
360
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include
167
361
  )
168
362
  end
169
363
  OrSymbol = T.type_alias { T.any(Symbol, String) }
170
364
 
171
- TEXT =
172
- T.let(
173
- :text,
174
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
175
- )
176
- AUDIO =
365
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
177
366
  T.let(
178
- :audio,
179
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
367
+ :"item.input_audio_transcription.logprobs",
368
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
180
369
  )
181
370
 
182
371
  sig do
183
372
  override.returns(
184
373
  T::Array[
185
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
374
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
186
375
  ]
187
376
  )
188
377
  end