openai 0.23.0 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/openai/models/realtime/client_secret_create_response.rb +6 -8
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +1 -1
- data/lib/openai/models/realtime/realtime_client_event.rb +2 -6
- data/lib/openai/models/realtime/{models.rb → realtime_function_tool.rb} +6 -6
- data/lib/openai/models/realtime/realtime_response_create_params.rb +4 -4
- data/lib/openai/models/realtime/realtime_server_event.rb +1 -8
- data/lib/openai/models/realtime/realtime_session.rb +3 -3
- data/lib/openai/models/realtime/realtime_session_create_request.rb +2 -2
- data/lib/openai/models/realtime/realtime_session_create_response.rb +21 -33
- data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -2
- data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +117 -40
- data/lib/openai/models/realtime/transcription_session_updated_event.rb +152 -3
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +1 -4
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +2 -2
- data/rbi/openai/models/realtime/realtime_client_event.rbi +1 -2
- data/rbi/openai/models/realtime/{models.rbi → realtime_function_tool.rbi} +27 -9
- data/rbi/openai/models/realtime/realtime_response_create_params.rbi +5 -5
- data/rbi/openai/models/realtime/realtime_server_event.rbi +0 -2
- data/rbi/openai/models/realtime/realtime_session.rbi +10 -4
- data/rbi/openai/models/realtime/realtime_session_create_request.rbi +4 -4
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +29 -77
- data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -1
- data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +290 -101
- data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +311 -4
- data/sig/openai/models/realtime/realtime_client_event.rbs +0 -1
- data/sig/openai/models/realtime/{models.rbs → realtime_function_tool.rbs} +9 -9
- data/sig/openai/models/realtime/realtime_response_create_params.rbs +1 -1
- data/sig/openai/models/realtime/realtime_server_event.rbs +0 -2
- data/sig/openai/models/realtime/realtime_session.rbs +6 -6
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +13 -31
- data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -1
- data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +123 -35
- data/sig/openai/models/realtime/transcription_session_updated_event.rbs +118 -4
- metadata +5 -14
- data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +0 -38
- data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +0 -66
- data/lib/openai/models/realtime/transcription_session_created.rb +0 -43
- data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +0 -51
- data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +0 -144
- data/rbi/openai/models/realtime/transcription_session_created.rbi +0 -79
- data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +0 -20
- data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +0 -59
- data/sig/openai/models/realtime/transcription_session_created.rbs +0 -32
@@ -15,174 +15,363 @@ module OpenAI
|
|
15
15
|
)
|
16
16
|
end
|
17
17
|
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
returns(OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret)
|
22
|
-
end
|
23
|
-
attr_reader :client_secret
|
18
|
+
# Unique identifier for the session that looks like `sess_1234567890abcdef`.
|
19
|
+
sig { returns(String) }
|
20
|
+
attr_accessor :id
|
24
21
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash
|
29
|
-
).void
|
30
|
-
end
|
31
|
-
attr_writer :client_secret
|
22
|
+
# The object type. Always `realtime.transcription_session`.
|
23
|
+
sig { returns(String) }
|
24
|
+
attr_accessor :object
|
32
25
|
|
33
|
-
# The
|
34
|
-
sig { returns(
|
35
|
-
|
26
|
+
# The type of session. Always `transcription` for transcription sessions.
|
27
|
+
sig { returns(Symbol) }
|
28
|
+
attr_accessor :type
|
36
29
|
|
37
|
-
|
38
|
-
attr_writer :input_audio_format
|
39
|
-
|
40
|
-
# Configuration of the transcription model.
|
30
|
+
# Configuration for input audio for the session.
|
41
31
|
sig do
|
42
32
|
returns(
|
43
33
|
T.nilable(
|
44
|
-
OpenAI::Realtime::
|
34
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio
|
45
35
|
)
|
46
36
|
)
|
47
37
|
end
|
48
|
-
attr_reader :
|
38
|
+
attr_reader :audio
|
49
39
|
|
50
40
|
sig do
|
51
41
|
params(
|
52
|
-
|
53
|
-
OpenAI::Realtime::
|
42
|
+
audio:
|
43
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash
|
54
44
|
).void
|
55
45
|
end
|
56
|
-
attr_writer :
|
46
|
+
attr_writer :audio
|
47
|
+
|
48
|
+
# Expiration timestamp for the session, in seconds since epoch.
|
49
|
+
sig { returns(T.nilable(Integer)) }
|
50
|
+
attr_reader :expires_at
|
51
|
+
|
52
|
+
sig { params(expires_at: Integer).void }
|
53
|
+
attr_writer :expires_at
|
57
54
|
|
58
|
-
#
|
59
|
-
#
|
55
|
+
# Additional fields to include in server outputs.
|
56
|
+
#
|
57
|
+
# - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
|
58
|
+
# transcription.
|
60
59
|
sig do
|
61
60
|
returns(
|
62
61
|
T.nilable(
|
63
62
|
T::Array[
|
64
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
63
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
|
65
64
|
]
|
66
65
|
)
|
67
66
|
)
|
68
67
|
end
|
69
|
-
attr_reader :
|
68
|
+
attr_reader :include
|
70
69
|
|
71
70
|
sig do
|
72
71
|
params(
|
73
|
-
|
72
|
+
include:
|
74
73
|
T::Array[
|
75
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
74
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
|
76
75
|
]
|
77
76
|
).void
|
78
77
|
end
|
79
|
-
attr_writer :
|
80
|
-
|
81
|
-
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
82
|
-
# means that the model will detect the start and end of speech based on audio
|
83
|
-
# volume and respond at the end of user speech.
|
84
|
-
sig do
|
85
|
-
returns(
|
86
|
-
T.nilable(
|
87
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
88
|
-
)
|
89
|
-
)
|
90
|
-
end
|
91
|
-
attr_reader :turn_detection
|
92
|
-
|
93
|
-
sig do
|
94
|
-
params(
|
95
|
-
turn_detection:
|
96
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
97
|
-
).void
|
98
|
-
end
|
99
|
-
attr_writer :turn_detection
|
78
|
+
attr_writer :include
|
100
79
|
|
101
|
-
# A
|
102
|
-
#
|
103
|
-
# When a session is created on the server via REST API, the session object also
|
104
|
-
# contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
|
105
|
-
# not present when a session is updated via the WebSocket API.
|
80
|
+
# A Realtime transcription session configuration object.
|
106
81
|
sig do
|
107
82
|
params(
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
83
|
+
id: String,
|
84
|
+
object: String,
|
85
|
+
audio:
|
86
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::OrHash,
|
87
|
+
expires_at: Integer,
|
88
|
+
include:
|
114
89
|
T::Array[
|
115
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
90
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::OrSymbol
|
116
91
|
],
|
117
|
-
|
118
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
92
|
+
type: Symbol
|
119
93
|
).returns(T.attached_class)
|
120
94
|
end
|
121
95
|
def self.new(
|
122
|
-
#
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
#
|
131
|
-
|
132
|
-
#
|
133
|
-
#
|
134
|
-
|
135
|
-
|
96
|
+
# Unique identifier for the session that looks like `sess_1234567890abcdef`.
|
97
|
+
id:,
|
98
|
+
# The object type. Always `realtime.transcription_session`.
|
99
|
+
object:,
|
100
|
+
# Configuration for input audio for the session.
|
101
|
+
audio: nil,
|
102
|
+
# Expiration timestamp for the session, in seconds since epoch.
|
103
|
+
expires_at: nil,
|
104
|
+
# Additional fields to include in server outputs.
|
105
|
+
#
|
106
|
+
# - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
|
107
|
+
# transcription.
|
108
|
+
include: nil,
|
109
|
+
# The type of session. Always `transcription` for transcription sessions.
|
110
|
+
type: :transcription
|
136
111
|
)
|
137
112
|
end
|
138
113
|
|
139
114
|
sig do
|
140
115
|
override.returns(
|
141
116
|
{
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
OpenAI::Realtime::
|
147
|
-
|
117
|
+
id: String,
|
118
|
+
object: String,
|
119
|
+
type: Symbol,
|
120
|
+
audio:
|
121
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio,
|
122
|
+
expires_at: Integer,
|
123
|
+
include:
|
148
124
|
T::Array[
|
149
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
150
|
-
]
|
151
|
-
turn_detection:
|
152
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
125
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
|
126
|
+
]
|
153
127
|
}
|
154
128
|
)
|
155
129
|
end
|
156
130
|
def to_hash
|
157
131
|
end
|
158
132
|
|
159
|
-
|
133
|
+
class Audio < OpenAI::Internal::Type::BaseModel
|
134
|
+
OrHash =
|
135
|
+
T.type_alias do
|
136
|
+
T.any(
|
137
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio,
|
138
|
+
OpenAI::Internal::AnyHash
|
139
|
+
)
|
140
|
+
end
|
141
|
+
|
142
|
+
sig do
|
143
|
+
returns(
|
144
|
+
T.nilable(
|
145
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input
|
146
|
+
)
|
147
|
+
)
|
148
|
+
end
|
149
|
+
attr_reader :input
|
150
|
+
|
151
|
+
sig do
|
152
|
+
params(
|
153
|
+
input:
|
154
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
|
155
|
+
).void
|
156
|
+
end
|
157
|
+
attr_writer :input
|
158
|
+
|
159
|
+
# Configuration for input audio for the session.
|
160
|
+
sig do
|
161
|
+
params(
|
162
|
+
input:
|
163
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::OrHash
|
164
|
+
).returns(T.attached_class)
|
165
|
+
end
|
166
|
+
def self.new(input: nil)
|
167
|
+
end
|
168
|
+
|
169
|
+
sig do
|
170
|
+
override.returns(
|
171
|
+
{
|
172
|
+
input:
|
173
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input
|
174
|
+
}
|
175
|
+
)
|
176
|
+
end
|
177
|
+
def to_hash
|
178
|
+
end
|
179
|
+
|
180
|
+
class Input < OpenAI::Internal::Type::BaseModel
|
181
|
+
OrHash =
|
182
|
+
T.type_alias do
|
183
|
+
T.any(
|
184
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input,
|
185
|
+
OpenAI::Internal::AnyHash
|
186
|
+
)
|
187
|
+
end
|
188
|
+
|
189
|
+
# The PCM audio format. Only a 24kHz sample rate is supported.
|
190
|
+
sig do
|
191
|
+
returns(
|
192
|
+
T.nilable(OpenAI::Realtime::RealtimeAudioFormats::Variants)
|
193
|
+
)
|
194
|
+
end
|
195
|
+
attr_reader :format_
|
196
|
+
|
197
|
+
sig do
|
198
|
+
params(
|
199
|
+
format_:
|
200
|
+
T.any(
|
201
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
|
202
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
|
203
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
|
204
|
+
)
|
205
|
+
).void
|
206
|
+
end
|
207
|
+
attr_writer :format_
|
208
|
+
|
209
|
+
# Configuration for input audio noise reduction.
|
210
|
+
sig do
|
211
|
+
returns(
|
212
|
+
T.nilable(
|
213
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction
|
214
|
+
)
|
215
|
+
)
|
216
|
+
end
|
217
|
+
attr_reader :noise_reduction
|
218
|
+
|
219
|
+
sig do
|
220
|
+
params(
|
221
|
+
noise_reduction:
|
222
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash
|
223
|
+
).void
|
224
|
+
end
|
225
|
+
attr_writer :noise_reduction
|
226
|
+
|
227
|
+
# Configuration of the transcription model.
|
228
|
+
sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
|
229
|
+
attr_reader :transcription
|
230
|
+
|
231
|
+
sig do
|
232
|
+
params(
|
233
|
+
transcription: OpenAI::Realtime::AudioTranscription::OrHash
|
234
|
+
).void
|
235
|
+
end
|
236
|
+
attr_writer :transcription
|
237
|
+
|
238
|
+
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
239
|
+
# means that the model will detect the start and end of speech based on audio
|
240
|
+
# volume and respond at the end of user speech.
|
241
|
+
sig do
|
242
|
+
returns(
|
243
|
+
T.nilable(
|
244
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
245
|
+
)
|
246
|
+
)
|
247
|
+
end
|
248
|
+
attr_reader :turn_detection
|
249
|
+
|
250
|
+
sig do
|
251
|
+
params(
|
252
|
+
turn_detection:
|
253
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
254
|
+
).void
|
255
|
+
end
|
256
|
+
attr_writer :turn_detection
|
257
|
+
|
258
|
+
sig do
|
259
|
+
params(
|
260
|
+
format_:
|
261
|
+
T.any(
|
262
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
|
263
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
|
264
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
|
265
|
+
),
|
266
|
+
noise_reduction:
|
267
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash,
|
268
|
+
transcription: OpenAI::Realtime::AudioTranscription::OrHash,
|
269
|
+
turn_detection:
|
270
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
271
|
+
).returns(T.attached_class)
|
272
|
+
end
|
273
|
+
def self.new(
|
274
|
+
# The PCM audio format. Only a 24kHz sample rate is supported.
|
275
|
+
format_: nil,
|
276
|
+
# Configuration for input audio noise reduction.
|
277
|
+
noise_reduction: nil,
|
278
|
+
# Configuration of the transcription model.
|
279
|
+
transcription: nil,
|
280
|
+
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
281
|
+
# means that the model will detect the start and end of speech based on audio
|
282
|
+
# volume and respond at the end of user speech.
|
283
|
+
turn_detection: nil
|
284
|
+
)
|
285
|
+
end
|
286
|
+
|
287
|
+
sig do
|
288
|
+
override.returns(
|
289
|
+
{
|
290
|
+
format_: OpenAI::Realtime::RealtimeAudioFormats::Variants,
|
291
|
+
noise_reduction:
|
292
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
|
293
|
+
transcription: OpenAI::Realtime::AudioTranscription,
|
294
|
+
turn_detection:
|
295
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
296
|
+
}
|
297
|
+
)
|
298
|
+
end
|
299
|
+
def to_hash
|
300
|
+
end
|
301
|
+
|
302
|
+
class NoiseReduction < OpenAI::Internal::Type::BaseModel
|
303
|
+
OrHash =
|
304
|
+
T.type_alias do
|
305
|
+
T.any(
|
306
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
|
307
|
+
OpenAI::Internal::AnyHash
|
308
|
+
)
|
309
|
+
end
|
310
|
+
|
311
|
+
# Type of noise reduction. `near_field` is for close-talking microphones such as
|
312
|
+
# headphones, `far_field` is for far-field microphones such as laptop or
|
313
|
+
# conference room microphones.
|
314
|
+
sig do
|
315
|
+
returns(
|
316
|
+
T.nilable(OpenAI::Realtime::NoiseReductionType::TaggedSymbol)
|
317
|
+
)
|
318
|
+
end
|
319
|
+
attr_reader :type
|
320
|
+
|
321
|
+
sig do
|
322
|
+
params(
|
323
|
+
type: OpenAI::Realtime::NoiseReductionType::OrSymbol
|
324
|
+
).void
|
325
|
+
end
|
326
|
+
attr_writer :type
|
327
|
+
|
328
|
+
# Configuration for input audio noise reduction.
|
329
|
+
sig do
|
330
|
+
params(
|
331
|
+
type: OpenAI::Realtime::NoiseReductionType::OrSymbol
|
332
|
+
).returns(T.attached_class)
|
333
|
+
end
|
334
|
+
def self.new(
|
335
|
+
# Type of noise reduction. `near_field` is for close-talking microphones such as
|
336
|
+
# headphones, `far_field` is for far-field microphones such as laptop or
|
337
|
+
# conference room microphones.
|
338
|
+
type: nil
|
339
|
+
)
|
340
|
+
end
|
341
|
+
|
342
|
+
sig do
|
343
|
+
override.returns(
|
344
|
+
{ type: OpenAI::Realtime::NoiseReductionType::TaggedSymbol }
|
345
|
+
)
|
346
|
+
end
|
347
|
+
def to_hash
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
module Include
|
160
354
|
extend OpenAI::Internal::Type::Enum
|
161
355
|
|
162
356
|
TaggedSymbol =
|
163
357
|
T.type_alias do
|
164
358
|
T.all(
|
165
359
|
Symbol,
|
166
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
360
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include
|
167
361
|
)
|
168
362
|
end
|
169
363
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
170
364
|
|
171
|
-
|
172
|
-
T.let(
|
173
|
-
:text,
|
174
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
175
|
-
)
|
176
|
-
AUDIO =
|
365
|
+
ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
|
177
366
|
T.let(
|
178
|
-
:
|
179
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
367
|
+
:"item.input_audio_transcription.logprobs",
|
368
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
|
180
369
|
)
|
181
370
|
|
182
371
|
sig do
|
183
372
|
override.returns(
|
184
373
|
T::Array[
|
185
|
-
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::
|
374
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Include::TaggedSymbol
|
186
375
|
]
|
187
376
|
)
|
188
377
|
end
|