openai 0.22.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/openai/models/realtime/audio_transcription.rb +60 -0
- data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
- data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
- data/lib/openai/models/realtime/conversation_item.rb +1 -1
- data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
- data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
- data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
- data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
- data/lib/openai/models/realtime/models.rb +58 -0
- data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
- data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
- data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
- data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
- data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
- data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
- data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
- data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
- data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
- data/lib/openai/models/realtime/realtime_response.rb +117 -107
- data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
- data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
- data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
- data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
- data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
- data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
- data/lib/openai/models/realtime/realtime_session.rb +9 -125
- data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
- data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
- data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
- data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
- data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
- data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
- data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
- data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
- data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
- data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
- data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
- data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
- data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
- data/lib/openai/models/realtime/response_create_event.rb +18 -348
- data/lib/openai/models/realtime/response_done_event.rb +7 -0
- data/lib/openai/models/realtime/session_created_event.rb +20 -4
- data/lib/openai/models/realtime/session_update_event.rb +36 -12
- data/lib/openai/models/realtime/session_updated_event.rb +20 -4
- data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
- data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
- data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
- data/lib/openai/resources/realtime/client_secrets.rb +2 -3
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +19 -1
- data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
- data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
- data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
- data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
- data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
- data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
- data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
- data/rbi/openai/models/realtime/models.rbi +97 -0
- data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
- data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
- data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
- data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
- data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
- data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
- data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
- data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
- data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
- data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
- data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
- data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
- data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
- data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
- data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
- data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
- data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
- data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
- data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
- data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
- data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
- data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
- data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
- data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
- data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
- data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
- data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
- data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
- data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
- data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
- data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
- data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
- data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
- data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
- data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
- data/sig/openai/models/realtime/models.rbs +57 -0
- data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
- data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
- data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
- data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
- data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
- data/sig/openai/models/realtime/realtime_response.rbs +103 -82
- data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
- data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
- data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
- data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
- data/sig/openai/models/realtime/realtime_session.rbs +16 -106
- data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
- data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
- data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
- data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
- data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
- data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
- data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
- data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
- data/sig/openai/models/realtime/response_create_event.rbs +6 -249
- data/sig/openai/models/realtime/session_created_event.rbs +14 -4
- data/sig/openai/models/realtime/session_update_event.rbs +14 -4
- data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
- data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
- data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
- data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
- metadata +59 -5
- data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
- data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
- data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
|
@@ -9,20 +9,28 @@ module OpenAI
|
|
|
9
9
|
T.any(OpenAI::Realtime::RealtimeResponse, OpenAI::Internal::AnyHash)
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
# The unique ID of the response
|
|
12
|
+
# The unique ID of the response, will look like `resp_1234`.
|
|
13
13
|
sig { returns(T.nilable(String)) }
|
|
14
14
|
attr_reader :id
|
|
15
15
|
|
|
16
16
|
sig { params(id: String).void }
|
|
17
17
|
attr_writer :id
|
|
18
18
|
|
|
19
|
+
# Configuration for audio output.
|
|
20
|
+
sig { returns(T.nilable(OpenAI::Realtime::RealtimeResponse::Audio)) }
|
|
21
|
+
attr_reader :audio
|
|
22
|
+
|
|
23
|
+
sig do
|
|
24
|
+
params(audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash).void
|
|
25
|
+
end
|
|
26
|
+
attr_writer :audio
|
|
27
|
+
|
|
19
28
|
# Which conversation the response is added to, determined by the `conversation`
|
|
20
29
|
# field in the `response.create` event. If `auto`, the response will be added to
|
|
21
30
|
# the default conversation and the value of `conversation_id` will be an id like
|
|
22
31
|
# `conv_1234`. If `none`, the response will not be added to any conversation and
|
|
23
32
|
# the value of `conversation_id` will be `null`. If responses are being triggered
|
|
24
|
-
# by
|
|
25
|
-
# `conversation_id` will be an id like `conv_1234`.
|
|
33
|
+
# automatically by VAD the response will be added to the default conversation
|
|
26
34
|
sig { returns(T.nilable(String)) }
|
|
27
35
|
attr_reader :conversation_id
|
|
28
36
|
|
|
@@ -46,26 +54,6 @@ module OpenAI
|
|
|
46
54
|
sig { returns(T.nilable(T::Hash[Symbol, String])) }
|
|
47
55
|
attr_accessor :metadata
|
|
48
56
|
|
|
49
|
-
# The set of modalities the model used to respond. If there are multiple
|
|
50
|
-
# modalities, the model will pick one, for example if `modalities` is
|
|
51
|
-
# `["text", "audio"]`, the model could be responding in either text or audio.
|
|
52
|
-
sig do
|
|
53
|
-
returns(
|
|
54
|
-
T.nilable(
|
|
55
|
-
T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol]
|
|
56
|
-
)
|
|
57
|
-
)
|
|
58
|
-
end
|
|
59
|
-
attr_reader :modalities
|
|
60
|
-
|
|
61
|
-
sig do
|
|
62
|
-
params(
|
|
63
|
-
modalities:
|
|
64
|
-
T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol]
|
|
65
|
-
).void
|
|
66
|
-
end
|
|
67
|
-
attr_writer :modalities
|
|
68
|
-
|
|
69
57
|
# The object type, must be `realtime.response`.
|
|
70
58
|
sig do
|
|
71
59
|
returns(
|
|
@@ -123,23 +111,30 @@ module OpenAI
|
|
|
123
111
|
end
|
|
124
112
|
attr_writer :output
|
|
125
113
|
|
|
126
|
-
# The
|
|
114
|
+
# The set of modalities the model used to respond, currently the only possible
|
|
115
|
+
# values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
|
|
116
|
+
# transcript. Setting the output to mode `text` will disable audio output from the
|
|
117
|
+
# model.
|
|
127
118
|
sig do
|
|
128
119
|
returns(
|
|
129
120
|
T.nilable(
|
|
130
|
-
|
|
121
|
+
T::Array[
|
|
122
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
|
|
123
|
+
]
|
|
131
124
|
)
|
|
132
125
|
)
|
|
133
126
|
end
|
|
134
|
-
attr_reader :
|
|
127
|
+
attr_reader :output_modalities
|
|
135
128
|
|
|
136
129
|
sig do
|
|
137
130
|
params(
|
|
138
|
-
|
|
139
|
-
|
|
131
|
+
output_modalities:
|
|
132
|
+
T::Array[
|
|
133
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
|
|
134
|
+
]
|
|
140
135
|
).void
|
|
141
136
|
end
|
|
142
|
-
attr_writer :
|
|
137
|
+
attr_writer :output_modalities
|
|
143
138
|
|
|
144
139
|
# The final status of the response (`completed`, `cancelled`, `failed`, or
|
|
145
140
|
# `incomplete`, `in_progress`).
|
|
@@ -168,13 +163,6 @@ module OpenAI
|
|
|
168
163
|
end
|
|
169
164
|
attr_writer :status_details
|
|
170
165
|
|
|
171
|
-
# Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
|
|
172
|
-
sig { returns(T.nilable(Float)) }
|
|
173
|
-
attr_reader :temperature
|
|
174
|
-
|
|
175
|
-
sig { params(temperature: Float).void }
|
|
176
|
-
attr_writer :temperature
|
|
177
|
-
|
|
178
166
|
# Usage statistics for the Response, this will correspond to billing. A Realtime
|
|
179
167
|
# API session will maintain a conversation context and append new Items to the
|
|
180
168
|
# Conversation, thus output from previous turns (text and audio tokens) will
|
|
@@ -187,34 +175,14 @@ module OpenAI
|
|
|
187
175
|
end
|
|
188
176
|
attr_writer :usage
|
|
189
177
|
|
|
190
|
-
# The voice the model used to respond. Current voice options are `alloy`, `ash`,
|
|
191
|
-
# `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
|
|
192
|
-
sig do
|
|
193
|
-
returns(
|
|
194
|
-
T.nilable(
|
|
195
|
-
T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
|
|
196
|
-
)
|
|
197
|
-
)
|
|
198
|
-
end
|
|
199
|
-
attr_reader :voice
|
|
200
|
-
|
|
201
|
-
sig do
|
|
202
|
-
params(
|
|
203
|
-
voice:
|
|
204
|
-
T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
|
|
205
|
-
).void
|
|
206
|
-
end
|
|
207
|
-
attr_writer :voice
|
|
208
|
-
|
|
209
178
|
# The response resource.
|
|
210
179
|
sig do
|
|
211
180
|
params(
|
|
212
181
|
id: String,
|
|
182
|
+
audio: OpenAI::Realtime::RealtimeResponse::Audio::OrHash,
|
|
213
183
|
conversation_id: String,
|
|
214
184
|
max_output_tokens: T.any(Integer, Symbol),
|
|
215
185
|
metadata: T.nilable(T::Hash[Symbol, String]),
|
|
216
|
-
modalities:
|
|
217
|
-
T::Array[OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol],
|
|
218
186
|
object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol,
|
|
219
187
|
output:
|
|
220
188
|
T::Array[
|
|
@@ -230,26 +198,26 @@ module OpenAI
|
|
|
230
198
|
OpenAI::Realtime::RealtimeMcpApprovalRequest::OrHash
|
|
231
199
|
)
|
|
232
200
|
],
|
|
233
|
-
|
|
234
|
-
|
|
201
|
+
output_modalities:
|
|
202
|
+
T::Array[
|
|
203
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
|
|
204
|
+
],
|
|
235
205
|
status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol,
|
|
236
206
|
status_details: OpenAI::Realtime::RealtimeResponseStatus::OrHash,
|
|
237
|
-
|
|
238
|
-
usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash,
|
|
239
|
-
voice:
|
|
240
|
-
T.any(String, OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol)
|
|
207
|
+
usage: OpenAI::Realtime::RealtimeResponseUsage::OrHash
|
|
241
208
|
).returns(T.attached_class)
|
|
242
209
|
end
|
|
243
210
|
def self.new(
|
|
244
|
-
# The unique ID of the response
|
|
211
|
+
# The unique ID of the response, will look like `resp_1234`.
|
|
245
212
|
id: nil,
|
|
213
|
+
# Configuration for audio output.
|
|
214
|
+
audio: nil,
|
|
246
215
|
# Which conversation the response is added to, determined by the `conversation`
|
|
247
216
|
# field in the `response.create` event. If `auto`, the response will be added to
|
|
248
217
|
# the default conversation and the value of `conversation_id` will be an id like
|
|
249
218
|
# `conv_1234`. If `none`, the response will not be added to any conversation and
|
|
250
219
|
# the value of `conversation_id` will be `null`. If responses are being triggered
|
|
251
|
-
# by
|
|
252
|
-
# `conversation_id` will be an id like `conv_1234`.
|
|
220
|
+
# automatically by VAD the response will be added to the default conversation
|
|
253
221
|
conversation_id: nil,
|
|
254
222
|
# Maximum number of output tokens for a single assistant response, inclusive of
|
|
255
223
|
# tool calls, that was used in this response.
|
|
@@ -261,31 +229,25 @@ module OpenAI
|
|
|
261
229
|
# Keys are strings with a maximum length of 64 characters. Values are strings with
|
|
262
230
|
# a maximum length of 512 characters.
|
|
263
231
|
metadata: nil,
|
|
264
|
-
# The set of modalities the model used to respond. If there are multiple
|
|
265
|
-
# modalities, the model will pick one, for example if `modalities` is
|
|
266
|
-
# `["text", "audio"]`, the model could be responding in either text or audio.
|
|
267
|
-
modalities: nil,
|
|
268
232
|
# The object type, must be `realtime.response`.
|
|
269
233
|
object: nil,
|
|
270
234
|
# The list of output items generated by the response.
|
|
271
235
|
output: nil,
|
|
272
|
-
# The
|
|
273
|
-
|
|
236
|
+
# The set of modalities the model used to respond, currently the only possible
|
|
237
|
+
# values are `[\"audio\"]`, `[\"text\"]`. Audio output always include a text
|
|
238
|
+
# transcript. Setting the output to mode `text` will disable audio output from the
|
|
239
|
+
# model.
|
|
240
|
+
output_modalities: nil,
|
|
274
241
|
# The final status of the response (`completed`, `cancelled`, `failed`, or
|
|
275
242
|
# `incomplete`, `in_progress`).
|
|
276
243
|
status: nil,
|
|
277
244
|
# Additional details about the status.
|
|
278
245
|
status_details: nil,
|
|
279
|
-
# Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
|
|
280
|
-
temperature: nil,
|
|
281
246
|
# Usage statistics for the Response, this will correspond to billing. A Realtime
|
|
282
247
|
# API session will maintain a conversation context and append new Items to the
|
|
283
248
|
# Conversation, thus output from previous turns (text and audio tokens) will
|
|
284
249
|
# become the input for later turns.
|
|
285
|
-
usage: nil
|
|
286
|
-
# The voice the model used to respond. Current voice options are `alloy`, `ash`,
|
|
287
|
-
# `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
|
|
288
|
-
voice: nil
|
|
250
|
+
usage: nil
|
|
289
251
|
)
|
|
290
252
|
end
|
|
291
253
|
|
|
@@ -293,13 +255,10 @@ module OpenAI
|
|
|
293
255
|
override.returns(
|
|
294
256
|
{
|
|
295
257
|
id: String,
|
|
258
|
+
audio: OpenAI::Realtime::RealtimeResponse::Audio,
|
|
296
259
|
conversation_id: String,
|
|
297
260
|
max_output_tokens: T.any(Integer, Symbol),
|
|
298
261
|
metadata: T.nilable(T::Hash[Symbol, String]),
|
|
299
|
-
modalities:
|
|
300
|
-
T::Array[
|
|
301
|
-
OpenAI::Realtime::RealtimeResponse::Modality::OrSymbol
|
|
302
|
-
],
|
|
303
262
|
object: OpenAI::Realtime::RealtimeResponse::Object::OrSymbol,
|
|
304
263
|
output:
|
|
305
264
|
T::Array[
|
|
@@ -315,69 +274,270 @@ module OpenAI
|
|
|
315
274
|
OpenAI::Realtime::RealtimeMcpApprovalRequest
|
|
316
275
|
)
|
|
317
276
|
],
|
|
318
|
-
|
|
319
|
-
|
|
277
|
+
output_modalities:
|
|
278
|
+
T::Array[
|
|
279
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::OrSymbol
|
|
280
|
+
],
|
|
320
281
|
status: OpenAI::Realtime::RealtimeResponse::Status::OrSymbol,
|
|
321
282
|
status_details: OpenAI::Realtime::RealtimeResponseStatus,
|
|
322
|
-
|
|
323
|
-
usage: OpenAI::Realtime::RealtimeResponseUsage,
|
|
324
|
-
voice:
|
|
325
|
-
T.any(
|
|
326
|
-
String,
|
|
327
|
-
OpenAI::Realtime::RealtimeResponse::Voice::OrSymbol
|
|
328
|
-
)
|
|
283
|
+
usage: OpenAI::Realtime::RealtimeResponseUsage
|
|
329
284
|
}
|
|
330
285
|
)
|
|
331
286
|
end
|
|
332
287
|
def to_hash
|
|
333
288
|
end
|
|
334
289
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
290
|
+
class Audio < OpenAI::Internal::Type::BaseModel
|
|
291
|
+
OrHash =
|
|
292
|
+
T.type_alias do
|
|
293
|
+
T.any(
|
|
294
|
+
OpenAI::Realtime::RealtimeResponse::Audio,
|
|
295
|
+
OpenAI::Internal::AnyHash
|
|
296
|
+
)
|
|
297
|
+
end
|
|
339
298
|
|
|
340
|
-
|
|
299
|
+
sig do
|
|
300
|
+
returns(
|
|
301
|
+
T.nilable(OpenAI::Realtime::RealtimeResponse::Audio::Output)
|
|
302
|
+
)
|
|
303
|
+
end
|
|
304
|
+
attr_reader :output
|
|
305
|
+
|
|
306
|
+
sig do
|
|
307
|
+
params(
|
|
308
|
+
output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash
|
|
309
|
+
).void
|
|
310
|
+
end
|
|
311
|
+
attr_writer :output
|
|
312
|
+
|
|
313
|
+
# Configuration for audio output.
|
|
314
|
+
sig do
|
|
315
|
+
params(
|
|
316
|
+
output: OpenAI::Realtime::RealtimeResponse::Audio::Output::OrHash
|
|
317
|
+
).returns(T.attached_class)
|
|
318
|
+
end
|
|
319
|
+
def self.new(output: nil)
|
|
320
|
+
end
|
|
341
321
|
|
|
342
322
|
sig do
|
|
343
323
|
override.returns(
|
|
344
|
-
|
|
345
|
-
OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants
|
|
346
|
-
]
|
|
324
|
+
{ output: OpenAI::Realtime::RealtimeResponse::Audio::Output }
|
|
347
325
|
)
|
|
348
326
|
end
|
|
349
|
-
def
|
|
327
|
+
def to_hash
|
|
350
328
|
end
|
|
351
|
-
end
|
|
352
329
|
|
|
353
|
-
|
|
354
|
-
|
|
330
|
+
class Output < OpenAI::Internal::Type::BaseModel
|
|
331
|
+
OrHash =
|
|
332
|
+
T.type_alias do
|
|
333
|
+
T.any(
|
|
334
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output,
|
|
335
|
+
OpenAI::Internal::AnyHash
|
|
336
|
+
)
|
|
337
|
+
end
|
|
355
338
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
339
|
+
# The format of the output audio.
|
|
340
|
+
sig do
|
|
341
|
+
returns(
|
|
342
|
+
T.nilable(
|
|
343
|
+
T.any(
|
|
344
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
|
|
345
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
|
|
346
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
|
|
347
|
+
)
|
|
348
|
+
)
|
|
349
|
+
)
|
|
359
350
|
end
|
|
360
|
-
|
|
351
|
+
attr_reader :format_
|
|
361
352
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
353
|
+
sig do
|
|
354
|
+
params(
|
|
355
|
+
format_:
|
|
356
|
+
T.any(
|
|
357
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
|
|
358
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
|
|
359
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
|
|
360
|
+
)
|
|
361
|
+
).void
|
|
362
|
+
end
|
|
363
|
+
attr_writer :format_
|
|
364
|
+
|
|
365
|
+
# The voice the model uses to respond. Voice cannot be changed during the session
|
|
366
|
+
# once the model has responded with audio at least once. Current voice options are
|
|
367
|
+
# `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
|
|
368
|
+
# and `cedar`. We recommend `marin` and `cedar` for best quality.
|
|
369
|
+
sig do
|
|
370
|
+
returns(
|
|
371
|
+
T.nilable(
|
|
372
|
+
T.any(
|
|
373
|
+
String,
|
|
374
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
end
|
|
379
|
+
attr_reader :voice
|
|
380
|
+
|
|
381
|
+
sig do
|
|
382
|
+
params(
|
|
383
|
+
voice:
|
|
384
|
+
T.any(
|
|
385
|
+
String,
|
|
386
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
|
|
387
|
+
)
|
|
388
|
+
).void
|
|
389
|
+
end
|
|
390
|
+
attr_writer :voice
|
|
391
|
+
|
|
392
|
+
sig do
|
|
393
|
+
params(
|
|
394
|
+
format_:
|
|
395
|
+
T.any(
|
|
396
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
|
|
397
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
|
|
398
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
|
|
399
|
+
),
|
|
400
|
+
voice:
|
|
401
|
+
T.any(
|
|
402
|
+
String,
|
|
403
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
|
|
404
|
+
)
|
|
405
|
+
).returns(T.attached_class)
|
|
406
|
+
end
|
|
407
|
+
def self.new(
|
|
408
|
+
# The format of the output audio.
|
|
409
|
+
format_: nil,
|
|
410
|
+
# The voice the model uses to respond. Voice cannot be changed during the session
|
|
411
|
+
# once the model has responded with audio at least once. Current voice options are
|
|
412
|
+
# `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
|
|
413
|
+
# and `cedar`. We recommend `marin` and `cedar` for best quality.
|
|
414
|
+
voice: nil
|
|
371
415
|
)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
sig do
|
|
419
|
+
override.returns(
|
|
420
|
+
{
|
|
421
|
+
format_:
|
|
422
|
+
T.any(
|
|
423
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
|
|
424
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
|
|
425
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
|
|
426
|
+
),
|
|
427
|
+
voice:
|
|
428
|
+
T.any(
|
|
429
|
+
String,
|
|
430
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::OrSymbol
|
|
431
|
+
)
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
end
|
|
435
|
+
def to_hash
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# The voice the model uses to respond. Voice cannot be changed during the session
|
|
439
|
+
# once the model has responded with audio at least once. Current voice options are
|
|
440
|
+
# `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
|
|
441
|
+
# and `cedar`. We recommend `marin` and `cedar` for best quality.
|
|
442
|
+
module Voice
|
|
443
|
+
extend OpenAI::Internal::Type::Union
|
|
444
|
+
|
|
445
|
+
Variants =
|
|
446
|
+
T.type_alias do
|
|
447
|
+
T.any(
|
|
448
|
+
String,
|
|
449
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
450
|
+
)
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
sig do
|
|
454
|
+
override.returns(
|
|
455
|
+
T::Array[
|
|
456
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::Variants
|
|
457
|
+
]
|
|
458
|
+
)
|
|
459
|
+
end
|
|
460
|
+
def self.variants
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
TaggedSymbol =
|
|
464
|
+
T.type_alias do
|
|
465
|
+
T.all(
|
|
466
|
+
Symbol,
|
|
467
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice
|
|
468
|
+
)
|
|
469
|
+
end
|
|
470
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
471
|
+
|
|
472
|
+
ALLOY =
|
|
473
|
+
T.let(
|
|
474
|
+
:alloy,
|
|
475
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
476
|
+
)
|
|
477
|
+
ASH =
|
|
478
|
+
T.let(
|
|
479
|
+
:ash,
|
|
480
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
481
|
+
)
|
|
482
|
+
BALLAD =
|
|
483
|
+
T.let(
|
|
484
|
+
:ballad,
|
|
485
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
486
|
+
)
|
|
487
|
+
CORAL =
|
|
488
|
+
T.let(
|
|
489
|
+
:coral,
|
|
490
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
491
|
+
)
|
|
492
|
+
ECHO =
|
|
493
|
+
T.let(
|
|
494
|
+
:echo,
|
|
495
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
496
|
+
)
|
|
497
|
+
SAGE =
|
|
498
|
+
T.let(
|
|
499
|
+
:sage,
|
|
500
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
501
|
+
)
|
|
502
|
+
SHIMMER =
|
|
503
|
+
T.let(
|
|
504
|
+
:shimmer,
|
|
505
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
506
|
+
)
|
|
507
|
+
VERSE =
|
|
508
|
+
T.let(
|
|
509
|
+
:verse,
|
|
510
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
511
|
+
)
|
|
512
|
+
MARIN =
|
|
513
|
+
T.let(
|
|
514
|
+
:marin,
|
|
515
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
516
|
+
)
|
|
517
|
+
CEDAR =
|
|
518
|
+
T.let(
|
|
519
|
+
:cedar,
|
|
520
|
+
OpenAI::Realtime::RealtimeResponse::Audio::Output::Voice::TaggedSymbol
|
|
521
|
+
)
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Maximum number of output tokens for a single assistant response, inclusive of
|
|
527
|
+
# tool calls, that was used in this response.
|
|
528
|
+
module MaxOutputTokens
|
|
529
|
+
extend OpenAI::Internal::Type::Union
|
|
530
|
+
|
|
531
|
+
Variants = T.type_alias { T.any(Integer, Symbol) }
|
|
372
532
|
|
|
373
533
|
sig do
|
|
374
534
|
override.returns(
|
|
375
535
|
T::Array[
|
|
376
|
-
OpenAI::Realtime::RealtimeResponse::
|
|
536
|
+
OpenAI::Realtime::RealtimeResponse::MaxOutputTokens::Variants
|
|
377
537
|
]
|
|
378
538
|
)
|
|
379
539
|
end
|
|
380
|
-
def self.
|
|
540
|
+
def self.variants
|
|
381
541
|
end
|
|
382
542
|
end
|
|
383
543
|
|
|
@@ -406,39 +566,30 @@ module OpenAI
|
|
|
406
566
|
end
|
|
407
567
|
end
|
|
408
568
|
|
|
409
|
-
|
|
410
|
-
module OutputAudioFormat
|
|
569
|
+
module OutputModality
|
|
411
570
|
extend OpenAI::Internal::Type::Enum
|
|
412
571
|
|
|
413
572
|
TaggedSymbol =
|
|
414
573
|
T.type_alias do
|
|
415
|
-
T.all(
|
|
416
|
-
Symbol,
|
|
417
|
-
OpenAI::Realtime::RealtimeResponse::OutputAudioFormat
|
|
418
|
-
)
|
|
574
|
+
T.all(Symbol, OpenAI::Realtime::RealtimeResponse::OutputModality)
|
|
419
575
|
end
|
|
420
576
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
421
577
|
|
|
422
|
-
|
|
423
|
-
T.let(
|
|
424
|
-
:pcm16,
|
|
425
|
-
OpenAI::Realtime::RealtimeResponse::OutputAudioFormat::TaggedSymbol
|
|
426
|
-
)
|
|
427
|
-
G711_ULAW =
|
|
578
|
+
TEXT =
|
|
428
579
|
T.let(
|
|
429
|
-
:
|
|
430
|
-
OpenAI::Realtime::RealtimeResponse::
|
|
580
|
+
:text,
|
|
581
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
|
|
431
582
|
)
|
|
432
|
-
|
|
583
|
+
AUDIO =
|
|
433
584
|
T.let(
|
|
434
|
-
:
|
|
435
|
-
OpenAI::Realtime::RealtimeResponse::
|
|
585
|
+
:audio,
|
|
586
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
|
|
436
587
|
)
|
|
437
588
|
|
|
438
589
|
sig do
|
|
439
590
|
override.returns(
|
|
440
591
|
T::Array[
|
|
441
|
-
OpenAI::Realtime::RealtimeResponse::
|
|
592
|
+
OpenAI::Realtime::RealtimeResponse::OutputModality::TaggedSymbol
|
|
442
593
|
]
|
|
443
594
|
)
|
|
444
595
|
end
|
|
@@ -491,82 +642,6 @@ module OpenAI
|
|
|
491
642
|
def self.values
|
|
492
643
|
end
|
|
493
644
|
end
|
|
494
|
-
|
|
495
|
-
# The voice the model used to respond. Current voice options are `alloy`, `ash`,
|
|
496
|
-
# `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
|
|
497
|
-
module Voice
|
|
498
|
-
extend OpenAI::Internal::Type::Union
|
|
499
|
-
|
|
500
|
-
Variants =
|
|
501
|
-
T.type_alias do
|
|
502
|
-
T.any(
|
|
503
|
-
String,
|
|
504
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
505
|
-
)
|
|
506
|
-
end
|
|
507
|
-
|
|
508
|
-
sig do
|
|
509
|
-
override.returns(
|
|
510
|
-
T::Array[OpenAI::Realtime::RealtimeResponse::Voice::Variants]
|
|
511
|
-
)
|
|
512
|
-
end
|
|
513
|
-
def self.variants
|
|
514
|
-
end
|
|
515
|
-
|
|
516
|
-
TaggedSymbol =
|
|
517
|
-
T.type_alias do
|
|
518
|
-
T.all(Symbol, OpenAI::Realtime::RealtimeResponse::Voice)
|
|
519
|
-
end
|
|
520
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
521
|
-
|
|
522
|
-
ALLOY =
|
|
523
|
-
T.let(
|
|
524
|
-
:alloy,
|
|
525
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
526
|
-
)
|
|
527
|
-
ASH =
|
|
528
|
-
T.let(:ash, OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol)
|
|
529
|
-
BALLAD =
|
|
530
|
-
T.let(
|
|
531
|
-
:ballad,
|
|
532
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
533
|
-
)
|
|
534
|
-
CORAL =
|
|
535
|
-
T.let(
|
|
536
|
-
:coral,
|
|
537
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
538
|
-
)
|
|
539
|
-
ECHO =
|
|
540
|
-
T.let(
|
|
541
|
-
:echo,
|
|
542
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
543
|
-
)
|
|
544
|
-
SAGE =
|
|
545
|
-
T.let(
|
|
546
|
-
:sage,
|
|
547
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
548
|
-
)
|
|
549
|
-
SHIMMER =
|
|
550
|
-
T.let(
|
|
551
|
-
:shimmer,
|
|
552
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
553
|
-
)
|
|
554
|
-
VERSE =
|
|
555
|
-
T.let(
|
|
556
|
-
:verse,
|
|
557
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
558
|
-
)
|
|
559
|
-
MARIN =
|
|
560
|
-
T.let(
|
|
561
|
-
:marin,
|
|
562
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
563
|
-
)
|
|
564
|
-
CEDAR =
|
|
565
|
-
T.let(
|
|
566
|
-
:cedar,
|
|
567
|
-
OpenAI::Realtime::RealtimeResponse::Voice::TaggedSymbol
|
|
568
|
-
)
|
|
569
|
-
end
|
|
570
645
|
end
|
|
571
646
|
end
|
|
572
647
|
end
|