openai 0.23.1 → 0.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb +25 -5
- data/lib/openai/models/realtime/realtime_audio_config_input.rb +14 -11
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +173 -117
- data/lib/openai/models/realtime/realtime_server_event.rb +13 -1
- data/lib/openai/models/realtime/realtime_session.rb +179 -118
- data/lib/openai/models/realtime/realtime_session_create_response.rb +184 -122
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +16 -11
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +175 -117
- data/lib/openai/models/responses/response.rb +8 -8
- data/lib/openai/models/responses/response_create_params.rb +8 -8
- data/lib/openai/version.rb +1 -1
- data/rbi/openai/models/realtime/input_audio_buffer_timeout_triggered.rbi +24 -5
- data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +44 -28
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +264 -203
- data/rbi/openai/models/realtime/realtime_session.rbi +306 -231
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +298 -232
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +39 -28
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +264 -200
- data/rbi/openai/models/responses/response.rbi +12 -12
- data/rbi/openai/models/responses/response_create_params.rbi +12 -12
- data/rbi/openai/resources/responses.rbi +8 -8
- data/sig/openai/models/realtime/realtime_audio_config_input.rbs +4 -8
- data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +91 -65
- data/sig/openai/models/realtime/realtime_session.rbs +95 -69
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +95 -73
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +4 -8
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +91 -65
- metadata +2 -2
@@ -3,259 +3,320 @@
|
|
3
3
|
module OpenAI
|
4
4
|
module Models
|
5
5
|
module Realtime
|
6
|
-
|
7
|
-
|
6
|
+
# Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
7
|
+
# set to `null` to turn off, in which case the client must manually trigger model
|
8
|
+
# response.
|
9
|
+
#
|
10
|
+
# Server VAD means that the model will detect the start and end of speech based on
|
11
|
+
# audio volume and respond at the end of user speech.
|
12
|
+
#
|
13
|
+
# Semantic VAD is more advanced and uses a turn detection model (in conjunction
|
14
|
+
# with VAD) to semantically estimate whether the user has finished speaking, then
|
15
|
+
# dynamically sets a timeout based on this probability. For example, if user audio
|
16
|
+
# trails off with "uhhm", the model will score a low probability of turn end and
|
17
|
+
# wait longer for the user to continue speaking. This can be useful for more
|
18
|
+
# natural conversations, but may have a higher latency.
|
19
|
+
module RealtimeAudioInputTurnDetection
|
20
|
+
extend OpenAI::Internal::Type::Union
|
21
|
+
|
22
|
+
Variants =
|
8
23
|
T.type_alias do
|
9
24
|
T.any(
|
10
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection,
|
11
|
-
OpenAI::
|
25
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::ServerVad,
|
26
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad
|
12
27
|
)
|
13
28
|
end
|
14
29
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
24
|
-
# will wait longer for the user to continue speaking, `high` will respond more
|
25
|
-
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
26
|
-
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
27
|
-
sig do
|
28
|
-
returns(
|
29
|
-
T.nilable(
|
30
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
|
31
|
-
)
|
32
|
-
)
|
33
|
-
end
|
34
|
-
attr_reader :eagerness
|
35
|
-
|
36
|
-
sig do
|
37
|
-
params(
|
38
|
-
eagerness:
|
39
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
|
40
|
-
).void
|
41
|
-
end
|
42
|
-
attr_writer :eagerness
|
43
|
-
|
44
|
-
# Optional idle timeout after which turn detection will auto-timeout when no
|
45
|
-
# additional audio is received and emits a `timeout_triggered` event.
|
46
|
-
sig { returns(T.nilable(Integer)) }
|
47
|
-
attr_accessor :idle_timeout_ms
|
48
|
-
|
49
|
-
# Whether or not to automatically interrupt any ongoing response with output to
|
50
|
-
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
51
|
-
# occurs.
|
52
|
-
sig { returns(T.nilable(T::Boolean)) }
|
53
|
-
attr_reader :interrupt_response
|
54
|
-
|
55
|
-
sig { params(interrupt_response: T::Boolean).void }
|
56
|
-
attr_writer :interrupt_response
|
57
|
-
|
58
|
-
# Used only for `server_vad` mode. Amount of audio to include before the VAD
|
59
|
-
# detected speech (in milliseconds). Defaults to 300ms.
|
60
|
-
sig { returns(T.nilable(Integer)) }
|
61
|
-
attr_reader :prefix_padding_ms
|
62
|
-
|
63
|
-
sig { params(prefix_padding_ms: Integer).void }
|
64
|
-
attr_writer :prefix_padding_ms
|
65
|
-
|
66
|
-
# Used only for `server_vad` mode. Duration of silence to detect speech stop (in
|
67
|
-
# milliseconds). Defaults to 500ms. With shorter values the model will respond
|
68
|
-
# more quickly, but may jump in on short pauses from the user.
|
69
|
-
sig { returns(T.nilable(Integer)) }
|
70
|
-
attr_reader :silence_duration_ms
|
30
|
+
class ServerVad < OpenAI::Internal::Type::BaseModel
|
31
|
+
OrHash =
|
32
|
+
T.type_alias do
|
33
|
+
T.any(
|
34
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::ServerVad,
|
35
|
+
OpenAI::Internal::AnyHash
|
36
|
+
)
|
37
|
+
end
|
71
38
|
|
72
|
-
|
73
|
-
|
39
|
+
# Type of turn detection, `server_vad` to turn on simple Server VAD.
|
40
|
+
sig { returns(Symbol) }
|
41
|
+
attr_accessor :type
|
74
42
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
attr_reader :threshold
|
43
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
44
|
+
# occurs.
|
45
|
+
sig { returns(T.nilable(T::Boolean)) }
|
46
|
+
attr_reader :create_response
|
80
47
|
|
81
|
-
|
82
|
-
|
48
|
+
sig { params(create_response: T::Boolean).void }
|
49
|
+
attr_writer :create_response
|
83
50
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
51
|
+
# Optional timeout after which a model response will be triggered automatically.
|
52
|
+
# This is useful for situations in which a long pause from the user is unexpected,
|
53
|
+
# such as a phone call. The model will effectively prompt the user to continue the
|
54
|
+
# conversation based on the current context.
|
55
|
+
#
|
56
|
+
# The timeout value will be applied after the last model response's audio has
|
57
|
+
# finished playing, i.e. it's set to the `response.done` time plus audio playback
|
58
|
+
# duration.
|
59
|
+
#
|
60
|
+
# An `input_audio_buffer.timeout_triggered` event (plus events associated with the
|
61
|
+
# Response) will be emitted when the timeout is reached. Idle timeout is currently
|
62
|
+
# only supported for `server_vad` mode.
|
63
|
+
sig { returns(T.nilable(Integer)) }
|
64
|
+
attr_accessor :idle_timeout_ms
|
93
65
|
|
94
|
-
sig do
|
95
|
-
params(
|
96
|
-
type:
|
97
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
98
|
-
).void
|
99
|
-
end
|
100
|
-
attr_writer :type
|
101
|
-
|
102
|
-
# Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
103
|
-
# set to `null` to turn off, in which case the client must manually trigger model
|
104
|
-
# response. Server VAD means that the model will detect the start and end of
|
105
|
-
# speech based on audio volume and respond at the end of user speech. Semantic VAD
|
106
|
-
# is more advanced and uses a turn detection model (in conjunction with VAD) to
|
107
|
-
# semantically estimate whether the user has finished speaking, then dynamically
|
108
|
-
# sets a timeout based on this probability. For example, if user audio trails off
|
109
|
-
# with "uhhm", the model will score a low probability of turn end and wait longer
|
110
|
-
# for the user to continue speaking. This can be useful for more natural
|
111
|
-
# conversations, but may have a higher latency.
|
112
|
-
sig do
|
113
|
-
params(
|
114
|
-
create_response: T::Boolean,
|
115
|
-
eagerness:
|
116
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
|
117
|
-
idle_timeout_ms: T.nilable(Integer),
|
118
|
-
interrupt_response: T::Boolean,
|
119
|
-
prefix_padding_ms: Integer,
|
120
|
-
silence_duration_ms: Integer,
|
121
|
-
threshold: Float,
|
122
|
-
type:
|
123
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
124
|
-
).returns(T.attached_class)
|
125
|
-
end
|
126
|
-
def self.new(
|
127
|
-
# Whether or not to automatically generate a response when a VAD stop event
|
128
|
-
# occurs.
|
129
|
-
create_response: nil,
|
130
|
-
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
131
|
-
# will wait longer for the user to continue speaking, `high` will respond more
|
132
|
-
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
133
|
-
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
134
|
-
eagerness: nil,
|
135
|
-
# Optional idle timeout after which turn detection will auto-timeout when no
|
136
|
-
# additional audio is received and emits a `timeout_triggered` event.
|
137
|
-
idle_timeout_ms: nil,
|
138
66
|
# Whether or not to automatically interrupt any ongoing response with output to
|
139
67
|
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
140
68
|
# occurs.
|
141
|
-
|
69
|
+
sig { returns(T.nilable(T::Boolean)) }
|
70
|
+
attr_reader :interrupt_response
|
71
|
+
|
72
|
+
sig { params(interrupt_response: T::Boolean).void }
|
73
|
+
attr_writer :interrupt_response
|
74
|
+
|
142
75
|
# Used only for `server_vad` mode. Amount of audio to include before the VAD
|
143
76
|
# detected speech (in milliseconds). Defaults to 300ms.
|
144
|
-
|
77
|
+
sig { returns(T.nilable(Integer)) }
|
78
|
+
attr_reader :prefix_padding_ms
|
79
|
+
|
80
|
+
sig { params(prefix_padding_ms: Integer).void }
|
81
|
+
attr_writer :prefix_padding_ms
|
82
|
+
|
145
83
|
# Used only for `server_vad` mode. Duration of silence to detect speech stop (in
|
146
84
|
# milliseconds). Defaults to 500ms. With shorter values the model will respond
|
147
85
|
# more quickly, but may jump in on short pauses from the user.
|
148
|
-
|
86
|
+
sig { returns(T.nilable(Integer)) }
|
87
|
+
attr_reader :silence_duration_ms
|
88
|
+
|
89
|
+
sig { params(silence_duration_ms: Integer).void }
|
90
|
+
attr_writer :silence_duration_ms
|
91
|
+
|
149
92
|
# Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
|
150
93
|
# defaults to 0.5. A higher threshold will require louder audio to activate the
|
151
94
|
# model, and thus might perform better in noisy environments.
|
152
|
-
|
153
|
-
|
154
|
-
type: nil
|
155
|
-
)
|
156
|
-
end
|
95
|
+
sig { returns(T.nilable(Float)) }
|
96
|
+
attr_reader :threshold
|
157
97
|
|
158
|
-
|
159
|
-
|
160
|
-
|
98
|
+
sig { params(threshold: Float).void }
|
99
|
+
attr_writer :threshold
|
100
|
+
|
101
|
+
# Server-side voice activity detection (VAD) which flips on when user speech is
|
102
|
+
# detected and off after a period of silence.
|
103
|
+
sig do
|
104
|
+
params(
|
161
105
|
create_response: T::Boolean,
|
162
|
-
eagerness:
|
163
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
|
164
106
|
idle_timeout_ms: T.nilable(Integer),
|
165
107
|
interrupt_response: T::Boolean,
|
166
108
|
prefix_padding_ms: Integer,
|
167
109
|
silence_duration_ms: Integer,
|
168
110
|
threshold: Float,
|
169
|
-
type:
|
170
|
-
|
171
|
-
|
111
|
+
type: Symbol
|
112
|
+
).returns(T.attached_class)
|
113
|
+
end
|
114
|
+
def self.new(
|
115
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
116
|
+
# occurs.
|
117
|
+
create_response: nil,
|
118
|
+
# Optional timeout after which a model response will be triggered automatically.
|
119
|
+
# This is useful for situations in which a long pause from the user is unexpected,
|
120
|
+
# such as a phone call. The model will effectively prompt the user to continue the
|
121
|
+
# conversation based on the current context.
|
122
|
+
#
|
123
|
+
# The timeout value will be applied after the last model response's audio has
|
124
|
+
# finished playing, i.e. it's set to the `response.done` time plus audio playback
|
125
|
+
# duration.
|
126
|
+
#
|
127
|
+
# An `input_audio_buffer.timeout_triggered` event (plus events associated with the
|
128
|
+
# Response) will be emitted when the timeout is reached. Idle timeout is currently
|
129
|
+
# only supported for `server_vad` mode.
|
130
|
+
idle_timeout_ms: nil,
|
131
|
+
# Whether or not to automatically interrupt any ongoing response with output to
|
132
|
+
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
133
|
+
# occurs.
|
134
|
+
interrupt_response: nil,
|
135
|
+
# Used only for `server_vad` mode. Amount of audio to include before the VAD
|
136
|
+
# detected speech (in milliseconds). Defaults to 300ms.
|
137
|
+
prefix_padding_ms: nil,
|
138
|
+
# Used only for `server_vad` mode. Duration of silence to detect speech stop (in
|
139
|
+
# milliseconds). Defaults to 500ms. With shorter values the model will respond
|
140
|
+
# more quickly, but may jump in on short pauses from the user.
|
141
|
+
silence_duration_ms: nil,
|
142
|
+
# Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
|
143
|
+
# defaults to 0.5. A higher threshold will require louder audio to activate the
|
144
|
+
# model, and thus might perform better in noisy environments.
|
145
|
+
threshold: nil,
|
146
|
+
# Type of turn detection, `server_vad` to turn on simple Server VAD.
|
147
|
+
type: :server_vad
|
172
148
|
)
|
173
|
-
|
174
|
-
def to_hash
|
175
|
-
end
|
149
|
+
end
|
176
150
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
151
|
+
sig do
|
152
|
+
override.returns(
|
153
|
+
{
|
154
|
+
type: Symbol,
|
155
|
+
create_response: T::Boolean,
|
156
|
+
idle_timeout_ms: T.nilable(Integer),
|
157
|
+
interrupt_response: T::Boolean,
|
158
|
+
prefix_padding_ms: Integer,
|
159
|
+
silence_duration_ms: Integer,
|
160
|
+
threshold: Float
|
161
|
+
}
|
162
|
+
)
|
163
|
+
end
|
164
|
+
def to_hash
|
165
|
+
end
|
166
|
+
end
|
183
167
|
|
184
|
-
|
168
|
+
class SemanticVad < OpenAI::Internal::Type::BaseModel
|
169
|
+
OrHash =
|
185
170
|
T.type_alias do
|
186
|
-
T.
|
187
|
-
|
188
|
-
OpenAI::
|
171
|
+
T.any(
|
172
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad,
|
173
|
+
OpenAI::Internal::AnyHash
|
189
174
|
)
|
190
175
|
end
|
191
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
192
176
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
T.let(
|
205
|
-
:high,
|
206
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
207
|
-
)
|
208
|
-
AUTO =
|
209
|
-
T.let(
|
210
|
-
:auto,
|
211
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
212
|
-
)
|
177
|
+
# Type of turn detection, `semantic_vad` to turn on Semantic VAD.
|
178
|
+
sig { returns(Symbol) }
|
179
|
+
attr_accessor :type
|
180
|
+
|
181
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
182
|
+
# occurs.
|
183
|
+
sig { returns(T.nilable(T::Boolean)) }
|
184
|
+
attr_reader :create_response
|
185
|
+
|
186
|
+
sig { params(create_response: T::Boolean).void }
|
187
|
+
attr_writer :create_response
|
213
188
|
|
189
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
190
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
191
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
192
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
214
193
|
sig do
|
215
|
-
|
216
|
-
T
|
217
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::
|
218
|
-
|
194
|
+
returns(
|
195
|
+
T.nilable(
|
196
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::OrSymbol
|
197
|
+
)
|
219
198
|
)
|
220
199
|
end
|
221
|
-
|
200
|
+
attr_reader :eagerness
|
201
|
+
|
202
|
+
sig do
|
203
|
+
params(
|
204
|
+
eagerness:
|
205
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::OrSymbol
|
206
|
+
).void
|
222
207
|
end
|
223
|
-
|
208
|
+
attr_writer :eagerness
|
224
209
|
|
225
|
-
|
226
|
-
|
227
|
-
|
210
|
+
# Whether or not to automatically interrupt any ongoing response with output to
|
211
|
+
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
212
|
+
# occurs.
|
213
|
+
sig { returns(T.nilable(T::Boolean)) }
|
214
|
+
attr_reader :interrupt_response
|
228
215
|
|
229
|
-
|
230
|
-
|
231
|
-
T.all(
|
232
|
-
Symbol,
|
233
|
-
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type
|
234
|
-
)
|
235
|
-
end
|
236
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
216
|
+
sig { params(interrupt_response: T::Boolean).void }
|
217
|
+
attr_writer :interrupt_response
|
237
218
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
:
|
246
|
-
|
247
|
-
)
|
219
|
+
# Server-side semantic turn detection which uses a model to determine when the
|
220
|
+
# user has finished speaking.
|
221
|
+
sig do
|
222
|
+
params(
|
223
|
+
create_response: T::Boolean,
|
224
|
+
eagerness:
|
225
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::OrSymbol,
|
226
|
+
interrupt_response: T::Boolean,
|
227
|
+
type: Symbol
|
228
|
+
).returns(T.attached_class)
|
229
|
+
end
|
230
|
+
def self.new(
|
231
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
232
|
+
# occurs.
|
233
|
+
create_response: nil,
|
234
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
235
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
236
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
237
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
238
|
+
eagerness: nil,
|
239
|
+
# Whether or not to automatically interrupt any ongoing response with output to
|
240
|
+
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
241
|
+
# occurs.
|
242
|
+
interrupt_response: nil,
|
243
|
+
# Type of turn detection, `semantic_vad` to turn on Semantic VAD.
|
244
|
+
type: :semantic_vad
|
245
|
+
)
|
246
|
+
end
|
248
247
|
|
249
248
|
sig do
|
250
249
|
override.returns(
|
251
|
-
|
252
|
-
|
253
|
-
|
250
|
+
{
|
251
|
+
type: Symbol,
|
252
|
+
create_response: T::Boolean,
|
253
|
+
eagerness:
|
254
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::OrSymbol,
|
255
|
+
interrupt_response: T::Boolean
|
256
|
+
}
|
254
257
|
)
|
255
258
|
end
|
256
|
-
def
|
259
|
+
def to_hash
|
260
|
+
end
|
261
|
+
|
262
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
263
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
264
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
265
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
266
|
+
module Eagerness
|
267
|
+
extend OpenAI::Internal::Type::Enum
|
268
|
+
|
269
|
+
TaggedSymbol =
|
270
|
+
T.type_alias do
|
271
|
+
T.all(
|
272
|
+
Symbol,
|
273
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness
|
274
|
+
)
|
275
|
+
end
|
276
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
277
|
+
|
278
|
+
LOW =
|
279
|
+
T.let(
|
280
|
+
:low,
|
281
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::TaggedSymbol
|
282
|
+
)
|
283
|
+
MEDIUM =
|
284
|
+
T.let(
|
285
|
+
:medium,
|
286
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::TaggedSymbol
|
287
|
+
)
|
288
|
+
HIGH =
|
289
|
+
T.let(
|
290
|
+
:high,
|
291
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::TaggedSymbol
|
292
|
+
)
|
293
|
+
AUTO =
|
294
|
+
T.let(
|
295
|
+
:auto,
|
296
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::TaggedSymbol
|
297
|
+
)
|
298
|
+
|
299
|
+
sig do
|
300
|
+
override.returns(
|
301
|
+
T::Array[
|
302
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness::TaggedSymbol
|
303
|
+
]
|
304
|
+
)
|
305
|
+
end
|
306
|
+
def self.values
|
307
|
+
end
|
257
308
|
end
|
258
309
|
end
|
310
|
+
|
311
|
+
sig do
|
312
|
+
override.returns(
|
313
|
+
T::Array[
|
314
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Variants
|
315
|
+
]
|
316
|
+
)
|
317
|
+
end
|
318
|
+
def self.variants
|
319
|
+
end
|
259
320
|
end
|
260
321
|
end
|
261
322
|
end
|