openai 0.22.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/openai/models/realtime/audio_transcription.rb +60 -0
- data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
- data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
- data/lib/openai/models/realtime/conversation_item.rb +1 -1
- data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
- data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
- data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
- data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
- data/lib/openai/models/realtime/models.rb +58 -0
- data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
- data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
- data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
- data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
- data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
- data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
- data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
- data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
- data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
- data/lib/openai/models/realtime/realtime_response.rb +117 -107
- data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
- data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
- data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
- data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
- data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
- data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
- data/lib/openai/models/realtime/realtime_session.rb +9 -125
- data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
- data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
- data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
- data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
- data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
- data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
- data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
- data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
- data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
- data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
- data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
- data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
- data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
- data/lib/openai/models/realtime/response_create_event.rb +18 -348
- data/lib/openai/models/realtime/response_done_event.rb +7 -0
- data/lib/openai/models/realtime/session_created_event.rb +20 -4
- data/lib/openai/models/realtime/session_update_event.rb +36 -12
- data/lib/openai/models/realtime/session_updated_event.rb +20 -4
- data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
- data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
- data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
- data/lib/openai/resources/realtime/client_secrets.rb +2 -3
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +19 -1
- data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
- data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
- data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
- data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
- data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
- data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
- data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
- data/rbi/openai/models/realtime/models.rbi +97 -0
- data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
- data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
- data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
- data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
- data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
- data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
- data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
- data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
- data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
- data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
- data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
- data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
- data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
- data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
- data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
- data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
- data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
- data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
- data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
- data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
- data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
- data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
- data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
- data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
- data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
- data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
- data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
- data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
- data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
- data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
- data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
- data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
- data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
- data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
- data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
- data/sig/openai/models/realtime/models.rbs +57 -0
- data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
- data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
- data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
- data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
- data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
- data/sig/openai/models/realtime/realtime_response.rbs +103 -82
- data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
- data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
- data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
- data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
- data/sig/openai/models/realtime/realtime_session.rbs +16 -106
- data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
- data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
- data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
- data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
- data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
- data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
- data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
- data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
- data/sig/openai/models/realtime/response_create_event.rbs +6 -249
- data/sig/openai/models/realtime/session_created_event.rbs +14 -4
- data/sig/openai/models/realtime/session_update_event.rbs +14 -4
- data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
- data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
- data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
- data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
- metadata +59 -5
- data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
- data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
- data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
# The PCM audio format. Only a 24kHz sample rate is supported.
|
|
7
|
+
module RealtimeAudioFormats
|
|
8
|
+
extend OpenAI::Internal::Type::Union
|
|
9
|
+
|
|
10
|
+
Variants =
|
|
11
|
+
T.type_alias do
|
|
12
|
+
T.any(
|
|
13
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
|
|
14
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
|
|
15
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
|
|
16
|
+
)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
class AudioPCM < OpenAI::Internal::Type::BaseModel
|
|
20
|
+
OrHash =
|
|
21
|
+
T.type_alias do
|
|
22
|
+
T.any(
|
|
23
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
|
|
24
|
+
OpenAI::Internal::AnyHash
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# The sample rate of the audio. Always `24000`.
|
|
29
|
+
sig do
|
|
30
|
+
returns(
|
|
31
|
+
T.nilable(
|
|
32
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
attr_reader :rate
|
|
37
|
+
|
|
38
|
+
sig do
|
|
39
|
+
params(
|
|
40
|
+
rate:
|
|
41
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger
|
|
42
|
+
).void
|
|
43
|
+
end
|
|
44
|
+
attr_writer :rate
|
|
45
|
+
|
|
46
|
+
# The audio format. Always `audio/pcm`.
|
|
47
|
+
sig do
|
|
48
|
+
returns(
|
|
49
|
+
T.nilable(
|
|
50
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
attr_reader :type
|
|
55
|
+
|
|
56
|
+
sig do
|
|
57
|
+
params(
|
|
58
|
+
type:
|
|
59
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
|
|
60
|
+
).void
|
|
61
|
+
end
|
|
62
|
+
attr_writer :type
|
|
63
|
+
|
|
64
|
+
# The PCM audio format. Only a 24kHz sample rate is supported.
|
|
65
|
+
sig do
|
|
66
|
+
params(
|
|
67
|
+
rate:
|
|
68
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger,
|
|
69
|
+
type:
|
|
70
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
|
|
71
|
+
).returns(T.attached_class)
|
|
72
|
+
end
|
|
73
|
+
def self.new(
|
|
74
|
+
# The sample rate of the audio. Always `24000`.
|
|
75
|
+
rate: nil,
|
|
76
|
+
# The audio format. Always `audio/pcm`.
|
|
77
|
+
type: nil
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
sig do
|
|
82
|
+
override.returns(
|
|
83
|
+
{
|
|
84
|
+
rate:
|
|
85
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::OrInteger,
|
|
86
|
+
type:
|
|
87
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::OrSymbol
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
def to_hash
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# The sample rate of the audio. Always `24000`.
|
|
95
|
+
module Rate
|
|
96
|
+
extend OpenAI::Internal::Type::Enum
|
|
97
|
+
|
|
98
|
+
TaggedInteger =
|
|
99
|
+
T.type_alias do
|
|
100
|
+
T.all(
|
|
101
|
+
Integer,
|
|
102
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
OrInteger = T.type_alias { Integer }
|
|
106
|
+
|
|
107
|
+
RATE_24000 =
|
|
108
|
+
T.let(
|
|
109
|
+
24_000,
|
|
110
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
sig do
|
|
114
|
+
override.returns(
|
|
115
|
+
T::Array[
|
|
116
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate::TaggedInteger
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
def self.values
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# The audio format. Always `audio/pcm`.
|
|
125
|
+
module Type
|
|
126
|
+
extend OpenAI::Internal::Type::Enum
|
|
127
|
+
|
|
128
|
+
TaggedSymbol =
|
|
129
|
+
T.type_alias do
|
|
130
|
+
T.all(
|
|
131
|
+
Symbol,
|
|
132
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
136
|
+
|
|
137
|
+
AUDIO_PCM =
|
|
138
|
+
T.let(
|
|
139
|
+
:"audio/pcm",
|
|
140
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
sig do
|
|
144
|
+
override.returns(
|
|
145
|
+
T::Array[
|
|
146
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type::TaggedSymbol
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
end
|
|
150
|
+
def self.values
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
class AudioPCMU < OpenAI::Internal::Type::BaseModel
|
|
156
|
+
OrHash =
|
|
157
|
+
T.type_alias do
|
|
158
|
+
T.any(
|
|
159
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
|
|
160
|
+
OpenAI::Internal::AnyHash
|
|
161
|
+
)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# The audio format. Always `audio/pcmu`.
|
|
165
|
+
sig do
|
|
166
|
+
returns(
|
|
167
|
+
T.nilable(
|
|
168
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
attr_reader :type
|
|
173
|
+
|
|
174
|
+
sig do
|
|
175
|
+
params(
|
|
176
|
+
type:
|
|
177
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
|
|
178
|
+
).void
|
|
179
|
+
end
|
|
180
|
+
attr_writer :type
|
|
181
|
+
|
|
182
|
+
# The G.711 μ-law format.
|
|
183
|
+
sig do
|
|
184
|
+
params(
|
|
185
|
+
type:
|
|
186
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
|
|
187
|
+
).returns(T.attached_class)
|
|
188
|
+
end
|
|
189
|
+
def self.new(
|
|
190
|
+
# The audio format. Always `audio/pcmu`.
|
|
191
|
+
type: nil
|
|
192
|
+
)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
sig do
|
|
196
|
+
override.returns(
|
|
197
|
+
{
|
|
198
|
+
type:
|
|
199
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::OrSymbol
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
end
|
|
203
|
+
def to_hash
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# The audio format. Always `audio/pcmu`.
|
|
207
|
+
module Type
|
|
208
|
+
extend OpenAI::Internal::Type::Enum
|
|
209
|
+
|
|
210
|
+
TaggedSymbol =
|
|
211
|
+
T.type_alias do
|
|
212
|
+
T.all(
|
|
213
|
+
Symbol,
|
|
214
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type
|
|
215
|
+
)
|
|
216
|
+
end
|
|
217
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
218
|
+
|
|
219
|
+
AUDIO_PCMU =
|
|
220
|
+
T.let(
|
|
221
|
+
:"audio/pcmu",
|
|
222
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
sig do
|
|
226
|
+
override.returns(
|
|
227
|
+
T::Array[
|
|
228
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type::TaggedSymbol
|
|
229
|
+
]
|
|
230
|
+
)
|
|
231
|
+
end
|
|
232
|
+
def self.values
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
class AudioPCMA < OpenAI::Internal::Type::BaseModel
|
|
238
|
+
OrHash =
|
|
239
|
+
T.type_alias do
|
|
240
|
+
T.any(
|
|
241
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA,
|
|
242
|
+
OpenAI::Internal::AnyHash
|
|
243
|
+
)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# The audio format. Always `audio/pcma`.
|
|
247
|
+
sig do
|
|
248
|
+
returns(
|
|
249
|
+
T.nilable(
|
|
250
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
end
|
|
254
|
+
attr_reader :type
|
|
255
|
+
|
|
256
|
+
sig do
|
|
257
|
+
params(
|
|
258
|
+
type:
|
|
259
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
|
|
260
|
+
).void
|
|
261
|
+
end
|
|
262
|
+
attr_writer :type
|
|
263
|
+
|
|
264
|
+
# The G.711 A-law format.
|
|
265
|
+
sig do
|
|
266
|
+
params(
|
|
267
|
+
type:
|
|
268
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
|
|
269
|
+
).returns(T.attached_class)
|
|
270
|
+
end
|
|
271
|
+
def self.new(
|
|
272
|
+
# The audio format. Always `audio/pcma`.
|
|
273
|
+
type: nil
|
|
274
|
+
)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
sig do
|
|
278
|
+
override.returns(
|
|
279
|
+
{
|
|
280
|
+
type:
|
|
281
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::OrSymbol
|
|
282
|
+
}
|
|
283
|
+
)
|
|
284
|
+
end
|
|
285
|
+
def to_hash
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# The audio format. Always `audio/pcma`.
|
|
289
|
+
module Type
|
|
290
|
+
extend OpenAI::Internal::Type::Enum
|
|
291
|
+
|
|
292
|
+
TaggedSymbol =
|
|
293
|
+
T.type_alias do
|
|
294
|
+
T.all(
|
|
295
|
+
Symbol,
|
|
296
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type
|
|
297
|
+
)
|
|
298
|
+
end
|
|
299
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
300
|
+
|
|
301
|
+
AUDIO_PCMA =
|
|
302
|
+
T.let(
|
|
303
|
+
:"audio/pcma",
|
|
304
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
sig do
|
|
308
|
+
override.returns(
|
|
309
|
+
T::Array[
|
|
310
|
+
OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type::TaggedSymbol
|
|
311
|
+
]
|
|
312
|
+
)
|
|
313
|
+
end
|
|
314
|
+
def self.values
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
sig do
|
|
320
|
+
override.returns(
|
|
321
|
+
T::Array[OpenAI::Realtime::RealtimeAudioFormats::Variants]
|
|
322
|
+
)
|
|
323
|
+
end
|
|
324
|
+
def self.variants
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
OrHash =
|
|
8
|
+
T.type_alias do
|
|
9
|
+
T.any(
|
|
10
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection,
|
|
11
|
+
OpenAI::Internal::AnyHash
|
|
12
|
+
)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
|
16
|
+
# occurs.
|
|
17
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
18
|
+
attr_reader :create_response
|
|
19
|
+
|
|
20
|
+
sig { params(create_response: T::Boolean).void }
|
|
21
|
+
attr_writer :create_response
|
|
22
|
+
|
|
23
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
|
24
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
|
25
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
|
26
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
|
27
|
+
sig do
|
|
28
|
+
returns(
|
|
29
|
+
T.nilable(
|
|
30
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
attr_reader :eagerness
|
|
35
|
+
|
|
36
|
+
sig do
|
|
37
|
+
params(
|
|
38
|
+
eagerness:
|
|
39
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol
|
|
40
|
+
).void
|
|
41
|
+
end
|
|
42
|
+
attr_writer :eagerness
|
|
43
|
+
|
|
44
|
+
# Optional idle timeout after which turn detection will auto-timeout when no
|
|
45
|
+
# additional audio is received.
|
|
46
|
+
sig { returns(T.nilable(Integer)) }
|
|
47
|
+
attr_accessor :idle_timeout_ms
|
|
48
|
+
|
|
49
|
+
# Whether or not to automatically interrupt any ongoing response with output to
|
|
50
|
+
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
|
51
|
+
# occurs.
|
|
52
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
53
|
+
attr_reader :interrupt_response
|
|
54
|
+
|
|
55
|
+
sig { params(interrupt_response: T::Boolean).void }
|
|
56
|
+
attr_writer :interrupt_response
|
|
57
|
+
|
|
58
|
+
# Used only for `server_vad` mode. Amount of audio to include before the VAD
|
|
59
|
+
# detected speech (in milliseconds). Defaults to 300ms.
|
|
60
|
+
sig { returns(T.nilable(Integer)) }
|
|
61
|
+
attr_reader :prefix_padding_ms
|
|
62
|
+
|
|
63
|
+
sig { params(prefix_padding_ms: Integer).void }
|
|
64
|
+
attr_writer :prefix_padding_ms
|
|
65
|
+
|
|
66
|
+
# Used only for `server_vad` mode. Duration of silence to detect speech stop (in
|
|
67
|
+
# milliseconds). Defaults to 500ms. With shorter values the model will respond
|
|
68
|
+
# more quickly, but may jump in on short pauses from the user.
|
|
69
|
+
sig { returns(T.nilable(Integer)) }
|
|
70
|
+
attr_reader :silence_duration_ms
|
|
71
|
+
|
|
72
|
+
sig { params(silence_duration_ms: Integer).void }
|
|
73
|
+
attr_writer :silence_duration_ms
|
|
74
|
+
|
|
75
|
+
# Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
|
|
76
|
+
# defaults to 0.5. A higher threshold will require louder audio to activate the
|
|
77
|
+
# model, and thus might perform better in noisy environments.
|
|
78
|
+
sig { returns(T.nilable(Float)) }
|
|
79
|
+
attr_reader :threshold
|
|
80
|
+
|
|
81
|
+
sig { params(threshold: Float).void }
|
|
82
|
+
attr_writer :threshold
|
|
83
|
+
|
|
84
|
+
# Type of turn detection.
|
|
85
|
+
sig do
|
|
86
|
+
returns(
|
|
87
|
+
T.nilable(
|
|
88
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
attr_reader :type
|
|
93
|
+
|
|
94
|
+
sig do
|
|
95
|
+
params(
|
|
96
|
+
type:
|
|
97
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
|
98
|
+
).void
|
|
99
|
+
end
|
|
100
|
+
attr_writer :type
|
|
101
|
+
|
|
102
|
+
# Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
|
103
|
+
# set to `null` to turn off, in which case the client must manually trigger model
|
|
104
|
+
# response. Server VAD means that the model will detect the start and end of
|
|
105
|
+
# speech based on audio volume and respond at the end of user speech. Semantic VAD
|
|
106
|
+
# is more advanced and uses a turn detection model (in conjunction with VAD) to
|
|
107
|
+
# semantically estimate whether the user has finished speaking, then dynamically
|
|
108
|
+
# sets a timeout based on this probability. For example, if user audio trails off
|
|
109
|
+
# with "uhhm", the model will score a low probability of turn end and wait longer
|
|
110
|
+
# for the user to continue speaking. This can be useful for more natural
|
|
111
|
+
# conversations, but may have a higher latency.
|
|
112
|
+
sig do
|
|
113
|
+
params(
|
|
114
|
+
create_response: T::Boolean,
|
|
115
|
+
eagerness:
|
|
116
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
|
|
117
|
+
idle_timeout_ms: T.nilable(Integer),
|
|
118
|
+
interrupt_response: T::Boolean,
|
|
119
|
+
prefix_padding_ms: Integer,
|
|
120
|
+
silence_duration_ms: Integer,
|
|
121
|
+
threshold: Float,
|
|
122
|
+
type:
|
|
123
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
|
124
|
+
).returns(T.attached_class)
|
|
125
|
+
end
|
|
126
|
+
def self.new(
|
|
127
|
+
# Whether or not to automatically generate a response when a VAD stop event
|
|
128
|
+
# occurs.
|
|
129
|
+
create_response: nil,
|
|
130
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
|
131
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
|
132
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
|
133
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
|
134
|
+
eagerness: nil,
|
|
135
|
+
# Optional idle timeout after which turn detection will auto-timeout when no
|
|
136
|
+
# additional audio is received.
|
|
137
|
+
idle_timeout_ms: nil,
|
|
138
|
+
# Whether or not to automatically interrupt any ongoing response with output to
|
|
139
|
+
# the default conversation (i.e. `conversation` of `auto`) when a VAD start event
|
|
140
|
+
# occurs.
|
|
141
|
+
interrupt_response: nil,
|
|
142
|
+
# Used only for `server_vad` mode. Amount of audio to include before the VAD
|
|
143
|
+
# detected speech (in milliseconds). Defaults to 300ms.
|
|
144
|
+
prefix_padding_ms: nil,
|
|
145
|
+
# Used only for `server_vad` mode. Duration of silence to detect speech stop (in
|
|
146
|
+
# milliseconds). Defaults to 500ms. With shorter values the model will respond
|
|
147
|
+
# more quickly, but may jump in on short pauses from the user.
|
|
148
|
+
silence_duration_ms: nil,
|
|
149
|
+
# Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
|
|
150
|
+
# defaults to 0.5. A higher threshold will require louder audio to activate the
|
|
151
|
+
# model, and thus might perform better in noisy environments.
|
|
152
|
+
threshold: nil,
|
|
153
|
+
# Type of turn detection.
|
|
154
|
+
type: nil
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
sig do
|
|
159
|
+
override.returns(
|
|
160
|
+
{
|
|
161
|
+
create_response: T::Boolean,
|
|
162
|
+
eagerness:
|
|
163
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::OrSymbol,
|
|
164
|
+
idle_timeout_ms: T.nilable(Integer),
|
|
165
|
+
interrupt_response: T::Boolean,
|
|
166
|
+
prefix_padding_ms: Integer,
|
|
167
|
+
silence_duration_ms: Integer,
|
|
168
|
+
threshold: Float,
|
|
169
|
+
type:
|
|
170
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::OrSymbol
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
end
|
|
174
|
+
def to_hash
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
|
|
178
|
+
# will wait longer for the user to continue speaking, `high` will respond more
|
|
179
|
+
# quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
|
|
180
|
+
# and `high` have max timeouts of 8s, 4s, and 2s respectively.
|
|
181
|
+
module Eagerness
|
|
182
|
+
extend OpenAI::Internal::Type::Enum
|
|
183
|
+
|
|
184
|
+
TaggedSymbol =
|
|
185
|
+
T.type_alias do
|
|
186
|
+
T.all(
|
|
187
|
+
Symbol,
|
|
188
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
192
|
+
|
|
193
|
+
LOW =
|
|
194
|
+
T.let(
|
|
195
|
+
:low,
|
|
196
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
|
197
|
+
)
|
|
198
|
+
MEDIUM =
|
|
199
|
+
T.let(
|
|
200
|
+
:medium,
|
|
201
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
|
202
|
+
)
|
|
203
|
+
HIGH =
|
|
204
|
+
T.let(
|
|
205
|
+
:high,
|
|
206
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
|
207
|
+
)
|
|
208
|
+
AUTO =
|
|
209
|
+
T.let(
|
|
210
|
+
:auto,
|
|
211
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
sig do
|
|
215
|
+
override.returns(
|
|
216
|
+
T::Array[
|
|
217
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness::TaggedSymbol
|
|
218
|
+
]
|
|
219
|
+
)
|
|
220
|
+
end
|
|
221
|
+
def self.values
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Type of turn detection.
|
|
226
|
+
module Type
|
|
227
|
+
extend OpenAI::Internal::Type::Enum
|
|
228
|
+
|
|
229
|
+
TaggedSymbol =
|
|
230
|
+
T.type_alias do
|
|
231
|
+
T.all(
|
|
232
|
+
Symbol,
|
|
233
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type
|
|
234
|
+
)
|
|
235
|
+
end
|
|
236
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
237
|
+
|
|
238
|
+
SERVER_VAD =
|
|
239
|
+
T.let(
|
|
240
|
+
:server_vad,
|
|
241
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
|
|
242
|
+
)
|
|
243
|
+
SEMANTIC_VAD =
|
|
244
|
+
T.let(
|
|
245
|
+
:semantic_vad,
|
|
246
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
sig do
|
|
250
|
+
override.returns(
|
|
251
|
+
T::Array[
|
|
252
|
+
OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type::TaggedSymbol
|
|
253
|
+
]
|
|
254
|
+
)
|
|
255
|
+
end
|
|
256
|
+
def self.values
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|