openai 0.22.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -1
- data/lib/openai/models/realtime/audio_transcription.rb +60 -0
- data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
- data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
- data/lib/openai/models/realtime/conversation_item.rb +1 -1
- data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
- data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
- data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
- data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
- data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
- data/lib/openai/models/realtime/models.rb +58 -0
- data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
- data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
- data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
- data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
- data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
- data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
- data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
- data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
- data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
- data/lib/openai/models/realtime/realtime_response.rb +117 -107
- data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
- data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
- data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
- data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
- data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
- data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
- data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
- data/lib/openai/models/realtime/realtime_session.rb +9 -125
- data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
- data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
- data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
- data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
- data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
- data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
- data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
- data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
- data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
- data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
- data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
- data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
- data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
- data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
- data/lib/openai/models/realtime/response_create_event.rb +18 -348
- data/lib/openai/models/realtime/response_done_event.rb +7 -0
- data/lib/openai/models/realtime/session_created_event.rb +20 -4
- data/lib/openai/models/realtime/session_update_event.rb +36 -12
- data/lib/openai/models/realtime/session_updated_event.rb +20 -4
- data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
- data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
- data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
- data/lib/openai/resources/realtime/client_secrets.rb +2 -3
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +19 -1
- data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
- data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
- data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
- data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
- data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
- data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
- data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
- data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
- data/rbi/openai/models/realtime/models.rbi +97 -0
- data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
- data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
- data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
- data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
- data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
- data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
- data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
- data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
- data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
- data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
- data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
- data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
- data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
- data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
- data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
- data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
- data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
- data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
- data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
- data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
- data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
- data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
- data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
- data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
- data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
- data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
- data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
- data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
- data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
- data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
- data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
- data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
- data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
- data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
- data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
- data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
- data/sig/openai/models/realtime/models.rbs +57 -0
- data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
- data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
- data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
- data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
- data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
- data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
- data/sig/openai/models/realtime/realtime_response.rbs +103 -82
- data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
- data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
- data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
- data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
- data/sig/openai/models/realtime/realtime_session.rbs +16 -106
- data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
- data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
- data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
- data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
- data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
- data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
- data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
- data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
- data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
- data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
- data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
- data/sig/openai/models/realtime/response_create_event.rbs +6 -249
- data/sig/openai/models/realtime/session_created_event.rbs +14 -4
- data/sig/openai/models/realtime/session_update_event.rbs +14 -4
- data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
- data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
- data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
- data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
- metadata +59 -5
- data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
- data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
- data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
RealtimeTranscriptionSessionCreateResponse =
|
|
6
|
+
Realtime::RealtimeTranscriptionSessionCreateResponse
|
|
7
|
+
|
|
8
|
+
module Realtime
|
|
9
|
+
class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel
|
|
10
|
+
OrHash =
|
|
11
|
+
T.type_alias do
|
|
12
|
+
T.any(
|
|
13
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse,
|
|
14
|
+
OpenAI::Internal::AnyHash
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Ephemeral key returned by the API. Only present when the session is created on
|
|
19
|
+
# the server via REST API.
|
|
20
|
+
sig do
|
|
21
|
+
returns(OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret)
|
|
22
|
+
end
|
|
23
|
+
attr_reader :client_secret
|
|
24
|
+
|
|
25
|
+
sig do
|
|
26
|
+
params(
|
|
27
|
+
client_secret:
|
|
28
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash
|
|
29
|
+
).void
|
|
30
|
+
end
|
|
31
|
+
attr_writer :client_secret
|
|
32
|
+
|
|
33
|
+
# The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
|
|
34
|
+
sig { returns(T.nilable(String)) }
|
|
35
|
+
attr_reader :input_audio_format
|
|
36
|
+
|
|
37
|
+
sig { params(input_audio_format: String).void }
|
|
38
|
+
attr_writer :input_audio_format
|
|
39
|
+
|
|
40
|
+
# Configuration of the transcription model.
|
|
41
|
+
sig do
|
|
42
|
+
returns(
|
|
43
|
+
T.nilable(
|
|
44
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
attr_reader :input_audio_transcription
|
|
49
|
+
|
|
50
|
+
sig do
|
|
51
|
+
params(
|
|
52
|
+
input_audio_transcription:
|
|
53
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash
|
|
54
|
+
).void
|
|
55
|
+
end
|
|
56
|
+
attr_writer :input_audio_transcription
|
|
57
|
+
|
|
58
|
+
# The set of modalities the model can respond with. To disable audio, set this to
|
|
59
|
+
# ["text"].
|
|
60
|
+
sig do
|
|
61
|
+
returns(
|
|
62
|
+
T.nilable(
|
|
63
|
+
T::Array[
|
|
64
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
|
65
|
+
]
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
attr_reader :modalities
|
|
70
|
+
|
|
71
|
+
sig do
|
|
72
|
+
params(
|
|
73
|
+
modalities:
|
|
74
|
+
T::Array[
|
|
75
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
|
|
76
|
+
]
|
|
77
|
+
).void
|
|
78
|
+
end
|
|
79
|
+
attr_writer :modalities
|
|
80
|
+
|
|
81
|
+
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
|
82
|
+
# means that the model will detect the start and end of speech based on audio
|
|
83
|
+
# volume and respond at the end of user speech.
|
|
84
|
+
sig do
|
|
85
|
+
returns(
|
|
86
|
+
T.nilable(
|
|
87
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
attr_reader :turn_detection
|
|
92
|
+
|
|
93
|
+
sig do
|
|
94
|
+
params(
|
|
95
|
+
turn_detection:
|
|
96
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
|
97
|
+
).void
|
|
98
|
+
end
|
|
99
|
+
attr_writer :turn_detection
|
|
100
|
+
|
|
101
|
+
# A new Realtime transcription session configuration.
|
|
102
|
+
#
|
|
103
|
+
# When a session is created on the server via REST API, the session object also
|
|
104
|
+
# contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
|
|
105
|
+
# not present when a session is updated via the WebSocket API.
|
|
106
|
+
sig do
|
|
107
|
+
params(
|
|
108
|
+
client_secret:
|
|
109
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash,
|
|
110
|
+
input_audio_format: String,
|
|
111
|
+
input_audio_transcription:
|
|
112
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash,
|
|
113
|
+
modalities:
|
|
114
|
+
T::Array[
|
|
115
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
|
|
116
|
+
],
|
|
117
|
+
turn_detection:
|
|
118
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
|
|
119
|
+
).returns(T.attached_class)
|
|
120
|
+
end
|
|
121
|
+
def self.new(
|
|
122
|
+
# Ephemeral key returned by the API. Only present when the session is created on
|
|
123
|
+
# the server via REST API.
|
|
124
|
+
client_secret:,
|
|
125
|
+
# The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
|
|
126
|
+
input_audio_format: nil,
|
|
127
|
+
# Configuration of the transcription model.
|
|
128
|
+
input_audio_transcription: nil,
|
|
129
|
+
# The set of modalities the model can respond with. To disable audio, set this to
|
|
130
|
+
# ["text"].
|
|
131
|
+
modalities: nil,
|
|
132
|
+
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
|
133
|
+
# means that the model will detect the start and end of speech based on audio
|
|
134
|
+
# volume and respond at the end of user speech.
|
|
135
|
+
turn_detection: nil
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
sig do
|
|
140
|
+
override.returns(
|
|
141
|
+
{
|
|
142
|
+
client_secret:
|
|
143
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret,
|
|
144
|
+
input_audio_format: String,
|
|
145
|
+
input_audio_transcription:
|
|
146
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription,
|
|
147
|
+
modalities:
|
|
148
|
+
T::Array[
|
|
149
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
|
150
|
+
],
|
|
151
|
+
turn_detection:
|
|
152
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
end
|
|
156
|
+
def to_hash
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
module Modality
|
|
160
|
+
extend OpenAI::Internal::Type::Enum
|
|
161
|
+
|
|
162
|
+
TaggedSymbol =
|
|
163
|
+
T.type_alias do
|
|
164
|
+
T.all(
|
|
165
|
+
Symbol,
|
|
166
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
170
|
+
|
|
171
|
+
TEXT =
|
|
172
|
+
T.let(
|
|
173
|
+
:text,
|
|
174
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
|
175
|
+
)
|
|
176
|
+
AUDIO =
|
|
177
|
+
T.let(
|
|
178
|
+
:audio,
|
|
179
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
sig do
|
|
183
|
+
override.returns(
|
|
184
|
+
T::Array[
|
|
185
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
|
|
186
|
+
]
|
|
187
|
+
)
|
|
188
|
+
end
|
|
189
|
+
def self.values
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
RealtimeTranscriptionSessionInputAudioTranscription =
|
|
6
|
+
Realtime::RealtimeTranscriptionSessionInputAudioTranscription
|
|
7
|
+
|
|
8
|
+
module Realtime
|
|
9
|
+
class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel
|
|
10
|
+
OrHash =
|
|
11
|
+
T.type_alias do
|
|
12
|
+
T.any(
|
|
13
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription,
|
|
14
|
+
OpenAI::Internal::AnyHash
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# The language of the input audio. Supplying the input language in
|
|
19
|
+
# [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
|
|
20
|
+
# format will improve accuracy and latency.
|
|
21
|
+
sig { returns(T.nilable(String)) }
|
|
22
|
+
attr_reader :language
|
|
23
|
+
|
|
24
|
+
sig { params(language: String).void }
|
|
25
|
+
attr_writer :language
|
|
26
|
+
|
|
27
|
+
# The model to use for transcription. Current options are `whisper-1`,
|
|
28
|
+
# `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
|
|
29
|
+
sig do
|
|
30
|
+
returns(
|
|
31
|
+
T.nilable(
|
|
32
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
attr_reader :model
|
|
37
|
+
|
|
38
|
+
sig do
|
|
39
|
+
params(
|
|
40
|
+
model:
|
|
41
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol
|
|
42
|
+
).void
|
|
43
|
+
end
|
|
44
|
+
attr_writer :model
|
|
45
|
+
|
|
46
|
+
# An optional text to guide the model's style or continue a previous audio
|
|
47
|
+
# segment. For `whisper-1`, the
|
|
48
|
+
# [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
|
|
49
|
+
# For `gpt-4o-transcribe` models, the prompt is a free text string, for example
|
|
50
|
+
# "expect words related to technology".
|
|
51
|
+
sig { returns(T.nilable(String)) }
|
|
52
|
+
attr_reader :prompt
|
|
53
|
+
|
|
54
|
+
sig { params(prompt: String).void }
|
|
55
|
+
attr_writer :prompt
|
|
56
|
+
|
|
57
|
+
# Configuration of the transcription model.
|
|
58
|
+
sig do
|
|
59
|
+
params(
|
|
60
|
+
language: String,
|
|
61
|
+
model:
|
|
62
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol,
|
|
63
|
+
prompt: String
|
|
64
|
+
).returns(T.attached_class)
|
|
65
|
+
end
|
|
66
|
+
def self.new(
|
|
67
|
+
# The language of the input audio. Supplying the input language in
|
|
68
|
+
# [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
|
|
69
|
+
# format will improve accuracy and latency.
|
|
70
|
+
language: nil,
|
|
71
|
+
# The model to use for transcription. Current options are `whisper-1`,
|
|
72
|
+
# `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
|
|
73
|
+
model: nil,
|
|
74
|
+
# An optional text to guide the model's style or continue a previous audio
|
|
75
|
+
# segment. For `whisper-1`, the
|
|
76
|
+
# [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
|
|
77
|
+
# For `gpt-4o-transcribe` models, the prompt is a free text string, for example
|
|
78
|
+
# "expect words related to technology".
|
|
79
|
+
prompt: nil
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
sig do
|
|
84
|
+
override.returns(
|
|
85
|
+
{
|
|
86
|
+
language: String,
|
|
87
|
+
model:
|
|
88
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol,
|
|
89
|
+
prompt: String
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
def to_hash
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# The model to use for transcription. Current options are `whisper-1`,
|
|
97
|
+
# `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
|
|
98
|
+
module Model
|
|
99
|
+
extend OpenAI::Internal::Type::Enum
|
|
100
|
+
|
|
101
|
+
TaggedSymbol =
|
|
102
|
+
T.type_alias do
|
|
103
|
+
T.all(
|
|
104
|
+
Symbol,
|
|
105
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model
|
|
106
|
+
)
|
|
107
|
+
end
|
|
108
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
109
|
+
|
|
110
|
+
WHISPER_1 =
|
|
111
|
+
T.let(
|
|
112
|
+
:"whisper-1",
|
|
113
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
114
|
+
)
|
|
115
|
+
GPT_4O_TRANSCRIBE_LATEST =
|
|
116
|
+
T.let(
|
|
117
|
+
:"gpt-4o-transcribe-latest",
|
|
118
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
119
|
+
)
|
|
120
|
+
GPT_4O_MINI_TRANSCRIBE =
|
|
121
|
+
T.let(
|
|
122
|
+
:"gpt-4o-mini-transcribe",
|
|
123
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
124
|
+
)
|
|
125
|
+
GPT_4O_TRANSCRIBE =
|
|
126
|
+
T.let(
|
|
127
|
+
:"gpt-4o-transcribe",
|
|
128
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
sig do
|
|
132
|
+
override.returns(
|
|
133
|
+
T::Array[
|
|
134
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
|
|
135
|
+
]
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
def self.values
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
RealtimeTranscriptionSessionTurnDetection =
|
|
6
|
+
Realtime::RealtimeTranscriptionSessionTurnDetection
|
|
7
|
+
|
|
8
|
+
module Realtime
|
|
9
|
+
class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel
|
|
10
|
+
OrHash =
|
|
11
|
+
T.type_alias do
|
|
12
|
+
T.any(
|
|
13
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection,
|
|
14
|
+
OpenAI::Internal::AnyHash
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Amount of audio to include before the VAD detected speech (in milliseconds).
|
|
19
|
+
# Defaults to 300ms.
|
|
20
|
+
sig { returns(T.nilable(Integer)) }
|
|
21
|
+
attr_reader :prefix_padding_ms
|
|
22
|
+
|
|
23
|
+
sig { params(prefix_padding_ms: Integer).void }
|
|
24
|
+
attr_writer :prefix_padding_ms
|
|
25
|
+
|
|
26
|
+
# Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
|
|
27
|
+
# With shorter values the model will respond more quickly, but may jump in on
|
|
28
|
+
# short pauses from the user.
|
|
29
|
+
sig { returns(T.nilable(Integer)) }
|
|
30
|
+
attr_reader :silence_duration_ms
|
|
31
|
+
|
|
32
|
+
sig { params(silence_duration_ms: Integer).void }
|
|
33
|
+
attr_writer :silence_duration_ms
|
|
34
|
+
|
|
35
|
+
# Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
|
|
36
|
+
# threshold will require louder audio to activate the model, and thus might
|
|
37
|
+
# perform better in noisy environments.
|
|
38
|
+
sig { returns(T.nilable(Float)) }
|
|
39
|
+
attr_reader :threshold
|
|
40
|
+
|
|
41
|
+
sig { params(threshold: Float).void }
|
|
42
|
+
attr_writer :threshold
|
|
43
|
+
|
|
44
|
+
# Type of turn detection, only `server_vad` is currently supported.
|
|
45
|
+
sig { returns(T.nilable(String)) }
|
|
46
|
+
attr_reader :type
|
|
47
|
+
|
|
48
|
+
sig { params(type: String).void }
|
|
49
|
+
attr_writer :type
|
|
50
|
+
|
|
51
|
+
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
|
52
|
+
# means that the model will detect the start and end of speech based on audio
|
|
53
|
+
# volume and respond at the end of user speech.
|
|
54
|
+
sig do
|
|
55
|
+
params(
|
|
56
|
+
prefix_padding_ms: Integer,
|
|
57
|
+
silence_duration_ms: Integer,
|
|
58
|
+
threshold: Float,
|
|
59
|
+
type: String
|
|
60
|
+
).returns(T.attached_class)
|
|
61
|
+
end
|
|
62
|
+
def self.new(
|
|
63
|
+
# Amount of audio to include before the VAD detected speech (in milliseconds).
|
|
64
|
+
# Defaults to 300ms.
|
|
65
|
+
prefix_padding_ms: nil,
|
|
66
|
+
# Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
|
|
67
|
+
# With shorter values the model will respond more quickly, but may jump in on
|
|
68
|
+
# short pauses from the user.
|
|
69
|
+
silence_duration_ms: nil,
|
|
70
|
+
# Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
|
|
71
|
+
# threshold will require louder audio to activate the model, and thus might
|
|
72
|
+
# perform better in noisy environments.
|
|
73
|
+
threshold: nil,
|
|
74
|
+
# Type of turn detection, only `server_vad` is currently supported.
|
|
75
|
+
type: nil
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
sig do
|
|
80
|
+
override.returns(
|
|
81
|
+
{
|
|
82
|
+
prefix_padding_ms: Integer,
|
|
83
|
+
silence_duration_ms: Integer,
|
|
84
|
+
threshold: Float,
|
|
85
|
+
type: String
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
def to_hash
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -4,8 +4,7 @@ module OpenAI
|
|
|
4
4
|
module Models
|
|
5
5
|
module Realtime
|
|
6
6
|
# Controls how the realtime conversation is truncated prior to model inference.
|
|
7
|
-
# The default is `auto`.
|
|
8
|
-
# fraction of the conversation tokens prior to the instructions.
|
|
7
|
+
# The default is `auto`.
|
|
9
8
|
module RealtimeTruncation
|
|
10
9
|
extend OpenAI::Internal::Type::Union
|
|
11
10
|
|
|
@@ -13,11 +12,13 @@ module OpenAI
|
|
|
13
12
|
T.type_alias do
|
|
14
13
|
T.any(
|
|
15
14
|
OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::TaggedSymbol,
|
|
16
|
-
OpenAI::Realtime::
|
|
15
|
+
OpenAI::Realtime::RealtimeTruncationRetentionRatio
|
|
17
16
|
)
|
|
18
17
|
end
|
|
19
18
|
|
|
20
|
-
# The truncation strategy to use for the session.
|
|
19
|
+
# The truncation strategy to use for the session. `auto` is the default truncation
|
|
20
|
+
# strategy. `disabled` will disable truncation and emit errors when the
|
|
21
|
+
# conversation exceeds the input token limit.
|
|
21
22
|
module RealtimeTruncationStrategy
|
|
22
23
|
extend OpenAI::Internal::Type::Enum
|
|
23
24
|
|
|
@@ -52,58 +53,6 @@ module OpenAI
|
|
|
52
53
|
end
|
|
53
54
|
end
|
|
54
55
|
|
|
55
|
-
class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel
|
|
56
|
-
OrHash =
|
|
57
|
-
T.type_alias do
|
|
58
|
-
T.any(
|
|
59
|
-
OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation,
|
|
60
|
-
OpenAI::Internal::AnyHash
|
|
61
|
-
)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
|
|
65
|
-
sig { returns(Float) }
|
|
66
|
-
attr_accessor :retention_ratio
|
|
67
|
-
|
|
68
|
-
# Use retention ratio truncation.
|
|
69
|
-
sig { returns(Symbol) }
|
|
70
|
-
attr_accessor :type
|
|
71
|
-
|
|
72
|
-
# Optional cap on tokens allowed after the instructions.
|
|
73
|
-
sig { returns(T.nilable(Integer)) }
|
|
74
|
-
attr_accessor :post_instructions_token_limit
|
|
75
|
-
|
|
76
|
-
# Retain a fraction of the conversation tokens.
|
|
77
|
-
sig do
|
|
78
|
-
params(
|
|
79
|
-
retention_ratio: Float,
|
|
80
|
-
post_instructions_token_limit: T.nilable(Integer),
|
|
81
|
-
type: Symbol
|
|
82
|
-
).returns(T.attached_class)
|
|
83
|
-
end
|
|
84
|
-
def self.new(
|
|
85
|
-
# Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
|
|
86
|
-
retention_ratio:,
|
|
87
|
-
# Optional cap on tokens allowed after the instructions.
|
|
88
|
-
post_instructions_token_limit: nil,
|
|
89
|
-
# Use retention ratio truncation.
|
|
90
|
-
type: :retention_ratio
|
|
91
|
-
)
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
sig do
|
|
95
|
-
override.returns(
|
|
96
|
-
{
|
|
97
|
-
retention_ratio: Float,
|
|
98
|
-
type: Symbol,
|
|
99
|
-
post_instructions_token_limit: T.nilable(Integer)
|
|
100
|
-
}
|
|
101
|
-
)
|
|
102
|
-
end
|
|
103
|
-
def to_hash
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
56
|
sig do
|
|
108
57
|
override.returns(
|
|
109
58
|
T::Array[OpenAI::Realtime::RealtimeTruncation::Variants]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# typed: strong
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
OrHash =
|
|
8
|
+
T.type_alias do
|
|
9
|
+
T.any(
|
|
10
|
+
OpenAI::Realtime::RealtimeTruncationRetentionRatio,
|
|
11
|
+
OpenAI::Internal::AnyHash
|
|
12
|
+
)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
|
|
16
|
+
# conversation exceeds the input token limit.
|
|
17
|
+
sig { returns(Float) }
|
|
18
|
+
attr_accessor :retention_ratio
|
|
19
|
+
|
|
20
|
+
# Use retention ratio truncation.
|
|
21
|
+
sig { returns(Symbol) }
|
|
22
|
+
attr_accessor :type
|
|
23
|
+
|
|
24
|
+
# Retain a fraction of the conversation tokens when the conversation exceeds the
|
|
25
|
+
# input token limit. This allows you to amortize truncations across multiple
|
|
26
|
+
# turns, which can help improve cached token usage.
|
|
27
|
+
sig do
|
|
28
|
+
params(retention_ratio: Float, type: Symbol).returns(T.attached_class)
|
|
29
|
+
end
|
|
30
|
+
def self.new(
|
|
31
|
+
# Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
|
|
32
|
+
# conversation exceeds the input token limit.
|
|
33
|
+
retention_ratio:,
|
|
34
|
+
# Use retention ratio truncation.
|
|
35
|
+
type: :retention_ratio
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
sig { override.returns({ retention_ratio: Float, type: Symbol }) }
|
|
40
|
+
def to_hash
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -33,7 +33,9 @@ module OpenAI
|
|
|
33
33
|
|
|
34
34
|
# Send this event to cancel an in-progress response. The server will respond with
|
|
35
35
|
# a `response.done` event with a status of `response.status=cancelled`. If there
|
|
36
|
-
# is no response to cancel, the server will respond with an error.
|
|
36
|
+
# is no response to cancel, the server will respond with an error. It's safe to
|
|
37
|
+
# call `response.cancel` even if no response is in progress, an error will be
|
|
38
|
+
# returned the session will remain unaffected.
|
|
37
39
|
sig do
|
|
38
40
|
params(event_id: String, response_id: String, type: Symbol).returns(
|
|
39
41
|
T.attached_class
|