openai 0.61.0 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +1 -1
- data/lib/openai/models/admin/organization/usage_audio_speeches_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_audio_transcriptions_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_code_interpreter_sessions_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_completions_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_costs_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_embeddings_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_images_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_moderations_response.rb +10 -1
- data/lib/openai/models/admin/organization/usage_vector_stores_response.rb +10 -1
- data/lib/openai/models/chat/chat_completion_token_logprob.rb +1 -2
- data/lib/openai/models/chat/completion_create_params.rb +4 -3
- data/lib/openai/models/image_edit_params.rb +85 -31
- data/lib/openai/models/image_generate_params.rb +78 -26
- data/lib/openai/models/image_model.rb +5 -2
- data/lib/openai/models/realtime/audio_transcription.rb +37 -5
- data/lib/openai/models/realtime/client_secret_create_response.rb +1 -2
- data/lib/openai/models/realtime/realtime_audio_config_input.rb +3 -0
- data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +3 -0
- data/lib/openai/models/realtime/realtime_reasoning.rb +24 -0
- data/lib/openai/models/realtime/realtime_reasoning_effort.rb +22 -0
- data/lib/openai/models/realtime/realtime_response_create_params.rb +18 -1
- data/lib/openai/models/realtime/realtime_session.rb +6 -0
- data/lib/openai/models/realtime/realtime_session_create_request.rb +21 -1
- data/lib/openai/models/realtime/realtime_session_create_response.rb +41 -17
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +3 -0
- data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +3 -0
- data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +9 -5
- data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +2 -1
- data/lib/openai/models/realtime/realtime_translation_client_event.rb +45 -0
- data/lib/openai/models/realtime/realtime_translation_client_secret_create_request.rb +85 -0
- data/lib/openai/models/realtime/realtime_translation_client_secret_create_response.rb +42 -0
- data/lib/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rb +51 -0
- data/lib/openai/models/realtime/realtime_translation_input_transcript_delta_event.rb +55 -0
- data/lib/openai/models/realtime/realtime_translation_output_audio_delta_event.rb +89 -0
- data/lib/openai/models/realtime/realtime_translation_output_transcript_delta_event.rb +54 -0
- data/lib/openai/models/realtime/realtime_translation_server_event.rb +53 -0
- data/lib/openai/models/realtime/realtime_translation_session.rb +158 -0
- data/lib/openai/models/realtime/realtime_translation_session_close_event.rb +30 -0
- data/lib/openai/models/realtime/realtime_translation_session_closed_event.rb +28 -0
- data/lib/openai/models/realtime/realtime_translation_session_create_request.rb +138 -0
- data/lib/openai/models/realtime/realtime_translation_session_created_event.rb +38 -0
- data/lib/openai/models/realtime/realtime_translation_session_update_event.rb +43 -0
- data/lib/openai/models/realtime/realtime_translation_session_update_request.rb +129 -0
- data/lib/openai/models/realtime/realtime_translation_session_updated_event.rb +37 -0
- data/lib/openai/models/realtime/transcription_session_updated_event.rb +1 -2
- data/lib/openai/models/responses/response.rb +4 -3
- data/lib/openai/models/responses/response_create_params.rb +4 -3
- data/lib/openai/models/responses/response_includable.rb +2 -0
- data/lib/openai/models/responses/response_text_delta_event.rb +2 -2
- data/lib/openai/models/responses/response_text_done_event.rb +2 -2
- data/lib/openai/models/responses/responses_client_event.rb +4 -3
- data/lib/openai/models/responses/tool.rb +81 -16
- data/lib/openai/resources/chat/completions.rb +2 -2
- data/lib/openai/resources/images.rb +6 -6
- data/lib/openai/resources/realtime/calls.rb +5 -1
- data/lib/openai/resources/responses.rb +2 -2
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +18 -1
- data/rbi/openai/models/admin/organization/usage_audio_speeches_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_audio_transcriptions_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_completions_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_costs_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_embeddings_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_images_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_moderations_response.rbi +11 -1
- data/rbi/openai/models/admin/organization/usage_vector_stores_response.rbi +11 -1
- data/rbi/openai/models/chat/chat_completion_token_logprob.rbi +2 -4
- data/rbi/openai/models/chat/completion_create_params.rbi +6 -4
- data/rbi/openai/models/image_edit_params.rbi +102 -45
- data/rbi/openai/models/image_generate_params.rbi +93 -39
- data/rbi/openai/models/image_model.rbi +8 -3
- data/rbi/openai/models/realtime/audio_transcription.rbi +85 -6
- data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +6 -0
- data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +3 -0
- data/rbi/openai/models/realtime/realtime_reasoning.rbi +54 -0
- data/rbi/openai/models/realtime/realtime_reasoning_effort.rbi +44 -0
- data/rbi/openai/models/realtime/realtime_response_create_params.rbi +26 -0
- data/rbi/openai/models/realtime/realtime_session.rbi +9 -0
- data/rbi/openai/models/realtime/realtime_session_create_request.rbi +31 -0
- data/rbi/openai/models/realtime/realtime_session_create_response.rbi +53 -32
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +6 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +3 -0
- data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +13 -7
- data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +2 -1
- data/rbi/openai/models/realtime/realtime_translation_client_event.rbi +29 -0
- data/rbi/openai/models/realtime/realtime_translation_client_secret_create_request.rbi +193 -0
- data/rbi/openai/models/realtime/realtime_translation_client_secret_create_response.rbi +69 -0
- data/rbi/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbi +69 -0
- data/rbi/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbi +77 -0
- data/rbi/openai/models/realtime/realtime_translation_output_audio_delta_event.rbi +148 -0
- data/rbi/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbi +76 -0
- data/rbi/openai/models/realtime/realtime_translation_server_event.rbi +33 -0
- data/rbi/openai/models/realtime/realtime_translation_session.rbi +339 -0
- data/rbi/openai/models/realtime/realtime_translation_session_close_event.rbi +44 -0
- data/rbi/openai/models/realtime/realtime_translation_session_closed_event.rbi +39 -0
- data/rbi/openai/models/realtime/realtime_translation_session_create_request.rbi +322 -0
- data/rbi/openai/models/realtime/realtime_translation_session_created_event.rbi +68 -0
- data/rbi/openai/models/realtime/realtime_translation_session_update_event.rbi +78 -0
- data/rbi/openai/models/realtime/realtime_translation_session_update_request.rbi +313 -0
- data/rbi/openai/models/realtime/realtime_translation_session_updated_event.rbi +67 -0
- data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +0 -2
- data/rbi/openai/models/responses/response.rbi +6 -4
- data/rbi/openai/models/responses/response_create_params.rbi +6 -4
- data/rbi/openai/models/responses/response_includable.rbi +2 -0
- data/rbi/openai/models/responses/response_text_delta_event.rbi +2 -2
- data/rbi/openai/models/responses/response_text_done_event.rbi +2 -2
- data/rbi/openai/models/responses/responses_client_event.rbi +6 -4
- data/rbi/openai/models/responses/tool.rbi +122 -27
- data/rbi/openai/resources/chat/completions.rbi +6 -4
- data/rbi/openai/resources/images.rbi +110 -44
- data/rbi/openai/resources/realtime/calls.rbi +7 -0
- data/rbi/openai/resources/responses.rbi +6 -4
- data/sig/openai/models/admin/organization/usage_audio_speeches_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_audio_transcriptions_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_completions_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_costs_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_embeddings_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_images_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_moderations_response.rbs +7 -2
- data/sig/openai/models/admin/organization/usage_vector_stores_response.rbs +7 -2
- data/sig/openai/models/image_edit_params.rbs +5 -4
- data/sig/openai/models/image_generate_params.rbs +5 -4
- data/sig/openai/models/image_model.rbs +11 -5
- data/sig/openai/models/realtime/audio_transcription.rbs +25 -0
- data/sig/openai/models/realtime/realtime_reasoning.rbs +24 -0
- data/sig/openai/models/realtime/realtime_reasoning_effort.rbs +20 -0
- data/sig/openai/models/realtime/realtime_response_create_params.rbs +16 -0
- data/sig/openai/models/realtime/realtime_session_create_request.rbs +18 -0
- data/sig/openai/models/realtime/realtime_session_create_response.rbs +27 -4
- data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +4 -8
- data/sig/openai/models/realtime/realtime_translation_client_event.rbs +16 -0
- data/sig/openai/models/realtime/realtime_translation_client_secret_create_request.rbs +69 -0
- data/sig/openai/models/realtime/realtime_translation_client_secret_create_response.rbs +32 -0
- data/sig/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbs +34 -0
- data/sig/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbs +37 -0
- data/sig/openai/models/realtime/realtime_translation_output_audio_delta_event.rbs +70 -0
- data/sig/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbs +37 -0
- data/sig/openai/models/realtime/realtime_translation_server_event.rbs +20 -0
- data/sig/openai/models/realtime/realtime_translation_session.rbs +131 -0
- data/sig/openai/models/realtime/realtime_translation_session_close_event.rbs +20 -0
- data/sig/openai/models/realtime/realtime_translation_session_closed_event.rbs +18 -0
- data/sig/openai/models/realtime/realtime_translation_session_create_request.rbs +120 -0
- data/sig/openai/models/realtime/realtime_translation_session_created_event.rbs +32 -0
- data/sig/openai/models/realtime/realtime_translation_session_update_event.rbs +34 -0
- data/sig/openai/models/realtime/realtime_translation_session_update_request.rbs +115 -0
- data/sig/openai/models/realtime/realtime_translation_session_updated_event.rbs +32 -0
- data/sig/openai/models/responses/tool.rbs +15 -5
- data/sig/openai/resources/realtime/calls.rbs +2 -0
- metadata +56 -5
- data/lib/openai/models/realtime/realtime_session_client_secret.rb +0 -36
- data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +0 -49
- data/sig/openai/models/realtime/realtime_session_client_secret.rbs +0 -20
data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb
CHANGED
|
@@ -16,6 +16,9 @@ module OpenAI
|
|
|
16
16
|
# trails off with "uhhm", the model will score a low probability of turn end and
|
|
17
17
|
# wait longer for the user to continue speaking. This can be useful for more
|
|
18
18
|
# natural conversations, but may have a higher latency.
|
|
19
|
+
#
|
|
20
|
+
# For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
|
|
21
|
+
# `null`; VAD is not supported.
|
|
19
22
|
module RealtimeTranscriptionSessionAudioInputTurnDetection
|
|
20
23
|
extend OpenAI::Internal::Type::Union
|
|
21
24
|
|
|
@@ -91,7 +91,6 @@ module OpenAI
|
|
|
91
91
|
-> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction }
|
|
92
92
|
|
|
93
93
|
# @!attribute transcription
|
|
94
|
-
# Configuration of the transcription model.
|
|
95
94
|
#
|
|
96
95
|
# @return [OpenAI::Models::Realtime::AudioTranscription, nil]
|
|
97
96
|
optional :transcription, -> { OpenAI::Realtime::AudioTranscription }
|
|
@@ -99,10 +98,15 @@ module OpenAI
|
|
|
99
98
|
# @!attribute turn_detection
|
|
100
99
|
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
|
101
100
|
# means that the model will detect the start and end of speech based on audio
|
|
102
|
-
# volume and respond at the end of user speech.
|
|
101
|
+
# volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
|
|
102
|
+
# must be `null`; VAD is not supported.
|
|
103
103
|
#
|
|
104
104
|
# @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection, nil]
|
|
105
|
-
optional :turn_detection,
|
|
105
|
+
optional :turn_detection,
|
|
106
|
+
-> {
|
|
107
|
+
OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
|
|
108
|
+
},
|
|
109
|
+
nil?: true
|
|
106
110
|
|
|
107
111
|
# @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
|
|
108
112
|
# Some parameter documentations has been truncated, see
|
|
@@ -113,9 +117,9 @@ module OpenAI
|
|
|
113
117
|
#
|
|
114
118
|
# @param noise_reduction [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction] Configuration for input audio noise reduction.
|
|
115
119
|
#
|
|
116
|
-
# @param transcription [OpenAI::Models::Realtime::AudioTranscription]
|
|
120
|
+
# @param transcription [OpenAI::Models::Realtime::AudioTranscription]
|
|
117
121
|
#
|
|
118
|
-
# @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server
|
|
122
|
+
# @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection, nil] Configuration for turn detection. Can be set to `null` to turn off. Server
|
|
119
123
|
|
|
120
124
|
# @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input#noise_reduction
|
|
121
125
|
class NoiseReduction < OpenAI::Internal::Type::BaseModel
|
|
@@ -40,7 +40,8 @@ module OpenAI
|
|
|
40
40
|
#
|
|
41
41
|
# Configuration for turn detection. Can be set to `null` to turn off. Server VAD
|
|
42
42
|
# means that the model will detect the start and end of speech based on audio
|
|
43
|
-
# volume and respond at the end of user speech.
|
|
43
|
+
# volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
|
|
44
|
+
# must be `null`; VAD is not supported.
|
|
44
45
|
#
|
|
45
46
|
# @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in
|
|
46
47
|
#
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
# A Realtime translation client event.
|
|
7
|
+
module RealtimeTranslationClientEvent
|
|
8
|
+
extend OpenAI::Internal::Type::Union
|
|
9
|
+
|
|
10
|
+
discriminator :type
|
|
11
|
+
|
|
12
|
+
# Send this event to update the translation session configuration. Translation
|
|
13
|
+
# sessions support updates to `audio.output.language`, `audio.input.transcription`,
|
|
14
|
+
# and `audio.input.noise_reduction`.
|
|
15
|
+
variant :"session.update", -> { OpenAI::Realtime::RealtimeTranslationSessionUpdateEvent }
|
|
16
|
+
|
|
17
|
+
# Send this event to append audio bytes to the translation session input audio buffer.
|
|
18
|
+
#
|
|
19
|
+
# WebSocket translation sessions accept base64-encoded 24 kHz PCM16 mono
|
|
20
|
+
# little-endian raw audio bytes. Unsupported websocket audio formats return a
|
|
21
|
+
# validation error because lower-quality audio materially degrades translation
|
|
22
|
+
# quality.
|
|
23
|
+
#
|
|
24
|
+
# Translation consumes 200 ms engine frames. For best realtime behavior, append
|
|
25
|
+
# audio in 200 ms chunks. If a chunk is shorter, the server buffers it until it
|
|
26
|
+
# has enough audio for one frame. If a chunk is longer, the server splits it into
|
|
27
|
+
# 200 ms frames and enqueues them back-to-back.
|
|
28
|
+
#
|
|
29
|
+
# Keep appending silence while the session is active. If a client stops sending
|
|
30
|
+
# audio and later resumes, model time treats the resumed audio as contiguous with
|
|
31
|
+
# the previous audio rather than as a real-world pause.
|
|
32
|
+
variant :"session.input_audio_buffer.append",
|
|
33
|
+
-> { OpenAI::Realtime::RealtimeTranslationInputAudioBufferAppendEvent }
|
|
34
|
+
|
|
35
|
+
# Gracefully close the realtime translation session. The server flushes pending
|
|
36
|
+
# input audio and emits any remaining translated output before closing the
|
|
37
|
+
# session.
|
|
38
|
+
variant :"session.close", -> { OpenAI::Realtime::RealtimeTranslationSessionCloseEvent }
|
|
39
|
+
|
|
40
|
+
# @!method self.variants
|
|
41
|
+
# @return [Array(OpenAI::Models::Realtime::RealtimeTranslationSessionUpdateEvent, OpenAI::Models::Realtime::RealtimeTranslationInputAudioBufferAppendEvent, OpenAI::Models::Realtime::RealtimeTranslationSessionCloseEvent)]
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationClientSecretCreateRequest < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute session
|
|
8
|
+
# Realtime translation session configuration. Translation sessions stream source
|
|
9
|
+
# audio in and translated audio plus transcript deltas out continuously.
|
|
10
|
+
#
|
|
11
|
+
# @return [OpenAI::Models::Realtime::RealtimeTranslationSessionCreateRequest]
|
|
12
|
+
required :session, -> { OpenAI::Realtime::RealtimeTranslationSessionCreateRequest }
|
|
13
|
+
|
|
14
|
+
# @!attribute expires_after
|
|
15
|
+
# Configuration for the client secret expiration. Expiration refers to the time
|
|
16
|
+
# after which a client secret will no longer be valid for creating sessions. The
|
|
17
|
+
# session itself may continue after that time once started. A secret can be used
|
|
18
|
+
# to create multiple sessions until it expires.
|
|
19
|
+
#
|
|
20
|
+
# @return [OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter, nil]
|
|
21
|
+
optional :expires_after,
|
|
22
|
+
-> { OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter }
|
|
23
|
+
|
|
24
|
+
# @!method initialize(session:, expires_after: nil)
|
|
25
|
+
# Some parameter documentations has been truncated, see
|
|
26
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest} for
|
|
27
|
+
# more details.
|
|
28
|
+
#
|
|
29
|
+
# Create a translation session and client secret for the Realtime API.
|
|
30
|
+
#
|
|
31
|
+
# @param session [OpenAI::Models::Realtime::RealtimeTranslationSessionCreateRequest] Realtime translation session configuration. Translation sessions stream source
|
|
32
|
+
#
|
|
33
|
+
# @param expires_after [OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter] Configuration for the client secret expiration. Expiration refers to the time af
|
|
34
|
+
|
|
35
|
+
# @see OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest#expires_after
|
|
36
|
+
class ExpiresAfter < OpenAI::Internal::Type::BaseModel
|
|
37
|
+
# @!attribute anchor
|
|
38
|
+
# The anchor point for the client secret expiration, meaning that `seconds` will
|
|
39
|
+
# be added to the `created_at` time of the client secret to produce an expiration
|
|
40
|
+
# timestamp. Only `created_at` is currently supported.
|
|
41
|
+
#
|
|
42
|
+
# @return [Symbol, OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor, nil]
|
|
43
|
+
optional :anchor,
|
|
44
|
+
enum: -> { OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor }
|
|
45
|
+
|
|
46
|
+
# @!attribute seconds
|
|
47
|
+
# The number of seconds from the anchor point to the expiration. Select a value
|
|
48
|
+
# between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
|
|
49
|
+
# not specified.
|
|
50
|
+
#
|
|
51
|
+
# @return [Integer, nil]
|
|
52
|
+
optional :seconds, Integer
|
|
53
|
+
|
|
54
|
+
# @!method initialize(anchor: nil, seconds: nil)
|
|
55
|
+
# Some parameter documentations has been truncated, see
|
|
56
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter}
|
|
57
|
+
# for more details.
|
|
58
|
+
#
|
|
59
|
+
# Configuration for the client secret expiration. Expiration refers to the time
|
|
60
|
+
# after which a client secret will no longer be valid for creating sessions. The
|
|
61
|
+
# session itself may continue after that time once started. A secret can be used
|
|
62
|
+
# to create multiple sessions until it expires.
|
|
63
|
+
#
|
|
64
|
+
# @param anchor [Symbol, OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor] The anchor point for the client secret expiration, meaning that `seconds` will b
|
|
65
|
+
#
|
|
66
|
+
# @param seconds [Integer] The number of seconds from the anchor point to the expiration. Select a value be
|
|
67
|
+
|
|
68
|
+
# The anchor point for the client secret expiration, meaning that `seconds` will
|
|
69
|
+
# be added to the `created_at` time of the client secret to produce an expiration
|
|
70
|
+
# timestamp. Only `created_at` is currently supported.
|
|
71
|
+
#
|
|
72
|
+
# @see OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter#anchor
|
|
73
|
+
module Anchor
|
|
74
|
+
extend OpenAI::Internal::Type::Enum
|
|
75
|
+
|
|
76
|
+
CREATED_AT = :created_at
|
|
77
|
+
|
|
78
|
+
# @!method self.values
|
|
79
|
+
# @return [Array<Symbol>]
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationClientSecretCreateResponse < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute expires_at
|
|
8
|
+
# Expiration timestamp for the client secret, in seconds since epoch.
|
|
9
|
+
#
|
|
10
|
+
# @return [Integer]
|
|
11
|
+
required :expires_at, Integer
|
|
12
|
+
|
|
13
|
+
# @!attribute session
|
|
14
|
+
# A Realtime translation session. Translation sessions continuously translate
|
|
15
|
+
# input audio into the configured output language.
|
|
16
|
+
#
|
|
17
|
+
# @return [OpenAI::Models::Realtime::RealtimeTranslationSession]
|
|
18
|
+
required :session, -> { OpenAI::Realtime::RealtimeTranslationSession }
|
|
19
|
+
|
|
20
|
+
# @!attribute value
|
|
21
|
+
# The generated client secret value.
|
|
22
|
+
#
|
|
23
|
+
# @return [String]
|
|
24
|
+
required :value, String
|
|
25
|
+
|
|
26
|
+
# @!method initialize(expires_at:, session:, value:)
|
|
27
|
+
# Some parameter documentations has been truncated, see
|
|
28
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationClientSecretCreateResponse} for
|
|
29
|
+
# more details.
|
|
30
|
+
#
|
|
31
|
+
# Response from creating a translation session and client secret for the Realtime
|
|
32
|
+
# API.
|
|
33
|
+
#
|
|
34
|
+
# @param expires_at [Integer] Expiration timestamp for the client secret, in seconds since epoch.
|
|
35
|
+
#
|
|
36
|
+
# @param session [OpenAI::Models::Realtime::RealtimeTranslationSession] A Realtime translation session. Translation sessions continuously translate inpu
|
|
37
|
+
#
|
|
38
|
+
# @param value [String] The generated client secret value.
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationInputAudioBufferAppendEvent < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute audio
|
|
8
|
+
# Base64-encoded 24 kHz PCM16 mono audio bytes.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :audio, String
|
|
12
|
+
|
|
13
|
+
# @!attribute type
|
|
14
|
+
# The event type, must be `session.input_audio_buffer.append`.
|
|
15
|
+
#
|
|
16
|
+
# @return [Symbol, :"session.input_audio_buffer.append"]
|
|
17
|
+
required :type, const: :"session.input_audio_buffer.append"
|
|
18
|
+
|
|
19
|
+
# @!attribute event_id
|
|
20
|
+
# Optional client-generated ID used to identify this event.
|
|
21
|
+
#
|
|
22
|
+
# @return [String, nil]
|
|
23
|
+
optional :event_id, String
|
|
24
|
+
|
|
25
|
+
# @!method initialize(audio:, event_id: nil, type: :"session.input_audio_buffer.append")
|
|
26
|
+
# Send this event to append audio bytes to the translation session input audio
|
|
27
|
+
# buffer.
|
|
28
|
+
#
|
|
29
|
+
# WebSocket translation sessions accept base64-encoded 24 kHz PCM16 mono
|
|
30
|
+
# little-endian raw audio bytes. Unsupported websocket audio formats return a
|
|
31
|
+
# validation error because lower-quality audio materially degrades translation
|
|
32
|
+
# quality.
|
|
33
|
+
#
|
|
34
|
+
# Translation consumes 200 ms engine frames. For best realtime behavior, append
|
|
35
|
+
# audio in 200 ms chunks. If a chunk is shorter, the server buffers it until it
|
|
36
|
+
# has enough audio for one frame. If a chunk is longer, the server splits it into
|
|
37
|
+
# 200 ms frames and enqueues them back-to-back.
|
|
38
|
+
#
|
|
39
|
+
# Keep appending silence while the session is active. If a client stops sending
|
|
40
|
+
# audio and later resumes, model time treats the resumed audio as contiguous with
|
|
41
|
+
# the previous audio rather than as a real-world pause.
|
|
42
|
+
#
|
|
43
|
+
# @param audio [String] Base64-encoded 24 kHz PCM16 mono audio bytes.
|
|
44
|
+
#
|
|
45
|
+
# @param event_id [String] Optional client-generated ID used to identify this event.
|
|
46
|
+
#
|
|
47
|
+
# @param type [Symbol, :"session.input_audio_buffer.append"] The event type, must be `session.input_audio_buffer.append`.
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationInputTranscriptDeltaEvent < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute delta
|
|
8
|
+
# Append-only source-language transcript text.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :delta, String
|
|
12
|
+
|
|
13
|
+
# @!attribute event_id
|
|
14
|
+
# The unique ID of the server event.
|
|
15
|
+
#
|
|
16
|
+
# @return [String]
|
|
17
|
+
required :event_id, String
|
|
18
|
+
|
|
19
|
+
# @!attribute type
|
|
20
|
+
# The event type, must be `session.input_transcript.delta`.
|
|
21
|
+
#
|
|
22
|
+
# @return [Symbol, :"session.input_transcript.delta"]
|
|
23
|
+
required :type, const: :"session.input_transcript.delta"
|
|
24
|
+
|
|
25
|
+
# @!attribute elapsed_ms
|
|
26
|
+
# Timing metadata for stream alignment, derived from the translation frame when
|
|
27
|
+
# available. It advances in 200 ms increments, but multiple transcript deltas may
|
|
28
|
+
# share the same `elapsed_ms`. Treat it as alignment metadata, not a unique
|
|
29
|
+
# transcript-delta identifier.
|
|
30
|
+
#
|
|
31
|
+
# @return [Integer, nil]
|
|
32
|
+
optional :elapsed_ms, Integer, nil?: true
|
|
33
|
+
|
|
34
|
+
# @!method initialize(delta:, event_id:, elapsed_ms: nil, type: :"session.input_transcript.delta")
|
|
35
|
+
# Some parameter documentations has been truncated, see
|
|
36
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationInputTranscriptDeltaEvent} for
|
|
37
|
+
# more details.
|
|
38
|
+
#
|
|
39
|
+
# Returned when optional source-language transcript text is available. This event
|
|
40
|
+
# is emitted only when `audio.input.transcription` is configured.
|
|
41
|
+
#
|
|
42
|
+
# Transcript deltas are append-only text fragments. Clients should not insert
|
|
43
|
+
# unconditional spaces between deltas.
|
|
44
|
+
#
|
|
45
|
+
# @param delta [String] Append-only source-language transcript text.
|
|
46
|
+
#
|
|
47
|
+
# @param event_id [String] The unique ID of the server event.
|
|
48
|
+
#
|
|
49
|
+
# @param elapsed_ms [Integer, nil] Timing metadata for stream alignment, derived from the translation frame
|
|
50
|
+
#
|
|
51
|
+
# @param type [Symbol, :"session.input_transcript.delta"] The event type, must be `session.input_transcript.delta`.
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationOutputAudioDeltaEvent < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute delta
|
|
8
|
+
# Base64-encoded translated audio data.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :delta, String
|
|
12
|
+
|
|
13
|
+
# @!attribute event_id
|
|
14
|
+
# The unique ID of the server event.
|
|
15
|
+
#
|
|
16
|
+
# @return [String]
|
|
17
|
+
required :event_id, String
|
|
18
|
+
|
|
19
|
+
# @!attribute type
|
|
20
|
+
# The event type, must be `session.output_audio.delta`.
|
|
21
|
+
#
|
|
22
|
+
# @return [Symbol, :"session.output_audio.delta"]
|
|
23
|
+
required :type, const: :"session.output_audio.delta"
|
|
24
|
+
|
|
25
|
+
# @!attribute channels
|
|
26
|
+
# Number of audio channels.
|
|
27
|
+
#
|
|
28
|
+
# @return [Integer, nil]
|
|
29
|
+
optional :channels, Integer
|
|
30
|
+
|
|
31
|
+
# @!attribute elapsed_ms
|
|
32
|
+
# Timing metadata for stream alignment, derived from the translation frame when
|
|
33
|
+
# available. Treat `elapsed_ms` as alignment metadata, not a unique event
|
|
34
|
+
# identifier.
|
|
35
|
+
#
|
|
36
|
+
# @return [Integer, nil]
|
|
37
|
+
optional :elapsed_ms, Integer, nil?: true
|
|
38
|
+
|
|
39
|
+
# @!attribute format_
|
|
40
|
+
# Audio encoding for `delta`.
|
|
41
|
+
#
|
|
42
|
+
# @return [Symbol, OpenAI::Models::Realtime::RealtimeTranslationOutputAudioDeltaEvent::Format, nil]
|
|
43
|
+
optional :format_,
|
|
44
|
+
enum: -> { OpenAI::Realtime::RealtimeTranslationOutputAudioDeltaEvent::Format },
|
|
45
|
+
api_name: :format
|
|
46
|
+
|
|
47
|
+
# @!attribute sample_rate
|
|
48
|
+
# Sample rate of the audio delta.
|
|
49
|
+
#
|
|
50
|
+
# @return [Integer, nil]
|
|
51
|
+
optional :sample_rate, Integer
|
|
52
|
+
|
|
53
|
+
# @!method initialize(delta:, event_id:, channels: nil, elapsed_ms: nil, format_: nil, sample_rate: nil, type: :"session.output_audio.delta")
|
|
54
|
+
# Some parameter documentations has been truncated, see
|
|
55
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationOutputAudioDeltaEvent} for more
|
|
56
|
+
# details.
|
|
57
|
+
#
|
|
58
|
+
# Returned when translated output audio is available. Output audio deltas are 200
|
|
59
|
+
# ms frames of PCM16 audio.
|
|
60
|
+
#
|
|
61
|
+
# @param delta [String] Base64-encoded translated audio data.
|
|
62
|
+
#
|
|
63
|
+
# @param event_id [String] The unique ID of the server event.
|
|
64
|
+
#
|
|
65
|
+
# @param channels [Integer] Number of audio channels.
|
|
66
|
+
#
|
|
67
|
+
# @param elapsed_ms [Integer, nil] Timing metadata for stream alignment, derived from the translation frame
|
|
68
|
+
#
|
|
69
|
+
# @param format_ [Symbol, OpenAI::Models::Realtime::RealtimeTranslationOutputAudioDeltaEvent::Format] Audio encoding for `delta`.
|
|
70
|
+
#
|
|
71
|
+
# @param sample_rate [Integer] Sample rate of the audio delta.
|
|
72
|
+
#
|
|
73
|
+
# @param type [Symbol, :"session.output_audio.delta"] The event type, must be `session.output_audio.delta`.
|
|
74
|
+
|
|
75
|
+
# Audio encoding for `delta`.
|
|
76
|
+
#
|
|
77
|
+
# @see OpenAI::Models::Realtime::RealtimeTranslationOutputAudioDeltaEvent#format_
|
|
78
|
+
module Format
|
|
79
|
+
extend OpenAI::Internal::Type::Enum
|
|
80
|
+
|
|
81
|
+
PCM16 = :pcm16
|
|
82
|
+
|
|
83
|
+
# @!method self.values
|
|
84
|
+
# @return [Array<Symbol>]
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
class RealtimeTranslationOutputTranscriptDeltaEvent < OpenAI::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute delta
|
|
8
|
+
# Append-only transcript text for the translated output audio.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :delta, String
|
|
12
|
+
|
|
13
|
+
# @!attribute event_id
|
|
14
|
+
# The unique ID of the server event.
|
|
15
|
+
#
|
|
16
|
+
# @return [String]
|
|
17
|
+
required :event_id, String
|
|
18
|
+
|
|
19
|
+
# @!attribute type
|
|
20
|
+
# The event type, must be `session.output_transcript.delta`.
|
|
21
|
+
#
|
|
22
|
+
# @return [Symbol, :"session.output_transcript.delta"]
|
|
23
|
+
required :type, const: :"session.output_transcript.delta"
|
|
24
|
+
|
|
25
|
+
# @!attribute elapsed_ms
|
|
26
|
+
# Timing metadata for stream alignment, derived from the translation frame when
|
|
27
|
+
# available. It advances in 200 ms increments, but multiple transcript deltas may
|
|
28
|
+
# share the same `elapsed_ms`. Treat it as alignment metadata, not a unique
|
|
29
|
+
# transcript-delta identifier.
|
|
30
|
+
#
|
|
31
|
+
# @return [Integer, nil]
|
|
32
|
+
optional :elapsed_ms, Integer, nil?: true
|
|
33
|
+
|
|
34
|
+
# @!method initialize(delta:, event_id:, elapsed_ms: nil, type: :"session.output_transcript.delta")
|
|
35
|
+
# Some parameter documentations has been truncated, see
|
|
36
|
+
# {OpenAI::Models::Realtime::RealtimeTranslationOutputTranscriptDeltaEvent} for
|
|
37
|
+
# more details.
|
|
38
|
+
#
|
|
39
|
+
# Returned when translated transcript text is available.
|
|
40
|
+
#
|
|
41
|
+
# Transcript deltas are append-only text fragments. Clients should not insert
|
|
42
|
+
# unconditional spaces between deltas.
|
|
43
|
+
#
|
|
44
|
+
# @param delta [String] Append-only transcript text for the translated output audio.
|
|
45
|
+
#
|
|
46
|
+
# @param event_id [String] The unique ID of the server event.
|
|
47
|
+
#
|
|
48
|
+
# @param elapsed_ms [Integer, nil] Timing metadata for stream alignment, derived from the translation frame
|
|
49
|
+
#
|
|
50
|
+
# @param type [Symbol, :"session.output_transcript.delta"] The event type, must be `session.output_transcript.delta`.
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenAI
|
|
4
|
+
module Models
|
|
5
|
+
module Realtime
|
|
6
|
+
# A Realtime translation server event.
|
|
7
|
+
module RealtimeTranslationServerEvent
|
|
8
|
+
extend OpenAI::Internal::Type::Union
|
|
9
|
+
|
|
10
|
+
discriminator :type
|
|
11
|
+
|
|
12
|
+
# Returned when an error occurs, which could be a client problem or a server
|
|
13
|
+
# problem. Most errors are recoverable and the session will stay open, we
|
|
14
|
+
# recommend to implementors to monitor and log error messages by default.
|
|
15
|
+
variant :error, -> { OpenAI::Realtime::RealtimeErrorEvent }
|
|
16
|
+
|
|
17
|
+
# Returned when a translation session is created. Emitted automatically when a
|
|
18
|
+
# new connection is established as the first server event. This event contains
|
|
19
|
+
# the default translation session configuration.
|
|
20
|
+
variant :"session.created", -> { OpenAI::Realtime::RealtimeTranslationSessionCreatedEvent }
|
|
21
|
+
|
|
22
|
+
# Returned when a translation session is updated with a `session.update` event,
|
|
23
|
+
# unless there is an error.
|
|
24
|
+
variant :"session.updated", -> { OpenAI::Realtime::RealtimeTranslationSessionUpdatedEvent }
|
|
25
|
+
|
|
26
|
+
# Returned when a realtime translation session is closed.
|
|
27
|
+
variant :"session.closed", -> { OpenAI::Realtime::RealtimeTranslationSessionClosedEvent }
|
|
28
|
+
|
|
29
|
+
# Returned when optional source-language transcript text is available. This event
|
|
30
|
+
# is emitted only when `audio.input.transcription` is configured.
|
|
31
|
+
#
|
|
32
|
+
# Transcript deltas are append-only text fragments. Clients should not insert
|
|
33
|
+
# unconditional spaces between deltas.
|
|
34
|
+
variant :"session.input_transcript.delta",
|
|
35
|
+
-> { OpenAI::Realtime::RealtimeTranslationInputTranscriptDeltaEvent }
|
|
36
|
+
|
|
37
|
+
# Returned when translated transcript text is available.
|
|
38
|
+
#
|
|
39
|
+
# Transcript deltas are append-only text fragments. Clients should not insert
|
|
40
|
+
# unconditional spaces between deltas.
|
|
41
|
+
variant :"session.output_transcript.delta",
|
|
42
|
+
-> { OpenAI::Realtime::RealtimeTranslationOutputTranscriptDeltaEvent }
|
|
43
|
+
|
|
44
|
+
# Returned when translated output audio is available. Output audio deltas are
|
|
45
|
+
# 200 ms frames of PCM16 audio.
|
|
46
|
+
variant :"session.output_audio.delta", -> { OpenAI::Realtime::RealtimeTranslationOutputAudioDeltaEvent }
|
|
47
|
+
|
|
48
|
+
# @!method self.variants
|
|
49
|
+
# @return [Array(OpenAI::Models::Realtime::RealtimeErrorEvent, OpenAI::Models::Realtime::RealtimeTranslationSessionCreatedEvent, OpenAI::Models::Realtime::RealtimeTranslationSessionUpdatedEvent, OpenAI::Models::Realtime::RealtimeTranslationSessionClosedEvent, OpenAI::Models::Realtime::RealtimeTranslationInputTranscriptDeltaEvent, OpenAI::Models::Realtime::RealtimeTranslationOutputTranscriptDeltaEvent, OpenAI::Models::Realtime::RealtimeTranslationOutputAudioDeltaEvent)]
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|