RubyGems - openai - Versions diffs - 0.22.1 → 0.23.0 - Mend

openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb CHANGED Viewed

@@ -4,14 +4,6 @@ module OpenAI
   module Models
     module Realtime
       class RealtimeTranscriptionSessionCreateRequest < OpenAI::Internal::Type::BaseModel
-        # @!attribute model
-        #   ID of the model to use. The options are `gpt-4o-transcribe`,
-        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        #   Whisper V2 model).
-        #
-        #   @return [String, Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model]
-        required :model, union: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model }
         # @!attribute type
         #   The type of session to create. Always `transcription` for transcription
         #   sessions.
@@ -19,106 +11,35 @@ module OpenAI
         #   @return [Symbol, :transcription]
         required :type, const: :transcription
+        # @!attribute audio
+        #   Configuration for input and output audio.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudio, nil]
+        optional :audio, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudio }
         # @!attribute include
-        #   The set of items to include in the transcription. Current available items are:
+        #   Additional fields to include in server outputs.
         #
-        #   - `item.input_audio_transcription.logprobs`
+        #   `item.input_audio_transcription.logprobs`: Include logprobs for input audio
+        #   transcription.
         #
         #   @return [Array<Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Include>, nil]
         optional :include,
                  -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include] }
-        # @!attribute input_audio_format
-        #   The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
-        #   `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
-        #   (mono), and little-endian byte order.
-        #
-        #   @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat, nil]
-        optional :input_audio_format,
-                 enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat }
-        # @!attribute input_audio_noise_reduction
-        #   Configuration for input audio noise reduction. This can be set to `null` to turn
-        #   off. Noise reduction filters audio added to the input audio buffer before it is
-        #   sent to VAD and the model. Filtering the audio can improve VAD and turn
-        #   detection accuracy (reducing false positives) and model performance by improving
-        #   perception of the input audio.
-        #
-        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction, nil]
-        optional :input_audio_noise_reduction,
-                 -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction }
-        # @!attribute input_audio_transcription
-        #   Configuration for input audio transcription. The client can optionally set the
-        #   language and prompt for transcription, these offer additional guidance to the
-        #   transcription service.
-        #
-        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription, nil]
-        optional :input_audio_transcription,
-                 -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription }
-        # @!attribute turn_detection
-        #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
-        #   means that the model will detect the start and end of speech based on audio
-        #   volume and respond at the end of user speech.
-        #
-        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection, nil]
-        optional :turn_detection,
-                 -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection }
-        # @!method initialize(model:, include: nil, input_audio_format: nil, input_audio_noise_reduction: nil, input_audio_transcription: nil, turn_detection: nil, type: :transcription)
+        # @!method initialize(audio: nil, include: nil, type: :transcription)
         #   Some parameter documentations has been truncated, see
         #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest} for more
         #   details.
         #
         #   Realtime transcription session object configuration.
         #
-        #   @param model [String, Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model] ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transc
-        #
-        #   @param include [Array<Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Include>] The set of items to include in the transcription. Current available items are:
-        #
-        #   @param input_audio_format [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
-        #
-        #   @param input_audio_noise_reduction [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn
+        #   @param audio [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudio] Configuration for input and output audio.
         #
-        #   @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription] Configuration for input audio transcription. The client can optionally set the l
-        #
-        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server VAD m
+        #   @param include [Array<Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Include>] Additional fields to include in server outputs.
         #
         #   @param type [Symbol, :transcription] The type of session to create. Always `transcription` for transcription sessions
-        # ID of the model to use. The options are `gpt-4o-transcribe`,
-        # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        # Whisper V2 model).
-        #
-        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#model
-        module Model
-          extend OpenAI::Internal::Type::Union
-          variant String
-          variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::WHISPER_1 }
-          variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::GPT_4O_TRANSCRIBE }
-          variant const: -> { OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::GPT_4O_MINI_TRANSCRIBE }
-          # @!method self.variants
-          #   @return [Array(String, Symbol)]
-          define_sorbet_constant!(:Variants) do
-            T.type_alias { T.any(String, OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol) }
-          end
-          # @!group
-          WHISPER_1 = :"whisper-1"
-          GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
-          GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
-          # @!endgroup
-        end
         module Include
           extend OpenAI::Internal::Type::Enum
@@ -127,185 +48,6 @@ module OpenAI
           # @!method self.values
           #   @return [Array<Symbol>]
         end
-        # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
-        # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
-        # (mono), and little-endian byte order.
-        #
-        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_format
-        module InputAudioFormat
-          extend OpenAI::Internal::Type::Enum
-          PCM16 = :pcm16
-          G711_ULAW = :g711_ulaw
-          G711_ALAW = :g711_alaw
-          # @!method self.values
-          #   @return [Array<Symbol>]
-        end
-        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_noise_reduction
-        class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
-          # @!attribute type
-          #   Type of noise reduction. `near_field` is for close-talking microphones such as
-          #   headphones, `far_field` is for far-field microphones such as laptop or
-          #   conference room microphones.
-          #
-          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type, nil]
-          optional :type,
-                   enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type }
-          # @!method initialize(type: nil)
-          #   Some parameter documentations has been truncated, see
-          #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction}
-          #   for more details.
-          #
-          #   Configuration for input audio noise reduction. This can be set to `null` to turn
-          #   off. Noise reduction filters audio added to the input audio buffer before it is
-          #   sent to VAD and the model. Filtering the audio can improve VAD and turn
-          #   detection accuracy (reducing false positives) and model performance by improving
-          #   perception of the input audio.
-          #
-          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type] Type of noise reduction. `near_field` is for close-talking microphones such as h
-          # Type of noise reduction. `near_field` is for close-talking microphones such as
-          # headphones, `far_field` is for far-field microphones such as laptop or
-          # conference room microphones.
-          #
-          # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction#type
-          module Type
-            extend OpenAI::Internal::Type::Enum
-            NEAR_FIELD = :near_field
-            FAR_FIELD = :far_field
-            # @!method self.values
-            #   @return [Array<Symbol>]
-          end
-        end
-        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#input_audio_transcription
-        class InputAudioTranscription < OpenAI::Internal::Type::BaseModel
-          # @!attribute language
-          #   The language of the input audio. Supplying the input language in
-          #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
-          #   format will improve accuracy and latency.
-          #
-          #   @return [String, nil]
-          optional :language, String
-          # @!attribute model
-          #   The model to use for transcription, current options are `gpt-4o-transcribe`,
-          #   `gpt-4o-mini-transcribe`, and `whisper-1`.
-          #
-          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model, nil]
-          optional :model,
-                   enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model }
-          # @!attribute prompt
-          #   An optional text to guide the model's style or continue a previous audio
-          #   segment. For `whisper-1`, the
-          #   [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
-          #   For `gpt-4o-transcribe` models, the prompt is a free text string, for example
-          #   "expect words related to technology".
-          #
-          #   @return [String, nil]
-          optional :prompt, String
-          # @!method initialize(language: nil, model: nil, prompt: nil)
-          #   Some parameter documentations has been truncated, see
-          #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription}
-          #   for more details.
-          #
-          #   Configuration for input audio transcription. The client can optionally set the
-          #   language and prompt for transcription, these offer additional guidance to the
-          #   transcription service.
-          #
-          #   @param language [String] The language of the input audio. Supplying the input language in
-          #
-          #   @param model [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model] The model to use for transcription, current options are `gpt-4o-transcribe`, `gp
-          #
-          #   @param prompt [String] An optional text to guide the model's style or continue a previous audio
-          # The model to use for transcription, current options are `gpt-4o-transcribe`,
-          # `gpt-4o-mini-transcribe`, and `whisper-1`.
-          #
-          # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription#model
-          module Model
-            extend OpenAI::Internal::Type::Enum
-            GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
-            GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
-            WHISPER_1 = :"whisper-1"
-            # @!method self.values
-            #   @return [Array<Symbol>]
-          end
-        end
-        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest#turn_detection
-        class TurnDetection < OpenAI::Internal::Type::BaseModel
-          # @!attribute prefix_padding_ms
-          #   Amount of audio to include before the VAD detected speech (in milliseconds).
-          #   Defaults to 300ms.
-          #
-          #   @return [Integer, nil]
-          optional :prefix_padding_ms, Integer
-          # @!attribute silence_duration_ms
-          #   Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
-          #   With shorter values the model will respond more quickly, but may jump in on
-          #   short pauses from the user.
-          #
-          #   @return [Integer, nil]
-          optional :silence_duration_ms, Integer
-          # @!attribute threshold
-          #   Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
-          #   threshold will require louder audio to activate the model, and thus might
-          #   perform better in noisy environments.
-          #
-          #   @return [Float, nil]
-          optional :threshold, Float
-          # @!attribute type
-          #   Type of turn detection. Only `server_vad` is currently supported for
-          #   transcription sessions.
-          #
-          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type, nil]
-          optional :type,
-                   enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type }
-          # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
-          #   Some parameter documentations has been truncated, see
-          #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection}
-          #   for more details.
-          #
-          #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
-          #   means that the model will detect the start and end of speech based on audio
-          #   volume and respond at the end of user speech.
-          #
-          #   @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in
-          #
-          #   @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults
-          #
-          #   @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A
-          #
-          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type] Type of turn detection. Only `server_vad` is currently supported for transcripti
-          # Type of turn detection. Only `server_vad` is currently supported for
-          # transcription sessions.
-          #
-          # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection#type
-          module Type
-            extend OpenAI::Internal::Type::Enum
-            SERVER_VAD = :server_vad
-            # @!method self.values
-            #   @return [Array<Symbol>]
-          end
-        end
       end
     end
   end

data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb ADDED Viewed

@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel
+        # @!attribute client_secret
+        #   Ephemeral key returned by the API. Only present when the session is created on
+        #   the server via REST API.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret]
+        required :client_secret, -> { OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret }
+        # @!attribute input_audio_format
+        #   The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+        #
+        #   @return [String, nil]
+        optional :input_audio_format, String
+        # @!attribute input_audio_transcription
+        #   Configuration of the transcription model.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription, nil]
+        optional :input_audio_transcription,
+                 -> { OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription }
+        # @!attribute modalities
+        #   The set of modalities the model can respond with. To disable audio, set this to
+        #   ["text"].
+        #
+        #   @return [Array<Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality>, nil]
+        optional :modalities,
+                 -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality] }
+        # @!attribute turn_detection
+        #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+        #   means that the model will detect the start and end of speech based on audio
+        #   volume and respond at the end of user speech.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection, nil]
+        optional :turn_detection, -> { OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection }
+        # @!method initialize(client_secret:, input_audio_format: nil, input_audio_transcription: nil, modalities: nil, turn_detection: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse} for more
+        #   details.
+        #
+        #   A new Realtime transcription session configuration.
+        #
+        #   When a session is created on the server via REST API, the session object also
+        #   contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
+        #   not present when a session is updated via the WebSocket API.
+        #
+        #   @param client_secret [OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret] Ephemeral key returned by the API. Only present when the session is
+        #
+        #   @param input_audio_format [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+        #
+        #   @param input_audio_transcription [OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription] Configuration of the transcription model.
+        #
+        #   @param modalities [Array<Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality>] The set of modalities the model can respond with. To disable audio,
+        #
+        #   @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server
+        module Modality
+          extend OpenAI::Internal::Type::Enum
+          TEXT = :text
+          AUDIO = :audio
+          # @!method self.values
+          #   @return [Array<Symbol>]
+        end
+      end
+    end
+    RealtimeTranscriptionSessionCreateResponse = Realtime::RealtimeTranscriptionSessionCreateResponse
+  end
+end

data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb ADDED Viewed

@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel
+        # @!attribute language
+        #   The language of the input audio. Supplying the input language in
+        #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+        #   format will improve accuracy and latency.
+        #
+        #   @return [String, nil]
+        optional :language, String
+        # @!attribute model
+        #   The model to use for transcription. Current options are `whisper-1`,
+        #   `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+        #
+        #   @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model, nil]
+        optional :model, enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model }
+        # @!attribute prompt
+        #   An optional text to guide the model's style or continue a previous audio
+        #   segment. For `whisper-1`, the
+        #   [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
+        #   For `gpt-4o-transcribe` models, the prompt is a free text string, for example
+        #   "expect words related to technology".
+        #
+        #   @return [String, nil]
+        optional :prompt, String
+        # @!method initialize(language: nil, model: nil, prompt: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription}
+        #   for more details.
+        #
+        #   Configuration of the transcription model.
+        #
+        #   @param language [String] The language of the input audio. Supplying the input language in
+        #
+        #   @param model [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra
+        #
+        #   @param prompt [String] An optional text to guide the model's style or continue a previous audio
+        # The model to use for transcription. Current options are `whisper-1`,
+        # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+        #
+        # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionInputAudioTranscription#model
+        module Model
+          extend OpenAI::Internal::Type::Enum
+          WHISPER_1 = :"whisper-1"
+          GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest"
+          GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
+          GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
+          # @!method self.values
+          #   @return [Array<Symbol>]
+        end
+      end
+    end
+    RealtimeTranscriptionSessionInputAudioTranscription =
+      Realtime::RealtimeTranscriptionSessionInputAudioTranscription
+  end
+end

data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb ADDED Viewed

@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel
+        # @!attribute prefix_padding_ms
+        #   Amount of audio to include before the VAD detected speech (in milliseconds).
+        #   Defaults to 300ms.
+        #
+        #   @return [Integer, nil]
+        optional :prefix_padding_ms, Integer
+        # @!attribute silence_duration_ms
+        #   Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+        #   With shorter values the model will respond more quickly, but may jump in on
+        #   short pauses from the user.
+        #
+        #   @return [Integer, nil]
+        optional :silence_duration_ms, Integer
+        # @!attribute threshold
+        #   Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+        #   threshold will require louder audio to activate the model, and thus might
+        #   perform better in noisy environments.
+        #
+        #   @return [Float, nil]
+        optional :threshold, Float
+        # @!attribute type
+        #   Type of turn detection, only `server_vad` is currently supported.
+        #
+        #   @return [String, nil]
+        optional :type, String
+        # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeTranscriptionSessionTurnDetection} for more
+        #   details.
+        #
+        #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+        #   means that the model will detect the start and end of speech based on audio
+        #   volume and respond at the end of user speech.
+        #
+        #   @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in
+        #
+        #   @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults
+        #
+        #   @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A
+        #
+        #   @param type [String] Type of turn detection, only `server_vad` is currently supported.
+      end
+    end
+    RealtimeTranscriptionSessionTurnDetection = Realtime::RealtimeTranscriptionSessionTurnDetection
+  end
+end

data/lib/openai/models/realtime/realtime_truncation.rb CHANGED Viewed

@@ -4,18 +4,19 @@ module OpenAI
   module Models
     module Realtime
       # Controls how the realtime conversation is truncated prior to model inference.
-      # The default is `auto`. When set to `retention_ratio`, the server retains a
-      # fraction of the conversation tokens prior to the instructions.
+      # The default is `auto`.
       module RealtimeTruncation
         extend OpenAI::Internal::Type::Union
-        # The truncation strategy to use for the session.
+        # The truncation strategy to use for the session. `auto` is the default truncation strategy. `disabled` will disable truncation and emit errors when the conversation exceeds the input token limit.
         variant enum: -> { OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy }
-        # Retain a fraction of the conversation tokens.
-        variant -> { OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation }
+        # Retain a fraction of the conversation tokens when the conversation exceeds the input token limit. This allows you to amortize truncations across multiple turns, which can help improve cached token usage.
+        variant -> { OpenAI::Realtime::RealtimeTruncationRetentionRatio }
-        # The truncation strategy to use for the session.
+        # The truncation strategy to use for the session. `auto` is the default truncation
+        # strategy. `disabled` will disable truncation and emit errors when the
+        # conversation exceeds the input token limit.
         module RealtimeTruncationStrategy
           extend OpenAI::Internal::Type::Enum
@@ -26,41 +27,8 @@ module OpenAI
           #   @return [Array<Symbol>]
         end
-        class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel
-          # @!attribute retention_ratio
-          #   Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
-          #
-          #   @return [Float]
-          required :retention_ratio, Float
-          # @!attribute type
-          #   Use retention ratio truncation.
-          #
-          #   @return [Symbol, :retention_ratio]
-          required :type, const: :retention_ratio
-          # @!attribute post_instructions_token_limit
-          #   Optional cap on tokens allowed after the instructions.
-          #
-          #   @return [Integer, nil]
-          optional :post_instructions_token_limit, Integer, nil?: true
-          # @!method initialize(retention_ratio:, post_instructions_token_limit: nil, type: :retention_ratio)
-          #   Some parameter documentations has been truncated, see
-          #   {OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation} for
-          #   more details.
-          #
-          #   Retain a fraction of the conversation tokens.
-          #
-          #   @param retention_ratio [Float] Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
-          #
-          #   @param post_instructions_token_limit [Integer, nil] Optional cap on tokens allowed after the instructions.
-          #
-          #   @param type [Symbol, :retention_ratio] Use retention ratio truncation.
-        end
         # @!method self.variants
-        #   @return [Array(Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncation::RetentionRatioTruncation)]
+        #   @return [Array(Symbol, OpenAI::Models::Realtime::RealtimeTruncation::RealtimeTruncationStrategy, OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio)]
       end
     end
   end

data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel
+        # @!attribute retention_ratio
+        #   Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
+        #   conversation exceeds the input token limit.
+        #
+        #   @return [Float]
+        required :retention_ratio, Float
+        # @!attribute type
+        #   Use retention ratio truncation.
+        #
+        #   @return [Symbol, :retention_ratio]
+        required :type, const: :retention_ratio
+        # @!method initialize(retention_ratio:, type: :retention_ratio)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeTruncationRetentionRatio} for more details.
+        #
+        #   Retain a fraction of the conversation tokens when the conversation exceeds the
+        #   input token limit. This allows you to amortize truncations across multiple
+        #   turns, which can help improve cached token usage.
+        #
+        #   @param retention_ratio [Float] Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
+        #
+        #   @param type [Symbol, :retention_ratio] Use retention ratio truncation.
+      end
+    end
+  end
+end

data/lib/openai/models/realtime/response_cancel_event.rb CHANGED Viewed

@@ -29,7 +29,9 @@ module OpenAI
         #
         #   Send this event to cancel an in-progress response. The server will respond with
         #   a `response.done` event with a status of `response.status=cancelled`. If there
-        #   is no response to cancel, the server will respond with an error.
+        #   is no response to cancel, the server will respond with an error. It's safe to
+        #   call `response.cancel` even if no response is in progress, an error will be
+        #   returned the session will remain unaffected.
         #
         #   @param event_id [String] Optional client-generated ID used to identify this event.
         #