RubyGems - openai - Versions diffs - 0.22.1 → 0.23.0 - Mend

openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

data/lib/openai/models/realtime/transcription_session_created.rb CHANGED Viewed

@@ -11,10 +11,14 @@ module OpenAI
         required :event_id, String
         # @!attribute session
-        #   A Realtime transcription session configuration object.
+        #   A new Realtime transcription session configuration.
         #
-        #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session]
-        required :session, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session }
+        #   When a session is created on the server via REST API, the session object also
+        #   contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
+        #   not present when a session is updated via the WebSocket API.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse]
+        required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse }
         # @!attribute type
         #   The event type, must be `transcription_session.created`.
@@ -30,248 +34,9 @@ module OpenAI
         #
         #   @param event_id [String] The unique ID of the server event.
         #
-        #   @param session [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session] A Realtime transcription session configuration object.
+        #   @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] A new Realtime transcription session configuration.
         #
         #   @param type [Symbol, :"transcription_session.created"] The event type, must be `transcription_session.created`.
-        # @see OpenAI::Models::Realtime::TranscriptionSessionCreated#session
-        class Session < OpenAI::Internal::Type::BaseModel
-          # @!attribute id
-          #   Unique identifier for the session that looks like `sess_1234567890abcdef`.
-          #
-          #   @return [String, nil]
-          optional :id, String
-          # @!attribute audio
-          #   Configuration for input audio for the session.
-          #
-          #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio, nil]
-          optional :audio, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio }
-          # @!attribute expires_at
-          #   Expiration timestamp for the session, in seconds since epoch.
-          #
-          #   @return [Integer, nil]
-          optional :expires_at, Integer
-          # @!attribute include
-          #   Additional fields to include in server outputs.
-          #
-          #   - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
-          #     transcription.
-          #
-          #   @return [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Include>, nil]
-          optional :include,
-                   -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionCreated::Session::Include] }
-          # @!attribute object
-          #   The object type. Always `realtime.transcription_session`.
-          #
-          #   @return [String, nil]
-          optional :object, String
-          # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil)
-          #   Some parameter documentations has been truncated, see
-          #   {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session} for more
-          #   details.
-          #
-          #   A Realtime transcription session configuration object.
-          #
-          #   @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`.
-          #
-          #   @param audio [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio] Configuration for input audio for the session.
-          #
-          #   @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch.
-          #
-          #   @param include [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Include>] Additional fields to include in server outputs.
-          #
-          #   @param object [String] The object type. Always `realtime.transcription_session`.
-          # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session#audio
-          class Audio < OpenAI::Internal::Type::BaseModel
-            # @!attribute input
-            #
-            #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input, nil]
-            optional :input, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input }
-            # @!method initialize(input: nil)
-            #   Configuration for input audio for the session.
-            #
-            #   @param input [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input]
-            # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio#input
-            class Input < OpenAI::Internal::Type::BaseModel
-              # @!attribute format_
-              #   The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
-              #
-              #   @return [String, nil]
-              optional :format_, String, api_name: :format
-              # @!attribute noise_reduction
-              #   Configuration for input audio noise reduction.
-              #
-              #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, nil]
-              optional :noise_reduction,
-                       -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction }
-              # @!attribute transcription
-              #   Configuration of the transcription model.
-              #
-              #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, nil]
-              optional :transcription,
-                       -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription }
-              # @!attribute turn_detection
-              #   Configuration for turn detection.
-              #
-              #   @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection, nil]
-              optional :turn_detection,
-                       -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection }
-              # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
-              #   Some parameter documentations has been truncated, see
-              #   {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input}
-              #   for more details.
-              #
-              #   @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
-              #
-              #   @param noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction] Configuration for input audio noise reduction.
-              #
-              #   @param transcription [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription] Configuration of the transcription model.
-              #
-              #   @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection] Configuration for turn detection.
-              # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#noise_reduction
-              class NoiseReduction < OpenAI::Internal::Type::BaseModel
-                # @!attribute type
-                #
-                #   @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type, nil]
-                optional :type,
-                         enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type }
-                # @!method initialize(type: nil)
-                #   Configuration for input audio noise reduction.
-                #
-                #   @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type]
-                # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction#type
-                module Type
-                  extend OpenAI::Internal::Type::Enum
-                  NEAR_FIELD = :near_field
-                  FAR_FIELD = :far_field
-                  # @!method self.values
-                  #   @return [Array<Symbol>]
-                end
-              end
-              # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#transcription
-              class Transcription < OpenAI::Internal::Type::BaseModel
-                # @!attribute language
-                #   The language of the input audio. Supplying the input language in
-                #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
-                #   format will improve accuracy and latency.
-                #
-                #   @return [String, nil]
-                optional :language, String
-                # @!attribute model
-                #   The model to use for transcription. Can be `gpt-4o-transcribe`,
-                #   `gpt-4o-mini-transcribe`, or `whisper-1`.
-                #
-                #   @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model, nil]
-                optional :model,
-                         enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model }
-                # @!attribute prompt
-                #   An optional text to guide the model's style or continue a previous audio
-                #   segment. The
-                #   [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
-                #   should match the audio language.
-                #
-                #   @return [String, nil]
-                optional :prompt, String
-                # @!method initialize(language: nil, model: nil, prompt: nil)
-                #   Some parameter documentations has been truncated, see
-                #   {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription}
-                #   for more details.
-                #
-                #   Configuration of the transcription model.
-                #
-                #   @param language [String] The language of the input audio. Supplying the input language in
-                #
-                #   @param model [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra
-                #
-                #   @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
-                # The model to use for transcription. Can be `gpt-4o-transcribe`,
-                # `gpt-4o-mini-transcribe`, or `whisper-1`.
-                #
-                # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription#model
-                module Model
-                  extend OpenAI::Internal::Type::Enum
-                  GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
-                  GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
-                  WHISPER_1 = :"whisper-1"
-                  # @!method self.values
-                  #   @return [Array<Symbol>]
-                end
-              end
-              # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#turn_detection
-              class TurnDetection < OpenAI::Internal::Type::BaseModel
-                # @!attribute prefix_padding_ms
-                #
-                #   @return [Integer, nil]
-                optional :prefix_padding_ms, Integer
-                # @!attribute silence_duration_ms
-                #
-                #   @return [Integer, nil]
-                optional :silence_duration_ms, Integer
-                # @!attribute threshold
-                #
-                #   @return [Float, nil]
-                optional :threshold, Float
-                # @!attribute type
-                #   Type of turn detection, only `server_vad` is currently supported.
-                #
-                #   @return [String, nil]
-                optional :type, String
-                # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
-                #   Some parameter documentations has been truncated, see
-                #   {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection}
-                #   for more details.
-                #
-                #   Configuration for turn detection.
-                #
-                #   @param prefix_padding_ms [Integer]
-                #
-                #   @param silence_duration_ms [Integer]
-                #
-                #   @param threshold [Float]
-                #
-                #   @param type [String] Type of turn detection, only `server_vad` is currently supported.
-              end
-            end
-          end
-          module Include
-            extend OpenAI::Internal::Type::Enum
-            ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
-            # @!method self.values
-            #   @return [Array<Symbol>]
-          end
-        end
       end
     end
   end

data/lib/openai/models/realtime/transcription_session_update.rb CHANGED Viewed

@@ -7,8 +7,8 @@ module OpenAI
         # @!attribute session
         #   Realtime transcription session object configuration.
         #
-        #   @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest]
-        required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest }
+        #   @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session]
+        required :session, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session }
         # @!attribute type
         #   The event type, must be `transcription_session.update`.
@@ -25,11 +25,187 @@ module OpenAI
         # @!method initialize(session:, event_id: nil, type: :"transcription_session.update")
         #   Send this event to update a transcription session.
         #
-        #   @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Realtime transcription session object configuration.
+        #   @param session [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session] Realtime transcription session object configuration.
         #
         #   @param event_id [String] Optional client-generated ID used to identify this event.
         #
         #   @param type [Symbol, :"transcription_session.update"] The event type, must be `transcription_session.update`.
+        # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate#session
+        class Session < OpenAI::Internal::Type::BaseModel
+          # @!attribute include
+          #   The set of items to include in the transcription. Current available items are:
+          #   `item.input_audio_transcription.logprobs`
+          #
+          #   @return [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::Include>, nil]
+          optional :include,
+                   -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include] }
+          # @!attribute input_audio_format
+          #   The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+          #   `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+          #   (mono), and little-endian byte order.
+          #
+          #   @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat, nil]
+          optional :input_audio_format,
+                   enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat }
+          # @!attribute input_audio_noise_reduction
+          #   Configuration for input audio noise reduction. This can be set to `null` to turn
+          #   off. Noise reduction filters audio added to the input audio buffer before it is
+          #   sent to VAD and the model. Filtering the audio can improve VAD and turn
+          #   detection accuracy (reducing false positives) and model performance by improving
+          #   perception of the input audio.
+          #
+          #   @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, nil]
+          optional :input_audio_noise_reduction,
+                   -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction }
+          # @!attribute input_audio_transcription
+          #   Configuration for input audio transcription. The client can optionally set the
+          #   language and prompt for transcription, these offer additional guidance to the
+          #   transcription service.
+          #
+          #   @return [OpenAI::Models::Realtime::AudioTranscription, nil]
+          optional :input_audio_transcription, -> { OpenAI::Realtime::AudioTranscription }
+          # @!attribute turn_detection
+          #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+          #   means that the model will detect the start and end of speech based on audio
+          #   volume and respond at the end of user speech.
+          #
+          #   @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection, nil]
+          optional :turn_detection, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection }
+          # @!method initialize(include: nil, input_audio_format: nil, input_audio_noise_reduction: nil, input_audio_transcription: nil, turn_detection: nil)
+          #   Some parameter documentations has been truncated, see
+          #   {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session} for more
+          #   details.
+          #
+          #   Realtime transcription session object configuration.
+          #
+          #   @param include [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::Include>] The set of items to include in the transcription. Current available items are:
+          #
+          #   @param input_audio_format [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+          #
+          #   @param input_audio_noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn
+          #
+          #   @param input_audio_transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription. The client can optionally set the l
+          #
+          #   @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server VAD m
+          module Include
+            extend OpenAI::Internal::Type::Enum
+            ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
+            # @!method self.values
+            #   @return [Array<Symbol>]
+          end
+          # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+          # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+          # (mono), and little-endian byte order.
+          #
+          # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_format
+          module InputAudioFormat
+            extend OpenAI::Internal::Type::Enum
+            PCM16 = :pcm16
+            G711_ULAW = :g711_ulaw
+            G711_ALAW = :g711_alaw
+            # @!method self.values
+            #   @return [Array<Symbol>]
+          end
+          # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_noise_reduction
+          class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
+            # @!attribute type
+            #   Type of noise reduction. `near_field` is for close-talking microphones such as
+            #   headphones, `far_field` is for far-field microphones such as laptop or
+            #   conference room microphones.
+            #
+            #   @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil]
+            optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType }
+            # @!method initialize(type: nil)
+            #   Some parameter documentations has been truncated, see
+            #   {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction}
+            #   for more details.
+            #
+            #   Configuration for input audio noise reduction. This can be set to `null` to turn
+            #   off. Noise reduction filters audio added to the input audio buffer before it is
+            #   sent to VAD and the model. Filtering the audio can improve VAD and turn
+            #   detection accuracy (reducing false positives) and model performance by improving
+            #   perception of the input audio.
+            #
+            #   @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h
+          end
+          # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#turn_detection
+          class TurnDetection < OpenAI::Internal::Type::BaseModel
+            # @!attribute prefix_padding_ms
+            #   Amount of audio to include before the VAD detected speech (in milliseconds).
+            #   Defaults to 300ms.
+            #
+            #   @return [Integer, nil]
+            optional :prefix_padding_ms, Integer
+            # @!attribute silence_duration_ms
+            #   Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
+            #   With shorter values the model will respond more quickly, but may jump in on
+            #   short pauses from the user.
+            #
+            #   @return [Integer, nil]
+            optional :silence_duration_ms, Integer
+            # @!attribute threshold
+            #   Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
+            #   threshold will require louder audio to activate the model, and thus might
+            #   perform better in noisy environments.
+            #
+            #   @return [Float, nil]
+            optional :threshold, Float
+            # @!attribute type
+            #   Type of turn detection. Only `server_vad` is currently supported for
+            #   transcription sessions.
+            #
+            #   @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type, nil]
+            optional :type, enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type }
+            # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
+            #   Some parameter documentations has been truncated, see
+            #   {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection}
+            #   for more details.
+            #
+            #   Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+            #   means that the model will detect the start and end of speech based on audio
+            #   volume and respond at the end of user speech.
+            #
+            #   @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in
+            #
+            #   @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults
+            #
+            #   @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A
+            #
+            #   @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type] Type of turn detection. Only `server_vad` is currently supported for transcripti
+            # Type of turn detection. Only `server_vad` is currently supported for
+            # transcription sessions.
+            #
+            # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection#type
+            module Type
+              extend OpenAI::Internal::Type::Enum
+              SERVER_VAD = :server_vad
+              # @!method self.values
+              #   @return [Array<Symbol>]
+            end
+          end
+        end
       end
     end
   end