RubyGems - openai - Versions diffs - 0.22.1 → 0.23.0 - Mend

openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

data/lib/openai/models/realtime/realtime_audio_config_output.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel
+        # @!attribute format_
+        #   The format of the output audio.
+        #
+        #   @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil]
+        optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format
+        # @!attribute speed
+        #   The speed of the model's spoken response as a multiple of the original speed.
+        #   1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+        #   This value can only be changed in between model turns, not while a response is
+        #   in progress.
+        #
+        #   This parameter is a post-processing adjustment to the audio after it is
+        #   generated, it's also possible to prompt the model to speak faster or slower.
+        #
+        #   @return [Float, nil]
+        optional :speed, Float
+        # @!attribute voice
+        #   The voice the model uses to respond. Voice cannot be changed during the session
+        #   once the model has responded with audio at least once. Current voice options are
+        #   `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
+        #   and `cedar`. We recommend `marin` and `cedar` for best quality.
+        #
+        #   @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice, nil]
+        optional :voice, union: -> { OpenAI::Realtime::RealtimeAudioConfigOutput::Voice }
+        # @!method initialize(format_: nil, speed: nil, voice: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeAudioConfigOutput} for more details.
+        #
+        #   @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio.
+        #
+        #   @param speed [Float] The speed of the model's spoken response as a multiple of the original speed.
+        #
+        #   @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice] The voice the model uses to respond. Voice cannot be changed during the
+        # The voice the model uses to respond. Voice cannot be changed during the session
+        # once the model has responded with audio at least once. Current voice options are
+        # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
+        # and `cedar`. We recommend `marin` and `cedar` for best quality.
+        #
+        # @see OpenAI::Models::Realtime::RealtimeAudioConfigOutput#voice
+        module Voice
+          extend OpenAI::Internal::Type::Union
+          variant String
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ALLOY }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ASH }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::BALLAD }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CORAL }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ECHO }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SAGE }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SHIMMER }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::VERSE }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::MARIN }
+          variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CEDAR }
+          # @!method self.variants
+          #   @return [Array(String, Symbol)]
+          define_sorbet_constant!(:Variants) do
+            T.type_alias { T.any(String, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol) }
+          end
+          # @!group
+          ALLOY = :alloy
+          ASH = :ash
+          BALLAD = :ballad
+          CORAL = :coral
+          ECHO = :echo
+          SAGE = :sage
+          SHIMMER = :shimmer
+          VERSE = :verse
+          MARIN = :marin
+          CEDAR = :cedar
+          # @!endgroup
+        end
+      end
+    end
+  end
+end

data/lib/openai/models/realtime/realtime_audio_formats.rb ADDED Viewed

@@ -0,0 +1,121 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      # The PCM audio format. Only a 24kHz sample rate is supported.
+      module RealtimeAudioFormats
+        extend OpenAI::Internal::Type::Union
+        discriminator :type
+        # The PCM audio format. Only a 24kHz sample rate is supported.
+        variant :"audio/pcm", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM }
+        # The G.711 μ-law format.
+        variant :"audio/pcmu", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU }
+        # The G.711 A-law format.
+        variant :"audio/pcma", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA }
+        class AudioPCM < OpenAI::Internal::Type::BaseModel
+          # @!attribute rate
+          #   The sample rate of the audio. Always `24000`.
+          #
+          #   @return [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate, nil]
+          optional :rate, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate }
+          # @!attribute type
+          #   The audio format. Always `audio/pcm`.
+          #
+          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type, nil]
+          optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type }
+          # @!method initialize(rate: nil, type: nil)
+          #   The PCM audio format. Only a 24kHz sample rate is supported.
+          #
+          #   @param rate [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate] The sample rate of the audio. Always `24000`.
+          #
+          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type] The audio format. Always `audio/pcm`.
+          # The sample rate of the audio. Always `24000`.
+          #
+          # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#rate
+          module Rate
+            extend OpenAI::Internal::Type::Enum
+            RATE_24000 = 24_000
+            # @!method self.values
+            #   @return [Array<Integer>]
+          end
+          # The audio format. Always `audio/pcm`.
+          #
+          # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#type
+          module Type
+            extend OpenAI::Internal::Type::Enum
+            AUDIO_PCM = :"audio/pcm"
+            # @!method self.values
+            #   @return [Array<Symbol>]
+          end
+        end
+        class AudioPCMU < OpenAI::Internal::Type::BaseModel
+          # @!attribute type
+          #   The audio format. Always `audio/pcmu`.
+          #
+          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type, nil]
+          optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type }
+          # @!method initialize(type: nil)
+          #   The G.711 μ-law format.
+          #
+          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type] The audio format. Always `audio/pcmu`.
+          # The audio format. Always `audio/pcmu`.
+          #
+          # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU#type
+          module Type
+            extend OpenAI::Internal::Type::Enum
+            AUDIO_PCMU = :"audio/pcmu"
+            # @!method self.values
+            #   @return [Array<Symbol>]
+          end
+        end
+        class AudioPCMA < OpenAI::Internal::Type::BaseModel
+          # @!attribute type
+          #   The audio format. Always `audio/pcma`.
+          #
+          #   @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type, nil]
+          optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type }
+          # @!method initialize(type: nil)
+          #   The G.711 A-law format.
+          #
+          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type] The audio format. Always `audio/pcma`.
+          # The audio format. Always `audio/pcma`.
+          #
+          # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA#type
+          module Type
+            extend OpenAI::Internal::Type::Enum
+            AUDIO_PCMA = :"audio/pcma"
+            # @!method self.values
+            #   @return [Array<Symbol>]
+          end
+        end
+        # @!method self.variants
+        #   @return [Array(OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA)]
+      end
+    end
+  end
+end

data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb ADDED Viewed

@@ -0,0 +1,131 @@
+# frozen_string_literal: true
+module OpenAI
+  module Models
+    module Realtime
+      class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
+        # @!attribute create_response
+        #   Whether or not to automatically generate a response when a VAD stop event
+        #   occurs.
+        #
+        #   @return [Boolean, nil]
+        optional :create_response, OpenAI::Internal::Type::Boolean
+        # @!attribute eagerness
+        #   Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+        #   will wait longer for the user to continue speaking, `high` will respond more
+        #   quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
+        #   and `high` have max timeouts of 8s, 4s, and 2s respectively.
+        #
+        #   @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness, nil]
+        optional :eagerness, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness }
+        # @!attribute idle_timeout_ms
+        #   Optional idle timeout after which turn detection will auto-timeout when no
+        #   additional audio is received.
+        #
+        #   @return [Integer, nil]
+        optional :idle_timeout_ms, Integer, nil?: true
+        # @!attribute interrupt_response
+        #   Whether or not to automatically interrupt any ongoing response with output to
+        #   the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+        #   occurs.
+        #
+        #   @return [Boolean, nil]
+        optional :interrupt_response, OpenAI::Internal::Type::Boolean
+        # @!attribute prefix_padding_ms
+        #   Used only for `server_vad` mode. Amount of audio to include before the VAD
+        #   detected speech (in milliseconds). Defaults to 300ms.
+        #
+        #   @return [Integer, nil]
+        optional :prefix_padding_ms, Integer
+        # @!attribute silence_duration_ms
+        #   Used only for `server_vad` mode. Duration of silence to detect speech stop (in
+        #   milliseconds). Defaults to 500ms. With shorter values the model will respond
+        #   more quickly, but may jump in on short pauses from the user.
+        #
+        #   @return [Integer, nil]
+        optional :silence_duration_ms, Integer
+        # @!attribute threshold
+        #   Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
+        #   defaults to 0.5. A higher threshold will require louder audio to activate the
+        #   model, and thus might perform better in noisy environments.
+        #
+        #   @return [Float, nil]
+        optional :threshold, Float
+        # @!attribute type
+        #   Type of turn detection.
+        #
+        #   @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type, nil]
+        optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type }
+        # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection} for more details.
+        #
+        #   Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
+        #   set to `null` to turn off, in which case the client must manually trigger model
+        #   response. Server VAD means that the model will detect the start and end of
+        #   speech based on audio volume and respond at the end of user speech. Semantic VAD
+        #   is more advanced and uses a turn detection model (in conjunction with VAD) to
+        #   semantically estimate whether the user has finished speaking, then dynamically
+        #   sets a timeout based on this probability. For example, if user audio trails off
+        #   with "uhhm", the model will score a low probability of turn end and wait longer
+        #   for the user to continue speaking. This can be useful for more natural
+        #   conversations, but may have a higher latency.
+        #
+        #   @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
+        #
+        #   @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+        #
+        #   @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when
+        #
+        #   @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
+        #
+        #   @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec
+        #
+        #   @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m
+        #
+        #   @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
+        #
+        #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type] Type of turn detection.
+        # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
+        # will wait longer for the user to continue speaking, `high` will respond more
+        # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
+        # and `high` have max timeouts of 8s, 4s, and 2s respectively.
+        #
+        # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#eagerness
+        module Eagerness
+          extend OpenAI::Internal::Type::Enum
+          LOW = :low
+          MEDIUM = :medium
+          HIGH = :high
+          AUTO = :auto
+          # @!method self.values
+          #   @return [Array<Symbol>]
+        end
+        # Type of turn detection.
+        #
+        # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#type
+        module Type
+          extend OpenAI::Internal::Type::Enum
+          SERVER_VAD = :server_vad
+          SEMANTIC_VAD = :semantic_vad
+          # @!method self.values
+          #   @return [Array<Symbol>]
+        end
+      end
+    end
+  end
+end

data/lib/openai/models/realtime/realtime_client_event.rb CHANGED Viewed

@@ -44,14 +44,17 @@ module OpenAI
         variant :"conversation.item.truncate", -> { OpenAI::Realtime::ConversationItemTruncateEvent }
         # Send this event to append audio bytes to the input audio buffer. The audio
-        # buffer is temporary storage you can write to and later commit. In Server VAD
-        # mode, the audio buffer is used to detect speech and the server will decide
+        # buffer is temporary storage you can write to and later commit. A "commit" will create a new
+        # user message item in the conversation history from the buffer content and clear the buffer.
+        # Input audio transcription (if enabled) will be generated when the buffer is committed.
+        #
+        # If VAD is enabled the audio buffer is used to detect speech and the server will decide
         # when to commit. When Server VAD is disabled, you must commit the audio buffer
-        # manually.
+        # manually. Input audio noise reduction operates on writes to the audio buffer.
         #
         # The client may choose how much audio to place in each event up to a maximum
         # of 15 MiB, for example streaming smaller chunks from the client may allow the
-        # VAD to be more responsive. Unlike made other client events, the server will
+        # VAD to be more responsive. Unlike most other client events, the server will
         # not send a confirmation response to this event.
         variant :"input_audio_buffer.append", -> { OpenAI::Realtime::InputAudioBufferAppendEvent }
@@ -66,21 +69,16 @@ module OpenAI
         # [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
         variant :"output_audio_buffer.clear", -> { OpenAI::Realtime::OutputAudioBufferClearEvent }
-        # Send this event to commit the user input audio buffer, which will create a
-        # new user message item in the conversation. This event will produce an error
-        # if the input audio buffer is empty. When in Server VAD mode, the client does
-        # not need to send this event, the server will commit the audio buffer
-        # automatically.
+        # Send this event to commit the user input audio buffer, which will create a  new user message item in the conversation. This event will produce an error  if the input audio buffer is empty. When in Server VAD mode, the client does  not need to send this event, the server will commit the audio buffer  automatically.
         #
-        # Committing the input audio buffer will trigger input audio transcription
-        # (if enabled in session configuration), but it will not create a response
-        # from the model. The server will respond with an `input_audio_buffer.committed`
-        # event.
+        # Committing the input audio buffer will trigger input audio transcription  (if enabled in session configuration), but it will not create a response  from the model. The server will respond with an `input_audio_buffer.committed` event.
         variant :"input_audio_buffer.commit", -> { OpenAI::Realtime::InputAudioBufferCommitEvent }
         # Send this event to cancel an in-progress response. The server will respond
         # with a `response.done` event with a status of `response.status=cancelled`. If
-        # there is no response to cancel, the server will respond with an error.
+        # there is no response to cancel, the server will respond with an error. It's safe
+        # to call `response.cancel` even if no response is in progress, an error will be
+        # returned the session will remain unaffected.
         variant :"response.cancel", -> { OpenAI::Realtime::ResponseCancelEvent }
         # This event instructs the server to create a Response, which means triggering
@@ -89,27 +87,37 @@ module OpenAI
         #
         # A Response will include at least one Item, and may have two, in which case
         # the second will be a function call. These Items will be appended to the
-        # conversation history.
+        # conversation history by default.
         #
         # The server will respond with a `response.created` event, events for Items
         # and content created, and finally a `response.done` event to indicate the
         # Response is complete.
         #
         # The `response.create` event includes inference configuration like
-        # `instructions`, and `temperature`. These fields will override the Session's
+        # `instructions` and `tools`. If these are set, they will override the Session's
         # configuration for this Response only.
+        #
+        # Responses can be created out-of-band of the default Conversation, meaning that they can
+        # have arbitrary input, and it's possible to disable writing the output to the Conversation.
+        # Only one Response can write to the default Conversation at a time, but otherwise multiple
+        # Responses can be created in parallel. The `metadata` field is a good way to disambiguate
+        # multiple simultaneous Responses.
+        #
+        # Clients can set `conversation` to `none` to create a Response that does not write to the default
+        # Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
+        # raw Items and references to existing Items.
         variant :"response.create", -> { OpenAI::Realtime::ResponseCreateEvent }
-        # Send this event to update the session’s default configuration.
-        # The client may send this event at any time to update any field,
-        # except for `voice`. However, note that once a session has been
-        # initialized with a particular `model`, it can’t be changed to
-        # another model using `session.update`.
+        # Send this event to update the session’s configuration.
+        # The client may send this event at any time to update any field
+        # except for `voice` and `model`. `voice` can be updated only if there have been no other
+        # audio outputs yet.
         #
         # When the server receives a `session.update`, it will respond
         # with a `session.updated` event showing the full, effective configuration.
-        # Only the fields that are present are updated. To clear a field like
-        # `instructions`, pass an empty string.
+        # Only the fields that are present in the `session.update` are updated. To clear a field like
+        # `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
+        # To clear a field like `turn_detection`, pass `null`.
         variant :"session.update", -> { OpenAI::Realtime::SessionUpdateEvent }
         # Send this event to update a transcription session.

data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb CHANGED Viewed

@@ -24,13 +24,15 @@ module OpenAI
         required :type, const: :message
         # @!attribute id
-        #   The unique ID of the item.
+        #   The unique ID of the item. This may be provided by the client or generated by
+        #   the server.
         #
         #   @return [String, nil]
         optional :id, String
         # @!attribute object
-        #   Identifier for the API object being returned - always `realtime.item`.
+        #   Identifier for the API object being returned - always `realtime.item`. Optional
+        #   when creating a new item.
         #
         #   @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object, nil]
         optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Object }
@@ -42,13 +44,17 @@ module OpenAI
         optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Status }
         # @!method initialize(content:, id: nil, object: nil, status: nil, role: :assistant, type: :message)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage} for more
+        #   details.
+        #
         #   An assistant message item in a Realtime conversation.
         #
         #   @param content [Array<OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content>] The content of the message.
         #
-        #   @param id [String] The unique ID of the item.
+        #   @param id [String] The unique ID of the item. This may be provided by the client or generated by th
         #
-        #   @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`.
+        #   @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`. Optional
         #
         #   @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Status] The status of the item. Has no effect on the conversation.
         #
@@ -57,37 +63,64 @@ module OpenAI
         #   @param type [Symbol, :message] The type of the item. Always `message`.
         class Content < OpenAI::Internal::Type::BaseModel
+          # @!attribute audio
+          #   Base64-encoded audio bytes, these will be parsed as the format specified in the
+          #   session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
+          #   if not specified.
+          #
+          #   @return [String, nil]
+          optional :audio, String
           # @!attribute text
           #   The text content.
           #
           #   @return [String, nil]
           optional :text, String
+          # @!attribute transcript
+          #   The transcript of the audio content, this will always be present if the output
+          #   type is `audio`.
+          #
+          #   @return [String, nil]
+          optional :transcript, String
           # @!attribute type
-          #   The content type. Always `text` for assistant messages.
+          #   The content type, `output_text` or `output_audio` depending on the session
+          #   `output_modalities` configuration.
           #
           #   @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type, nil]
           optional :type, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type }
-          # @!method initialize(text: nil, type: nil)
+          # @!method initialize(audio: nil, text: nil, transcript: nil, type: nil)
+          #   Some parameter documentations has been truncated, see
+          #   {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content}
+          #   for more details.
+          #
+          #   @param audio [String] Base64-encoded audio bytes, these will be parsed as the format specified in the
+          #
           #   @param text [String] The text content.
           #
-          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type. Always `text` for assistant messages.
+          #   @param transcript [String] The transcript of the audio content, this will always be present if the output t
+          #
+          #   @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type, `output_text` or `output_audio` depending on the session `outp
-          # The content type. Always `text` for assistant messages.
+          # The content type, `output_text` or `output_audio` depending on the session
+          # `output_modalities` configuration.
           #
           # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content#type
           module Type
             extend OpenAI::Internal::Type::Enum
-            TEXT = :text
+            OUTPUT_TEXT = :output_text
+            OUTPUT_AUDIO = :output_audio
             # @!method self.values
             #   @return [Array<Symbol>]
           end
         end
-        # Identifier for the API object being returned - always `realtime.item`.
+        # Identifier for the API object being returned - always `realtime.item`. Optional
+        # when creating a new item.
         #
         # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage#object
         module Object

data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb CHANGED Viewed

@@ -5,7 +5,9 @@ module OpenAI
     module Realtime
       class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel
         # @!attribute arguments
-        #   The arguments of the function call.
+        #   The arguments of the function call. This is a JSON-encoded string representing
+        #   the arguments passed to the function, for example
+        #   `{"arg1": "value1", "arg2": 42}`.
         #
         #   @return [String]
         required :arguments, String
@@ -23,7 +25,8 @@ module OpenAI
         required :type, const: :function_call
         # @!attribute id
-        #   The unique ID of the item.
+        #   The unique ID of the item. This may be provided by the client or generated by
+        #   the server.
         #
         #   @return [String, nil]
         optional :id, String
@@ -35,7 +38,8 @@ module OpenAI
         optional :call_id, String
         # @!attribute object
-        #   Identifier for the API object being returned - always `realtime.item`.
+        #   Identifier for the API object being returned - always `realtime.item`. Optional
+        #   when creating a new item.
         #
         #   @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object, nil]
         optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Object }
@@ -47,23 +51,28 @@ module OpenAI
         optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Status }
         # @!method initialize(arguments:, name:, id: nil, call_id: nil, object: nil, status: nil, type: :function_call)
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall} for more
+        #   details.
+        #
         #   A function call item in a Realtime conversation.
         #
-        #   @param arguments [String] The arguments of the function call.
+        #   @param arguments [String] The arguments of the function call. This is a JSON-encoded string representing t
         #
         #   @param name [String] The name of the function being called.
         #
-        #   @param id [String] The unique ID of the item.
+        #   @param id [String] The unique ID of the item. This may be provided by the client or generated by th
         #
         #   @param call_id [String] The ID of the function call.
         #
-        #   @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`.
+        #   @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`. Optional
         #
         #   @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Status] The status of the item. Has no effect on the conversation.
         #
         #   @param type [Symbol, :function_call] The type of the item. Always `function_call`.
-        # Identifier for the API object being returned - always `realtime.item`.
+        # Identifier for the API object being returned - always `realtime.item`. Optional
+        # when creating a new item.
         #
         # @see OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall#object
         module Object