RubyGems - openai - Versions diffs - 0.30.0 → 0.32.0 - Mend

openai 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

data/lib/openai/models/comparison_filter.rb CHANGED Viewed

@@ -10,7 +10,8 @@ module OpenAI
       required :key, String
       # @!attribute type
-      #   Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+      #   Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
+      #   `nin`.
       #
       #   - `eq`: equals
       #   - `ne`: not equal
@@ -18,6 +19,8 @@ module OpenAI
       #   - `gte`: greater than or equal
       #   - `lt`: less than
       #   - `lte`: less than or equal
+      #   - `in`: in
+      #   - `nin`: not in
       #
       #   @return [Symbol, OpenAI::Models::ComparisonFilter::Type]
       required :type, enum: -> { OpenAI::ComparisonFilter::Type }
@@ -26,7 +29,7 @@ module OpenAI
       #   The value to compare against the attribute key; supports string, number, or
       #   boolean types.
       #
-      #   @return [String, Float, Boolean]
+      #   @return [String, Float, Boolean, Array<String, Float>]
       required :value, union: -> { OpenAI::ComparisonFilter::Value }
       # @!method initialize(key:, type:, value:)
@@ -38,11 +41,12 @@ module OpenAI
       #
       #   @param key [String] The key to compare against the value.
       #
-      #   @param type [Symbol, OpenAI::Models::ComparisonFilter::Type] Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+      #   @param type [Symbol, OpenAI::Models::ComparisonFilter::Type] Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `
       #
-      #   @param value [String, Float, Boolean] The value to compare against the attribute key; supports string, number, or bool
+      #   @param value [String, Float, Boolean, Array<String, Float>] The value to compare against the attribute key; supports string, number, or bool
-      # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+      # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
+      # `nin`.
       #
       # - `eq`: equals
       # - `ne`: not equal
@@ -50,6 +54,8 @@ module OpenAI
       # - `gte`: greater than or equal
       # - `lt`: less than
       # - `lte`: less than or equal
+      # - `in`: in
+      # - `nin`: not in
       #
       # @see OpenAI::Models::ComparisonFilter#type
       module Type
@@ -79,8 +85,25 @@ module OpenAI
         variant OpenAI::Internal::Type::Boolean
+        variant -> { OpenAI::Models::ComparisonFilter::Value::UnionMember3Array }
+        module UnionMember3
+          extend OpenAI::Internal::Type::Union
+          variant String
+          variant Float
+          # @!method self.variants
+          #   @return [Array(String, Float)]
+        end
         # @!method self.variants
-        #   @return [Array(String, Float, Boolean)]
+        #   @return [Array(String, Float, Boolean, Array<String, Float>)]
+        # @type [OpenAI::Internal::Type::Converter]
+        UnionMember3Array =
+          OpenAI::Internal::Type::ArrayOf[union: -> { OpenAI::ComparisonFilter::Value::UnionMember3 }]
       end
     end
   end

data/lib/openai/models/evals/create_eval_completions_run_data_source.rb CHANGED Viewed

@@ -466,6 +466,9 @@ module OpenAI
           #   effort can result in faster responses and fewer tokens used on reasoning in a
           #   response.
           #
+          #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+          #   effort.
+          #
           #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
           optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/evals/run_cancel_response.rb CHANGED Viewed

@@ -320,6 +320,9 @@ module OpenAI
                 #   effort can result in faster responses and fewer tokens used on reasoning in a
                 #   response.
                 #
+                #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+                #   effort.
+                #
                 #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
                 optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -661,6 +664,9 @@ module OpenAI
               #   effort can result in faster responses and fewer tokens used on reasoning in a
               #   response.
               #
+              #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+              #   effort.
+              #
               #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
               optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/evals/run_create_params.rb CHANGED Viewed

@@ -232,6 +232,9 @@ module OpenAI
                 #   effort can result in faster responses and fewer tokens used on reasoning in a
                 #   response.
                 #
+                #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+                #   effort.
+                #
                 #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
                 optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -589,6 +592,9 @@ module OpenAI
               #   effort can result in faster responses and fewer tokens used on reasoning in a
               #   response.
               #
+              #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+              #   effort.
+              #
               #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
               optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/evals/run_create_response.rb CHANGED Viewed

@@ -320,6 +320,9 @@ module OpenAI
                 #   effort can result in faster responses and fewer tokens used on reasoning in a
                 #   response.
                 #
+                #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+                #   effort.
+                #
                 #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
                 optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -661,6 +664,9 @@ module OpenAI
               #   effort can result in faster responses and fewer tokens used on reasoning in a
               #   response.
               #
+              #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+              #   effort.
+              #
               #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
               optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/evals/run_list_response.rb CHANGED Viewed

@@ -320,6 +320,9 @@ module OpenAI
                 #   effort can result in faster responses and fewer tokens used on reasoning in a
                 #   response.
                 #
+                #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+                #   effort.
+                #
                 #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
                 optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -661,6 +664,9 @@ module OpenAI
               #   effort can result in faster responses and fewer tokens used on reasoning in a
               #   response.
               #
+              #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+              #   effort.
+              #
               #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
               optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/evals/run_retrieve_response.rb CHANGED Viewed

@@ -320,6 +320,9 @@ module OpenAI
                 #   effort can result in faster responses and fewer tokens used on reasoning in a
                 #   response.
                 #
+                #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+                #   effort.
+                #
                 #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
                 optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -665,6 +668,9 @@ module OpenAI
               #   effort can result in faster responses and fewer tokens used on reasoning in a
               #   response.
               #
+              #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+              #   effort.
+              #
               #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
               optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/graders/score_model_grader.rb CHANGED Viewed

@@ -226,6 +226,9 @@ module OpenAI
           #   effort can result in faster responses and fewer tokens used on reasoning in a
           #   response.
           #
+          #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+          #   effort.
+          #
           #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
           optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/realtime/audio_transcription.rb CHANGED Viewed

@@ -14,7 +14,8 @@ module OpenAI
         # @!attribute model
         #   The model to use for transcription. Current options are `whisper-1`,
-        #   `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+        #   `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
+        #   Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
         #
         #   @return [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model, nil]
         optional :model, enum: -> { OpenAI::Realtime::AudioTranscription::Model }
@@ -23,8 +24,8 @@ module OpenAI
         #   An optional text to guide the model's style or continue a previous audio
         #   segment. For `whisper-1`, the
         #   [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
-        #   For `gpt-4o-transcribe` models, the prompt is a free text string, for example
-        #   "expect words related to technology".
+        #   For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
+        #   prompt is a free text string, for example "expect words related to technology".
         #
         #   @return [String, nil]
         optional :prompt, String
@@ -35,21 +36,22 @@ module OpenAI
         #
         #   @param language [String] The language of the input audio. Supplying the input language in
         #
-        #   @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra
+        #   @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-min
         #
         #   @param prompt [String] An optional text to guide the model's style or continue a previous audio
         # The model to use for transcription. Current options are `whisper-1`,
-        # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
+        # `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
+        # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
         #
         # @see OpenAI::Models::Realtime::AudioTranscription#model
         module Model
           extend OpenAI::Internal::Type::Enum
           WHISPER_1 = :"whisper-1"
-          GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest"
           GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
           GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
+          GPT_4O_TRANSCRIBE_DIARIZE = :"gpt-4o-transcribe-diarize"
           # @!method self.values
           #   @return [Array<Symbol>]

data/lib/openai/models/reasoning.rb CHANGED Viewed

@@ -10,6 +10,9 @@ module OpenAI
       #   effort can result in faster responses and fewer tokens used on reasoning in a
       #   response.
       #
+      #   Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+      #   effort.
+      #
       #   @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
       optional :effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true

data/lib/openai/models/reasoning_effort.rb CHANGED Viewed

@@ -7,6 +7,9 @@ module OpenAI
     # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
     # effort can result in faster responses and fewer tokens used on reasoning in a
     # response.
+    #
+    # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
+    # effort.
     module ReasoningEffort
       extend OpenAI::Internal::Type::Enum

data/lib/openai/models/vector_store_create_params.rb CHANGED Viewed

@@ -14,6 +14,13 @@ module OpenAI
       #   @return [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam, nil]
       optional :chunking_strategy, union: -> { OpenAI::FileChunkingStrategyParam }
+      # @!attribute description
+      #   A description for the vector store. Can be used to describe the vector store's
+      #   purpose.
+      #
+      #   @return [String, nil]
+      optional :description, String
       # @!attribute expires_after
       #   The expiration policy for a vector store.
       #
@@ -45,12 +52,14 @@ module OpenAI
       #   @return [String, nil]
       optional :name, String
-      # @!method initialize(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
+      # @!method initialize(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
       #   Some parameter documentations has been truncated, see
       #   {OpenAI::Models::VectorStoreCreateParams} for more details.
       #
       #   @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
       #
+      #   @param description [String] A description for the vector store. Can be used to describe the vector store's p
+      #
       #   @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
       #
       #   @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that

data/lib/openai/models/vector_stores/vector_store_file.rb CHANGED Viewed

@@ -101,7 +101,7 @@ module OpenAI
         # @see OpenAI::Models::VectorStores::VectorStoreFile#last_error
         class LastError < OpenAI::Internal::Type::BaseModel
           # @!attribute code
-          #   One of `server_error` or `rate_limit_exceeded`.
+          #   One of `server_error`, `unsupported_file`, or `invalid_file`.
           #
           #   @return [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code]
           required :code, enum: -> { OpenAI::VectorStores::VectorStoreFile::LastError::Code }
@@ -116,11 +116,11 @@ module OpenAI
           #   The last error associated with this vector store file. Will be `null` if there
           #   are no errors.
           #
-          #   @param code [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code] One of `server_error` or `rate_limit_exceeded`.
+          #   @param code [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code] One of `server_error`, `unsupported_file`, or `invalid_file`.
           #
           #   @param message [String] A human-readable description of the error.
-          # One of `server_error` or `rate_limit_exceeded`.
+          # One of `server_error`, `unsupported_file`, or `invalid_file`.
           #
           # @see OpenAI::Models::VectorStores::VectorStoreFile::LastError#code
           module Code

data/lib/openai/resources/audio/transcriptions.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module OpenAI
         #
         # Transcribes audio into the input language.
         #
-        # @overload create(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
+        # @overload create(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
         #
         # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
         #
@@ -22,6 +22,10 @@ module OpenAI
         #
         # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
         #
+        # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
+        #
+        # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
+        #
         # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
         #
         # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -34,7 +38,7 @@ module OpenAI
         #
         # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
         #
-        # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose]
+        # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionDiarized, OpenAI::Models::Audio::TranscriptionVerbose]
         #
         # @see OpenAI::Models::Audio::TranscriptionCreateParams
         def create(params)
@@ -61,7 +65,7 @@ module OpenAI
         #
         # Transcribes audio into the input language.
         #
-        # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
+        # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
         #
         # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
         #
@@ -71,6 +75,10 @@ module OpenAI
         #
         # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
         #
+        # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
+        #
+        # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
+        #
         # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
         #
         # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -83,7 +91,7 @@ module OpenAI
         #
         # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
         #
-        # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
+        # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextSegmentEvent, OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
         #
         # @see OpenAI::Models::Audio::TranscriptionCreateParams
         def create_streaming(params)

data/lib/openai/resources/files.rb CHANGED Viewed

@@ -105,7 +105,7 @@ module OpenAI
         )
       end
-      # Delete a file.
+      # Delete a file and remove it from all vector stores.
       #
       # @overload delete(file_id, request_options: {})
       #

data/lib/openai/resources/vector_stores.rb CHANGED Viewed

@@ -14,10 +14,12 @@ module OpenAI
       #
       # Create a vector store.
       #
-      # @overload create(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
+      # @overload create(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
       #
       # @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
       #
+      # @param description [String] A description for the vector store. Can be used to describe the vector store's p
+      #
       # @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
       #
       # @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that

data/lib/openai/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module OpenAI
-  VERSION = "0.30.0"
+  VERSION = "0.32.0"
 end

data/lib/openai.rb CHANGED Viewed

@@ -79,11 +79,14 @@ require_relative "openai/models/audio/speech_model"
 require_relative "openai/models/audio/transcription"
 require_relative "openai/models/audio/transcription_create_params"
 require_relative "openai/models/audio/transcription_create_response"
+require_relative "openai/models/audio/transcription_diarized"
+require_relative "openai/models/audio/transcription_diarized_segment"
 require_relative "openai/models/audio/transcription_include"
 require_relative "openai/models/audio/transcription_segment"
 require_relative "openai/models/audio/transcription_stream_event"
 require_relative "openai/models/audio/transcription_text_delta_event"
 require_relative "openai/models/audio/transcription_text_done_event"
+require_relative "openai/models/audio/transcription_text_segment_event"
 require_relative "openai/models/audio/transcription_verbose"
 require_relative "openai/models/audio/transcription_word"
 require_relative "openai/models/audio/translation"

data/rbi/openai/models/audio/transcription_create_params.rbi CHANGED Viewed

@@ -21,8 +21,8 @@ module OpenAI
         attr_accessor :file
         # ID of the model to use. The options are `gpt-4o-transcribe`,
-        # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        # Whisper V2 model).
+        # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+        # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
         sig { returns(T.any(String, OpenAI::AudioModel::OrSymbol)) }
         attr_accessor :model
@@ -30,6 +30,8 @@ module OpenAI
         # first normalizes loudness and then uses voice activity detection (VAD) to choose
         # boundaries. `server_vad` object can be provided to tweak VAD detection
         # parameters manually. If unset, the audio is transcribed as a single block.
+        # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+        # seconds.
         sig do
           returns(
             T.nilable(
@@ -46,7 +48,8 @@ module OpenAI
         # return the log probabilities of the tokens in the response to understand the
         # model's confidence in the transcription. `logprobs` only works with
         # response_format set to `json` and only with the models `gpt-4o-transcribe` and
-        # `gpt-4o-mini-transcribe`.
+        # `gpt-4o-mini-transcribe`. This field is not supported when using
+        # `gpt-4o-transcribe-diarize`.
         sig do
           returns(
             T.nilable(T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol])
@@ -61,6 +64,26 @@ module OpenAI
         end
         attr_writer :include
+        # Optional list of speaker names that correspond to the audio samples provided in
+        # `known_speaker_references[]`. Each entry should be a short identifier (for
+        # example `customer` or `agent`). Up to 4 speakers are supported.
+        sig { returns(T.nilable(T::Array[String])) }
+        attr_reader :known_speaker_names
+        sig { params(known_speaker_names: T::Array[String]).void }
+        attr_writer :known_speaker_names
+        # Optional list of audio samples (as
+        # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+        # that contain known speaker references matching `known_speaker_names[]`. Each
+        # sample must be between 2 and 10 seconds, and can use any of the same input audio
+        # formats supported by `file`.
+        sig { returns(T.nilable(T::Array[String])) }
+        attr_reader :known_speaker_references
+        sig { params(known_speaker_references: T::Array[String]).void }
+        attr_writer :known_speaker_references
         # The language of the input audio. Supplying the input language in
         # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
         # format will improve accuracy and latency.
@@ -73,7 +96,8 @@ module OpenAI
         # An optional text to guide the model's style or continue a previous audio
         # segment. The
         # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
-        # should match the audio language.
+        # should match the audio language. This field is not supported when using
+        # `gpt-4o-transcribe-diarize`.
         sig { returns(T.nilable(String)) }
         attr_reader :prompt
@@ -81,8 +105,10 @@ module OpenAI
         attr_writer :prompt
         # The format of the output, in one of these options: `json`, `text`, `srt`,
-        # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
-        # the only supported format is `json`.
+        # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+        # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+        # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+        # `diarized_json`, with `diarized_json` required to receive speaker annotations.
         sig { returns(T.nilable(OpenAI::AudioResponseFormat::OrSymbol)) }
         attr_reader :response_format
@@ -106,7 +132,8 @@ module OpenAI
         # `response_format` must be set `verbose_json` to use timestamp granularities.
         # Either or both of these options are supported: `word`, or `segment`. Note: There
         # is no additional latency for segment timestamps, but generating word timestamps
-        # incurs additional latency.
+        # incurs additional latency. This option is not available for
+        # `gpt-4o-transcribe-diarize`.
         sig do
           returns(
             T.nilable(
@@ -140,6 +167,8 @@ module OpenAI
                 )
               ),
             include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
+            known_speaker_names: T::Array[String],
+            known_speaker_references: T::Array[String],
             language: String,
             prompt: String,
             response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -156,20 +185,33 @@ module OpenAI
           # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
           file:,
           # ID of the model to use. The options are `gpt-4o-transcribe`,
-          # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-          # Whisper V2 model).
+          # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+          # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
           model:,
           # Controls how the audio is cut into chunks. When set to `"auto"`, the server
           # first normalizes loudness and then uses voice activity detection (VAD) to choose
           # boundaries. `server_vad` object can be provided to tweak VAD detection
           # parameters manually. If unset, the audio is transcribed as a single block.
+          # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+          # seconds.
           chunking_strategy: nil,
           # Additional information to include in the transcription response. `logprobs` will
           # return the log probabilities of the tokens in the response to understand the
           # model's confidence in the transcription. `logprobs` only works with
           # response_format set to `json` and only with the models `gpt-4o-transcribe` and
-          # `gpt-4o-mini-transcribe`.
+          # `gpt-4o-mini-transcribe`. This field is not supported when using
+          # `gpt-4o-transcribe-diarize`.
           include: nil,
+          # Optional list of speaker names that correspond to the audio samples provided in
+          # `known_speaker_references[]`. Each entry should be a short identifier (for
+          # example `customer` or `agent`). Up to 4 speakers are supported.
+          known_speaker_names: nil,
+          # Optional list of audio samples (as
+          # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+          # that contain known speaker references matching `known_speaker_names[]`. Each
+          # sample must be between 2 and 10 seconds, and can use any of the same input audio
+          # formats supported by `file`.
+          known_speaker_references: nil,
           # The language of the input audio. Supplying the input language in
           # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
           # format will improve accuracy and latency.
@@ -177,11 +219,14 @@ module OpenAI
           # An optional text to guide the model's style or continue a previous audio
           # segment. The
           # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
-          # should match the audio language.
+          # should match the audio language. This field is not supported when using
+          # `gpt-4o-transcribe-diarize`.
           prompt: nil,
           # The format of the output, in one of these options: `json`, `text`, `srt`,
-          # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
-          # the only supported format is `json`.
+          # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+          # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+          # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+          # `diarized_json`, with `diarized_json` required to receive speaker annotations.
           response_format: nil,
           # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
           # output more random, while lower values like 0.2 will make it more focused and
@@ -193,7 +238,8 @@ module OpenAI
           # `response_format` must be set `verbose_json` to use timestamp granularities.
           # Either or both of these options are supported: `word`, or `segment`. Note: There
           # is no additional latency for segment timestamps, but generating word timestamps
-          # incurs additional latency.
+          # incurs additional latency. This option is not available for
+          # `gpt-4o-transcribe-diarize`.
           timestamp_granularities: nil,
           request_options: {}
         )
@@ -212,6 +258,8 @@ module OpenAI
                   )
                 ),
               include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
+              known_speaker_names: T::Array[String],
+              known_speaker_references: T::Array[String],
               language: String,
               prompt: String,
               response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -228,8 +276,8 @@ module OpenAI
         end
         # ID of the model to use. The options are `gpt-4o-transcribe`,
-        # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        # Whisper V2 model).
+        # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+        # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
         module Model
           extend OpenAI::Internal::Type::Union
@@ -251,6 +299,8 @@ module OpenAI
         # first normalizes loudness and then uses voice activity detection (VAD) to choose
         # boundaries. `server_vad` object can be provided to tweak VAD detection
         # parameters manually. If unset, the audio is transcribed as a single block.
+        # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+        # seconds.
         module ChunkingStrategy
           extend OpenAI::Internal::Type::Union

data/rbi/openai/models/audio/transcription_create_response.rbi CHANGED Viewed

@@ -12,6 +12,7 @@ module OpenAI
           T.type_alias do
             T.any(
               OpenAI::Audio::Transcription,
+              OpenAI::Audio::TranscriptionDiarized,
               OpenAI::Audio::TranscriptionVerbose
             )
           end