openai 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/README.md +1 -1
  4. data/lib/openai/internal/util.rb +5 -5
  5. data/lib/openai/models/audio/transcription_create_params.rb +42 -11
  6. data/lib/openai/models/audio/transcription_create_response.rb +4 -1
  7. data/lib/openai/models/audio/transcription_diarized.rb +160 -0
  8. data/lib/openai/models/audio/transcription_diarized_segment.rb +65 -0
  9. data/lib/openai/models/audio/transcription_stream_event.rb +7 -4
  10. data/lib/openai/models/audio/transcription_text_delta_event.rb +10 -1
  11. data/lib/openai/models/audio/transcription_text_segment_event.rb +63 -0
  12. data/lib/openai/models/audio_model.rb +1 -0
  13. data/lib/openai/models/audio_response_format.rb +5 -2
  14. data/lib/openai/models/realtime/audio_transcription.rb +8 -6
  15. data/lib/openai/models/vector_store_create_params.rb +10 -1
  16. data/lib/openai/resources/audio/transcriptions.rb +12 -4
  17. data/lib/openai/resources/vector_stores.rb +3 -1
  18. data/lib/openai/version.rb +1 -1
  19. data/lib/openai.rb +3 -0
  20. data/rbi/openai/models/audio/transcription_create_params.rbi +66 -16
  21. data/rbi/openai/models/audio/transcription_create_response.rbi +1 -0
  22. data/rbi/openai/models/audio/transcription_diarized.rbi +281 -0
  23. data/rbi/openai/models/audio/transcription_diarized_segment.rbi +87 -0
  24. data/rbi/openai/models/audio/transcription_stream_event.rbi +4 -3
  25. data/rbi/openai/models/audio/transcription_text_delta_event.rbi +14 -1
  26. data/rbi/openai/models/audio/transcription_text_segment_event.rbi +86 -0
  27. data/rbi/openai/models/audio_model.rbi +2 -0
  28. data/rbi/openai/models/audio_response_format.rbi +6 -2
  29. data/rbi/openai/models/realtime/audio_transcription.rbi +15 -12
  30. data/rbi/openai/models/vector_store_create_params.rbi +13 -0
  31. data/rbi/openai/resources/audio/transcriptions.rbi +52 -14
  32. data/rbi/openai/resources/vector_stores.rbi +4 -0
  33. data/sig/openai/models/audio/transcription_create_params.rbs +14 -0
  34. data/sig/openai/models/audio/transcription_create_response.rbs +3 -1
  35. data/sig/openai/models/audio/transcription_diarized.rbs +129 -0
  36. data/sig/openai/models/audio/transcription_diarized_segment.rbs +47 -0
  37. data/sig/openai/models/audio/transcription_stream_event.rbs +2 -1
  38. data/sig/openai/models/audio/transcription_text_delta_event.rbs +9 -2
  39. data/sig/openai/models/audio/transcription_text_segment_event.rbs +47 -0
  40. data/sig/openai/models/audio_model.rbs +5 -1
  41. data/sig/openai/models/audio_response_format.rbs +3 -1
  42. data/sig/openai/models/realtime/audio_transcription.rbs +2 -2
  43. data/sig/openai/models/vector_store_create_params.rbs +7 -0
  44. data/sig/openai/resources/audio/transcriptions.rbs +4 -0
  45. data/sig/openai/resources/vector_stores.rbs +1 -0
  46. metadata +11 -2
@@ -14,6 +14,13 @@ module OpenAI
14
14
  # @return [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam, nil]
15
15
  optional :chunking_strategy, union: -> { OpenAI::FileChunkingStrategyParam }
16
16
 
17
+ # @!attribute description
18
+ # A description for the vector store. Can be used to describe the vector store's
19
+ # purpose.
20
+ #
21
+ # @return [String, nil]
22
+ optional :description, String
23
+
17
24
  # @!attribute expires_after
18
25
  # The expiration policy for a vector store.
19
26
  #
@@ -45,12 +52,14 @@ module OpenAI
45
52
  # @return [String, nil]
46
53
  optional :name, String
47
54
 
48
- # @!method initialize(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
55
+ # @!method initialize(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
49
56
  # Some parameter documentations has been truncated, see
50
57
  # {OpenAI::Models::VectorStoreCreateParams} for more details.
51
58
  #
52
59
  # @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
53
60
  #
61
+ # @param description [String] A description for the vector store. Can be used to describe the vector store's p
62
+ #
54
63
  # @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
55
64
  #
56
65
  # @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -12,7 +12,7 @@ module OpenAI
12
12
  #
13
13
  # Transcribes audio into the input language.
14
14
  #
15
- # @overload create(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
15
+ # @overload create(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
16
16
  #
17
17
  # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
18
18
  #
@@ -22,6 +22,10 @@ module OpenAI
22
22
  #
23
23
  # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
24
24
  #
25
+ # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
26
+ #
27
+ # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
28
+ #
25
29
  # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
26
30
  #
27
31
  # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -34,7 +38,7 @@ module OpenAI
34
38
  #
35
39
  # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
36
40
  #
37
- # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose]
41
+ # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionDiarized, OpenAI::Models::Audio::TranscriptionVerbose]
38
42
  #
39
43
  # @see OpenAI::Models::Audio::TranscriptionCreateParams
40
44
  def create(params)
@@ -61,7 +65,7 @@ module OpenAI
61
65
  #
62
66
  # Transcribes audio into the input language.
63
67
  #
64
- # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
68
+ # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
65
69
  #
66
70
  # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
67
71
  #
@@ -71,6 +75,10 @@ module OpenAI
71
75
  #
72
76
  # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
73
77
  #
78
+ # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
79
+ #
80
+ # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
81
+ #
74
82
  # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
75
83
  #
76
84
  # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -83,7 +91,7 @@ module OpenAI
83
91
  #
84
92
  # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
85
93
  #
86
- # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
94
+ # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextSegmentEvent, OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
87
95
  #
88
96
  # @see OpenAI::Models::Audio::TranscriptionCreateParams
89
97
  def create_streaming(params)
@@ -14,10 +14,12 @@ module OpenAI
14
14
  #
15
15
  # Create a vector store.
16
16
  #
17
- # @overload create(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
17
+ # @overload create(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
18
18
  #
19
19
  # @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
20
20
  #
21
+ # @param description [String] A description for the vector store. Can be used to describe the vector store's p
22
+ #
21
23
  # @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
22
24
  #
23
25
  # @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OpenAI
4
- VERSION = "0.31.0"
4
+ VERSION = "0.32.0"
5
5
  end
data/lib/openai.rb CHANGED
@@ -79,11 +79,14 @@ require_relative "openai/models/audio/speech_model"
79
79
  require_relative "openai/models/audio/transcription"
80
80
  require_relative "openai/models/audio/transcription_create_params"
81
81
  require_relative "openai/models/audio/transcription_create_response"
82
+ require_relative "openai/models/audio/transcription_diarized"
83
+ require_relative "openai/models/audio/transcription_diarized_segment"
82
84
  require_relative "openai/models/audio/transcription_include"
83
85
  require_relative "openai/models/audio/transcription_segment"
84
86
  require_relative "openai/models/audio/transcription_stream_event"
85
87
  require_relative "openai/models/audio/transcription_text_delta_event"
86
88
  require_relative "openai/models/audio/transcription_text_done_event"
89
+ require_relative "openai/models/audio/transcription_text_segment_event"
87
90
  require_relative "openai/models/audio/transcription_verbose"
88
91
  require_relative "openai/models/audio/transcription_word"
89
92
  require_relative "openai/models/audio/translation"
@@ -21,8 +21,8 @@ module OpenAI
21
21
  attr_accessor :file
22
22
 
23
23
  # ID of the model to use. The options are `gpt-4o-transcribe`,
24
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
25
- # Whisper V2 model).
24
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
25
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
26
26
  sig { returns(T.any(String, OpenAI::AudioModel::OrSymbol)) }
27
27
  attr_accessor :model
28
28
 
@@ -30,6 +30,8 @@ module OpenAI
30
30
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
31
31
  # boundaries. `server_vad` object can be provided to tweak VAD detection
32
32
  # parameters manually. If unset, the audio is transcribed as a single block.
33
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
34
+ # seconds.
33
35
  sig do
34
36
  returns(
35
37
  T.nilable(
@@ -46,7 +48,8 @@ module OpenAI
46
48
  # return the log probabilities of the tokens in the response to understand the
47
49
  # model's confidence in the transcription. `logprobs` only works with
48
50
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
49
- # `gpt-4o-mini-transcribe`.
51
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
52
+ # `gpt-4o-transcribe-diarize`.
50
53
  sig do
51
54
  returns(
52
55
  T.nilable(T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol])
@@ -61,6 +64,26 @@ module OpenAI
61
64
  end
62
65
  attr_writer :include
63
66
 
67
+ # Optional list of speaker names that correspond to the audio samples provided in
68
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
69
+ # example `customer` or `agent`). Up to 4 speakers are supported.
70
+ sig { returns(T.nilable(T::Array[String])) }
71
+ attr_reader :known_speaker_names
72
+
73
+ sig { params(known_speaker_names: T::Array[String]).void }
74
+ attr_writer :known_speaker_names
75
+
76
+ # Optional list of audio samples (as
77
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
78
+ # that contain known speaker references matching `known_speaker_names[]`. Each
79
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
80
+ # formats supported by `file`.
81
+ sig { returns(T.nilable(T::Array[String])) }
82
+ attr_reader :known_speaker_references
83
+
84
+ sig { params(known_speaker_references: T::Array[String]).void }
85
+ attr_writer :known_speaker_references
86
+
64
87
  # The language of the input audio. Supplying the input language in
65
88
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
66
89
  # format will improve accuracy and latency.
@@ -73,7 +96,8 @@ module OpenAI
73
96
  # An optional text to guide the model's style or continue a previous audio
74
97
  # segment. The
75
98
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
76
- # should match the audio language.
99
+ # should match the audio language. This field is not supported when using
100
+ # `gpt-4o-transcribe-diarize`.
77
101
  sig { returns(T.nilable(String)) }
78
102
  attr_reader :prompt
79
103
 
@@ -81,8 +105,10 @@ module OpenAI
81
105
  attr_writer :prompt
82
106
 
83
107
  # The format of the output, in one of these options: `json`, `text`, `srt`,
84
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
85
- # the only supported format is `json`.
108
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
109
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
110
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
111
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
86
112
  sig { returns(T.nilable(OpenAI::AudioResponseFormat::OrSymbol)) }
87
113
  attr_reader :response_format
88
114
 
@@ -106,7 +132,8 @@ module OpenAI
106
132
  # `response_format` must be set `verbose_json` to use timestamp granularities.
107
133
  # Either or both of these options are supported: `word`, or `segment`. Note: There
108
134
  # is no additional latency for segment timestamps, but generating word timestamps
109
- # incurs additional latency.
135
+ # incurs additional latency. This option is not available for
136
+ # `gpt-4o-transcribe-diarize`.
110
137
  sig do
111
138
  returns(
112
139
  T.nilable(
@@ -140,6 +167,8 @@ module OpenAI
140
167
  )
141
168
  ),
142
169
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
170
+ known_speaker_names: T::Array[String],
171
+ known_speaker_references: T::Array[String],
143
172
  language: String,
144
173
  prompt: String,
145
174
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -156,20 +185,33 @@ module OpenAI
156
185
  # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
157
186
  file:,
158
187
  # ID of the model to use. The options are `gpt-4o-transcribe`,
159
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
160
- # Whisper V2 model).
188
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
189
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
161
190
  model:,
162
191
  # Controls how the audio is cut into chunks. When set to `"auto"`, the server
163
192
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
164
193
  # boundaries. `server_vad` object can be provided to tweak VAD detection
165
194
  # parameters manually. If unset, the audio is transcribed as a single block.
195
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
196
+ # seconds.
166
197
  chunking_strategy: nil,
167
198
  # Additional information to include in the transcription response. `logprobs` will
168
199
  # return the log probabilities of the tokens in the response to understand the
169
200
  # model's confidence in the transcription. `logprobs` only works with
170
201
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
171
- # `gpt-4o-mini-transcribe`.
202
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
203
+ # `gpt-4o-transcribe-diarize`.
172
204
  include: nil,
205
+ # Optional list of speaker names that correspond to the audio samples provided in
206
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
207
+ # example `customer` or `agent`). Up to 4 speakers are supported.
208
+ known_speaker_names: nil,
209
+ # Optional list of audio samples (as
210
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
211
+ # that contain known speaker references matching `known_speaker_names[]`. Each
212
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
213
+ # formats supported by `file`.
214
+ known_speaker_references: nil,
173
215
  # The language of the input audio. Supplying the input language in
174
216
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
175
217
  # format will improve accuracy and latency.
@@ -177,11 +219,14 @@ module OpenAI
177
219
  # An optional text to guide the model's style or continue a previous audio
178
220
  # segment. The
179
221
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
180
- # should match the audio language.
222
+ # should match the audio language. This field is not supported when using
223
+ # `gpt-4o-transcribe-diarize`.
181
224
  prompt: nil,
182
225
  # The format of the output, in one of these options: `json`, `text`, `srt`,
183
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
184
- # the only supported format is `json`.
226
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
227
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
228
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
229
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
185
230
  response_format: nil,
186
231
  # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
187
232
  # output more random, while lower values like 0.2 will make it more focused and
@@ -193,7 +238,8 @@ module OpenAI
193
238
  # `response_format` must be set `verbose_json` to use timestamp granularities.
194
239
  # Either or both of these options are supported: `word`, or `segment`. Note: There
195
240
  # is no additional latency for segment timestamps, but generating word timestamps
196
- # incurs additional latency.
241
+ # incurs additional latency. This option is not available for
242
+ # `gpt-4o-transcribe-diarize`.
197
243
  timestamp_granularities: nil,
198
244
  request_options: {}
199
245
  )
@@ -212,6 +258,8 @@ module OpenAI
212
258
  )
213
259
  ),
214
260
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
261
+ known_speaker_names: T::Array[String],
262
+ known_speaker_references: T::Array[String],
215
263
  language: String,
216
264
  prompt: String,
217
265
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -228,8 +276,8 @@ module OpenAI
228
276
  end
229
277
 
230
278
  # ID of the model to use. The options are `gpt-4o-transcribe`,
231
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
232
- # Whisper V2 model).
279
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
280
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
233
281
  module Model
234
282
  extend OpenAI::Internal::Type::Union
235
283
 
@@ -251,6 +299,8 @@ module OpenAI
251
299
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
252
300
  # boundaries. `server_vad` object can be provided to tweak VAD detection
253
301
  # parameters manually. If unset, the audio is transcribed as a single block.
302
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
303
+ # seconds.
254
304
  module ChunkingStrategy
255
305
  extend OpenAI::Internal::Type::Union
256
306
 
@@ -12,6 +12,7 @@ module OpenAI
12
12
  T.type_alias do
13
13
  T.any(
14
14
  OpenAI::Audio::Transcription,
15
+ OpenAI::Audio::TranscriptionDiarized,
15
16
  OpenAI::Audio::TranscriptionVerbose
16
17
  )
17
18
  end
@@ -0,0 +1,281 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Audio
6
+ class TranscriptionDiarized < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Audio::TranscriptionDiarized,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Duration of the input audio in seconds.
16
+ sig { returns(Float) }
17
+ attr_accessor :duration
18
+
19
+ # Segments of the transcript annotated with timestamps and speaker labels.
20
+ sig { returns(T::Array[OpenAI::Audio::TranscriptionDiarizedSegment]) }
21
+ attr_accessor :segments
22
+
23
+ # The type of task that was run. Always `transcribe`.
24
+ sig { returns(Symbol) }
25
+ attr_accessor :task
26
+
27
+ # The concatenated transcript text for the entire audio input.
28
+ sig { returns(String) }
29
+ attr_accessor :text
30
+
31
+ # Token or duration usage statistics for the request.
32
+ sig do
33
+ returns(
34
+ T.nilable(OpenAI::Audio::TranscriptionDiarized::Usage::Variants)
35
+ )
36
+ end
37
+ attr_reader :usage
38
+
39
+ sig do
40
+ params(
41
+ usage:
42
+ T.any(
43
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::OrHash,
44
+ OpenAI::Audio::TranscriptionDiarized::Usage::Duration::OrHash
45
+ )
46
+ ).void
47
+ end
48
+ attr_writer :usage
49
+
50
+ # Represents a diarized transcription response returned by the model, including
51
+ # the combined transcript and speaker-segment annotations.
52
+ sig do
53
+ params(
54
+ duration: Float,
55
+ segments:
56
+ T::Array[OpenAI::Audio::TranscriptionDiarizedSegment::OrHash],
57
+ text: String,
58
+ usage:
59
+ T.any(
60
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::OrHash,
61
+ OpenAI::Audio::TranscriptionDiarized::Usage::Duration::OrHash
62
+ ),
63
+ task: Symbol
64
+ ).returns(T.attached_class)
65
+ end
66
+ def self.new(
67
+ # Duration of the input audio in seconds.
68
+ duration:,
69
+ # Segments of the transcript annotated with timestamps and speaker labels.
70
+ segments:,
71
+ # The concatenated transcript text for the entire audio input.
72
+ text:,
73
+ # Token or duration usage statistics for the request.
74
+ usage: nil,
75
+ # The type of task that was run. Always `transcribe`.
76
+ task: :transcribe
77
+ )
78
+ end
79
+
80
+ sig do
81
+ override.returns(
82
+ {
83
+ duration: Float,
84
+ segments: T::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
85
+ task: Symbol,
86
+ text: String,
87
+ usage: OpenAI::Audio::TranscriptionDiarized::Usage::Variants
88
+ }
89
+ )
90
+ end
91
+ def to_hash
92
+ end
93
+
94
+ # Token or duration usage statistics for the request.
95
+ module Usage
96
+ extend OpenAI::Internal::Type::Union
97
+
98
+ Variants =
99
+ T.type_alias do
100
+ T.any(
101
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens,
102
+ OpenAI::Audio::TranscriptionDiarized::Usage::Duration
103
+ )
104
+ end
105
+
106
+ class Tokens < OpenAI::Internal::Type::BaseModel
107
+ OrHash =
108
+ T.type_alias do
109
+ T.any(
110
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens,
111
+ OpenAI::Internal::AnyHash
112
+ )
113
+ end
114
+
115
+ # Number of input tokens billed for this request.
116
+ sig { returns(Integer) }
117
+ attr_accessor :input_tokens
118
+
119
+ # Number of output tokens generated.
120
+ sig { returns(Integer) }
121
+ attr_accessor :output_tokens
122
+
123
+ # Total number of tokens used (input + output).
124
+ sig { returns(Integer) }
125
+ attr_accessor :total_tokens
126
+
127
+ # The type of the usage object. Always `tokens` for this variant.
128
+ sig { returns(Symbol) }
129
+ attr_accessor :type
130
+
131
+ # Details about the input tokens billed for this request.
132
+ sig do
133
+ returns(
134
+ T.nilable(
135
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
136
+ )
137
+ )
138
+ end
139
+ attr_reader :input_token_details
140
+
141
+ sig do
142
+ params(
143
+ input_token_details:
144
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails::OrHash
145
+ ).void
146
+ end
147
+ attr_writer :input_token_details
148
+
149
+ # Usage statistics for models billed by token usage.
150
+ sig do
151
+ params(
152
+ input_tokens: Integer,
153
+ output_tokens: Integer,
154
+ total_tokens: Integer,
155
+ input_token_details:
156
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails::OrHash,
157
+ type: Symbol
158
+ ).returns(T.attached_class)
159
+ end
160
+ def self.new(
161
+ # Number of input tokens billed for this request.
162
+ input_tokens:,
163
+ # Number of output tokens generated.
164
+ output_tokens:,
165
+ # Total number of tokens used (input + output).
166
+ total_tokens:,
167
+ # Details about the input tokens billed for this request.
168
+ input_token_details: nil,
169
+ # The type of the usage object. Always `tokens` for this variant.
170
+ type: :tokens
171
+ )
172
+ end
173
+
174
+ sig do
175
+ override.returns(
176
+ {
177
+ input_tokens: Integer,
178
+ output_tokens: Integer,
179
+ total_tokens: Integer,
180
+ type: Symbol,
181
+ input_token_details:
182
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
183
+ }
184
+ )
185
+ end
186
+ def to_hash
187
+ end
188
+
189
+ class InputTokenDetails < OpenAI::Internal::Type::BaseModel
190
+ OrHash =
191
+ T.type_alias do
192
+ T.any(
193
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails,
194
+ OpenAI::Internal::AnyHash
195
+ )
196
+ end
197
+
198
+ # Number of audio tokens billed for this request.
199
+ sig { returns(T.nilable(Integer)) }
200
+ attr_reader :audio_tokens
201
+
202
+ sig { params(audio_tokens: Integer).void }
203
+ attr_writer :audio_tokens
204
+
205
+ # Number of text tokens billed for this request.
206
+ sig { returns(T.nilable(Integer)) }
207
+ attr_reader :text_tokens
208
+
209
+ sig { params(text_tokens: Integer).void }
210
+ attr_writer :text_tokens
211
+
212
+ # Details about the input tokens billed for this request.
213
+ sig do
214
+ params(audio_tokens: Integer, text_tokens: Integer).returns(
215
+ T.attached_class
216
+ )
217
+ end
218
+ def self.new(
219
+ # Number of audio tokens billed for this request.
220
+ audio_tokens: nil,
221
+ # Number of text tokens billed for this request.
222
+ text_tokens: nil
223
+ )
224
+ end
225
+
226
+ sig do
227
+ override.returns(
228
+ { audio_tokens: Integer, text_tokens: Integer }
229
+ )
230
+ end
231
+ def to_hash
232
+ end
233
+ end
234
+ end
235
+
236
+ class Duration < OpenAI::Internal::Type::BaseModel
237
+ OrHash =
238
+ T.type_alias do
239
+ T.any(
240
+ OpenAI::Audio::TranscriptionDiarized::Usage::Duration,
241
+ OpenAI::Internal::AnyHash
242
+ )
243
+ end
244
+
245
+ # Duration of the input audio in seconds.
246
+ sig { returns(Float) }
247
+ attr_accessor :seconds
248
+
249
+ # The type of the usage object. Always `duration` for this variant.
250
+ sig { returns(Symbol) }
251
+ attr_accessor :type
252
+
253
+ # Usage statistics for models billed by audio input duration.
254
+ sig do
255
+ params(seconds: Float, type: Symbol).returns(T.attached_class)
256
+ end
257
+ def self.new(
258
+ # Duration of the input audio in seconds.
259
+ seconds:,
260
+ # The type of the usage object. Always `duration` for this variant.
261
+ type: :duration
262
+ )
263
+ end
264
+
265
+ sig { override.returns({ seconds: Float, type: Symbol }) }
266
+ def to_hash
267
+ end
268
+ end
269
+
270
+ sig do
271
+ override.returns(
272
+ T::Array[OpenAI::Audio::TranscriptionDiarized::Usage::Variants]
273
+ )
274
+ end
275
+ def self.variants
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end