openai 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -0
  3. data/README.md +1 -1
  4. data/lib/openai/internal/util.rb +5 -5
  5. data/lib/openai/models/audio/transcription_create_params.rb +42 -11
  6. data/lib/openai/models/audio/transcription_create_response.rb +4 -1
  7. data/lib/openai/models/audio/transcription_diarized.rb +160 -0
  8. data/lib/openai/models/audio/transcription_diarized_segment.rb +65 -0
  9. data/lib/openai/models/audio/transcription_stream_event.rb +7 -4
  10. data/lib/openai/models/audio/transcription_text_delta_event.rb +10 -1
  11. data/lib/openai/models/audio/transcription_text_segment_event.rb +63 -0
  12. data/lib/openai/models/audio_model.rb +1 -0
  13. data/lib/openai/models/audio_response_format.rb +5 -2
  14. data/lib/openai/models/beta/assistant_create_params.rb +3 -0
  15. data/lib/openai/models/beta/assistant_update_params.rb +3 -0
  16. data/lib/openai/models/beta/threads/run_create_params.rb +3 -0
  17. data/lib/openai/models/chat/completion_create_params.rb +3 -0
  18. data/lib/openai/models/comparison_filter.rb +29 -6
  19. data/lib/openai/models/evals/create_eval_completions_run_data_source.rb +3 -0
  20. data/lib/openai/models/evals/run_cancel_response.rb +6 -0
  21. data/lib/openai/models/evals/run_create_params.rb +6 -0
  22. data/lib/openai/models/evals/run_create_response.rb +6 -0
  23. data/lib/openai/models/evals/run_list_response.rb +6 -0
  24. data/lib/openai/models/evals/run_retrieve_response.rb +6 -0
  25. data/lib/openai/models/graders/score_model_grader.rb +3 -0
  26. data/lib/openai/models/realtime/audio_transcription.rb +8 -6
  27. data/lib/openai/models/reasoning.rb +3 -0
  28. data/lib/openai/models/reasoning_effort.rb +3 -0
  29. data/lib/openai/models/vector_store_create_params.rb +10 -1
  30. data/lib/openai/models/vector_stores/vector_store_file.rb +3 -3
  31. data/lib/openai/resources/audio/transcriptions.rb +12 -4
  32. data/lib/openai/resources/files.rb +1 -1
  33. data/lib/openai/resources/vector_stores.rb +3 -1
  34. data/lib/openai/version.rb +1 -1
  35. data/lib/openai.rb +3 -0
  36. data/rbi/openai/models/audio/transcription_create_params.rbi +66 -16
  37. data/rbi/openai/models/audio/transcription_create_response.rbi +1 -0
  38. data/rbi/openai/models/audio/transcription_diarized.rbi +281 -0
  39. data/rbi/openai/models/audio/transcription_diarized_segment.rbi +87 -0
  40. data/rbi/openai/models/audio/transcription_stream_event.rbi +4 -3
  41. data/rbi/openai/models/audio/transcription_text_delta_event.rbi +14 -1
  42. data/rbi/openai/models/audio/transcription_text_segment_event.rbi +86 -0
  43. data/rbi/openai/models/audio_model.rbi +2 -0
  44. data/rbi/openai/models/audio_response_format.rbi +6 -2
  45. data/rbi/openai/models/beta/assistant_create_params.rbi +6 -0
  46. data/rbi/openai/models/beta/assistant_update_params.rbi +6 -0
  47. data/rbi/openai/models/beta/threads/run_create_params.rbi +6 -0
  48. data/rbi/openai/models/chat/completion_create_params.rbi +6 -0
  49. data/rbi/openai/models/comparison_filter.rbi +43 -4
  50. data/rbi/openai/models/eval_create_response.rbi +4 -4
  51. data/rbi/openai/models/eval_list_response.rbi +4 -4
  52. data/rbi/openai/models/eval_retrieve_response.rbi +4 -4
  53. data/rbi/openai/models/eval_update_response.rbi +4 -4
  54. data/rbi/openai/models/evals/create_eval_completions_run_data_source.rbi +6 -0
  55. data/rbi/openai/models/evals/run_cancel_response.rbi +12 -0
  56. data/rbi/openai/models/evals/run_create_params.rbi +12 -0
  57. data/rbi/openai/models/evals/run_create_response.rbi +12 -0
  58. data/rbi/openai/models/evals/run_list_response.rbi +12 -0
  59. data/rbi/openai/models/evals/run_retrieve_response.rbi +12 -0
  60. data/rbi/openai/models/graders/score_model_grader.rbi +6 -0
  61. data/rbi/openai/models/realtime/audio_transcription.rbi +15 -12
  62. data/rbi/openai/models/reasoning.rbi +6 -0
  63. data/rbi/openai/models/reasoning_effort.rbi +3 -0
  64. data/rbi/openai/models/vector_store_create_params.rbi +13 -0
  65. data/rbi/openai/models/vector_stores/vector_store_file.rbi +3 -3
  66. data/rbi/openai/resources/audio/transcriptions.rbi +52 -14
  67. data/rbi/openai/resources/beta/assistants.rbi +6 -0
  68. data/rbi/openai/resources/beta/threads/runs.rbi +6 -0
  69. data/rbi/openai/resources/chat/completions.rbi +6 -0
  70. data/rbi/openai/resources/files.rbi +1 -1
  71. data/rbi/openai/resources/vector_stores.rbi +4 -0
  72. data/sig/openai/models/audio/transcription_create_params.rbs +14 -0
  73. data/sig/openai/models/audio/transcription_create_response.rbs +3 -1
  74. data/sig/openai/models/audio/transcription_diarized.rbs +129 -0
  75. data/sig/openai/models/audio/transcription_diarized_segment.rbs +47 -0
  76. data/sig/openai/models/audio/transcription_stream_event.rbs +2 -1
  77. data/sig/openai/models/audio/transcription_text_delta_event.rbs +9 -2
  78. data/sig/openai/models/audio/transcription_text_segment_event.rbs +47 -0
  79. data/sig/openai/models/audio_model.rbs +5 -1
  80. data/sig/openai/models/audio_response_format.rbs +3 -1
  81. data/sig/openai/models/comparison_filter.rbs +15 -1
  82. data/sig/openai/models/eval_create_response.rbs +2 -2
  83. data/sig/openai/models/eval_list_response.rbs +2 -2
  84. data/sig/openai/models/eval_retrieve_response.rbs +2 -2
  85. data/sig/openai/models/eval_update_response.rbs +2 -2
  86. data/sig/openai/models/realtime/audio_transcription.rbs +2 -2
  87. data/sig/openai/models/vector_store_create_params.rbs +7 -0
  88. data/sig/openai/resources/audio/transcriptions.rbs +4 -0
  89. data/sig/openai/resources/vector_stores.rbs +1 -0
  90. metadata +11 -2
@@ -20,6 +20,8 @@ module OpenAI
20
20
  )
21
21
  ),
22
22
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
23
+ known_speaker_names: T::Array[String],
24
+ known_speaker_references: T::Array[String],
23
25
  language: String,
24
26
  prompt: String,
25
27
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -39,20 +41,33 @@ module OpenAI
39
41
  # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
40
42
  file:,
41
43
  # ID of the model to use. The options are `gpt-4o-transcribe`,
42
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
43
- # Whisper V2 model).
44
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
45
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
44
46
  model:,
45
47
  # Controls how the audio is cut into chunks. When set to `"auto"`, the server
46
48
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
47
49
  # boundaries. `server_vad` object can be provided to tweak VAD detection
48
50
  # parameters manually. If unset, the audio is transcribed as a single block.
51
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
52
+ # seconds.
49
53
  chunking_strategy: nil,
50
54
  # Additional information to include in the transcription response. `logprobs` will
51
55
  # return the log probabilities of the tokens in the response to understand the
52
56
  # model's confidence in the transcription. `logprobs` only works with
53
57
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
54
- # `gpt-4o-mini-transcribe`.
58
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
59
+ # `gpt-4o-transcribe-diarize`.
55
60
  include: nil,
61
+ # Optional list of speaker names that correspond to the audio samples provided in
62
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
63
+ # example `customer` or `agent`). Up to 4 speakers are supported.
64
+ known_speaker_names: nil,
65
+ # Optional list of audio samples (as
66
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
67
+ # that contain known speaker references matching `known_speaker_names[]`. Each
68
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
69
+ # formats supported by `file`.
70
+ known_speaker_references: nil,
56
71
  # The language of the input audio. Supplying the input language in
57
72
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
58
73
  # format will improve accuracy and latency.
@@ -60,11 +75,14 @@ module OpenAI
60
75
  # An optional text to guide the model's style or continue a previous audio
61
76
  # segment. The
62
77
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
63
- # should match the audio language.
78
+ # should match the audio language. This field is not supported when using
79
+ # `gpt-4o-transcribe-diarize`.
64
80
  prompt: nil,
65
81
  # The format of the output, in one of these options: `json`, `text`, `srt`,
66
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
67
- # the only supported format is `json`.
82
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
83
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
84
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
85
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
68
86
  response_format: nil,
69
87
  # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
70
88
  # output more random, while lower values like 0.2 will make it more focused and
@@ -76,7 +94,8 @@ module OpenAI
76
94
  # `response_format` must be set `verbose_json` to use timestamp granularities.
77
95
  # Either or both of these options are supported: `word`, or `segment`. Note: There
78
96
  # is no additional latency for segment timestamps, but generating word timestamps
79
- # incurs additional latency.
97
+ # incurs additional latency. This option is not available for
98
+ # `gpt-4o-transcribe-diarize`.
80
99
  timestamp_granularities: nil,
81
100
  # There is no need to provide `stream:`. Instead, use `#create_streaming` or
82
101
  # `#create` for streaming and non-streaming use cases, respectively.
@@ -101,6 +120,8 @@ module OpenAI
101
120
  )
102
121
  ),
103
122
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
123
+ known_speaker_names: T::Array[String],
124
+ known_speaker_references: T::Array[String],
104
125
  language: String,
105
126
  prompt: String,
106
127
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -122,20 +143,33 @@ module OpenAI
122
143
  # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
123
144
  file:,
124
145
  # ID of the model to use. The options are `gpt-4o-transcribe`,
125
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
126
- # Whisper V2 model).
146
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
147
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
127
148
  model:,
128
149
  # Controls how the audio is cut into chunks. When set to `"auto"`, the server
129
150
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
130
151
  # boundaries. `server_vad` object can be provided to tweak VAD detection
131
152
  # parameters manually. If unset, the audio is transcribed as a single block.
153
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
154
+ # seconds.
132
155
  chunking_strategy: nil,
133
156
  # Additional information to include in the transcription response. `logprobs` will
134
157
  # return the log probabilities of the tokens in the response to understand the
135
158
  # model's confidence in the transcription. `logprobs` only works with
136
159
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
137
- # `gpt-4o-mini-transcribe`.
160
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
161
+ # `gpt-4o-transcribe-diarize`.
138
162
  include: nil,
163
+ # Optional list of speaker names that correspond to the audio samples provided in
164
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
165
+ # example `customer` or `agent`). Up to 4 speakers are supported.
166
+ known_speaker_names: nil,
167
+ # Optional list of audio samples (as
168
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
169
+ # that contain known speaker references matching `known_speaker_names[]`. Each
170
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
171
+ # formats supported by `file`.
172
+ known_speaker_references: nil,
139
173
  # The language of the input audio. Supplying the input language in
140
174
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
141
175
  # format will improve accuracy and latency.
@@ -143,11 +177,14 @@ module OpenAI
143
177
  # An optional text to guide the model's style or continue a previous audio
144
178
  # segment. The
145
179
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
146
- # should match the audio language.
180
+ # should match the audio language. This field is not supported when using
181
+ # `gpt-4o-transcribe-diarize`.
147
182
  prompt: nil,
148
183
  # The format of the output, in one of these options: `json`, `text`, `srt`,
149
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
150
- # the only supported format is `json`.
184
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
185
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
186
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
187
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
151
188
  response_format: nil,
152
189
  # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
153
190
  # output more random, while lower values like 0.2 will make it more focused and
@@ -159,7 +196,8 @@ module OpenAI
159
196
  # `response_format` must be set `verbose_json` to use timestamp granularities.
160
197
  # Either or both of these options are supported: `word`, or `segment`. Note: There
161
198
  # is no additional latency for segment timestamps, but generating word timestamps
162
- # incurs additional latency.
199
+ # incurs additional latency. This option is not available for
200
+ # `gpt-4o-transcribe-diarize`.
163
201
  timestamp_granularities: nil,
164
202
  # There is no need to provide `stream:`. Instead, use `#create_streaming` or
165
203
  # `#create` for streaming and non-streaming use cases, respectively.
@@ -65,6 +65,9 @@ module OpenAI
65
65
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
66
66
  # effort can result in faster responses and fewer tokens used on reasoning in a
67
67
  # response.
68
+ #
69
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
70
+ # effort.
68
71
  reasoning_effort: nil,
69
72
  # Specifies the format that the model must output. Compatible with
70
73
  # [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -192,6 +195,9 @@ module OpenAI
192
195
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
193
196
  # effort can result in faster responses and fewer tokens used on reasoning in a
194
197
  # response.
198
+ #
199
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
200
+ # effort.
195
201
  reasoning_effort: nil,
196
202
  # Specifies the format that the model must output. Compatible with
197
203
  # [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -128,6 +128,9 @@ module OpenAI
128
128
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
129
129
  # effort can result in faster responses and fewer tokens used on reasoning in a
130
130
  # response.
131
+ #
132
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
133
+ # effort.
131
134
  reasoning_effort: nil,
132
135
  # Body param: Specifies the format that the model must output. Compatible with
133
136
  # [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -307,6 +310,9 @@ module OpenAI
307
310
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
308
311
  # effort can result in faster responses and fewer tokens used on reasoning in a
309
312
  # response.
313
+ #
314
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
315
+ # effort.
310
316
  reasoning_effort: nil,
311
317
  # Body param: Specifies the format that the model must output. Compatible with
312
318
  # [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -222,6 +222,9 @@ module OpenAI
222
222
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
223
223
  # effort can result in faster responses and fewer tokens used on reasoning in a
224
224
  # response.
225
+ #
226
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
227
+ # effort.
225
228
  reasoning_effort: nil,
226
229
  # An object specifying the format that the model must output.
227
230
  #
@@ -539,6 +542,9 @@ module OpenAI
539
542
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
540
543
  # effort can result in faster responses and fewer tokens used on reasoning in a
541
544
  # response.
545
+ #
546
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
547
+ # effort.
542
548
  reasoning_effort: nil,
543
549
  # An object specifying the format that the model must output.
544
550
  #
@@ -89,7 +89,7 @@ module OpenAI
89
89
  )
90
90
  end
91
91
 
92
- # Delete a file.
92
+ # Delete a file and remove it from all vector stores.
93
93
  sig do
94
94
  params(
95
95
  file_id: String,
@@ -17,6 +17,7 @@ module OpenAI
17
17
  OpenAI::AutoFileChunkingStrategyParam::OrHash,
18
18
  OpenAI::StaticFileChunkingStrategyObjectParam::OrHash
19
19
  ),
20
+ description: String,
20
21
  expires_after: OpenAI::VectorStoreCreateParams::ExpiresAfter::OrHash,
21
22
  file_ids: T::Array[String],
22
23
  metadata: T.nilable(T::Hash[Symbol, String]),
@@ -28,6 +29,9 @@ module OpenAI
28
29
  # The chunking strategy used to chunk the file(s). If not set, will use the `auto`
29
30
  # strategy. Only applicable if `file_ids` is non-empty.
30
31
  chunking_strategy: nil,
32
+ # A description for the vector store. Can be used to describe the vector store's
33
+ # purpose.
34
+ description: nil,
31
35
  # The expiration policy for a vector store.
32
36
  expires_after: nil,
33
37
  # A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -7,6 +7,8 @@ module OpenAI
7
7
  model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
8
8
  chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
9
9
  include: ::Array[OpenAI::Models::Audio::transcription_include],
10
+ known_speaker_names: ::Array[String],
11
+ known_speaker_references: ::Array[String],
10
12
  language: String,
11
13
  prompt: String,
12
14
  response_format: OpenAI::Models::audio_response_format,
@@ -31,6 +33,14 @@ module OpenAI
31
33
  ::Array[OpenAI::Models::Audio::transcription_include]
32
34
  ) -> ::Array[OpenAI::Models::Audio::transcription_include]
33
35
 
36
+ attr_reader known_speaker_names: ::Array[String]?
37
+
38
+ def known_speaker_names=: (::Array[String]) -> ::Array[String]
39
+
40
+ attr_reader known_speaker_references: ::Array[String]?
41
+
42
+ def known_speaker_references=: (::Array[String]) -> ::Array[String]
43
+
34
44
  attr_reader language: String?
35
45
 
36
46
  def language=: (String) -> String
@@ -60,6 +70,8 @@ module OpenAI
60
70
  model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
61
71
  ?chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
62
72
  ?include: ::Array[OpenAI::Models::Audio::transcription_include],
73
+ ?known_speaker_names: ::Array[String],
74
+ ?known_speaker_references: ::Array[String],
63
75
  ?language: String,
64
76
  ?prompt: String,
65
77
  ?response_format: OpenAI::Models::audio_response_format,
@@ -73,6 +85,8 @@ module OpenAI
73
85
  model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
74
86
  chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
75
87
  include: ::Array[OpenAI::Models::Audio::transcription_include],
88
+ known_speaker_names: ::Array[String],
89
+ known_speaker_references: ::Array[String],
76
90
  language: String,
77
91
  prompt: String,
78
92
  response_format: OpenAI::Models::audio_response_format,
@@ -2,7 +2,9 @@ module OpenAI
2
2
  module Models
3
3
  module Audio
4
4
  type transcription_create_response =
5
- OpenAI::Audio::Transcription | OpenAI::Audio::TranscriptionVerbose
5
+ OpenAI::Audio::Transcription
6
+ | OpenAI::Audio::TranscriptionDiarized
7
+ | OpenAI::Audio::TranscriptionVerbose
6
8
 
7
9
  module TranscriptionCreateResponse
8
10
  extend OpenAI::Internal::Type::Union
@@ -0,0 +1,129 @@
1
+ module OpenAI
2
+ module Models
3
+ module Audio
4
+ type transcription_diarized =
5
+ {
6
+ duration: Float,
7
+ segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
8
+ task: :transcribe,
9
+ text: String,
10
+ usage: OpenAI::Models::Audio::TranscriptionDiarized::usage
11
+ }
12
+
13
+ class TranscriptionDiarized < OpenAI::Internal::Type::BaseModel
14
+ attr_accessor duration: Float
15
+
16
+ attr_accessor segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment]
17
+
18
+ attr_accessor task: :transcribe
19
+
20
+ attr_accessor text: String
21
+
22
+ attr_reader usage: OpenAI::Models::Audio::TranscriptionDiarized::usage?
23
+
24
+ def usage=: (
25
+ OpenAI::Models::Audio::TranscriptionDiarized::usage
26
+ ) -> OpenAI::Models::Audio::TranscriptionDiarized::usage
27
+
28
+ def initialize: (
29
+ duration: Float,
30
+ segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
31
+ text: String,
32
+ ?usage: OpenAI::Models::Audio::TranscriptionDiarized::usage,
33
+ ?task: :transcribe
34
+ ) -> void
35
+
36
+ def to_hash: -> {
37
+ duration: Float,
38
+ segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
39
+ task: :transcribe,
40
+ text: String,
41
+ usage: OpenAI::Models::Audio::TranscriptionDiarized::usage
42
+ }
43
+
44
+ type usage =
45
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens
46
+ | OpenAI::Audio::TranscriptionDiarized::Usage::Duration
47
+
48
+ module Usage
49
+ extend OpenAI::Internal::Type::Union
50
+
51
+ type tokens =
52
+ {
53
+ input_tokens: Integer,
54
+ output_tokens: Integer,
55
+ total_tokens: Integer,
56
+ type: :tokens,
57
+ input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
58
+ }
59
+
60
+ class Tokens < OpenAI::Internal::Type::BaseModel
61
+ attr_accessor input_tokens: Integer
62
+
63
+ attr_accessor output_tokens: Integer
64
+
65
+ attr_accessor total_tokens: Integer
66
+
67
+ attr_accessor type: :tokens
68
+
69
+ attr_reader input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails?
70
+
71
+ def input_token_details=: (
72
+ OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
73
+ ) -> OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
74
+
75
+ def initialize: (
76
+ input_tokens: Integer,
77
+ output_tokens: Integer,
78
+ total_tokens: Integer,
79
+ ?input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails,
80
+ ?type: :tokens
81
+ ) -> void
82
+
83
+ def to_hash: -> {
84
+ input_tokens: Integer,
85
+ output_tokens: Integer,
86
+ total_tokens: Integer,
87
+ type: :tokens,
88
+ input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
89
+ }
90
+
91
+ type input_token_details =
92
+ { audio_tokens: Integer, text_tokens: Integer }
93
+
94
+ class InputTokenDetails < OpenAI::Internal::Type::BaseModel
95
+ attr_reader audio_tokens: Integer?
96
+
97
+ def audio_tokens=: (Integer) -> Integer
98
+
99
+ attr_reader text_tokens: Integer?
100
+
101
+ def text_tokens=: (Integer) -> Integer
102
+
103
+ def initialize: (
104
+ ?audio_tokens: Integer,
105
+ ?text_tokens: Integer
106
+ ) -> void
107
+
108
+ def to_hash: -> { audio_tokens: Integer, text_tokens: Integer }
109
+ end
110
+ end
111
+
112
+ type duration = { seconds: Float, type: :duration }
113
+
114
+ class Duration < OpenAI::Internal::Type::BaseModel
115
+ attr_accessor seconds: Float
116
+
117
+ attr_accessor type: :duration
118
+
119
+ def initialize: (seconds: Float, ?type: :duration) -> void
120
+
121
+ def to_hash: -> { seconds: Float, type: :duration }
122
+ end
123
+
124
+ def self?.variants: -> ::Array[OpenAI::Models::Audio::TranscriptionDiarized::usage]
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,47 @@
1
+ module OpenAI
2
+ module Models
3
+ module Audio
4
+ type transcription_diarized_segment =
5
+ {
6
+ id: String,
7
+ end_: Float,
8
+ speaker: String,
9
+ start: Float,
10
+ text: String,
11
+ type: :"transcript.text.segment"
12
+ }
13
+
14
+ class TranscriptionDiarizedSegment < OpenAI::Internal::Type::BaseModel
15
+ attr_accessor id: String
16
+
17
+ attr_accessor end_: Float
18
+
19
+ attr_accessor speaker: String
20
+
21
+ attr_accessor start: Float
22
+
23
+ attr_accessor text: String
24
+
25
+ attr_accessor type: :"transcript.text.segment"
26
+
27
+ def initialize: (
28
+ id: String,
29
+ end_: Float,
30
+ speaker: String,
31
+ start: Float,
32
+ text: String,
33
+ ?type: :"transcript.text.segment"
34
+ ) -> void
35
+
36
+ def to_hash: -> {
37
+ id: String,
38
+ end_: Float,
39
+ speaker: String,
40
+ start: Float,
41
+ text: String,
42
+ type: :"transcript.text.segment"
43
+ }
44
+ end
45
+ end
46
+ end
47
+ end
@@ -2,7 +2,8 @@ module OpenAI
2
2
  module Models
3
3
  module Audio
4
4
  type transcription_stream_event =
5
- OpenAI::Audio::TranscriptionTextDeltaEvent
5
+ OpenAI::Audio::TranscriptionTextSegmentEvent
6
+ | OpenAI::Audio::TranscriptionTextDeltaEvent
6
7
  | OpenAI::Audio::TranscriptionTextDoneEvent
7
8
 
8
9
  module TranscriptionStreamEvent
@@ -5,7 +5,8 @@ module OpenAI
5
5
  {
6
6
  delta: String,
7
7
  type: :"transcript.text.delta",
8
- logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
8
+ logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
9
+ segment_id: String
9
10
  }
10
11
 
11
12
  class TranscriptionTextDeltaEvent < OpenAI::Internal::Type::BaseModel
@@ -19,16 +20,22 @@ module OpenAI
19
20
  ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
20
21
  ) -> ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
21
22
 
23
+ attr_reader segment_id: String?
24
+
25
+ def segment_id=: (String) -> String
26
+
22
27
  def initialize: (
23
28
  delta: String,
24
29
  ?logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
30
+ ?segment_id: String,
25
31
  ?type: :"transcript.text.delta"
26
32
  ) -> void
27
33
 
28
34
  def to_hash: -> {
29
35
  delta: String,
30
36
  type: :"transcript.text.delta",
31
- logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
37
+ logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
38
+ segment_id: String
32
39
  }
33
40
 
34
41
  type logprob =
@@ -0,0 +1,47 @@
1
+ module OpenAI
2
+ module Models
3
+ module Audio
4
+ type transcription_text_segment_event =
5
+ {
6
+ id: String,
7
+ end_: Float,
8
+ speaker: String,
9
+ start: Float,
10
+ text: String,
11
+ type: :"transcript.text.segment"
12
+ }
13
+
14
+ class TranscriptionTextSegmentEvent < OpenAI::Internal::Type::BaseModel
15
+ attr_accessor id: String
16
+
17
+ attr_accessor end_: Float
18
+
19
+ attr_accessor speaker: String
20
+
21
+ attr_accessor start: Float
22
+
23
+ attr_accessor text: String
24
+
25
+ attr_accessor type: :"transcript.text.segment"
26
+
27
+ def initialize: (
28
+ id: String,
29
+ end_: Float,
30
+ speaker: String,
31
+ start: Float,
32
+ text: String,
33
+ ?type: :"transcript.text.segment"
34
+ ) -> void
35
+
36
+ def to_hash: -> {
37
+ id: String,
38
+ end_: Float,
39
+ speaker: String,
40
+ start: Float,
41
+ text: String,
42
+ type: :"transcript.text.segment"
43
+ }
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,7 +1,10 @@
1
1
  module OpenAI
2
2
  module Models
3
3
  type audio_model =
4
- :"whisper-1" | :"gpt-4o-transcribe" | :"gpt-4o-mini-transcribe"
4
+ :"whisper-1"
5
+ | :"gpt-4o-transcribe"
6
+ | :"gpt-4o-mini-transcribe"
7
+ | :"gpt-4o-transcribe-diarize"
5
8
 
6
9
  module AudioModel
7
10
  extend OpenAI::Internal::Type::Enum
@@ -9,6 +12,7 @@ module OpenAI
9
12
  WHISPER_1: :"whisper-1"
10
13
  GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe"
11
14
  GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe"
15
+ GPT_4O_TRANSCRIBE_DIARIZE: :"gpt-4o-transcribe-diarize"
12
16
 
13
17
  def self?.values: -> ::Array[OpenAI::Models::audio_model]
14
18
  end
@@ -1,6 +1,7 @@
1
1
  module OpenAI
2
2
  module Models
3
- type audio_response_format = :json | :text | :srt | :verbose_json | :vtt
3
+ type audio_response_format =
4
+ :json | :text | :srt | :verbose_json | :vtt | :diarized_json
4
5
 
5
6
  module AudioResponseFormat
6
7
  extend OpenAI::Internal::Type::Enum
@@ -10,6 +11,7 @@ module OpenAI
10
11
  SRT: :srt
11
12
  VERBOSE_JSON: :verbose_json
12
13
  VTT: :vtt
14
+ DIARIZED_JSON: :diarized_json
13
15
 
14
16
  def self?.values: -> ::Array[OpenAI::Models::audio_response_format]
15
17
  end
@@ -41,12 +41,26 @@ module OpenAI
41
41
  def self?.values: -> ::Array[OpenAI::Models::ComparisonFilter::type_]
42
42
  end
43
43
 
44
- type value = String | Float | bool
44
+ type value =
45
+ String
46
+ | Float
47
+ | bool
48
+ | ::Array[OpenAI::Models::ComparisonFilter::Value::union_member3]
45
49
 
46
50
  module Value
47
51
  extend OpenAI::Internal::Type::Union
48
52
 
53
+ type union_member3 = String | Float
54
+
55
+ module UnionMember3
56
+ extend OpenAI::Internal::Type::Union
57
+
58
+ def self?.variants: -> ::Array[OpenAI::Models::ComparisonFilter::Value::union_member3]
59
+ end
60
+
49
61
  def self?.variants: -> ::Array[OpenAI::Models::ComparisonFilter::value]
62
+
63
+ UnionMember3Array: OpenAI::Internal::Type::Converter
50
64
  end
51
65
  end
52
66
  end
@@ -85,8 +85,8 @@ module OpenAI
85
85
  end
86
86
 
87
87
  type testing_criterion =
88
- OpenAI::Models::Graders::LabelModelGrader
89
- | OpenAI::Models::Graders::StringCheckGrader
88
+ OpenAI::Graders::LabelModelGrader
89
+ | OpenAI::Graders::StringCheckGrader
90
90
  | OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderTextSimilarity
91
91
  | OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderPython
92
92
  | OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderScoreModel
@@ -85,8 +85,8 @@ module OpenAI
85
85
  end
86
86
 
87
87
  type testing_criterion =
88
- OpenAI::Models::Graders::LabelModelGrader
89
- | OpenAI::Models::Graders::StringCheckGrader
88
+ OpenAI::Graders::LabelModelGrader
89
+ | OpenAI::Graders::StringCheckGrader
90
90
  | OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderTextSimilarity
91
91
  | OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderPython
92
92
  | OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderScoreModel
@@ -85,8 +85,8 @@ module OpenAI
85
85
  end
86
86
 
87
87
  type testing_criterion =
88
- OpenAI::Models::Graders::LabelModelGrader
89
- | OpenAI::Models::Graders::StringCheckGrader
88
+ OpenAI::Graders::LabelModelGrader
89
+ | OpenAI::Graders::StringCheckGrader
90
90
  | OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderTextSimilarity
91
91
  | OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderPython
92
92
  | OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderScoreModel