openai 0.30.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +1 -1
- data/lib/openai/internal/util.rb +5 -5
- data/lib/openai/models/audio/transcription_create_params.rb +42 -11
- data/lib/openai/models/audio/transcription_create_response.rb +4 -1
- data/lib/openai/models/audio/transcription_diarized.rb +160 -0
- data/lib/openai/models/audio/transcription_diarized_segment.rb +65 -0
- data/lib/openai/models/audio/transcription_stream_event.rb +7 -4
- data/lib/openai/models/audio/transcription_text_delta_event.rb +10 -1
- data/lib/openai/models/audio/transcription_text_segment_event.rb +63 -0
- data/lib/openai/models/audio_model.rb +1 -0
- data/lib/openai/models/audio_response_format.rb +5 -2
- data/lib/openai/models/beta/assistant_create_params.rb +3 -0
- data/lib/openai/models/beta/assistant_update_params.rb +3 -0
- data/lib/openai/models/beta/threads/run_create_params.rb +3 -0
- data/lib/openai/models/chat/completion_create_params.rb +3 -0
- data/lib/openai/models/comparison_filter.rb +29 -6
- data/lib/openai/models/evals/create_eval_completions_run_data_source.rb +3 -0
- data/lib/openai/models/evals/run_cancel_response.rb +6 -0
- data/lib/openai/models/evals/run_create_params.rb +6 -0
- data/lib/openai/models/evals/run_create_response.rb +6 -0
- data/lib/openai/models/evals/run_list_response.rb +6 -0
- data/lib/openai/models/evals/run_retrieve_response.rb +6 -0
- data/lib/openai/models/graders/score_model_grader.rb +3 -0
- data/lib/openai/models/realtime/audio_transcription.rb +8 -6
- data/lib/openai/models/reasoning.rb +3 -0
- data/lib/openai/models/reasoning_effort.rb +3 -0
- data/lib/openai/models/vector_store_create_params.rb +10 -1
- data/lib/openai/models/vector_stores/vector_store_file.rb +3 -3
- data/lib/openai/resources/audio/transcriptions.rb +12 -4
- data/lib/openai/resources/files.rb +1 -1
- data/lib/openai/resources/vector_stores.rb +3 -1
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +3 -0
- data/rbi/openai/models/audio/transcription_create_params.rbi +66 -16
- data/rbi/openai/models/audio/transcription_create_response.rbi +1 -0
- data/rbi/openai/models/audio/transcription_diarized.rbi +281 -0
- data/rbi/openai/models/audio/transcription_diarized_segment.rbi +87 -0
- data/rbi/openai/models/audio/transcription_stream_event.rbi +4 -3
- data/rbi/openai/models/audio/transcription_text_delta_event.rbi +14 -1
- data/rbi/openai/models/audio/transcription_text_segment_event.rbi +86 -0
- data/rbi/openai/models/audio_model.rbi +2 -0
- data/rbi/openai/models/audio_response_format.rbi +6 -2
- data/rbi/openai/models/beta/assistant_create_params.rbi +6 -0
- data/rbi/openai/models/beta/assistant_update_params.rbi +6 -0
- data/rbi/openai/models/beta/threads/run_create_params.rbi +6 -0
- data/rbi/openai/models/chat/completion_create_params.rbi +6 -0
- data/rbi/openai/models/comparison_filter.rbi +43 -4
- data/rbi/openai/models/eval_create_response.rbi +4 -4
- data/rbi/openai/models/eval_list_response.rbi +4 -4
- data/rbi/openai/models/eval_retrieve_response.rbi +4 -4
- data/rbi/openai/models/eval_update_response.rbi +4 -4
- data/rbi/openai/models/evals/create_eval_completions_run_data_source.rbi +6 -0
- data/rbi/openai/models/evals/run_cancel_response.rbi +12 -0
- data/rbi/openai/models/evals/run_create_params.rbi +12 -0
- data/rbi/openai/models/evals/run_create_response.rbi +12 -0
- data/rbi/openai/models/evals/run_list_response.rbi +12 -0
- data/rbi/openai/models/evals/run_retrieve_response.rbi +12 -0
- data/rbi/openai/models/graders/score_model_grader.rbi +6 -0
- data/rbi/openai/models/realtime/audio_transcription.rbi +15 -12
- data/rbi/openai/models/reasoning.rbi +6 -0
- data/rbi/openai/models/reasoning_effort.rbi +3 -0
- data/rbi/openai/models/vector_store_create_params.rbi +13 -0
- data/rbi/openai/models/vector_stores/vector_store_file.rbi +3 -3
- data/rbi/openai/resources/audio/transcriptions.rbi +52 -14
- data/rbi/openai/resources/beta/assistants.rbi +6 -0
- data/rbi/openai/resources/beta/threads/runs.rbi +6 -0
- data/rbi/openai/resources/chat/completions.rbi +6 -0
- data/rbi/openai/resources/files.rbi +1 -1
- data/rbi/openai/resources/vector_stores.rbi +4 -0
- data/sig/openai/models/audio/transcription_create_params.rbs +14 -0
- data/sig/openai/models/audio/transcription_create_response.rbs +3 -1
- data/sig/openai/models/audio/transcription_diarized.rbs +129 -0
- data/sig/openai/models/audio/transcription_diarized_segment.rbs +47 -0
- data/sig/openai/models/audio/transcription_stream_event.rbs +2 -1
- data/sig/openai/models/audio/transcription_text_delta_event.rbs +9 -2
- data/sig/openai/models/audio/transcription_text_segment_event.rbs +47 -0
- data/sig/openai/models/audio_model.rbs +5 -1
- data/sig/openai/models/audio_response_format.rbs +3 -1
- data/sig/openai/models/comparison_filter.rbs +15 -1
- data/sig/openai/models/eval_create_response.rbs +2 -2
- data/sig/openai/models/eval_list_response.rbs +2 -2
- data/sig/openai/models/eval_retrieve_response.rbs +2 -2
- data/sig/openai/models/eval_update_response.rbs +2 -2
- data/sig/openai/models/realtime/audio_transcription.rbs +2 -2
- data/sig/openai/models/vector_store_create_params.rbs +7 -0
- data/sig/openai/resources/audio/transcriptions.rbs +4 -0
- data/sig/openai/resources/vector_stores.rbs +1 -0
- metadata +11 -2
@@ -20,6 +20,8 @@ module OpenAI
|
|
20
20
|
)
|
21
21
|
),
|
22
22
|
include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
|
23
|
+
known_speaker_names: T::Array[String],
|
24
|
+
known_speaker_references: T::Array[String],
|
23
25
|
language: String,
|
24
26
|
prompt: String,
|
25
27
|
response_format: OpenAI::AudioResponseFormat::OrSymbol,
|
@@ -39,20 +41,33 @@ module OpenAI
|
|
39
41
|
# flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
40
42
|
file:,
|
41
43
|
# ID of the model to use. The options are `gpt-4o-transcribe`,
|
42
|
-
# `gpt-4o-mini-transcribe`,
|
43
|
-
# Whisper V2 model)
|
44
|
+
# `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
|
45
|
+
# Whisper V2 model), and `gpt-4o-transcribe-diarize`.
|
44
46
|
model:,
|
45
47
|
# Controls how the audio is cut into chunks. When set to `"auto"`, the server
|
46
48
|
# first normalizes loudness and then uses voice activity detection (VAD) to choose
|
47
49
|
# boundaries. `server_vad` object can be provided to tweak VAD detection
|
48
50
|
# parameters manually. If unset, the audio is transcribed as a single block.
|
51
|
+
# Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
|
52
|
+
# seconds.
|
49
53
|
chunking_strategy: nil,
|
50
54
|
# Additional information to include in the transcription response. `logprobs` will
|
51
55
|
# return the log probabilities of the tokens in the response to understand the
|
52
56
|
# model's confidence in the transcription. `logprobs` only works with
|
53
57
|
# response_format set to `json` and only with the models `gpt-4o-transcribe` and
|
54
|
-
# `gpt-4o-mini-transcribe`.
|
58
|
+
# `gpt-4o-mini-transcribe`. This field is not supported when using
|
59
|
+
# `gpt-4o-transcribe-diarize`.
|
55
60
|
include: nil,
|
61
|
+
# Optional list of speaker names that correspond to the audio samples provided in
|
62
|
+
# `known_speaker_references[]`. Each entry should be a short identifier (for
|
63
|
+
# example `customer` or `agent`). Up to 4 speakers are supported.
|
64
|
+
known_speaker_names: nil,
|
65
|
+
# Optional list of audio samples (as
|
66
|
+
# [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
|
67
|
+
# that contain known speaker references matching `known_speaker_names[]`. Each
|
68
|
+
# sample must be between 2 and 10 seconds, and can use any of the same input audio
|
69
|
+
# formats supported by `file`.
|
70
|
+
known_speaker_references: nil,
|
56
71
|
# The language of the input audio. Supplying the input language in
|
57
72
|
# [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
|
58
73
|
# format will improve accuracy and latency.
|
@@ -60,11 +75,14 @@ module OpenAI
|
|
60
75
|
# An optional text to guide the model's style or continue a previous audio
|
61
76
|
# segment. The
|
62
77
|
# [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
|
63
|
-
# should match the audio language.
|
78
|
+
# should match the audio language. This field is not supported when using
|
79
|
+
# `gpt-4o-transcribe-diarize`.
|
64
80
|
prompt: nil,
|
65
81
|
# The format of the output, in one of these options: `json`, `text`, `srt`,
|
66
|
-
# `verbose_json`, or `
|
67
|
-
# the only supported format is `json`.
|
82
|
+
# `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
|
83
|
+
# `gpt-4o-mini-transcribe`, the only supported format is `json`. For
|
84
|
+
# `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
|
85
|
+
# `diarized_json`, with `diarized_json` required to receive speaker annotations.
|
68
86
|
response_format: nil,
|
69
87
|
# The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
|
70
88
|
# output more random, while lower values like 0.2 will make it more focused and
|
@@ -76,7 +94,8 @@ module OpenAI
|
|
76
94
|
# `response_format` must be set `verbose_json` to use timestamp granularities.
|
77
95
|
# Either or both of these options are supported: `word`, or `segment`. Note: There
|
78
96
|
# is no additional latency for segment timestamps, but generating word timestamps
|
79
|
-
# incurs additional latency.
|
97
|
+
# incurs additional latency. This option is not available for
|
98
|
+
# `gpt-4o-transcribe-diarize`.
|
80
99
|
timestamp_granularities: nil,
|
81
100
|
# There is no need to provide `stream:`. Instead, use `#create_streaming` or
|
82
101
|
# `#create` for streaming and non-streaming use cases, respectively.
|
@@ -101,6 +120,8 @@ module OpenAI
|
|
101
120
|
)
|
102
121
|
),
|
103
122
|
include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
|
123
|
+
known_speaker_names: T::Array[String],
|
124
|
+
known_speaker_references: T::Array[String],
|
104
125
|
language: String,
|
105
126
|
prompt: String,
|
106
127
|
response_format: OpenAI::AudioResponseFormat::OrSymbol,
|
@@ -122,20 +143,33 @@ module OpenAI
|
|
122
143
|
# flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
|
123
144
|
file:,
|
124
145
|
# ID of the model to use. The options are `gpt-4o-transcribe`,
|
125
|
-
# `gpt-4o-mini-transcribe`,
|
126
|
-
# Whisper V2 model)
|
146
|
+
# `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
|
147
|
+
# Whisper V2 model), and `gpt-4o-transcribe-diarize`.
|
127
148
|
model:,
|
128
149
|
# Controls how the audio is cut into chunks. When set to `"auto"`, the server
|
129
150
|
# first normalizes loudness and then uses voice activity detection (VAD) to choose
|
130
151
|
# boundaries. `server_vad` object can be provided to tweak VAD detection
|
131
152
|
# parameters manually. If unset, the audio is transcribed as a single block.
|
153
|
+
# Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
|
154
|
+
# seconds.
|
132
155
|
chunking_strategy: nil,
|
133
156
|
# Additional information to include in the transcription response. `logprobs` will
|
134
157
|
# return the log probabilities of the tokens in the response to understand the
|
135
158
|
# model's confidence in the transcription. `logprobs` only works with
|
136
159
|
# response_format set to `json` and only with the models `gpt-4o-transcribe` and
|
137
|
-
# `gpt-4o-mini-transcribe`.
|
160
|
+
# `gpt-4o-mini-transcribe`. This field is not supported when using
|
161
|
+
# `gpt-4o-transcribe-diarize`.
|
138
162
|
include: nil,
|
163
|
+
# Optional list of speaker names that correspond to the audio samples provided in
|
164
|
+
# `known_speaker_references[]`. Each entry should be a short identifier (for
|
165
|
+
# example `customer` or `agent`). Up to 4 speakers are supported.
|
166
|
+
known_speaker_names: nil,
|
167
|
+
# Optional list of audio samples (as
|
168
|
+
# [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
|
169
|
+
# that contain known speaker references matching `known_speaker_names[]`. Each
|
170
|
+
# sample must be between 2 and 10 seconds, and can use any of the same input audio
|
171
|
+
# formats supported by `file`.
|
172
|
+
known_speaker_references: nil,
|
139
173
|
# The language of the input audio. Supplying the input language in
|
140
174
|
# [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
|
141
175
|
# format will improve accuracy and latency.
|
@@ -143,11 +177,14 @@ module OpenAI
|
|
143
177
|
# An optional text to guide the model's style or continue a previous audio
|
144
178
|
# segment. The
|
145
179
|
# [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
|
146
|
-
# should match the audio language.
|
180
|
+
# should match the audio language. This field is not supported when using
|
181
|
+
# `gpt-4o-transcribe-diarize`.
|
147
182
|
prompt: nil,
|
148
183
|
# The format of the output, in one of these options: `json`, `text`, `srt`,
|
149
|
-
# `verbose_json`, or `
|
150
|
-
# the only supported format is `json`.
|
184
|
+
# `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
|
185
|
+
# `gpt-4o-mini-transcribe`, the only supported format is `json`. For
|
186
|
+
# `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
|
187
|
+
# `diarized_json`, with `diarized_json` required to receive speaker annotations.
|
151
188
|
response_format: nil,
|
152
189
|
# The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
|
153
190
|
# output more random, while lower values like 0.2 will make it more focused and
|
@@ -159,7 +196,8 @@ module OpenAI
|
|
159
196
|
# `response_format` must be set `verbose_json` to use timestamp granularities.
|
160
197
|
# Either or both of these options are supported: `word`, or `segment`. Note: There
|
161
198
|
# is no additional latency for segment timestamps, but generating word timestamps
|
162
|
-
# incurs additional latency.
|
199
|
+
# incurs additional latency. This option is not available for
|
200
|
+
# `gpt-4o-transcribe-diarize`.
|
163
201
|
timestamp_granularities: nil,
|
164
202
|
# There is no need to provide `stream:`. Instead, use `#create_streaming` or
|
165
203
|
# `#create` for streaming and non-streaming use cases, respectively.
|
@@ -65,6 +65,9 @@ module OpenAI
|
|
65
65
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
66
66
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
67
67
|
# response.
|
68
|
+
#
|
69
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
70
|
+
# effort.
|
68
71
|
reasoning_effort: nil,
|
69
72
|
# Specifies the format that the model must output. Compatible with
|
70
73
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -192,6 +195,9 @@ module OpenAI
|
|
192
195
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
193
196
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
194
197
|
# response.
|
198
|
+
#
|
199
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
200
|
+
# effort.
|
195
201
|
reasoning_effort: nil,
|
196
202
|
# Specifies the format that the model must output. Compatible with
|
197
203
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -128,6 +128,9 @@ module OpenAI
|
|
128
128
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
129
129
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
130
130
|
# response.
|
131
|
+
#
|
132
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
133
|
+
# effort.
|
131
134
|
reasoning_effort: nil,
|
132
135
|
# Body param: Specifies the format that the model must output. Compatible with
|
133
136
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -307,6 +310,9 @@ module OpenAI
|
|
307
310
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
308
311
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
309
312
|
# response.
|
313
|
+
#
|
314
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
315
|
+
# effort.
|
310
316
|
reasoning_effort: nil,
|
311
317
|
# Body param: Specifies the format that the model must output. Compatible with
|
312
318
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -222,6 +222,9 @@ module OpenAI
|
|
222
222
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
223
223
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
224
224
|
# response.
|
225
|
+
#
|
226
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
227
|
+
# effort.
|
225
228
|
reasoning_effort: nil,
|
226
229
|
# An object specifying the format that the model must output.
|
227
230
|
#
|
@@ -539,6 +542,9 @@ module OpenAI
|
|
539
542
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
540
543
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
541
544
|
# response.
|
545
|
+
#
|
546
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
547
|
+
# effort.
|
542
548
|
reasoning_effort: nil,
|
543
549
|
# An object specifying the format that the model must output.
|
544
550
|
#
|
@@ -17,6 +17,7 @@ module OpenAI
|
|
17
17
|
OpenAI::AutoFileChunkingStrategyParam::OrHash,
|
18
18
|
OpenAI::StaticFileChunkingStrategyObjectParam::OrHash
|
19
19
|
),
|
20
|
+
description: String,
|
20
21
|
expires_after: OpenAI::VectorStoreCreateParams::ExpiresAfter::OrHash,
|
21
22
|
file_ids: T::Array[String],
|
22
23
|
metadata: T.nilable(T::Hash[Symbol, String]),
|
@@ -28,6 +29,9 @@ module OpenAI
|
|
28
29
|
# The chunking strategy used to chunk the file(s). If not set, will use the `auto`
|
29
30
|
# strategy. Only applicable if `file_ids` is non-empty.
|
30
31
|
chunking_strategy: nil,
|
32
|
+
# A description for the vector store. Can be used to describe the vector store's
|
33
|
+
# purpose.
|
34
|
+
description: nil,
|
31
35
|
# The expiration policy for a vector store.
|
32
36
|
expires_after: nil,
|
33
37
|
# A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
|
@@ -7,6 +7,8 @@ module OpenAI
|
|
7
7
|
model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
|
8
8
|
chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
|
9
9
|
include: ::Array[OpenAI::Models::Audio::transcription_include],
|
10
|
+
known_speaker_names: ::Array[String],
|
11
|
+
known_speaker_references: ::Array[String],
|
10
12
|
language: String,
|
11
13
|
prompt: String,
|
12
14
|
response_format: OpenAI::Models::audio_response_format,
|
@@ -31,6 +33,14 @@ module OpenAI
|
|
31
33
|
::Array[OpenAI::Models::Audio::transcription_include]
|
32
34
|
) -> ::Array[OpenAI::Models::Audio::transcription_include]
|
33
35
|
|
36
|
+
attr_reader known_speaker_names: ::Array[String]?
|
37
|
+
|
38
|
+
def known_speaker_names=: (::Array[String]) -> ::Array[String]
|
39
|
+
|
40
|
+
attr_reader known_speaker_references: ::Array[String]?
|
41
|
+
|
42
|
+
def known_speaker_references=: (::Array[String]) -> ::Array[String]
|
43
|
+
|
34
44
|
attr_reader language: String?
|
35
45
|
|
36
46
|
def language=: (String) -> String
|
@@ -60,6 +70,8 @@ module OpenAI
|
|
60
70
|
model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
|
61
71
|
?chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
|
62
72
|
?include: ::Array[OpenAI::Models::Audio::transcription_include],
|
73
|
+
?known_speaker_names: ::Array[String],
|
74
|
+
?known_speaker_references: ::Array[String],
|
63
75
|
?language: String,
|
64
76
|
?prompt: String,
|
65
77
|
?response_format: OpenAI::Models::audio_response_format,
|
@@ -73,6 +85,8 @@ module OpenAI
|
|
73
85
|
model: OpenAI::Models::Audio::TranscriptionCreateParams::model,
|
74
86
|
chunking_strategy: OpenAI::Models::Audio::TranscriptionCreateParams::chunking_strategy?,
|
75
87
|
include: ::Array[OpenAI::Models::Audio::transcription_include],
|
88
|
+
known_speaker_names: ::Array[String],
|
89
|
+
known_speaker_references: ::Array[String],
|
76
90
|
language: String,
|
77
91
|
prompt: String,
|
78
92
|
response_format: OpenAI::Models::audio_response_format,
|
@@ -2,7 +2,9 @@ module OpenAI
|
|
2
2
|
module Models
|
3
3
|
module Audio
|
4
4
|
type transcription_create_response =
|
5
|
-
OpenAI::Audio::Transcription
|
5
|
+
OpenAI::Audio::Transcription
|
6
|
+
| OpenAI::Audio::TranscriptionDiarized
|
7
|
+
| OpenAI::Audio::TranscriptionVerbose
|
6
8
|
|
7
9
|
module TranscriptionCreateResponse
|
8
10
|
extend OpenAI::Internal::Type::Union
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module OpenAI
|
2
|
+
module Models
|
3
|
+
module Audio
|
4
|
+
type transcription_diarized =
|
5
|
+
{
|
6
|
+
duration: Float,
|
7
|
+
segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
|
8
|
+
task: :transcribe,
|
9
|
+
text: String,
|
10
|
+
usage: OpenAI::Models::Audio::TranscriptionDiarized::usage
|
11
|
+
}
|
12
|
+
|
13
|
+
class TranscriptionDiarized < OpenAI::Internal::Type::BaseModel
|
14
|
+
attr_accessor duration: Float
|
15
|
+
|
16
|
+
attr_accessor segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment]
|
17
|
+
|
18
|
+
attr_accessor task: :transcribe
|
19
|
+
|
20
|
+
attr_accessor text: String
|
21
|
+
|
22
|
+
attr_reader usage: OpenAI::Models::Audio::TranscriptionDiarized::usage?
|
23
|
+
|
24
|
+
def usage=: (
|
25
|
+
OpenAI::Models::Audio::TranscriptionDiarized::usage
|
26
|
+
) -> OpenAI::Models::Audio::TranscriptionDiarized::usage
|
27
|
+
|
28
|
+
def initialize: (
|
29
|
+
duration: Float,
|
30
|
+
segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
|
31
|
+
text: String,
|
32
|
+
?usage: OpenAI::Models::Audio::TranscriptionDiarized::usage,
|
33
|
+
?task: :transcribe
|
34
|
+
) -> void
|
35
|
+
|
36
|
+
def to_hash: -> {
|
37
|
+
duration: Float,
|
38
|
+
segments: ::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
|
39
|
+
task: :transcribe,
|
40
|
+
text: String,
|
41
|
+
usage: OpenAI::Models::Audio::TranscriptionDiarized::usage
|
42
|
+
}
|
43
|
+
|
44
|
+
type usage =
|
45
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens
|
46
|
+
| OpenAI::Audio::TranscriptionDiarized::Usage::Duration
|
47
|
+
|
48
|
+
module Usage
|
49
|
+
extend OpenAI::Internal::Type::Union
|
50
|
+
|
51
|
+
type tokens =
|
52
|
+
{
|
53
|
+
input_tokens: Integer,
|
54
|
+
output_tokens: Integer,
|
55
|
+
total_tokens: Integer,
|
56
|
+
type: :tokens,
|
57
|
+
input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
58
|
+
}
|
59
|
+
|
60
|
+
class Tokens < OpenAI::Internal::Type::BaseModel
|
61
|
+
attr_accessor input_tokens: Integer
|
62
|
+
|
63
|
+
attr_accessor output_tokens: Integer
|
64
|
+
|
65
|
+
attr_accessor total_tokens: Integer
|
66
|
+
|
67
|
+
attr_accessor type: :tokens
|
68
|
+
|
69
|
+
attr_reader input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails?
|
70
|
+
|
71
|
+
def input_token_details=: (
|
72
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
73
|
+
) -> OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
74
|
+
|
75
|
+
def initialize: (
|
76
|
+
input_tokens: Integer,
|
77
|
+
output_tokens: Integer,
|
78
|
+
total_tokens: Integer,
|
79
|
+
?input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails,
|
80
|
+
?type: :tokens
|
81
|
+
) -> void
|
82
|
+
|
83
|
+
def to_hash: -> {
|
84
|
+
input_tokens: Integer,
|
85
|
+
output_tokens: Integer,
|
86
|
+
total_tokens: Integer,
|
87
|
+
type: :tokens,
|
88
|
+
input_token_details: OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
89
|
+
}
|
90
|
+
|
91
|
+
type input_token_details =
|
92
|
+
{ audio_tokens: Integer, text_tokens: Integer }
|
93
|
+
|
94
|
+
class InputTokenDetails < OpenAI::Internal::Type::BaseModel
|
95
|
+
attr_reader audio_tokens: Integer?
|
96
|
+
|
97
|
+
def audio_tokens=: (Integer) -> Integer
|
98
|
+
|
99
|
+
attr_reader text_tokens: Integer?
|
100
|
+
|
101
|
+
def text_tokens=: (Integer) -> Integer
|
102
|
+
|
103
|
+
def initialize: (
|
104
|
+
?audio_tokens: Integer,
|
105
|
+
?text_tokens: Integer
|
106
|
+
) -> void
|
107
|
+
|
108
|
+
def to_hash: -> { audio_tokens: Integer, text_tokens: Integer }
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
type duration = { seconds: Float, type: :duration }
|
113
|
+
|
114
|
+
class Duration < OpenAI::Internal::Type::BaseModel
|
115
|
+
attr_accessor seconds: Float
|
116
|
+
|
117
|
+
attr_accessor type: :duration
|
118
|
+
|
119
|
+
def initialize: (seconds: Float, ?type: :duration) -> void
|
120
|
+
|
121
|
+
def to_hash: -> { seconds: Float, type: :duration }
|
122
|
+
end
|
123
|
+
|
124
|
+
def self?.variants: -> ::Array[OpenAI::Models::Audio::TranscriptionDiarized::usage]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module OpenAI
|
2
|
+
module Models
|
3
|
+
module Audio
|
4
|
+
type transcription_diarized_segment =
|
5
|
+
{
|
6
|
+
id: String,
|
7
|
+
end_: Float,
|
8
|
+
speaker: String,
|
9
|
+
start: Float,
|
10
|
+
text: String,
|
11
|
+
type: :"transcript.text.segment"
|
12
|
+
}
|
13
|
+
|
14
|
+
class TranscriptionDiarizedSegment < OpenAI::Internal::Type::BaseModel
|
15
|
+
attr_accessor id: String
|
16
|
+
|
17
|
+
attr_accessor end_: Float
|
18
|
+
|
19
|
+
attr_accessor speaker: String
|
20
|
+
|
21
|
+
attr_accessor start: Float
|
22
|
+
|
23
|
+
attr_accessor text: String
|
24
|
+
|
25
|
+
attr_accessor type: :"transcript.text.segment"
|
26
|
+
|
27
|
+
def initialize: (
|
28
|
+
id: String,
|
29
|
+
end_: Float,
|
30
|
+
speaker: String,
|
31
|
+
start: Float,
|
32
|
+
text: String,
|
33
|
+
?type: :"transcript.text.segment"
|
34
|
+
) -> void
|
35
|
+
|
36
|
+
def to_hash: -> {
|
37
|
+
id: String,
|
38
|
+
end_: Float,
|
39
|
+
speaker: String,
|
40
|
+
start: Float,
|
41
|
+
text: String,
|
42
|
+
type: :"transcript.text.segment"
|
43
|
+
}
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -2,7 +2,8 @@ module OpenAI
|
|
2
2
|
module Models
|
3
3
|
module Audio
|
4
4
|
type transcription_stream_event =
|
5
|
-
OpenAI::Audio::
|
5
|
+
OpenAI::Audio::TranscriptionTextSegmentEvent
|
6
|
+
| OpenAI::Audio::TranscriptionTextDeltaEvent
|
6
7
|
| OpenAI::Audio::TranscriptionTextDoneEvent
|
7
8
|
|
8
9
|
module TranscriptionStreamEvent
|
@@ -5,7 +5,8 @@ module OpenAI
|
|
5
5
|
{
|
6
6
|
delta: String,
|
7
7
|
type: :"transcript.text.delta",
|
8
|
-
logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
|
8
|
+
logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
|
9
|
+
segment_id: String
|
9
10
|
}
|
10
11
|
|
11
12
|
class TranscriptionTextDeltaEvent < OpenAI::Internal::Type::BaseModel
|
@@ -19,16 +20,22 @@ module OpenAI
|
|
19
20
|
::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
|
20
21
|
) -> ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
|
21
22
|
|
23
|
+
attr_reader segment_id: String?
|
24
|
+
|
25
|
+
def segment_id=: (String) -> String
|
26
|
+
|
22
27
|
def initialize: (
|
23
28
|
delta: String,
|
24
29
|
?logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
|
30
|
+
?segment_id: String,
|
25
31
|
?type: :"transcript.text.delta"
|
26
32
|
) -> void
|
27
33
|
|
28
34
|
def to_hash: -> {
|
29
35
|
delta: String,
|
30
36
|
type: :"transcript.text.delta",
|
31
|
-
logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
|
37
|
+
logprobs: ::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
|
38
|
+
segment_id: String
|
32
39
|
}
|
33
40
|
|
34
41
|
type logprob =
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module OpenAI
|
2
|
+
module Models
|
3
|
+
module Audio
|
4
|
+
type transcription_text_segment_event =
|
5
|
+
{
|
6
|
+
id: String,
|
7
|
+
end_: Float,
|
8
|
+
speaker: String,
|
9
|
+
start: Float,
|
10
|
+
text: String,
|
11
|
+
type: :"transcript.text.segment"
|
12
|
+
}
|
13
|
+
|
14
|
+
class TranscriptionTextSegmentEvent < OpenAI::Internal::Type::BaseModel
|
15
|
+
attr_accessor id: String
|
16
|
+
|
17
|
+
attr_accessor end_: Float
|
18
|
+
|
19
|
+
attr_accessor speaker: String
|
20
|
+
|
21
|
+
attr_accessor start: Float
|
22
|
+
|
23
|
+
attr_accessor text: String
|
24
|
+
|
25
|
+
attr_accessor type: :"transcript.text.segment"
|
26
|
+
|
27
|
+
def initialize: (
|
28
|
+
id: String,
|
29
|
+
end_: Float,
|
30
|
+
speaker: String,
|
31
|
+
start: Float,
|
32
|
+
text: String,
|
33
|
+
?type: :"transcript.text.segment"
|
34
|
+
) -> void
|
35
|
+
|
36
|
+
def to_hash: -> {
|
37
|
+
id: String,
|
38
|
+
end_: Float,
|
39
|
+
speaker: String,
|
40
|
+
start: Float,
|
41
|
+
text: String,
|
42
|
+
type: :"transcript.text.segment"
|
43
|
+
}
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,7 +1,10 @@
|
|
1
1
|
module OpenAI
|
2
2
|
module Models
|
3
3
|
type audio_model =
|
4
|
-
:"whisper-1"
|
4
|
+
:"whisper-1"
|
5
|
+
| :"gpt-4o-transcribe"
|
6
|
+
| :"gpt-4o-mini-transcribe"
|
7
|
+
| :"gpt-4o-transcribe-diarize"
|
5
8
|
|
6
9
|
module AudioModel
|
7
10
|
extend OpenAI::Internal::Type::Enum
|
@@ -9,6 +12,7 @@ module OpenAI
|
|
9
12
|
WHISPER_1: :"whisper-1"
|
10
13
|
GPT_4O_TRANSCRIBE: :"gpt-4o-transcribe"
|
11
14
|
GPT_4O_MINI_TRANSCRIBE: :"gpt-4o-mini-transcribe"
|
15
|
+
GPT_4O_TRANSCRIBE_DIARIZE: :"gpt-4o-transcribe-diarize"
|
12
16
|
|
13
17
|
def self?.values: -> ::Array[OpenAI::Models::audio_model]
|
14
18
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module OpenAI
|
2
2
|
module Models
|
3
|
-
type audio_response_format =
|
3
|
+
type audio_response_format =
|
4
|
+
:json | :text | :srt | :verbose_json | :vtt | :diarized_json
|
4
5
|
|
5
6
|
module AudioResponseFormat
|
6
7
|
extend OpenAI::Internal::Type::Enum
|
@@ -10,6 +11,7 @@ module OpenAI
|
|
10
11
|
SRT: :srt
|
11
12
|
VERBOSE_JSON: :verbose_json
|
12
13
|
VTT: :vtt
|
14
|
+
DIARIZED_JSON: :diarized_json
|
13
15
|
|
14
16
|
def self?.values: -> ::Array[OpenAI::Models::audio_response_format]
|
15
17
|
end
|
@@ -41,12 +41,26 @@ module OpenAI
|
|
41
41
|
def self?.values: -> ::Array[OpenAI::Models::ComparisonFilter::type_]
|
42
42
|
end
|
43
43
|
|
44
|
-
type value =
|
44
|
+
type value =
|
45
|
+
String
|
46
|
+
| Float
|
47
|
+
| bool
|
48
|
+
| ::Array[OpenAI::Models::ComparisonFilter::Value::union_member3]
|
45
49
|
|
46
50
|
module Value
|
47
51
|
extend OpenAI::Internal::Type::Union
|
48
52
|
|
53
|
+
type union_member3 = String | Float
|
54
|
+
|
55
|
+
module UnionMember3
|
56
|
+
extend OpenAI::Internal::Type::Union
|
57
|
+
|
58
|
+
def self?.variants: -> ::Array[OpenAI::Models::ComparisonFilter::Value::union_member3]
|
59
|
+
end
|
60
|
+
|
49
61
|
def self?.variants: -> ::Array[OpenAI::Models::ComparisonFilter::value]
|
62
|
+
|
63
|
+
UnionMember3Array: OpenAI::Internal::Type::Converter
|
50
64
|
end
|
51
65
|
end
|
52
66
|
end
|
@@ -85,8 +85,8 @@ module OpenAI
|
|
85
85
|
end
|
86
86
|
|
87
87
|
type testing_criterion =
|
88
|
-
OpenAI::
|
89
|
-
| OpenAI::
|
88
|
+
OpenAI::Graders::LabelModelGrader
|
89
|
+
| OpenAI::Graders::StringCheckGrader
|
90
90
|
| OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderTextSimilarity
|
91
91
|
| OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderPython
|
92
92
|
| OpenAI::Models::EvalCreateResponse::TestingCriterion::EvalGraderScoreModel
|
@@ -85,8 +85,8 @@ module OpenAI
|
|
85
85
|
end
|
86
86
|
|
87
87
|
type testing_criterion =
|
88
|
-
OpenAI::
|
89
|
-
| OpenAI::
|
88
|
+
OpenAI::Graders::LabelModelGrader
|
89
|
+
| OpenAI::Graders::StringCheckGrader
|
90
90
|
| OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderTextSimilarity
|
91
91
|
| OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderPython
|
92
92
|
| OpenAI::Models::EvalListResponse::TestingCriterion::EvalGraderScoreModel
|
@@ -85,8 +85,8 @@ module OpenAI
|
|
85
85
|
end
|
86
86
|
|
87
87
|
type testing_criterion =
|
88
|
-
OpenAI::
|
89
|
-
| OpenAI::
|
88
|
+
OpenAI::Graders::LabelModelGrader
|
89
|
+
| OpenAI::Graders::StringCheckGrader
|
90
90
|
| OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderTextSimilarity
|
91
91
|
| OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderPython
|
92
92
|
| OpenAI::Models::EvalRetrieveResponse::TestingCriterion::EvalGraderScoreModel
|