openai 0.30.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +1 -1
- data/lib/openai/internal/util.rb +5 -5
- data/lib/openai/models/audio/transcription_create_params.rb +42 -11
- data/lib/openai/models/audio/transcription_create_response.rb +4 -1
- data/lib/openai/models/audio/transcription_diarized.rb +160 -0
- data/lib/openai/models/audio/transcription_diarized_segment.rb +65 -0
- data/lib/openai/models/audio/transcription_stream_event.rb +7 -4
- data/lib/openai/models/audio/transcription_text_delta_event.rb +10 -1
- data/lib/openai/models/audio/transcription_text_segment_event.rb +63 -0
- data/lib/openai/models/audio_model.rb +1 -0
- data/lib/openai/models/audio_response_format.rb +5 -2
- data/lib/openai/models/beta/assistant_create_params.rb +3 -0
- data/lib/openai/models/beta/assistant_update_params.rb +3 -0
- data/lib/openai/models/beta/threads/run_create_params.rb +3 -0
- data/lib/openai/models/chat/completion_create_params.rb +3 -0
- data/lib/openai/models/comparison_filter.rb +29 -6
- data/lib/openai/models/evals/create_eval_completions_run_data_source.rb +3 -0
- data/lib/openai/models/evals/run_cancel_response.rb +6 -0
- data/lib/openai/models/evals/run_create_params.rb +6 -0
- data/lib/openai/models/evals/run_create_response.rb +6 -0
- data/lib/openai/models/evals/run_list_response.rb +6 -0
- data/lib/openai/models/evals/run_retrieve_response.rb +6 -0
- data/lib/openai/models/graders/score_model_grader.rb +3 -0
- data/lib/openai/models/realtime/audio_transcription.rb +8 -6
- data/lib/openai/models/reasoning.rb +3 -0
- data/lib/openai/models/reasoning_effort.rb +3 -0
- data/lib/openai/models/vector_store_create_params.rb +10 -1
- data/lib/openai/models/vector_stores/vector_store_file.rb +3 -3
- data/lib/openai/resources/audio/transcriptions.rb +12 -4
- data/lib/openai/resources/files.rb +1 -1
- data/lib/openai/resources/vector_stores.rb +3 -1
- data/lib/openai/version.rb +1 -1
- data/lib/openai.rb +3 -0
- data/rbi/openai/models/audio/transcription_create_params.rbi +66 -16
- data/rbi/openai/models/audio/transcription_create_response.rbi +1 -0
- data/rbi/openai/models/audio/transcription_diarized.rbi +281 -0
- data/rbi/openai/models/audio/transcription_diarized_segment.rbi +87 -0
- data/rbi/openai/models/audio/transcription_stream_event.rbi +4 -3
- data/rbi/openai/models/audio/transcription_text_delta_event.rbi +14 -1
- data/rbi/openai/models/audio/transcription_text_segment_event.rbi +86 -0
- data/rbi/openai/models/audio_model.rbi +2 -0
- data/rbi/openai/models/audio_response_format.rbi +6 -2
- data/rbi/openai/models/beta/assistant_create_params.rbi +6 -0
- data/rbi/openai/models/beta/assistant_update_params.rbi +6 -0
- data/rbi/openai/models/beta/threads/run_create_params.rbi +6 -0
- data/rbi/openai/models/chat/completion_create_params.rbi +6 -0
- data/rbi/openai/models/comparison_filter.rbi +43 -4
- data/rbi/openai/models/eval_create_response.rbi +4 -4
- data/rbi/openai/models/eval_list_response.rbi +4 -4
- data/rbi/openai/models/eval_retrieve_response.rbi +4 -4
- data/rbi/openai/models/eval_update_response.rbi +4 -4
- data/rbi/openai/models/evals/create_eval_completions_run_data_source.rbi +6 -0
- data/rbi/openai/models/evals/run_cancel_response.rbi +12 -0
- data/rbi/openai/models/evals/run_create_params.rbi +12 -0
- data/rbi/openai/models/evals/run_create_response.rbi +12 -0
- data/rbi/openai/models/evals/run_list_response.rbi +12 -0
- data/rbi/openai/models/evals/run_retrieve_response.rbi +12 -0
- data/rbi/openai/models/graders/score_model_grader.rbi +6 -0
- data/rbi/openai/models/realtime/audio_transcription.rbi +15 -12
- data/rbi/openai/models/reasoning.rbi +6 -0
- data/rbi/openai/models/reasoning_effort.rbi +3 -0
- data/rbi/openai/models/vector_store_create_params.rbi +13 -0
- data/rbi/openai/models/vector_stores/vector_store_file.rbi +3 -3
- data/rbi/openai/resources/audio/transcriptions.rbi +52 -14
- data/rbi/openai/resources/beta/assistants.rbi +6 -0
- data/rbi/openai/resources/beta/threads/runs.rbi +6 -0
- data/rbi/openai/resources/chat/completions.rbi +6 -0
- data/rbi/openai/resources/files.rbi +1 -1
- data/rbi/openai/resources/vector_stores.rbi +4 -0
- data/sig/openai/models/audio/transcription_create_params.rbs +14 -0
- data/sig/openai/models/audio/transcription_create_response.rbs +3 -1
- data/sig/openai/models/audio/transcription_diarized.rbs +129 -0
- data/sig/openai/models/audio/transcription_diarized_segment.rbs +47 -0
- data/sig/openai/models/audio/transcription_stream_event.rbs +2 -1
- data/sig/openai/models/audio/transcription_text_delta_event.rbs +9 -2
- data/sig/openai/models/audio/transcription_text_segment_event.rbs +47 -0
- data/sig/openai/models/audio_model.rbs +5 -1
- data/sig/openai/models/audio_response_format.rbs +3 -1
- data/sig/openai/models/comparison_filter.rbs +15 -1
- data/sig/openai/models/eval_create_response.rbs +2 -2
- data/sig/openai/models/eval_list_response.rbs +2 -2
- data/sig/openai/models/eval_retrieve_response.rbs +2 -2
- data/sig/openai/models/eval_update_response.rbs +2 -2
- data/sig/openai/models/realtime/audio_transcription.rbs +2 -2
- data/sig/openai/models/vector_store_create_params.rbs +7 -0
- data/sig/openai/resources/audio/transcriptions.rbs +4 -0
- data/sig/openai/resources/vector_stores.rbs +1 -0
- metadata +11 -2
@@ -0,0 +1,281 @@
|
|
1
|
+
# typed: strong
|
2
|
+
|
3
|
+
module OpenAI
|
4
|
+
module Models
|
5
|
+
module Audio
|
6
|
+
class TranscriptionDiarized < OpenAI::Internal::Type::BaseModel
|
7
|
+
OrHash =
|
8
|
+
T.type_alias do
|
9
|
+
T.any(
|
10
|
+
OpenAI::Audio::TranscriptionDiarized,
|
11
|
+
OpenAI::Internal::AnyHash
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Duration of the input audio in seconds.
|
16
|
+
sig { returns(Float) }
|
17
|
+
attr_accessor :duration
|
18
|
+
|
19
|
+
# Segments of the transcript annotated with timestamps and speaker labels.
|
20
|
+
sig { returns(T::Array[OpenAI::Audio::TranscriptionDiarizedSegment]) }
|
21
|
+
attr_accessor :segments
|
22
|
+
|
23
|
+
# The type of task that was run. Always `transcribe`.
|
24
|
+
sig { returns(Symbol) }
|
25
|
+
attr_accessor :task
|
26
|
+
|
27
|
+
# The concatenated transcript text for the entire audio input.
|
28
|
+
sig { returns(String) }
|
29
|
+
attr_accessor :text
|
30
|
+
|
31
|
+
# Token or duration usage statistics for the request.
|
32
|
+
sig do
|
33
|
+
returns(
|
34
|
+
T.nilable(OpenAI::Audio::TranscriptionDiarized::Usage::Variants)
|
35
|
+
)
|
36
|
+
end
|
37
|
+
attr_reader :usage
|
38
|
+
|
39
|
+
sig do
|
40
|
+
params(
|
41
|
+
usage:
|
42
|
+
T.any(
|
43
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::OrHash,
|
44
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Duration::OrHash
|
45
|
+
)
|
46
|
+
).void
|
47
|
+
end
|
48
|
+
attr_writer :usage
|
49
|
+
|
50
|
+
# Represents a diarized transcription response returned by the model, including
|
51
|
+
# the combined transcript and speaker-segment annotations.
|
52
|
+
sig do
|
53
|
+
params(
|
54
|
+
duration: Float,
|
55
|
+
segments:
|
56
|
+
T::Array[OpenAI::Audio::TranscriptionDiarizedSegment::OrHash],
|
57
|
+
text: String,
|
58
|
+
usage:
|
59
|
+
T.any(
|
60
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::OrHash,
|
61
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Duration::OrHash
|
62
|
+
),
|
63
|
+
task: Symbol
|
64
|
+
).returns(T.attached_class)
|
65
|
+
end
|
66
|
+
def self.new(
|
67
|
+
# Duration of the input audio in seconds.
|
68
|
+
duration:,
|
69
|
+
# Segments of the transcript annotated with timestamps and speaker labels.
|
70
|
+
segments:,
|
71
|
+
# The concatenated transcript text for the entire audio input.
|
72
|
+
text:,
|
73
|
+
# Token or duration usage statistics for the request.
|
74
|
+
usage: nil,
|
75
|
+
# The type of task that was run. Always `transcribe`.
|
76
|
+
task: :transcribe
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
sig do
|
81
|
+
override.returns(
|
82
|
+
{
|
83
|
+
duration: Float,
|
84
|
+
segments: T::Array[OpenAI::Audio::TranscriptionDiarizedSegment],
|
85
|
+
task: Symbol,
|
86
|
+
text: String,
|
87
|
+
usage: OpenAI::Audio::TranscriptionDiarized::Usage::Variants
|
88
|
+
}
|
89
|
+
)
|
90
|
+
end
|
91
|
+
def to_hash
|
92
|
+
end
|
93
|
+
|
94
|
+
# Token or duration usage statistics for the request.
|
95
|
+
module Usage
|
96
|
+
extend OpenAI::Internal::Type::Union
|
97
|
+
|
98
|
+
Variants =
|
99
|
+
T.type_alias do
|
100
|
+
T.any(
|
101
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens,
|
102
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Duration
|
103
|
+
)
|
104
|
+
end
|
105
|
+
|
106
|
+
class Tokens < OpenAI::Internal::Type::BaseModel
|
107
|
+
OrHash =
|
108
|
+
T.type_alias do
|
109
|
+
T.any(
|
110
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens,
|
111
|
+
OpenAI::Internal::AnyHash
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Number of input tokens billed for this request.
|
116
|
+
sig { returns(Integer) }
|
117
|
+
attr_accessor :input_tokens
|
118
|
+
|
119
|
+
# Number of output tokens generated.
|
120
|
+
sig { returns(Integer) }
|
121
|
+
attr_accessor :output_tokens
|
122
|
+
|
123
|
+
# Total number of tokens used (input + output).
|
124
|
+
sig { returns(Integer) }
|
125
|
+
attr_accessor :total_tokens
|
126
|
+
|
127
|
+
# The type of the usage object. Always `tokens` for this variant.
|
128
|
+
sig { returns(Symbol) }
|
129
|
+
attr_accessor :type
|
130
|
+
|
131
|
+
# Details about the input tokens billed for this request.
|
132
|
+
sig do
|
133
|
+
returns(
|
134
|
+
T.nilable(
|
135
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
136
|
+
)
|
137
|
+
)
|
138
|
+
end
|
139
|
+
attr_reader :input_token_details
|
140
|
+
|
141
|
+
sig do
|
142
|
+
params(
|
143
|
+
input_token_details:
|
144
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails::OrHash
|
145
|
+
).void
|
146
|
+
end
|
147
|
+
attr_writer :input_token_details
|
148
|
+
|
149
|
+
# Usage statistics for models billed by token usage.
|
150
|
+
sig do
|
151
|
+
params(
|
152
|
+
input_tokens: Integer,
|
153
|
+
output_tokens: Integer,
|
154
|
+
total_tokens: Integer,
|
155
|
+
input_token_details:
|
156
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails::OrHash,
|
157
|
+
type: Symbol
|
158
|
+
).returns(T.attached_class)
|
159
|
+
end
|
160
|
+
def self.new(
|
161
|
+
# Number of input tokens billed for this request.
|
162
|
+
input_tokens:,
|
163
|
+
# Number of output tokens generated.
|
164
|
+
output_tokens:,
|
165
|
+
# Total number of tokens used (input + output).
|
166
|
+
total_tokens:,
|
167
|
+
# Details about the input tokens billed for this request.
|
168
|
+
input_token_details: nil,
|
169
|
+
# The type of the usage object. Always `tokens` for this variant.
|
170
|
+
type: :tokens
|
171
|
+
)
|
172
|
+
end
|
173
|
+
|
174
|
+
sig do
|
175
|
+
override.returns(
|
176
|
+
{
|
177
|
+
input_tokens: Integer,
|
178
|
+
output_tokens: Integer,
|
179
|
+
total_tokens: Integer,
|
180
|
+
type: Symbol,
|
181
|
+
input_token_details:
|
182
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails
|
183
|
+
}
|
184
|
+
)
|
185
|
+
end
|
186
|
+
def to_hash
|
187
|
+
end
|
188
|
+
|
189
|
+
class InputTokenDetails < OpenAI::Internal::Type::BaseModel
|
190
|
+
OrHash =
|
191
|
+
T.type_alias do
|
192
|
+
T.any(
|
193
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails,
|
194
|
+
OpenAI::Internal::AnyHash
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Number of audio tokens billed for this request.
|
199
|
+
sig { returns(T.nilable(Integer)) }
|
200
|
+
attr_reader :audio_tokens
|
201
|
+
|
202
|
+
sig { params(audio_tokens: Integer).void }
|
203
|
+
attr_writer :audio_tokens
|
204
|
+
|
205
|
+
# Number of text tokens billed for this request.
|
206
|
+
sig { returns(T.nilable(Integer)) }
|
207
|
+
attr_reader :text_tokens
|
208
|
+
|
209
|
+
sig { params(text_tokens: Integer).void }
|
210
|
+
attr_writer :text_tokens
|
211
|
+
|
212
|
+
# Details about the input tokens billed for this request.
|
213
|
+
sig do
|
214
|
+
params(audio_tokens: Integer, text_tokens: Integer).returns(
|
215
|
+
T.attached_class
|
216
|
+
)
|
217
|
+
end
|
218
|
+
def self.new(
|
219
|
+
# Number of audio tokens billed for this request.
|
220
|
+
audio_tokens: nil,
|
221
|
+
# Number of text tokens billed for this request.
|
222
|
+
text_tokens: nil
|
223
|
+
)
|
224
|
+
end
|
225
|
+
|
226
|
+
sig do
|
227
|
+
override.returns(
|
228
|
+
{ audio_tokens: Integer, text_tokens: Integer }
|
229
|
+
)
|
230
|
+
end
|
231
|
+
def to_hash
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
class Duration < OpenAI::Internal::Type::BaseModel
|
237
|
+
OrHash =
|
238
|
+
T.type_alias do
|
239
|
+
T.any(
|
240
|
+
OpenAI::Audio::TranscriptionDiarized::Usage::Duration,
|
241
|
+
OpenAI::Internal::AnyHash
|
242
|
+
)
|
243
|
+
end
|
244
|
+
|
245
|
+
# Duration of the input audio in seconds.
|
246
|
+
sig { returns(Float) }
|
247
|
+
attr_accessor :seconds
|
248
|
+
|
249
|
+
# The type of the usage object. Always `duration` for this variant.
|
250
|
+
sig { returns(Symbol) }
|
251
|
+
attr_accessor :type
|
252
|
+
|
253
|
+
# Usage statistics for models billed by audio input duration.
|
254
|
+
sig do
|
255
|
+
params(seconds: Float, type: Symbol).returns(T.attached_class)
|
256
|
+
end
|
257
|
+
def self.new(
|
258
|
+
# Duration of the input audio in seconds.
|
259
|
+
seconds:,
|
260
|
+
# The type of the usage object. Always `duration` for this variant.
|
261
|
+
type: :duration
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
265
|
+
sig { override.returns({ seconds: Float, type: Symbol }) }
|
266
|
+
def to_hash
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
sig do
|
271
|
+
override.returns(
|
272
|
+
T::Array[OpenAI::Audio::TranscriptionDiarized::Usage::Variants]
|
273
|
+
)
|
274
|
+
end
|
275
|
+
def self.variants
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# typed: strong
|
2
|
+
|
3
|
+
module OpenAI
|
4
|
+
module Models
|
5
|
+
module Audio
|
6
|
+
class TranscriptionDiarizedSegment < OpenAI::Internal::Type::BaseModel
|
7
|
+
OrHash =
|
8
|
+
T.type_alias do
|
9
|
+
T.any(
|
10
|
+
OpenAI::Audio::TranscriptionDiarizedSegment,
|
11
|
+
OpenAI::Internal::AnyHash
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Unique identifier for the segment.
|
16
|
+
sig { returns(String) }
|
17
|
+
attr_accessor :id
|
18
|
+
|
19
|
+
# End timestamp of the segment in seconds.
|
20
|
+
sig { returns(Float) }
|
21
|
+
attr_accessor :end_
|
22
|
+
|
23
|
+
# Speaker label for this segment. When known speakers are provided, the label
|
24
|
+
# matches `known_speaker_names[]`. Otherwise speakers are labeled sequentially
|
25
|
+
# using capital letters (`A`, `B`, ...).
|
26
|
+
sig { returns(String) }
|
27
|
+
attr_accessor :speaker
|
28
|
+
|
29
|
+
# Start timestamp of the segment in seconds.
|
30
|
+
sig { returns(Float) }
|
31
|
+
attr_accessor :start
|
32
|
+
|
33
|
+
# Transcript text for this segment.
|
34
|
+
sig { returns(String) }
|
35
|
+
attr_accessor :text
|
36
|
+
|
37
|
+
# The type of the segment. Always `transcript.text.segment`.
|
38
|
+
sig { returns(Symbol) }
|
39
|
+
attr_accessor :type
|
40
|
+
|
41
|
+
# A segment of diarized transcript text with speaker metadata.
|
42
|
+
sig do
|
43
|
+
params(
|
44
|
+
id: String,
|
45
|
+
end_: Float,
|
46
|
+
speaker: String,
|
47
|
+
start: Float,
|
48
|
+
text: String,
|
49
|
+
type: Symbol
|
50
|
+
).returns(T.attached_class)
|
51
|
+
end
|
52
|
+
def self.new(
|
53
|
+
# Unique identifier for the segment.
|
54
|
+
id:,
|
55
|
+
# End timestamp of the segment in seconds.
|
56
|
+
end_:,
|
57
|
+
# Speaker label for this segment. When known speakers are provided, the label
|
58
|
+
# matches `known_speaker_names[]`. Otherwise speakers are labeled sequentially
|
59
|
+
# using capital letters (`A`, `B`, ...).
|
60
|
+
speaker:,
|
61
|
+
# Start timestamp of the segment in seconds.
|
62
|
+
start:,
|
63
|
+
# Transcript text for this segment.
|
64
|
+
text:,
|
65
|
+
# The type of the segment. Always `transcript.text.segment`.
|
66
|
+
type: :"transcript.text.segment"
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
sig do
|
71
|
+
override.returns(
|
72
|
+
{
|
73
|
+
id: String,
|
74
|
+
end_: Float,
|
75
|
+
speaker: String,
|
76
|
+
start: Float,
|
77
|
+
text: String,
|
78
|
+
type: Symbol
|
79
|
+
}
|
80
|
+
)
|
81
|
+
end
|
82
|
+
def to_hash
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -3,16 +3,17 @@
|
|
3
3
|
module OpenAI
|
4
4
|
module Models
|
5
5
|
module Audio
|
6
|
-
# Emitted when
|
7
|
-
#
|
6
|
+
# Emitted when a diarized transcription returns a completed segment with speaker
|
7
|
+
# information. Only emitted when you
|
8
8
|
# [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
|
9
|
-
# with
|
9
|
+
# with `stream` set to `true` and `response_format` set to `diarized_json`.
|
10
10
|
module TranscriptionStreamEvent
|
11
11
|
extend OpenAI::Internal::Type::Union
|
12
12
|
|
13
13
|
Variants =
|
14
14
|
T.type_alias do
|
15
15
|
T.any(
|
16
|
+
OpenAI::Audio::TranscriptionTextSegmentEvent,
|
16
17
|
OpenAI::Audio::TranscriptionTextDeltaEvent,
|
17
18
|
OpenAI::Audio::TranscriptionTextDoneEvent
|
18
19
|
)
|
@@ -42,6 +42,14 @@ module OpenAI
|
|
42
42
|
end
|
43
43
|
attr_writer :logprobs
|
44
44
|
|
45
|
+
# Identifier of the diarized segment that this delta belongs to. Only present when
|
46
|
+
# using `gpt-4o-transcribe-diarize`.
|
47
|
+
sig { returns(T.nilable(String)) }
|
48
|
+
attr_reader :segment_id
|
49
|
+
|
50
|
+
sig { params(segment_id: String).void }
|
51
|
+
attr_writer :segment_id
|
52
|
+
|
45
53
|
# Emitted when there is an additional text delta. This is also the first event
|
46
54
|
# emitted when the transcription starts. Only emitted when you
|
47
55
|
# [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
|
@@ -53,6 +61,7 @@ module OpenAI
|
|
53
61
|
T::Array[
|
54
62
|
OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob::OrHash
|
55
63
|
],
|
64
|
+
segment_id: String,
|
56
65
|
type: Symbol
|
57
66
|
).returns(T.attached_class)
|
58
67
|
end
|
@@ -63,6 +72,9 @@ module OpenAI
|
|
63
72
|
# [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
|
64
73
|
# with the `include[]` parameter set to `logprobs`.
|
65
74
|
logprobs: nil,
|
75
|
+
# Identifier of the diarized segment that this delta belongs to. Only present when
|
76
|
+
# using `gpt-4o-transcribe-diarize`.
|
77
|
+
segment_id: nil,
|
66
78
|
# The type of the event. Always `transcript.text.delta`.
|
67
79
|
type: :"transcript.text.delta"
|
68
80
|
)
|
@@ -74,7 +86,8 @@ module OpenAI
|
|
74
86
|
delta: String,
|
75
87
|
type: Symbol,
|
76
88
|
logprobs:
|
77
|
-
T::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob]
|
89
|
+
T::Array[OpenAI::Audio::TranscriptionTextDeltaEvent::Logprob],
|
90
|
+
segment_id: String
|
78
91
|
}
|
79
92
|
)
|
80
93
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# typed: strong
|
2
|
+
|
3
|
+
module OpenAI
|
4
|
+
module Models
|
5
|
+
module Audio
|
6
|
+
class TranscriptionTextSegmentEvent < OpenAI::Internal::Type::BaseModel
|
7
|
+
OrHash =
|
8
|
+
T.type_alias do
|
9
|
+
T.any(
|
10
|
+
OpenAI::Audio::TranscriptionTextSegmentEvent,
|
11
|
+
OpenAI::Internal::AnyHash
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Unique identifier for the segment.
|
16
|
+
sig { returns(String) }
|
17
|
+
attr_accessor :id
|
18
|
+
|
19
|
+
# End timestamp of the segment in seconds.
|
20
|
+
sig { returns(Float) }
|
21
|
+
attr_accessor :end_
|
22
|
+
|
23
|
+
# Speaker label for this segment.
|
24
|
+
sig { returns(String) }
|
25
|
+
attr_accessor :speaker
|
26
|
+
|
27
|
+
# Start timestamp of the segment in seconds.
|
28
|
+
sig { returns(Float) }
|
29
|
+
attr_accessor :start
|
30
|
+
|
31
|
+
# Transcript text for this segment.
|
32
|
+
sig { returns(String) }
|
33
|
+
attr_accessor :text
|
34
|
+
|
35
|
+
# The type of the event. Always `transcript.text.segment`.
|
36
|
+
sig { returns(Symbol) }
|
37
|
+
attr_accessor :type
|
38
|
+
|
39
|
+
# Emitted when a diarized transcription returns a completed segment with speaker
|
40
|
+
# information. Only emitted when you
|
41
|
+
# [create a transcription](https://platform.openai.com/docs/api-reference/audio/create-transcription)
|
42
|
+
# with `stream` set to `true` and `response_format` set to `diarized_json`.
|
43
|
+
sig do
|
44
|
+
params(
|
45
|
+
id: String,
|
46
|
+
end_: Float,
|
47
|
+
speaker: String,
|
48
|
+
start: Float,
|
49
|
+
text: String,
|
50
|
+
type: Symbol
|
51
|
+
).returns(T.attached_class)
|
52
|
+
end
|
53
|
+
def self.new(
|
54
|
+
# Unique identifier for the segment.
|
55
|
+
id:,
|
56
|
+
# End timestamp of the segment in seconds.
|
57
|
+
end_:,
|
58
|
+
# Speaker label for this segment.
|
59
|
+
speaker:,
|
60
|
+
# Start timestamp of the segment in seconds.
|
61
|
+
start:,
|
62
|
+
# Transcript text for this segment.
|
63
|
+
text:,
|
64
|
+
# The type of the event. Always `transcript.text.segment`.
|
65
|
+
type: :"transcript.text.segment"
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
sig do
|
70
|
+
override.returns(
|
71
|
+
{
|
72
|
+
id: String,
|
73
|
+
end_: Float,
|
74
|
+
speaker: String,
|
75
|
+
start: Float,
|
76
|
+
text: String,
|
77
|
+
type: Symbol
|
78
|
+
}
|
79
|
+
)
|
80
|
+
end
|
81
|
+
def to_hash
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -13,6 +13,8 @@ module OpenAI
|
|
13
13
|
T.let(:"gpt-4o-transcribe", OpenAI::AudioModel::TaggedSymbol)
|
14
14
|
GPT_4O_MINI_TRANSCRIBE =
|
15
15
|
T.let(:"gpt-4o-mini-transcribe", OpenAI::AudioModel::TaggedSymbol)
|
16
|
+
GPT_4O_TRANSCRIBE_DIARIZE =
|
17
|
+
T.let(:"gpt-4o-transcribe-diarize", OpenAI::AudioModel::TaggedSymbol)
|
16
18
|
|
17
19
|
sig { override.returns(T::Array[OpenAI::AudioModel::TaggedSymbol]) }
|
18
20
|
def self.values
|
@@ -3,8 +3,10 @@
|
|
3
3
|
module OpenAI
|
4
4
|
module Models
|
5
5
|
# The format of the output, in one of these options: `json`, `text`, `srt`,
|
6
|
-
# `verbose_json`, or `
|
7
|
-
# the only supported format is `json`.
|
6
|
+
# `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
|
7
|
+
# `gpt-4o-mini-transcribe`, the only supported format is `json`. For
|
8
|
+
# `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
|
9
|
+
# `diarized_json`, with `diarized_json` required to receive speaker annotations.
|
8
10
|
module AudioResponseFormat
|
9
11
|
extend OpenAI::Internal::Type::Enum
|
10
12
|
|
@@ -17,6 +19,8 @@ module OpenAI
|
|
17
19
|
VERBOSE_JSON =
|
18
20
|
T.let(:verbose_json, OpenAI::AudioResponseFormat::TaggedSymbol)
|
19
21
|
VTT = T.let(:vtt, OpenAI::AudioResponseFormat::TaggedSymbol)
|
22
|
+
DIARIZED_JSON =
|
23
|
+
T.let(:diarized_json, OpenAI::AudioResponseFormat::TaggedSymbol)
|
20
24
|
|
21
25
|
sig do
|
22
26
|
override.returns(T::Array[OpenAI::AudioResponseFormat::TaggedSymbol])
|
@@ -50,6 +50,9 @@ module OpenAI
|
|
50
50
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
51
51
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
52
52
|
# response.
|
53
|
+
#
|
54
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
55
|
+
# effort.
|
53
56
|
sig { returns(T.nilable(OpenAI::ReasoningEffort::OrSymbol)) }
|
54
57
|
attr_accessor :reasoning_effort
|
55
58
|
|
@@ -212,6 +215,9 @@ module OpenAI
|
|
212
215
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
213
216
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
214
217
|
# response.
|
218
|
+
#
|
219
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
220
|
+
# effort.
|
215
221
|
reasoning_effort: nil,
|
216
222
|
# Specifies the format that the model must output. Compatible with
|
217
223
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -70,6 +70,9 @@ module OpenAI
|
|
70
70
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
71
71
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
72
72
|
# response.
|
73
|
+
#
|
74
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
75
|
+
# effort.
|
73
76
|
sig { returns(T.nilable(OpenAI::ReasoningEffort::OrSymbol)) }
|
74
77
|
attr_accessor :reasoning_effort
|
75
78
|
|
@@ -236,6 +239,9 @@ module OpenAI
|
|
236
239
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
237
240
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
238
241
|
# response.
|
242
|
+
#
|
243
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
244
|
+
# effort.
|
239
245
|
reasoning_effort: nil,
|
240
246
|
# Specifies the format that the model must output. Compatible with
|
241
247
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -116,6 +116,9 @@ module OpenAI
|
|
116
116
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
117
117
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
118
118
|
# response.
|
119
|
+
#
|
120
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
121
|
+
# effort.
|
119
122
|
sig { returns(T.nilable(OpenAI::ReasoningEffort::OrSymbol)) }
|
120
123
|
attr_accessor :reasoning_effort
|
121
124
|
|
@@ -334,6 +337,9 @@ module OpenAI
|
|
334
337
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
335
338
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
336
339
|
# response.
|
340
|
+
#
|
341
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
342
|
+
# effort.
|
337
343
|
reasoning_effort: nil,
|
338
344
|
# Specifies the format that the model must output. Compatible with
|
339
345
|
# [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
|
@@ -230,6 +230,9 @@ module OpenAI
|
|
230
230
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
231
231
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
232
232
|
# response.
|
233
|
+
#
|
234
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
235
|
+
# effort.
|
233
236
|
sig { returns(T.nilable(OpenAI::ReasoningEffort::OrSymbol)) }
|
234
237
|
attr_accessor :reasoning_effort
|
235
238
|
|
@@ -667,6 +670,9 @@ module OpenAI
|
|
667
670
|
# supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
|
668
671
|
# effort can result in faster responses and fewer tokens used on reasoning in a
|
669
672
|
# response.
|
673
|
+
#
|
674
|
+
# Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
|
675
|
+
# effort.
|
670
676
|
reasoning_effort: nil,
|
671
677
|
# An object specifying the format that the model must output.
|
672
678
|
#
|