openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -12,27 +12,30 @@ module OpenAI
12
12
  )
13
13
  end
14
14
 
15
- # ID of the model to use. The options are `gpt-4o-transcribe`,
16
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
17
- # Whisper V2 model).
15
+ # The type of session to create. Always `transcription` for transcription
16
+ # sessions.
17
+ sig { returns(Symbol) }
18
+ attr_accessor :type
19
+
20
+ # Configuration for input and output audio.
18
21
  sig do
19
22
  returns(
20
- T.any(
21
- String,
22
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
23
- )
23
+ T.nilable(OpenAI::Realtime::RealtimeTranscriptionSessionAudio)
24
24
  )
25
25
  end
26
- attr_accessor :model
26
+ attr_reader :audio
27
27
 
28
- # The type of session to create. Always `transcription` for transcription
29
- # sessions.
30
- sig { returns(Symbol) }
31
- attr_accessor :type
28
+ sig do
29
+ params(
30
+ audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio::OrHash
31
+ ).void
32
+ end
33
+ attr_writer :audio
32
34
 
33
- # The set of items to include in the transcription. Current available items are:
35
+ # Additional fields to include in server outputs.
34
36
  #
35
- # - `item.input_audio_transcription.logprobs`
37
+ # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
38
+ # transcription.
36
39
  sig do
37
40
  returns(
38
41
  T.nilable(
@@ -54,138 +57,25 @@ module OpenAI
54
57
  end
55
58
  attr_writer :include
56
59
 
57
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
58
- # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
59
- # (mono), and little-endian byte order.
60
- sig do
61
- returns(
62
- T.nilable(
63
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol
64
- )
65
- )
66
- end
67
- attr_reader :input_audio_format
68
-
69
- sig do
70
- params(
71
- input_audio_format:
72
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol
73
- ).void
74
- end
75
- attr_writer :input_audio_format
76
-
77
- # Configuration for input audio noise reduction. This can be set to `null` to turn
78
- # off. Noise reduction filters audio added to the input audio buffer before it is
79
- # sent to VAD and the model. Filtering the audio can improve VAD and turn
80
- # detection accuracy (reducing false positives) and model performance by improving
81
- # perception of the input audio.
82
- sig do
83
- returns(
84
- T.nilable(
85
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction
86
- )
87
- )
88
- end
89
- attr_reader :input_audio_noise_reduction
90
-
91
- sig do
92
- params(
93
- input_audio_noise_reduction:
94
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash
95
- ).void
96
- end
97
- attr_writer :input_audio_noise_reduction
98
-
99
- # Configuration for input audio transcription. The client can optionally set the
100
- # language and prompt for transcription, these offer additional guidance to the
101
- # transcription service.
102
- sig do
103
- returns(
104
- T.nilable(
105
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription
106
- )
107
- )
108
- end
109
- attr_reader :input_audio_transcription
110
-
111
- sig do
112
- params(
113
- input_audio_transcription:
114
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash
115
- ).void
116
- end
117
- attr_writer :input_audio_transcription
118
-
119
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
120
- # means that the model will detect the start and end of speech based on audio
121
- # volume and respond at the end of user speech.
122
- sig do
123
- returns(
124
- T.nilable(
125
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection
126
- )
127
- )
128
- end
129
- attr_reader :turn_detection
130
-
131
- sig do
132
- params(
133
- turn_detection:
134
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash
135
- ).void
136
- end
137
- attr_writer :turn_detection
138
-
139
60
  # Realtime transcription session object configuration.
140
61
  sig do
141
62
  params(
142
- model:
143
- T.any(
144
- String,
145
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
146
- ),
63
+ audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio::OrHash,
147
64
  include:
148
65
  T::Array[
149
66
  OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
150
67
  ],
151
- input_audio_format:
152
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol,
153
- input_audio_noise_reduction:
154
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash,
155
- input_audio_transcription:
156
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash,
157
- turn_detection:
158
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash,
159
68
  type: Symbol
160
69
  ).returns(T.attached_class)
161
70
  end
162
71
  def self.new(
163
- # ID of the model to use. The options are `gpt-4o-transcribe`,
164
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
165
- # Whisper V2 model).
166
- model:,
167
- # The set of items to include in the transcription. Current available items are:
72
+ # Configuration for input and output audio.
73
+ audio: nil,
74
+ # Additional fields to include in server outputs.
168
75
  #
169
- # - `item.input_audio_transcription.logprobs`
76
+ # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
77
+ # transcription.
170
78
  include: nil,
171
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
172
- # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
173
- # (mono), and little-endian byte order.
174
- input_audio_format: nil,
175
- # Configuration for input audio noise reduction. This can be set to `null` to turn
176
- # off. Noise reduction filters audio added to the input audio buffer before it is
177
- # sent to VAD and the model. Filtering the audio can improve VAD and turn
178
- # detection accuracy (reducing false positives) and model performance by improving
179
- # perception of the input audio.
180
- input_audio_noise_reduction: nil,
181
- # Configuration for input audio transcription. The client can optionally set the
182
- # language and prompt for transcription, these offer additional guidance to the
183
- # transcription service.
184
- input_audio_transcription: nil,
185
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
186
- # means that the model will detect the start and end of speech based on audio
187
- # volume and respond at the end of user speech.
188
- turn_detection: nil,
189
79
  # The type of session to create. Always `transcription` for transcription
190
80
  # sessions.
191
81
  type: :transcription
@@ -195,80 +85,18 @@ module OpenAI
195
85
  sig do
196
86
  override.returns(
197
87
  {
198
- model:
199
- T.any(
200
- String,
201
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
202
- ),
203
88
  type: Symbol,
89
+ audio: OpenAI::Realtime::RealtimeTranscriptionSessionAudio,
204
90
  include:
205
91
  T::Array[
206
92
  OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
207
- ],
208
- input_audio_format:
209
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol,
210
- input_audio_noise_reduction:
211
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction,
212
- input_audio_transcription:
213
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription,
214
- turn_detection:
215
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection
93
+ ]
216
94
  }
217
95
  )
218
96
  end
219
97
  def to_hash
220
98
  end
221
99
 
222
- # ID of the model to use. The options are `gpt-4o-transcribe`,
223
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
224
- # Whisper V2 model).
225
- module Model
226
- extend OpenAI::Internal::Type::Union
227
-
228
- Variants =
229
- T.type_alias do
230
- T.any(
231
- String,
232
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
233
- )
234
- end
235
-
236
- sig do
237
- override.returns(
238
- T::Array[
239
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::Variants
240
- ]
241
- )
242
- end
243
- def self.variants
244
- end
245
-
246
- TaggedSymbol =
247
- T.type_alias do
248
- T.all(
249
- Symbol,
250
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model
251
- )
252
- end
253
- OrSymbol = T.type_alias { T.any(Symbol, String) }
254
-
255
- WHISPER_1 =
256
- T.let(
257
- :"whisper-1",
258
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
259
- )
260
- GPT_4O_TRANSCRIBE =
261
- T.let(
262
- :"gpt-4o-transcribe",
263
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
264
- )
265
- GPT_4O_MINI_TRANSCRIBE =
266
- T.let(
267
- :"gpt-4o-mini-transcribe",
268
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
269
- )
270
- end
271
-
272
100
  module Include
273
101
  extend OpenAI::Internal::Type::Enum
274
102
 
@@ -297,406 +125,6 @@ module OpenAI
297
125
  def self.values
298
126
  end
299
127
  end
300
-
301
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
302
- # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
303
- # (mono), and little-endian byte order.
304
- module InputAudioFormat
305
- extend OpenAI::Internal::Type::Enum
306
-
307
- TaggedSymbol =
308
- T.type_alias do
309
- T.all(
310
- Symbol,
311
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat
312
- )
313
- end
314
- OrSymbol = T.type_alias { T.any(Symbol, String) }
315
-
316
- PCM16 =
317
- T.let(
318
- :pcm16,
319
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
320
- )
321
- G711_ULAW =
322
- T.let(
323
- :g711_ulaw,
324
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
325
- )
326
- G711_ALAW =
327
- T.let(
328
- :g711_alaw,
329
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
330
- )
331
-
332
- sig do
333
- override.returns(
334
- T::Array[
335
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
336
- ]
337
- )
338
- end
339
- def self.values
340
- end
341
- end
342
-
343
- class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
344
- OrHash =
345
- T.type_alias do
346
- T.any(
347
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction,
348
- OpenAI::Internal::AnyHash
349
- )
350
- end
351
-
352
- # Type of noise reduction. `near_field` is for close-talking microphones such as
353
- # headphones, `far_field` is for far-field microphones such as laptop or
354
- # conference room microphones.
355
- sig do
356
- returns(
357
- T.nilable(
358
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
359
- )
360
- )
361
- end
362
- attr_reader :type
363
-
364
- sig do
365
- params(
366
- type:
367
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
368
- ).void
369
- end
370
- attr_writer :type
371
-
372
- # Configuration for input audio noise reduction. This can be set to `null` to turn
373
- # off. Noise reduction filters audio added to the input audio buffer before it is
374
- # sent to VAD and the model. Filtering the audio can improve VAD and turn
375
- # detection accuracy (reducing false positives) and model performance by improving
376
- # perception of the input audio.
377
- sig do
378
- params(
379
- type:
380
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
381
- ).returns(T.attached_class)
382
- end
383
- def self.new(
384
- # Type of noise reduction. `near_field` is for close-talking microphones such as
385
- # headphones, `far_field` is for far-field microphones such as laptop or
386
- # conference room microphones.
387
- type: nil
388
- )
389
- end
390
-
391
- sig do
392
- override.returns(
393
- {
394
- type:
395
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
396
- }
397
- )
398
- end
399
- def to_hash
400
- end
401
-
402
- # Type of noise reduction. `near_field` is for close-talking microphones such as
403
- # headphones, `far_field` is for far-field microphones such as laptop or
404
- # conference room microphones.
405
- module Type
406
- extend OpenAI::Internal::Type::Enum
407
-
408
- TaggedSymbol =
409
- T.type_alias do
410
- T.all(
411
- Symbol,
412
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type
413
- )
414
- end
415
- OrSymbol = T.type_alias { T.any(Symbol, String) }
416
-
417
- NEAR_FIELD =
418
- T.let(
419
- :near_field,
420
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
421
- )
422
- FAR_FIELD =
423
- T.let(
424
- :far_field,
425
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
426
- )
427
-
428
- sig do
429
- override.returns(
430
- T::Array[
431
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
432
- ]
433
- )
434
- end
435
- def self.values
436
- end
437
- end
438
- end
439
-
440
- class InputAudioTranscription < OpenAI::Internal::Type::BaseModel
441
- OrHash =
442
- T.type_alias do
443
- T.any(
444
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription,
445
- OpenAI::Internal::AnyHash
446
- )
447
- end
448
-
449
- # The language of the input audio. Supplying the input language in
450
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
451
- # format will improve accuracy and latency.
452
- sig { returns(T.nilable(String)) }
453
- attr_reader :language
454
-
455
- sig { params(language: String).void }
456
- attr_writer :language
457
-
458
- # The model to use for transcription, current options are `gpt-4o-transcribe`,
459
- # `gpt-4o-mini-transcribe`, and `whisper-1`.
460
- sig do
461
- returns(
462
- T.nilable(
463
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol
464
- )
465
- )
466
- end
467
- attr_reader :model
468
-
469
- sig do
470
- params(
471
- model:
472
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol
473
- ).void
474
- end
475
- attr_writer :model
476
-
477
- # An optional text to guide the model's style or continue a previous audio
478
- # segment. For `whisper-1`, the
479
- # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
480
- # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
481
- # "expect words related to technology".
482
- sig { returns(T.nilable(String)) }
483
- attr_reader :prompt
484
-
485
- sig { params(prompt: String).void }
486
- attr_writer :prompt
487
-
488
- # Configuration for input audio transcription. The client can optionally set the
489
- # language and prompt for transcription, these offer additional guidance to the
490
- # transcription service.
491
- sig do
492
- params(
493
- language: String,
494
- model:
495
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol,
496
- prompt: String
497
- ).returns(T.attached_class)
498
- end
499
- def self.new(
500
- # The language of the input audio. Supplying the input language in
501
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
502
- # format will improve accuracy and latency.
503
- language: nil,
504
- # The model to use for transcription, current options are `gpt-4o-transcribe`,
505
- # `gpt-4o-mini-transcribe`, and `whisper-1`.
506
- model: nil,
507
- # An optional text to guide the model's style or continue a previous audio
508
- # segment. For `whisper-1`, the
509
- # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
510
- # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
511
- # "expect words related to technology".
512
- prompt: nil
513
- )
514
- end
515
-
516
- sig do
517
- override.returns(
518
- {
519
- language: String,
520
- model:
521
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol,
522
- prompt: String
523
- }
524
- )
525
- end
526
- def to_hash
527
- end
528
-
529
- # The model to use for transcription, current options are `gpt-4o-transcribe`,
530
- # `gpt-4o-mini-transcribe`, and `whisper-1`.
531
- module Model
532
- extend OpenAI::Internal::Type::Enum
533
-
534
- TaggedSymbol =
535
- T.type_alias do
536
- T.all(
537
- Symbol,
538
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model
539
- )
540
- end
541
- OrSymbol = T.type_alias { T.any(Symbol, String) }
542
-
543
- GPT_4O_TRANSCRIBE =
544
- T.let(
545
- :"gpt-4o-transcribe",
546
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
547
- )
548
- GPT_4O_MINI_TRANSCRIBE =
549
- T.let(
550
- :"gpt-4o-mini-transcribe",
551
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
552
- )
553
- WHISPER_1 =
554
- T.let(
555
- :"whisper-1",
556
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
557
- )
558
-
559
- sig do
560
- override.returns(
561
- T::Array[
562
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
563
- ]
564
- )
565
- end
566
- def self.values
567
- end
568
- end
569
- end
570
-
571
- class TurnDetection < OpenAI::Internal::Type::BaseModel
572
- OrHash =
573
- T.type_alias do
574
- T.any(
575
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection,
576
- OpenAI::Internal::AnyHash
577
- )
578
- end
579
-
580
- # Amount of audio to include before the VAD detected speech (in milliseconds).
581
- # Defaults to 300ms.
582
- sig { returns(T.nilable(Integer)) }
583
- attr_reader :prefix_padding_ms
584
-
585
- sig { params(prefix_padding_ms: Integer).void }
586
- attr_writer :prefix_padding_ms
587
-
588
- # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
589
- # With shorter values the model will respond more quickly, but may jump in on
590
- # short pauses from the user.
591
- sig { returns(T.nilable(Integer)) }
592
- attr_reader :silence_duration_ms
593
-
594
- sig { params(silence_duration_ms: Integer).void }
595
- attr_writer :silence_duration_ms
596
-
597
- # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
598
- # threshold will require louder audio to activate the model, and thus might
599
- # perform better in noisy environments.
600
- sig { returns(T.nilable(Float)) }
601
- attr_reader :threshold
602
-
603
- sig { params(threshold: Float).void }
604
- attr_writer :threshold
605
-
606
- # Type of turn detection. Only `server_vad` is currently supported for
607
- # transcription sessions.
608
- sig do
609
- returns(
610
- T.nilable(
611
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
612
- )
613
- )
614
- end
615
- attr_reader :type
616
-
617
- sig do
618
- params(
619
- type:
620
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
621
- ).void
622
- end
623
- attr_writer :type
624
-
625
- # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
626
- # means that the model will detect the start and end of speech based on audio
627
- # volume and respond at the end of user speech.
628
- sig do
629
- params(
630
- prefix_padding_ms: Integer,
631
- silence_duration_ms: Integer,
632
- threshold: Float,
633
- type:
634
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
635
- ).returns(T.attached_class)
636
- end
637
- def self.new(
638
- # Amount of audio to include before the VAD detected speech (in milliseconds).
639
- # Defaults to 300ms.
640
- prefix_padding_ms: nil,
641
- # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
642
- # With shorter values the model will respond more quickly, but may jump in on
643
- # short pauses from the user.
644
- silence_duration_ms: nil,
645
- # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
646
- # threshold will require louder audio to activate the model, and thus might
647
- # perform better in noisy environments.
648
- threshold: nil,
649
- # Type of turn detection. Only `server_vad` is currently supported for
650
- # transcription sessions.
651
- type: nil
652
- )
653
- end
654
-
655
- sig do
656
- override.returns(
657
- {
658
- prefix_padding_ms: Integer,
659
- silence_duration_ms: Integer,
660
- threshold: Float,
661
- type:
662
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
663
- }
664
- )
665
- end
666
- def to_hash
667
- end
668
-
669
- # Type of turn detection. Only `server_vad` is currently supported for
670
- # transcription sessions.
671
- module Type
672
- extend OpenAI::Internal::Type::Enum
673
-
674
- TaggedSymbol =
675
- T.type_alias do
676
- T.all(
677
- Symbol,
678
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type
679
- )
680
- end
681
- OrSymbol = T.type_alias { T.any(Symbol, String) }
682
-
683
- SERVER_VAD =
684
- T.let(
685
- :server_vad,
686
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol
687
- )
688
-
689
- sig do
690
- override.returns(
691
- T::Array[
692
- OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol
693
- ]
694
- )
695
- end
696
- def self.values
697
- end
698
- end
699
- end
700
128
  end
701
129
  end
702
130
  end