openai 0.61.0 → 0.63.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/README.md +2 -5
  4. data/lib/openai/auth/workload_identity.rb +2 -2
  5. data/lib/openai/auth/workload_identity_auth.rb +4 -3
  6. data/lib/openai/models/admin/organization/usage_audio_speeches_response.rb +10 -1
  7. data/lib/openai/models/admin/organization/usage_audio_transcriptions_response.rb +10 -1
  8. data/lib/openai/models/admin/organization/usage_code_interpreter_sessions_response.rb +10 -1
  9. data/lib/openai/models/admin/organization/usage_completions_response.rb +10 -1
  10. data/lib/openai/models/admin/organization/usage_costs_response.rb +10 -1
  11. data/lib/openai/models/admin/organization/usage_embeddings_response.rb +10 -1
  12. data/lib/openai/models/admin/organization/usage_images_response.rb +10 -1
  13. data/lib/openai/models/admin/organization/usage_moderations_response.rb +10 -1
  14. data/lib/openai/models/admin/organization/usage_vector_stores_response.rb +10 -1
  15. data/lib/openai/models/chat/chat_completion_token_logprob.rb +1 -2
  16. data/lib/openai/models/chat/completion_create_params.rb +4 -3
  17. data/lib/openai/models/image_edit_params.rb +85 -31
  18. data/lib/openai/models/image_generate_params.rb +78 -26
  19. data/lib/openai/models/image_model.rb +5 -2
  20. data/lib/openai/models/realtime/audio_transcription.rb +37 -5
  21. data/lib/openai/models/realtime/client_secret_create_response.rb +1 -2
  22. data/lib/openai/models/realtime/realtime_audio_config_input.rb +3 -0
  23. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +3 -0
  24. data/lib/openai/models/realtime/realtime_reasoning.rb +24 -0
  25. data/lib/openai/models/realtime/realtime_reasoning_effort.rb +22 -0
  26. data/lib/openai/models/realtime/realtime_response_create_params.rb +18 -1
  27. data/lib/openai/models/realtime/realtime_session.rb +6 -0
  28. data/lib/openai/models/realtime/realtime_session_create_request.rb +21 -1
  29. data/lib/openai/models/realtime/realtime_session_create_response.rb +41 -17
  30. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +3 -0
  31. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +3 -0
  32. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +9 -5
  33. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +2 -1
  34. data/lib/openai/models/realtime/realtime_translation_client_event.rb +45 -0
  35. data/lib/openai/models/realtime/realtime_translation_client_secret_create_request.rb +85 -0
  36. data/lib/openai/models/realtime/realtime_translation_client_secret_create_response.rb +42 -0
  37. data/lib/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rb +51 -0
  38. data/lib/openai/models/realtime/realtime_translation_input_transcript_delta_event.rb +55 -0
  39. data/lib/openai/models/realtime/realtime_translation_output_audio_delta_event.rb +89 -0
  40. data/lib/openai/models/realtime/realtime_translation_output_transcript_delta_event.rb +54 -0
  41. data/lib/openai/models/realtime/realtime_translation_server_event.rb +53 -0
  42. data/lib/openai/models/realtime/realtime_translation_session.rb +158 -0
  43. data/lib/openai/models/realtime/realtime_translation_session_close_event.rb +30 -0
  44. data/lib/openai/models/realtime/realtime_translation_session_closed_event.rb +28 -0
  45. data/lib/openai/models/realtime/realtime_translation_session_create_request.rb +138 -0
  46. data/lib/openai/models/realtime/realtime_translation_session_created_event.rb +38 -0
  47. data/lib/openai/models/realtime/realtime_translation_session_update_event.rb +43 -0
  48. data/lib/openai/models/realtime/realtime_translation_session_update_request.rb +129 -0
  49. data/lib/openai/models/realtime/realtime_translation_session_updated_event.rb +37 -0
  50. data/lib/openai/models/realtime/transcription_session_updated_event.rb +1 -2
  51. data/lib/openai/models/responses/response.rb +4 -3
  52. data/lib/openai/models/responses/response_compact_params.rb +22 -1
  53. data/lib/openai/models/responses/response_create_params.rb +4 -3
  54. data/lib/openai/models/responses/response_includable.rb +2 -0
  55. data/lib/openai/models/responses/response_text_delta_event.rb +2 -2
  56. data/lib/openai/models/responses/response_text_done_event.rb +2 -2
  57. data/lib/openai/models/responses/responses_client_event.rb +4 -3
  58. data/lib/openai/models/responses/tool.rb +81 -16
  59. data/lib/openai/resources/chat/completions.rb +2 -2
  60. data/lib/openai/resources/images.rb +6 -6
  61. data/lib/openai/resources/realtime/calls.rb +5 -1
  62. data/lib/openai/resources/responses.rb +5 -3
  63. data/lib/openai/version.rb +1 -1
  64. data/lib/openai.rb +18 -1
  65. data/rbi/openai/auth.rbi +3 -3
  66. data/rbi/openai/models/admin/organization/usage_audio_speeches_response.rbi +11 -1
  67. data/rbi/openai/models/admin/organization/usage_audio_transcriptions_response.rbi +11 -1
  68. data/rbi/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbi +11 -1
  69. data/rbi/openai/models/admin/organization/usage_completions_response.rbi +11 -1
  70. data/rbi/openai/models/admin/organization/usage_costs_response.rbi +11 -1
  71. data/rbi/openai/models/admin/organization/usage_embeddings_response.rbi +11 -1
  72. data/rbi/openai/models/admin/organization/usage_images_response.rbi +11 -1
  73. data/rbi/openai/models/admin/organization/usage_moderations_response.rbi +11 -1
  74. data/rbi/openai/models/admin/organization/usage_vector_stores_response.rbi +11 -1
  75. data/rbi/openai/models/chat/chat_completion_token_logprob.rbi +2 -4
  76. data/rbi/openai/models/chat/completion_create_params.rbi +6 -4
  77. data/rbi/openai/models/image_edit_params.rbi +102 -45
  78. data/rbi/openai/models/image_generate_params.rbi +93 -39
  79. data/rbi/openai/models/image_model.rbi +8 -3
  80. data/rbi/openai/models/realtime/audio_transcription.rbi +85 -6
  81. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +6 -0
  82. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +3 -0
  83. data/rbi/openai/models/realtime/realtime_reasoning.rbi +54 -0
  84. data/rbi/openai/models/realtime/realtime_reasoning_effort.rbi +44 -0
  85. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +26 -0
  86. data/rbi/openai/models/realtime/realtime_session.rbi +9 -0
  87. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +31 -0
  88. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +53 -32
  89. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +6 -0
  90. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +3 -0
  91. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +13 -7
  92. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +2 -1
  93. data/rbi/openai/models/realtime/realtime_translation_client_event.rbi +29 -0
  94. data/rbi/openai/models/realtime/realtime_translation_client_secret_create_request.rbi +193 -0
  95. data/rbi/openai/models/realtime/realtime_translation_client_secret_create_response.rbi +69 -0
  96. data/rbi/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbi +69 -0
  97. data/rbi/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbi +77 -0
  98. data/rbi/openai/models/realtime/realtime_translation_output_audio_delta_event.rbi +148 -0
  99. data/rbi/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbi +76 -0
  100. data/rbi/openai/models/realtime/realtime_translation_server_event.rbi +33 -0
  101. data/rbi/openai/models/realtime/realtime_translation_session.rbi +339 -0
  102. data/rbi/openai/models/realtime/realtime_translation_session_close_event.rbi +44 -0
  103. data/rbi/openai/models/realtime/realtime_translation_session_closed_event.rbi +39 -0
  104. data/rbi/openai/models/realtime/realtime_translation_session_create_request.rbi +322 -0
  105. data/rbi/openai/models/realtime/realtime_translation_session_created_event.rbi +68 -0
  106. data/rbi/openai/models/realtime/realtime_translation_session_update_event.rbi +78 -0
  107. data/rbi/openai/models/realtime/realtime_translation_session_update_request.rbi +313 -0
  108. data/rbi/openai/models/realtime/realtime_translation_session_updated_event.rbi +67 -0
  109. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +0 -2
  110. data/rbi/openai/models/responses/response.rbi +6 -4
  111. data/rbi/openai/models/responses/response_compact_params.rbi +65 -0
  112. data/rbi/openai/models/responses/response_create_params.rbi +6 -4
  113. data/rbi/openai/models/responses/response_includable.rbi +2 -0
  114. data/rbi/openai/models/responses/response_text_delta_event.rbi +2 -2
  115. data/rbi/openai/models/responses/response_text_done_event.rbi +2 -2
  116. data/rbi/openai/models/responses/responses_client_event.rbi +6 -4
  117. data/rbi/openai/models/responses/tool.rbi +122 -27
  118. data/rbi/openai/resources/chat/completions.rbi +6 -4
  119. data/rbi/openai/resources/images.rbi +110 -44
  120. data/rbi/openai/resources/realtime/calls.rbi +7 -0
  121. data/rbi/openai/resources/responses.rbi +12 -4
  122. data/sig/openai/models/admin/organization/usage_audio_speeches_response.rbs +7 -2
  123. data/sig/openai/models/admin/organization/usage_audio_transcriptions_response.rbs +7 -2
  124. data/sig/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbs +7 -2
  125. data/sig/openai/models/admin/organization/usage_completions_response.rbs +7 -2
  126. data/sig/openai/models/admin/organization/usage_costs_response.rbs +7 -2
  127. data/sig/openai/models/admin/organization/usage_embeddings_response.rbs +7 -2
  128. data/sig/openai/models/admin/organization/usage_images_response.rbs +7 -2
  129. data/sig/openai/models/admin/organization/usage_moderations_response.rbs +7 -2
  130. data/sig/openai/models/admin/organization/usage_vector_stores_response.rbs +7 -2
  131. data/sig/openai/models/image_edit_params.rbs +5 -4
  132. data/sig/openai/models/image_generate_params.rbs +5 -4
  133. data/sig/openai/models/image_model.rbs +11 -5
  134. data/sig/openai/models/realtime/audio_transcription.rbs +25 -0
  135. data/sig/openai/models/realtime/realtime_reasoning.rbs +24 -0
  136. data/sig/openai/models/realtime/realtime_reasoning_effort.rbs +20 -0
  137. data/sig/openai/models/realtime/realtime_response_create_params.rbs +16 -0
  138. data/sig/openai/models/realtime/realtime_session_create_request.rbs +18 -0
  139. data/sig/openai/models/realtime/realtime_session_create_response.rbs +27 -4
  140. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +4 -8
  141. data/sig/openai/models/realtime/realtime_translation_client_event.rbs +16 -0
  142. data/sig/openai/models/realtime/realtime_translation_client_secret_create_request.rbs +69 -0
  143. data/sig/openai/models/realtime/realtime_translation_client_secret_create_response.rbs +32 -0
  144. data/sig/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbs +34 -0
  145. data/sig/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbs +37 -0
  146. data/sig/openai/models/realtime/realtime_translation_output_audio_delta_event.rbs +70 -0
  147. data/sig/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbs +37 -0
  148. data/sig/openai/models/realtime/realtime_translation_server_event.rbs +20 -0
  149. data/sig/openai/models/realtime/realtime_translation_session.rbs +131 -0
  150. data/sig/openai/models/realtime/realtime_translation_session_close_event.rbs +20 -0
  151. data/sig/openai/models/realtime/realtime_translation_session_closed_event.rbs +18 -0
  152. data/sig/openai/models/realtime/realtime_translation_session_create_request.rbs +120 -0
  153. data/sig/openai/models/realtime/realtime_translation_session_created_event.rbs +32 -0
  154. data/sig/openai/models/realtime/realtime_translation_session_update_event.rbs +34 -0
  155. data/sig/openai/models/realtime/realtime_translation_session_update_request.rbs +115 -0
  156. data/sig/openai/models/realtime/realtime_translation_session_updated_event.rbs +32 -0
  157. data/sig/openai/models/responses/response_compact_params.rbs +19 -1
  158. data/sig/openai/models/responses/tool.rbs +15 -5
  159. data/sig/openai/resources/realtime/calls.rbs +2 -0
  160. data/sig/openai/resources/responses.rbs +1 -0
  161. metadata +56 -5
  162. data/lib/openai/models/realtime/realtime_session_client_secret.rb +0 -36
  163. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +0 -49
  164. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +0 -20
@@ -125,6 +125,14 @@ module OpenAI
125
125
  end
126
126
  attr_writer :output_modalities
127
127
 
128
+ # Whether the model may call multiple tools in parallel. Only supported by
129
+ # reasoning Realtime models such as `gpt-realtime-2`.
130
+ sig { returns(T.nilable(T::Boolean)) }
131
+ attr_reader :parallel_tool_calls
132
+
133
+ sig { params(parallel_tool_calls: T::Boolean).void }
134
+ attr_writer :parallel_tool_calls
135
+
128
136
  # Reference to a prompt template and its variables.
129
137
  # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
130
138
  sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) }
@@ -137,6 +145,15 @@ module OpenAI
137
145
  end
138
146
  attr_writer :prompt
139
147
 
148
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
149
+ sig { returns(T.nilable(OpenAI::Realtime::RealtimeReasoning)) }
150
+ attr_reader :reasoning
151
+
152
+ sig do
153
+ params(reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash).void
154
+ end
155
+ attr_writer :reasoning
156
+
140
157
  # How the model chooses tools. Provide one of the string modes or force a specific
141
158
  # function/MCP tool.
142
159
  sig do
@@ -271,7 +288,9 @@ module OpenAI
271
288
  T::Array[
272
289
  OpenAI::Realtime::RealtimeSessionCreateRequest::OutputModality::OrSymbol
273
290
  ],
291
+ parallel_tool_calls: T::Boolean,
274
292
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash),
293
+ reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash,
275
294
  tool_choice:
276
295
  T.any(
277
296
  OpenAI::Responses::ToolChoiceOptions::OrSymbol,
@@ -331,9 +350,14 @@ module OpenAI
331
350
  # can be used to make the model respond with text only. It is not possible to
332
351
  # request both `text` and `audio` at the same time.
333
352
  output_modalities: nil,
353
+ # Whether the model may call multiple tools in parallel. Only supported by
354
+ # reasoning Realtime models such as `gpt-realtime-2`.
355
+ parallel_tool_calls: nil,
334
356
  # Reference to a prompt template and its variables.
335
357
  # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
336
358
  prompt: nil,
359
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
360
+ reasoning: nil,
337
361
  # How the model chooses tools. Provide one of the string modes or force a specific
338
362
  # function/MCP tool.
339
363
  tool_choice: nil,
@@ -391,7 +415,9 @@ module OpenAI
391
415
  T::Array[
392
416
  OpenAI::Realtime::RealtimeSessionCreateRequest::OutputModality::OrSymbol
393
417
  ],
418
+ parallel_tool_calls: T::Boolean,
394
419
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt),
420
+ reasoning: OpenAI::Realtime::RealtimeReasoning,
395
421
  tool_choice:
396
422
  T.any(
397
423
  OpenAI::Responses::ToolChoiceOptions::OrSymbol,
@@ -512,6 +538,11 @@ module OpenAI
512
538
  :"gpt-realtime-1.5",
513
539
  OpenAI::Realtime::RealtimeSessionCreateRequest::Model::TaggedSymbol
514
540
  )
541
+ GPT_REALTIME_2 =
542
+ T.let(
543
+ :"gpt-realtime-2",
544
+ OpenAI::Realtime::RealtimeSessionCreateRequest::Model::TaggedSymbol
545
+ )
515
546
  GPT_REALTIME_2025_08_28 =
516
547
  T.let(
517
548
  :"gpt-realtime-2025-08-28",
@@ -14,16 +14,13 @@ module OpenAI
14
14
  )
15
15
  end
16
16
 
17
- # Ephemeral key returned by the API.
18
- sig { returns(OpenAI::Realtime::RealtimeSessionClientSecret) }
19
- attr_reader :client_secret
17
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
18
+ sig { returns(String) }
19
+ attr_accessor :id
20
20
 
21
- sig do
22
- params(
23
- client_secret: OpenAI::Realtime::RealtimeSessionClientSecret::OrHash
24
- ).void
25
- end
26
- attr_writer :client_secret
21
+ # The object type. Always `realtime.session`.
22
+ sig { returns(Symbol) }
23
+ attr_accessor :object
27
24
 
28
25
  # The type of session to create. Always `realtime` for the Realtime API.
29
26
  sig { returns(Symbol) }
@@ -45,6 +42,13 @@ module OpenAI
45
42
  end
46
43
  attr_writer :audio
47
44
 
45
+ # Expiration timestamp for the session, in seconds since epoch.
46
+ sig { returns(T.nilable(Integer)) }
47
+ attr_reader :expires_at
48
+
49
+ sig { params(expires_at: Integer).void }
50
+ attr_writer :expires_at
51
+
48
52
  # Additional fields to include in server outputs.
49
53
  #
50
54
  # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
@@ -160,6 +164,15 @@ module OpenAI
160
164
  end
161
165
  attr_writer :prompt
162
166
 
167
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
168
+ sig { returns(T.nilable(OpenAI::Realtime::RealtimeReasoning)) }
169
+ attr_reader :reasoning
170
+
171
+ sig do
172
+ params(reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash).void
173
+ end
174
+ attr_writer :reasoning
175
+
163
176
  # How the model chooses tools. Provide one of the string modes or force a specific
164
177
  # function/MCP tool.
165
178
  sig do
@@ -258,14 +271,13 @@ module OpenAI
258
271
  end
259
272
  attr_writer :truncation
260
273
 
261
- # A new Realtime session configuration, with an ephemeral key. Default TTL for
262
- # keys is one minute.
274
+ # A Realtime session configuration object.
263
275
  sig do
264
276
  params(
265
- client_secret:
266
- OpenAI::Realtime::RealtimeSessionClientSecret::OrHash,
277
+ id: String,
267
278
  audio:
268
279
  OpenAI::Realtime::RealtimeSessionCreateResponse::Audio::OrHash,
280
+ expires_at: Integer,
269
281
  include:
270
282
  T::Array[
271
283
  OpenAI::Realtime::RealtimeSessionCreateResponse::Include::OrSymbol
@@ -282,6 +294,7 @@ module OpenAI
282
294
  OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::OrSymbol
283
295
  ],
284
296
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash),
297
+ reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash,
285
298
  tool_choice:
286
299
  T.any(
287
300
  OpenAI::Responses::ToolChoiceOptions::OrSymbol,
@@ -307,14 +320,17 @@ module OpenAI
307
320
  OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::OrSymbol,
308
321
  OpenAI::Realtime::RealtimeTruncationRetentionRatio::OrHash
309
322
  ),
323
+ object: Symbol,
310
324
  type: Symbol
311
325
  ).returns(T.attached_class)
312
326
  end
313
327
  def self.new(
314
- # Ephemeral key returned by the API.
315
- client_secret:,
328
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
329
+ id:,
316
330
  # Configuration for input and output audio.
317
331
  audio: nil,
332
+ # Expiration timestamp for the session, in seconds since epoch.
333
+ expires_at: nil,
318
334
  # Additional fields to include in server outputs.
319
335
  #
320
336
  # `item.input_audio_transcription.logprobs`: Include logprobs for input audio
@@ -346,6 +362,8 @@ module OpenAI
346
362
  # Reference to a prompt template and its variables.
347
363
  # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
348
364
  prompt: nil,
365
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
366
+ reasoning: nil,
349
367
  # How the model chooses tools. Provide one of the string modes or force a specific
350
368
  # function/MCP tool.
351
369
  tool_choice: nil,
@@ -378,6 +396,8 @@ module OpenAI
378
396
  # but would instead return an error if the conversation exceeds the model's input
379
397
  # token limit.
380
398
  truncation: nil,
399
+ # The object type. Always `realtime.session`.
400
+ object: :"realtime.session",
381
401
  # The type of session to create. Always `realtime` for the Realtime API.
382
402
  type: :realtime
383
403
  )
@@ -386,9 +406,11 @@ module OpenAI
386
406
  sig do
387
407
  override.returns(
388
408
  {
389
- client_secret: OpenAI::Realtime::RealtimeSessionClientSecret,
409
+ id: String,
410
+ object: Symbol,
390
411
  type: Symbol,
391
412
  audio: OpenAI::Realtime::RealtimeSessionCreateResponse::Audio,
413
+ expires_at: Integer,
392
414
  include:
393
415
  T::Array[
394
416
  OpenAI::Realtime::RealtimeSessionCreateResponse::Include::TaggedSymbol
@@ -403,6 +425,7 @@ module OpenAI
403
425
  OpenAI::Realtime::RealtimeSessionCreateResponse::OutputModality::TaggedSymbol
404
426
  ],
405
427
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt),
428
+ reasoning: OpenAI::Realtime::RealtimeReasoning,
406
429
  tool_choice:
407
430
  OpenAI::Realtime::RealtimeSessionCreateResponse::ToolChoice::Variants,
408
431
  tools:
@@ -539,14 +562,6 @@ module OpenAI
539
562
  end
540
563
  attr_writer :noise_reduction
541
564
 
542
- # Configuration for input audio transcription, defaults to off and can be set to
543
- # `null` to turn off once on. Input audio transcription is not native to the
544
- # model, since the model consumes audio directly. Transcription runs
545
- # asynchronously through
546
- # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
547
- # and should be treated as guidance of input audio content rather than precisely
548
- # what the model heard. The client can optionally set the language and prompt for
549
- # transcription, these offer additional guidance to the transcription service.
550
565
  sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
551
566
  attr_reader :transcription
552
567
 
@@ -570,6 +585,9 @@ module OpenAI
570
585
  # trails off with "uhhm", the model will score a low probability of turn end and
571
586
  # wait longer for the user to continue speaking. This can be useful for more
572
587
  # natural conversations, but may have a higher latency.
588
+ #
589
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
590
+ # `null`; VAD is not supported.
573
591
  sig do
574
592
  returns(
575
593
  T.nilable(
@@ -608,14 +626,6 @@ module OpenAI
608
626
  # detection accuracy (reducing false positives) and model performance by improving
609
627
  # perception of the input audio.
610
628
  noise_reduction: nil,
611
- # Configuration for input audio transcription, defaults to off and can be set to
612
- # `null` to turn off once on. Input audio transcription is not native to the
613
- # model, since the model consumes audio directly. Transcription runs
614
- # asynchronously through
615
- # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
616
- # and should be treated as guidance of input audio content rather than precisely
617
- # what the model heard. The client can optionally set the language and prompt for
618
- # transcription, these offer additional guidance to the transcription service.
619
629
  transcription: nil,
620
630
  # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
621
631
  # set to `null` to turn off, in which case the client must manually trigger model
@@ -630,6 +640,9 @@ module OpenAI
630
640
  # trails off with "uhhm", the model will score a low probability of turn end and
631
641
  # wait longer for the user to continue speaking. This can be useful for more
632
642
  # natural conversations, but may have a higher latency.
643
+ #
644
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
645
+ # `null`; VAD is not supported.
633
646
  turn_detection: nil
634
647
  )
635
648
  end
@@ -717,6 +730,9 @@ module OpenAI
717
730
  # trails off with "uhhm", the model will score a low probability of turn end and
718
731
  # wait longer for the user to continue speaking. This can be useful for more
719
732
  # natural conversations, but may have a higher latency.
733
+ #
734
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
735
+ # `null`; VAD is not supported.
720
736
  module TurnDetection
721
737
  extend OpenAI::Internal::Type::Union
722
738
 
@@ -1328,6 +1344,11 @@ module OpenAI
1328
1344
  :"gpt-realtime-1.5",
1329
1345
  OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1330
1346
  )
1347
+ GPT_REALTIME_2 =
1348
+ T.let(
1349
+ :"gpt-realtime-2",
1350
+ OpenAI::Realtime::RealtimeSessionCreateResponse::Model::TaggedSymbol
1351
+ )
1331
1352
  GPT_REALTIME_2025_08_28 =
1332
1353
  T.let(
1333
1354
  :"gpt-realtime-2025-08-28",
@@ -91,6 +91,9 @@ module OpenAI
91
91
  # trails off with "uhhm", the model will score a low probability of turn end and
92
92
  # wait longer for the user to continue speaking. This can be useful for more
93
93
  # natural conversations, but may have a higher latency.
94
+ #
95
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
96
+ # `null`; VAD is not supported.
94
97
  sig do
95
98
  returns(
96
99
  T.nilable(
@@ -154,6 +157,9 @@ module OpenAI
154
157
  # trails off with "uhhm", the model will score a low probability of turn end and
155
158
  # wait longer for the user to continue speaking. This can be useful for more
156
159
  # natural conversations, but may have a higher latency.
160
+ #
161
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
162
+ # `null`; VAD is not supported.
157
163
  turn_detection: nil
158
164
  )
159
165
  end
@@ -16,6 +16,9 @@ module OpenAI
16
16
  # trails off with "uhhm", the model will score a low probability of turn end and
17
17
  # wait longer for the user to continue speaking. This can be useful for more
18
18
  # natural conversations, but may have a higher latency.
19
+ #
20
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
21
+ # `null`; VAD is not supported.
19
22
  module RealtimeTranscriptionSessionAudioInputTurnDetection
20
23
  extend OpenAI::Internal::Type::Union
21
24
 
@@ -224,7 +224,6 @@ module OpenAI
224
224
  end
225
225
  attr_writer :noise_reduction
226
226
 
227
- # Configuration of the transcription model.
228
227
  sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
229
228
  attr_reader :transcription
230
229
 
@@ -237,7 +236,8 @@ module OpenAI
237
236
 
238
237
  # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
239
238
  # means that the model will detect the start and end of speech based on audio
240
- # volume and respond at the end of user speech.
239
+ # volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
240
+ # must be `null`; VAD is not supported.
241
241
  sig do
242
242
  returns(
243
243
  T.nilable(
@@ -250,7 +250,9 @@ module OpenAI
250
250
  sig do
251
251
  params(
252
252
  turn_detection:
253
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
253
+ T.nilable(
254
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
255
+ )
254
256
  ).void
255
257
  end
256
258
  attr_writer :turn_detection
@@ -267,7 +269,9 @@ module OpenAI
267
269
  OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::OrHash,
268
270
  transcription: OpenAI::Realtime::AudioTranscription::OrHash,
269
271
  turn_detection:
270
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
272
+ T.nilable(
273
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
274
+ )
271
275
  ).returns(T.attached_class)
272
276
  end
273
277
  def self.new(
@@ -275,11 +279,11 @@ module OpenAI
275
279
  format_: nil,
276
280
  # Configuration for input audio noise reduction.
277
281
  noise_reduction: nil,
278
- # Configuration of the transcription model.
279
282
  transcription: nil,
280
283
  # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
281
284
  # means that the model will detect the start and end of speech based on audio
282
- # volume and respond at the end of user speech.
285
+ # volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
286
+ # must be `null`; VAD is not supported.
283
287
  turn_detection: nil
284
288
  )
285
289
  end
@@ -292,7 +296,9 @@ module OpenAI
292
296
  OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction,
293
297
  transcription: OpenAI::Realtime::AudioTranscription,
294
298
  turn_detection:
295
- OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
299
+ T.nilable(
300
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
301
+ )
296
302
  }
297
303
  )
298
304
  end
@@ -50,7 +50,8 @@ module OpenAI
50
50
 
51
51
  # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
52
52
  # means that the model will detect the start and end of speech based on audio
53
- # volume and respond at the end of user speech.
53
+ # volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
54
+ # must be `null`; VAD is not supported.
54
55
  sig do
55
56
  params(
56
57
  prefix_padding_ms: Integer,
@@ -0,0 +1,29 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ # A Realtime translation client event.
7
+ module RealtimeTranslationClientEvent
8
+ extend OpenAI::Internal::Type::Union
9
+
10
+ Variants =
11
+ T.type_alias do
12
+ T.any(
13
+ OpenAI::Realtime::RealtimeTranslationSessionUpdateEvent,
14
+ OpenAI::Realtime::RealtimeTranslationInputAudioBufferAppendEvent,
15
+ OpenAI::Realtime::RealtimeTranslationSessionCloseEvent
16
+ )
17
+ end
18
+
19
+ sig do
20
+ override.returns(
21
+ T::Array[OpenAI::Realtime::RealtimeTranslationClientEvent::Variants]
22
+ )
23
+ end
24
+ def self.variants
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,193 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranslationClientSecretCreateRequest < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Realtime translation session configuration. Translation sessions stream source
16
+ # audio in and translated audio plus transcript deltas out continuously.
17
+ sig do
18
+ returns(OpenAI::Realtime::RealtimeTranslationSessionCreateRequest)
19
+ end
20
+ attr_reader :session
21
+
22
+ sig do
23
+ params(
24
+ session:
25
+ OpenAI::Realtime::RealtimeTranslationSessionCreateRequest::OrHash
26
+ ).void
27
+ end
28
+ attr_writer :session
29
+
30
+ # Configuration for the client secret expiration. Expiration refers to the time
31
+ # after which a client secret will no longer be valid for creating sessions. The
32
+ # session itself may continue after that time once started. A secret can be used
33
+ # to create multiple sessions until it expires.
34
+ sig do
35
+ returns(
36
+ T.nilable(
37
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter
38
+ )
39
+ )
40
+ end
41
+ attr_reader :expires_after
42
+
43
+ sig do
44
+ params(
45
+ expires_after:
46
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::OrHash
47
+ ).void
48
+ end
49
+ attr_writer :expires_after
50
+
51
+ # Create a translation session and client secret for the Realtime API.
52
+ sig do
53
+ params(
54
+ session:
55
+ OpenAI::Realtime::RealtimeTranslationSessionCreateRequest::OrHash,
56
+ expires_after:
57
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::OrHash
58
+ ).returns(T.attached_class)
59
+ end
60
+ def self.new(
61
+ # Realtime translation session configuration. Translation sessions stream source
62
+ # audio in and translated audio plus transcript deltas out continuously.
63
+ session:,
64
+ # Configuration for the client secret expiration. Expiration refers to the time
65
+ # after which a client secret will no longer be valid for creating sessions. The
66
+ # session itself may continue after that time once started. A secret can be used
67
+ # to create multiple sessions until it expires.
68
+ expires_after: nil
69
+ )
70
+ end
71
+
72
+ sig do
73
+ override.returns(
74
+ {
75
+ session:
76
+ OpenAI::Realtime::RealtimeTranslationSessionCreateRequest,
77
+ expires_after:
78
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter
79
+ }
80
+ )
81
+ end
82
+ def to_hash
83
+ end
84
+
85
+ class ExpiresAfter < OpenAI::Internal::Type::BaseModel
86
+ OrHash =
87
+ T.type_alias do
88
+ T.any(
89
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter,
90
+ OpenAI::Internal::AnyHash
91
+ )
92
+ end
93
+
94
+ # The anchor point for the client secret expiration, meaning that `seconds` will
95
+ # be added to the `created_at` time of the client secret to produce an expiration
96
+ # timestamp. Only `created_at` is currently supported.
97
+ sig do
98
+ returns(
99
+ T.nilable(
100
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::OrSymbol
101
+ )
102
+ )
103
+ end
104
+ attr_reader :anchor
105
+
106
+ sig do
107
+ params(
108
+ anchor:
109
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::OrSymbol
110
+ ).void
111
+ end
112
+ attr_writer :anchor
113
+
114
+ # The number of seconds from the anchor point to the expiration. Select a value
115
+ # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
116
+ # not specified.
117
+ sig { returns(T.nilable(Integer)) }
118
+ attr_reader :seconds
119
+
120
+ sig { params(seconds: Integer).void }
121
+ attr_writer :seconds
122
+
123
+ # Configuration for the client secret expiration. Expiration refers to the time
124
+ # after which a client secret will no longer be valid for creating sessions. The
125
+ # session itself may continue after that time once started. A secret can be used
126
+ # to create multiple sessions until it expires.
127
+ sig do
128
+ params(
129
+ anchor:
130
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::OrSymbol,
131
+ seconds: Integer
132
+ ).returns(T.attached_class)
133
+ end
134
+ def self.new(
135
+ # The anchor point for the client secret expiration, meaning that `seconds` will
136
+ # be added to the `created_at` time of the client secret to produce an expiration
137
+ # timestamp. Only `created_at` is currently supported.
138
+ anchor: nil,
139
+ # The number of seconds from the anchor point to the expiration. Select a value
140
+ # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
141
+ # not specified.
142
+ seconds: nil
143
+ )
144
+ end
145
+
146
+ sig do
147
+ override.returns(
148
+ {
149
+ anchor:
150
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::OrSymbol,
151
+ seconds: Integer
152
+ }
153
+ )
154
+ end
155
+ def to_hash
156
+ end
157
+
158
+ # The anchor point for the client secret expiration, meaning that `seconds` will
159
+ # be added to the `created_at` time of the client secret to produce an expiration
160
+ # timestamp. Only `created_at` is currently supported.
161
+ module Anchor
162
+ extend OpenAI::Internal::Type::Enum
163
+
164
+ TaggedSymbol =
165
+ T.type_alias do
166
+ T.all(
167
+ Symbol,
168
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor
169
+ )
170
+ end
171
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
172
+
173
+ CREATED_AT =
174
+ T.let(
175
+ :created_at,
176
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::TaggedSymbol
177
+ )
178
+
179
+ sig do
180
+ override.returns(
181
+ T::Array[
182
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateRequest::ExpiresAfter::Anchor::TaggedSymbol
183
+ ]
184
+ )
185
+ end
186
+ def self.values
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,69 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranslationClientSecretCreateResponse < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTranslationClientSecretCreateResponse,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Expiration timestamp for the client secret, in seconds since epoch.
16
+ sig { returns(Integer) }
17
+ attr_accessor :expires_at
18
+
19
+ # A Realtime translation session. Translation sessions continuously translate
20
+ # input audio into the configured output language.
21
+ sig { returns(OpenAI::Realtime::RealtimeTranslationSession) }
22
+ attr_reader :session
23
+
24
+ sig do
25
+ params(
26
+ session: OpenAI::Realtime::RealtimeTranslationSession::OrHash
27
+ ).void
28
+ end
29
+ attr_writer :session
30
+
31
+ # The generated client secret value.
32
+ sig { returns(String) }
33
+ attr_accessor :value
34
+
35
+ # Response from creating a translation session and client secret for the Realtime
36
+ # API.
37
+ sig do
38
+ params(
39
+ expires_at: Integer,
40
+ session: OpenAI::Realtime::RealtimeTranslationSession::OrHash,
41
+ value: String
42
+ ).returns(T.attached_class)
43
+ end
44
+ def self.new(
45
+ # Expiration timestamp for the client secret, in seconds since epoch.
46
+ expires_at:,
47
+ # A Realtime translation session. Translation sessions continuously translate
48
+ # input audio into the configured output language.
49
+ session:,
50
+ # The generated client secret value.
51
+ value:
52
+ )
53
+ end
54
+
55
+ sig do
56
+ override.returns(
57
+ {
58
+ expires_at: Integer,
59
+ session: OpenAI::Realtime::RealtimeTranslationSession,
60
+ value: String
61
+ }
62
+ )
63
+ end
64
+ def to_hash
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end