openai 0.61.0 → 0.63.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/README.md +2 -5
  4. data/lib/openai/auth/workload_identity.rb +2 -2
  5. data/lib/openai/auth/workload_identity_auth.rb +4 -3
  6. data/lib/openai/models/admin/organization/usage_audio_speeches_response.rb +10 -1
  7. data/lib/openai/models/admin/organization/usage_audio_transcriptions_response.rb +10 -1
  8. data/lib/openai/models/admin/organization/usage_code_interpreter_sessions_response.rb +10 -1
  9. data/lib/openai/models/admin/organization/usage_completions_response.rb +10 -1
  10. data/lib/openai/models/admin/organization/usage_costs_response.rb +10 -1
  11. data/lib/openai/models/admin/organization/usage_embeddings_response.rb +10 -1
  12. data/lib/openai/models/admin/organization/usage_images_response.rb +10 -1
  13. data/lib/openai/models/admin/organization/usage_moderations_response.rb +10 -1
  14. data/lib/openai/models/admin/organization/usage_vector_stores_response.rb +10 -1
  15. data/lib/openai/models/chat/chat_completion_token_logprob.rb +1 -2
  16. data/lib/openai/models/chat/completion_create_params.rb +4 -3
  17. data/lib/openai/models/image_edit_params.rb +85 -31
  18. data/lib/openai/models/image_generate_params.rb +78 -26
  19. data/lib/openai/models/image_model.rb +5 -2
  20. data/lib/openai/models/realtime/audio_transcription.rb +37 -5
  21. data/lib/openai/models/realtime/client_secret_create_response.rb +1 -2
  22. data/lib/openai/models/realtime/realtime_audio_config_input.rb +3 -0
  23. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +3 -0
  24. data/lib/openai/models/realtime/realtime_reasoning.rb +24 -0
  25. data/lib/openai/models/realtime/realtime_reasoning_effort.rb +22 -0
  26. data/lib/openai/models/realtime/realtime_response_create_params.rb +18 -1
  27. data/lib/openai/models/realtime/realtime_session.rb +6 -0
  28. data/lib/openai/models/realtime/realtime_session_create_request.rb +21 -1
  29. data/lib/openai/models/realtime/realtime_session_create_response.rb +41 -17
  30. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +3 -0
  31. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +3 -0
  32. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +9 -5
  33. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +2 -1
  34. data/lib/openai/models/realtime/realtime_translation_client_event.rb +45 -0
  35. data/lib/openai/models/realtime/realtime_translation_client_secret_create_request.rb +85 -0
  36. data/lib/openai/models/realtime/realtime_translation_client_secret_create_response.rb +42 -0
  37. data/lib/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rb +51 -0
  38. data/lib/openai/models/realtime/realtime_translation_input_transcript_delta_event.rb +55 -0
  39. data/lib/openai/models/realtime/realtime_translation_output_audio_delta_event.rb +89 -0
  40. data/lib/openai/models/realtime/realtime_translation_output_transcript_delta_event.rb +54 -0
  41. data/lib/openai/models/realtime/realtime_translation_server_event.rb +53 -0
  42. data/lib/openai/models/realtime/realtime_translation_session.rb +158 -0
  43. data/lib/openai/models/realtime/realtime_translation_session_close_event.rb +30 -0
  44. data/lib/openai/models/realtime/realtime_translation_session_closed_event.rb +28 -0
  45. data/lib/openai/models/realtime/realtime_translation_session_create_request.rb +138 -0
  46. data/lib/openai/models/realtime/realtime_translation_session_created_event.rb +38 -0
  47. data/lib/openai/models/realtime/realtime_translation_session_update_event.rb +43 -0
  48. data/lib/openai/models/realtime/realtime_translation_session_update_request.rb +129 -0
  49. data/lib/openai/models/realtime/realtime_translation_session_updated_event.rb +37 -0
  50. data/lib/openai/models/realtime/transcription_session_updated_event.rb +1 -2
  51. data/lib/openai/models/responses/response.rb +4 -3
  52. data/lib/openai/models/responses/response_compact_params.rb +22 -1
  53. data/lib/openai/models/responses/response_create_params.rb +4 -3
  54. data/lib/openai/models/responses/response_includable.rb +2 -0
  55. data/lib/openai/models/responses/response_text_delta_event.rb +2 -2
  56. data/lib/openai/models/responses/response_text_done_event.rb +2 -2
  57. data/lib/openai/models/responses/responses_client_event.rb +4 -3
  58. data/lib/openai/models/responses/tool.rb +81 -16
  59. data/lib/openai/resources/chat/completions.rb +2 -2
  60. data/lib/openai/resources/images.rb +6 -6
  61. data/lib/openai/resources/realtime/calls.rb +5 -1
  62. data/lib/openai/resources/responses.rb +5 -3
  63. data/lib/openai/version.rb +1 -1
  64. data/lib/openai.rb +18 -1
  65. data/rbi/openai/auth.rbi +3 -3
  66. data/rbi/openai/models/admin/organization/usage_audio_speeches_response.rbi +11 -1
  67. data/rbi/openai/models/admin/organization/usage_audio_transcriptions_response.rbi +11 -1
  68. data/rbi/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbi +11 -1
  69. data/rbi/openai/models/admin/organization/usage_completions_response.rbi +11 -1
  70. data/rbi/openai/models/admin/organization/usage_costs_response.rbi +11 -1
  71. data/rbi/openai/models/admin/organization/usage_embeddings_response.rbi +11 -1
  72. data/rbi/openai/models/admin/organization/usage_images_response.rbi +11 -1
  73. data/rbi/openai/models/admin/organization/usage_moderations_response.rbi +11 -1
  74. data/rbi/openai/models/admin/organization/usage_vector_stores_response.rbi +11 -1
  75. data/rbi/openai/models/chat/chat_completion_token_logprob.rbi +2 -4
  76. data/rbi/openai/models/chat/completion_create_params.rbi +6 -4
  77. data/rbi/openai/models/image_edit_params.rbi +102 -45
  78. data/rbi/openai/models/image_generate_params.rbi +93 -39
  79. data/rbi/openai/models/image_model.rbi +8 -3
  80. data/rbi/openai/models/realtime/audio_transcription.rbi +85 -6
  81. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +6 -0
  82. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +3 -0
  83. data/rbi/openai/models/realtime/realtime_reasoning.rbi +54 -0
  84. data/rbi/openai/models/realtime/realtime_reasoning_effort.rbi +44 -0
  85. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +26 -0
  86. data/rbi/openai/models/realtime/realtime_session.rbi +9 -0
  87. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +31 -0
  88. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +53 -32
  89. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +6 -0
  90. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +3 -0
  91. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +13 -7
  92. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +2 -1
  93. data/rbi/openai/models/realtime/realtime_translation_client_event.rbi +29 -0
  94. data/rbi/openai/models/realtime/realtime_translation_client_secret_create_request.rbi +193 -0
  95. data/rbi/openai/models/realtime/realtime_translation_client_secret_create_response.rbi +69 -0
  96. data/rbi/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbi +69 -0
  97. data/rbi/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbi +77 -0
  98. data/rbi/openai/models/realtime/realtime_translation_output_audio_delta_event.rbi +148 -0
  99. data/rbi/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbi +76 -0
  100. data/rbi/openai/models/realtime/realtime_translation_server_event.rbi +33 -0
  101. data/rbi/openai/models/realtime/realtime_translation_session.rbi +339 -0
  102. data/rbi/openai/models/realtime/realtime_translation_session_close_event.rbi +44 -0
  103. data/rbi/openai/models/realtime/realtime_translation_session_closed_event.rbi +39 -0
  104. data/rbi/openai/models/realtime/realtime_translation_session_create_request.rbi +322 -0
  105. data/rbi/openai/models/realtime/realtime_translation_session_created_event.rbi +68 -0
  106. data/rbi/openai/models/realtime/realtime_translation_session_update_event.rbi +78 -0
  107. data/rbi/openai/models/realtime/realtime_translation_session_update_request.rbi +313 -0
  108. data/rbi/openai/models/realtime/realtime_translation_session_updated_event.rbi +67 -0
  109. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +0 -2
  110. data/rbi/openai/models/responses/response.rbi +6 -4
  111. data/rbi/openai/models/responses/response_compact_params.rbi +65 -0
  112. data/rbi/openai/models/responses/response_create_params.rbi +6 -4
  113. data/rbi/openai/models/responses/response_includable.rbi +2 -0
  114. data/rbi/openai/models/responses/response_text_delta_event.rbi +2 -2
  115. data/rbi/openai/models/responses/response_text_done_event.rbi +2 -2
  116. data/rbi/openai/models/responses/responses_client_event.rbi +6 -4
  117. data/rbi/openai/models/responses/tool.rbi +122 -27
  118. data/rbi/openai/resources/chat/completions.rbi +6 -4
  119. data/rbi/openai/resources/images.rbi +110 -44
  120. data/rbi/openai/resources/realtime/calls.rbi +7 -0
  121. data/rbi/openai/resources/responses.rbi +12 -4
  122. data/sig/openai/models/admin/organization/usage_audio_speeches_response.rbs +7 -2
  123. data/sig/openai/models/admin/organization/usage_audio_transcriptions_response.rbs +7 -2
  124. data/sig/openai/models/admin/organization/usage_code_interpreter_sessions_response.rbs +7 -2
  125. data/sig/openai/models/admin/organization/usage_completions_response.rbs +7 -2
  126. data/sig/openai/models/admin/organization/usage_costs_response.rbs +7 -2
  127. data/sig/openai/models/admin/organization/usage_embeddings_response.rbs +7 -2
  128. data/sig/openai/models/admin/organization/usage_images_response.rbs +7 -2
  129. data/sig/openai/models/admin/organization/usage_moderations_response.rbs +7 -2
  130. data/sig/openai/models/admin/organization/usage_vector_stores_response.rbs +7 -2
  131. data/sig/openai/models/image_edit_params.rbs +5 -4
  132. data/sig/openai/models/image_generate_params.rbs +5 -4
  133. data/sig/openai/models/image_model.rbs +11 -5
  134. data/sig/openai/models/realtime/audio_transcription.rbs +25 -0
  135. data/sig/openai/models/realtime/realtime_reasoning.rbs +24 -0
  136. data/sig/openai/models/realtime/realtime_reasoning_effort.rbs +20 -0
  137. data/sig/openai/models/realtime/realtime_response_create_params.rbs +16 -0
  138. data/sig/openai/models/realtime/realtime_session_create_request.rbs +18 -0
  139. data/sig/openai/models/realtime/realtime_session_create_response.rbs +27 -4
  140. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +4 -8
  141. data/sig/openai/models/realtime/realtime_translation_client_event.rbs +16 -0
  142. data/sig/openai/models/realtime/realtime_translation_client_secret_create_request.rbs +69 -0
  143. data/sig/openai/models/realtime/realtime_translation_client_secret_create_response.rbs +32 -0
  144. data/sig/openai/models/realtime/realtime_translation_input_audio_buffer_append_event.rbs +34 -0
  145. data/sig/openai/models/realtime/realtime_translation_input_transcript_delta_event.rbs +37 -0
  146. data/sig/openai/models/realtime/realtime_translation_output_audio_delta_event.rbs +70 -0
  147. data/sig/openai/models/realtime/realtime_translation_output_transcript_delta_event.rbs +37 -0
  148. data/sig/openai/models/realtime/realtime_translation_server_event.rbs +20 -0
  149. data/sig/openai/models/realtime/realtime_translation_session.rbs +131 -0
  150. data/sig/openai/models/realtime/realtime_translation_session_close_event.rbs +20 -0
  151. data/sig/openai/models/realtime/realtime_translation_session_closed_event.rbs +18 -0
  152. data/sig/openai/models/realtime/realtime_translation_session_create_request.rbs +120 -0
  153. data/sig/openai/models/realtime/realtime_translation_session_created_event.rbs +32 -0
  154. data/sig/openai/models/realtime/realtime_translation_session_update_event.rbs +34 -0
  155. data/sig/openai/models/realtime/realtime_translation_session_update_request.rbs +115 -0
  156. data/sig/openai/models/realtime/realtime_translation_session_updated_event.rbs +32 -0
  157. data/sig/openai/models/responses/response_compact_params.rbs +19 -1
  158. data/sig/openai/models/responses/tool.rbs +15 -5
  159. data/sig/openai/resources/realtime/calls.rbs +2 -0
  160. data/sig/openai/resources/responses.rbs +1 -0
  161. metadata +56 -5
  162. data/lib/openai/models/realtime/realtime_session_client_secret.rb +0 -36
  163. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +0 -49
  164. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +0 -20
@@ -18,9 +18,14 @@ module OpenAI
18
18
  attr_accessor :prompt
19
19
 
20
20
  # Allows to set transparency for the background of the generated image(s). This
21
- # parameter is only supported for the GPT image models. Must be one of
22
- # `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
23
- # model will automatically determine the best background for the image.
21
+ # parameter is only supported for GPT image models that support transparent
22
+ # backgrounds. Must be one of `transparent`, `opaque`, or `auto` (default value).
23
+ # When `auto` is used, the model will automatically determine the best background
24
+ # for the image.
25
+ #
26
+ # `gpt-image-2` and `gpt-image-2-2026-04-21` do not support transparent
27
+ # backgrounds. Requests with `background` set to `transparent` will return an
28
+ # error for these models; use `opaque` or `auto` instead.
24
29
  #
25
30
  # If `transparent`, the output format needs to support transparency, so it should
26
31
  # be set to either `png` (default value) or `webp`.
@@ -30,8 +35,9 @@ module OpenAI
30
35
  attr_accessor :background
31
36
 
32
37
  # The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or a GPT
33
- # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults to
34
- # `dall-e-2` unless a parameter specific to the GPT image models is used.
38
+ # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`, `gpt-image-2`,
39
+ # or `gpt-image-2-2026-04-21`). Defaults to `dall-e-2` unless a parameter specific
40
+ # to the GPT image models is used.
35
41
  sig { returns(T.nilable(T.any(String, OpenAI::ImageModel::OrSymbol))) }
36
42
  attr_accessor :model
37
43
 
@@ -91,11 +97,22 @@ module OpenAI
91
97
  end
92
98
  attr_accessor :response_format
93
99
 
94
- # The size of the generated images. Must be one of `1024x1024`, `1536x1024`
95
- # (landscape), `1024x1536` (portrait), or `auto` (default value) for the GPT image
96
- # models, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and one of
97
- # `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
98
- sig { returns(T.nilable(OpenAI::ImageGenerateParams::Size::OrSymbol)) }
100
+ # The size of the generated images. For `gpt-image-2` and
101
+ # `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as `WIDTHxHEIGHT`
102
+ # strings, for example `1536x864`. Width and height must both be divisible by 16
103
+ # and the requested aspect ratio must be between 1:3 and 3:1. Resolutions above
104
+ # `2560x1440` are experimental, and the maximum supported resolution is
105
+ # `3840x2160`. The requested size must also satisfy the model's current pixel and
106
+ # edge limits. The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
107
+ # supported by the GPT image models; `auto` is supported for models that allow
108
+ # automatic sizing. For `dall-e-2`, use one of `256x256`, `512x512`, or
109
+ # `1024x1024`. For `dall-e-3`, use one of `1024x1024`, `1792x1024`, or
110
+ # `1024x1792`.
111
+ sig do
112
+ returns(
113
+ T.nilable(T.any(String, OpenAI::ImageGenerateParams::Size::OrSymbol))
114
+ )
115
+ end
99
116
  attr_accessor :size
100
117
 
101
118
  # The style of the generated images. This parameter is only supported for
@@ -130,7 +147,10 @@ module OpenAI
130
147
  quality: T.nilable(OpenAI::ImageGenerateParams::Quality::OrSymbol),
131
148
  response_format:
132
149
  T.nilable(OpenAI::ImageGenerateParams::ResponseFormat::OrSymbol),
133
- size: T.nilable(OpenAI::ImageGenerateParams::Size::OrSymbol),
150
+ size:
151
+ T.nilable(
152
+ T.any(String, OpenAI::ImageGenerateParams::Size::OrSymbol)
153
+ ),
134
154
  style: T.nilable(OpenAI::ImageGenerateParams::Style::OrSymbol),
135
155
  user: String,
136
156
  request_options: OpenAI::RequestOptions::OrHash
@@ -142,16 +162,22 @@ module OpenAI
142
162
  # characters for `dall-e-3`.
143
163
  prompt:,
144
164
  # Allows to set transparency for the background of the generated image(s). This
145
- # parameter is only supported for the GPT image models. Must be one of
146
- # `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
147
- # model will automatically determine the best background for the image.
165
+ # parameter is only supported for GPT image models that support transparent
166
+ # backgrounds. Must be one of `transparent`, `opaque`, or `auto` (default value).
167
+ # When `auto` is used, the model will automatically determine the best background
168
+ # for the image.
169
+ #
170
+ # `gpt-image-2` and `gpt-image-2-2026-04-21` do not support transparent
171
+ # backgrounds. Requests with `background` set to `transparent` will return an
172
+ # error for these models; use `opaque` or `auto` instead.
148
173
  #
149
174
  # If `transparent`, the output format needs to support transparency, so it should
150
175
  # be set to either `png` (default value) or `webp`.
151
176
  background: nil,
152
177
  # The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or a GPT
153
- # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults to
154
- # `dall-e-2` unless a parameter specific to the GPT image models is used.
178
+ # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`, `gpt-image-2`,
179
+ # or `gpt-image-2-2026-04-21`). Defaults to `dall-e-2` unless a parameter specific
180
+ # to the GPT image models is used.
155
181
  model: nil,
156
182
  # Control the content-moderation level for images generated by the GPT image
157
183
  # models. Must be either `low` for less restrictive filtering or `auto` (default
@@ -187,10 +213,17 @@ module OpenAI
187
213
  # after the image has been generated. This parameter isn't supported for the GPT
188
214
  # image models, which always return base64-encoded images.
189
215
  response_format: nil,
190
- # The size of the generated images. Must be one of `1024x1024`, `1536x1024`
191
- # (landscape), `1024x1536` (portrait), or `auto` (default value) for the GPT image
192
- # models, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and one of
193
- # `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
216
+ # The size of the generated images. For `gpt-image-2` and
217
+ # `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as `WIDTHxHEIGHT`
218
+ # strings, for example `1536x864`. Width and height must both be divisible by 16
219
+ # and the requested aspect ratio must be between 1:3 and 3:1. Resolutions above
220
+ # `2560x1440` are experimental, and the maximum supported resolution is
221
+ # `3840x2160`. The requested size must also satisfy the model's current pixel and
222
+ # edge limits. The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
223
+ # supported by the GPT image models; `auto` is supported for models that allow
224
+ # automatic sizing. For `dall-e-2`, use one of `256x256`, `512x512`, or
225
+ # `1024x1024`. For `dall-e-3`, use one of `1024x1024`, `1792x1024`, or
226
+ # `1024x1792`.
194
227
  size: nil,
195
228
  # The style of the generated images. This parameter is only supported for
196
229
  # `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
@@ -222,7 +255,10 @@ module OpenAI
222
255
  quality: T.nilable(OpenAI::ImageGenerateParams::Quality::OrSymbol),
223
256
  response_format:
224
257
  T.nilable(OpenAI::ImageGenerateParams::ResponseFormat::OrSymbol),
225
- size: T.nilable(OpenAI::ImageGenerateParams::Size::OrSymbol),
258
+ size:
259
+ T.nilable(
260
+ T.any(String, OpenAI::ImageGenerateParams::Size::OrSymbol)
261
+ ),
226
262
  style: T.nilable(OpenAI::ImageGenerateParams::Style::OrSymbol),
227
263
  user: String,
228
264
  request_options: OpenAI::RequestOptions
@@ -233,9 +269,14 @@ module OpenAI
233
269
  end
234
270
 
235
271
  # Allows to set transparency for the background of the generated image(s). This
236
- # parameter is only supported for the GPT image models. Must be one of
237
- # `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
238
- # model will automatically determine the best background for the image.
272
+ # parameter is only supported for GPT image models that support transparent
273
+ # backgrounds. Must be one of `transparent`, `opaque`, or `auto` (default value).
274
+ # When `auto` is used, the model will automatically determine the best background
275
+ # for the image.
276
+ #
277
+ # `gpt-image-2` and `gpt-image-2-2026-04-21` do not support transparent
278
+ # backgrounds. Requests with `background` set to `transparent` will return an
279
+ # error for these models; use `opaque` or `auto` instead.
239
280
  #
240
281
  # If `transparent`, the output format needs to support transparency, so it should
241
282
  # be set to either `png` (default value) or `webp`.
@@ -268,8 +309,9 @@ module OpenAI
268
309
  end
269
310
 
270
311
  # The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or a GPT
271
- # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`). Defaults to
272
- # `dall-e-2` unless a parameter specific to the GPT image models is used.
312
+ # image model (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`, `gpt-image-2`,
313
+ # or `gpt-image-2-2026-04-21`). Defaults to `dall-e-2` unless a parameter specific
314
+ # to the GPT image models is used.
273
315
  module Model
274
316
  extend OpenAI::Internal::Type::Union
275
317
 
@@ -399,12 +441,32 @@ module OpenAI
399
441
  end
400
442
  end
401
443
 
402
- # The size of the generated images. Must be one of `1024x1024`, `1536x1024`
403
- # (landscape), `1024x1536` (portrait), or `auto` (default value) for the GPT image
404
- # models, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and one of
405
- # `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
444
+ # The size of the generated images. For `gpt-image-2` and
445
+ # `gpt-image-2-2026-04-21`, arbitrary resolutions are supported as `WIDTHxHEIGHT`
446
+ # strings, for example `1536x864`. Width and height must both be divisible by 16
447
+ # and the requested aspect ratio must be between 1:3 and 3:1. Resolutions above
448
+ # `2560x1440` are experimental, and the maximum supported resolution is
449
+ # `3840x2160`. The requested size must also satisfy the model's current pixel and
450
+ # edge limits. The standard sizes `1024x1024`, `1536x1024`, and `1024x1536` are
451
+ # supported by the GPT image models; `auto` is supported for models that allow
452
+ # automatic sizing. For `dall-e-2`, use one of `256x256`, `512x512`, or
453
+ # `1024x1024`. For `dall-e-3`, use one of `1024x1024`, `1792x1024`, or
454
+ # `1024x1792`.
406
455
  module Size
407
- extend OpenAI::Internal::Type::Enum
456
+ extend OpenAI::Internal::Type::Union
457
+
458
+ Variants =
459
+ T.type_alias do
460
+ T.any(String, OpenAI::ImageGenerateParams::Size::TaggedSymbol)
461
+ end
462
+
463
+ sig do
464
+ override.returns(
465
+ T::Array[OpenAI::ImageGenerateParams::Size::Variants]
466
+ )
467
+ end
468
+ def self.variants
469
+ end
408
470
 
409
471
  TaggedSymbol =
410
472
  T.type_alias { T.all(Symbol, OpenAI::ImageGenerateParams::Size) }
@@ -425,14 +487,6 @@ module OpenAI
425
487
  T.let(:"1792x1024", OpenAI::ImageGenerateParams::Size::TaggedSymbol)
426
488
  SIZE_1024X1792 =
427
489
  T.let(:"1024x1792", OpenAI::ImageGenerateParams::Size::TaggedSymbol)
428
-
429
- sig do
430
- override.returns(
431
- T::Array[OpenAI::ImageGenerateParams::Size::TaggedSymbol]
432
- )
433
- end
434
- def self.values
435
- end
436
490
  end
437
491
 
438
492
  # The style of the generated images. This parameter is only supported for
@@ -8,12 +8,17 @@ module OpenAI
8
8
  TaggedSymbol = T.type_alias { T.all(Symbol, OpenAI::ImageModel) }
9
9
  OrSymbol = T.type_alias { T.any(Symbol, String) }
10
10
 
11
- GPT_IMAGE_1_5 = T.let(:"gpt-image-1.5", OpenAI::ImageModel::TaggedSymbol)
12
- DALL_E_2 = T.let(:"dall-e-2", OpenAI::ImageModel::TaggedSymbol)
13
- DALL_E_3 = T.let(:"dall-e-3", OpenAI::ImageModel::TaggedSymbol)
14
11
  GPT_IMAGE_1 = T.let(:"gpt-image-1", OpenAI::ImageModel::TaggedSymbol)
15
12
  GPT_IMAGE_1_MINI =
16
13
  T.let(:"gpt-image-1-mini", OpenAI::ImageModel::TaggedSymbol)
14
+ GPT_IMAGE_2 = T.let(:"gpt-image-2", OpenAI::ImageModel::TaggedSymbol)
15
+ GPT_IMAGE_2_2026_04_21 =
16
+ T.let(:"gpt-image-2-2026-04-21", OpenAI::ImageModel::TaggedSymbol)
17
+ GPT_IMAGE_1_5 = T.let(:"gpt-image-1.5", OpenAI::ImageModel::TaggedSymbol)
18
+ CHATGPT_IMAGE_LATEST =
19
+ T.let(:"chatgpt-image-latest", OpenAI::ImageModel::TaggedSymbol)
20
+ DALL_E_2 = T.let(:"dall-e-2", OpenAI::ImageModel::TaggedSymbol)
21
+ DALL_E_3 = T.let(:"dall-e-3", OpenAI::ImageModel::TaggedSymbol)
17
22
 
18
23
  sig { override.returns(T::Array[OpenAI::ImageModel::TaggedSymbol]) }
19
24
  def self.values
@@ -12,6 +12,23 @@ module OpenAI
12
12
  )
13
13
  end
14
14
 
15
+ # Controls how long the model waits before emitting transcription text. Higher
16
+ # values can improve transcription accuracy at the cost of latency. Only supported
17
+ # with `gpt-realtime-whisper` in GA Realtime sessions.
18
+ sig do
19
+ returns(
20
+ T.nilable(OpenAI::Realtime::AudioTranscription::Delay::OrSymbol)
21
+ )
22
+ end
23
+ attr_reader :delay
24
+
25
+ sig do
26
+ params(
27
+ delay: OpenAI::Realtime::AudioTranscription::Delay::OrSymbol
28
+ ).void
29
+ end
30
+ attr_writer :delay
31
+
15
32
  # The language of the input audio. Supplying the input language in
16
33
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
17
34
  # format will improve accuracy and latency.
@@ -23,8 +40,8 @@ module OpenAI
23
40
 
24
41
  # The model to use for transcription. Current options are `whisper-1`,
25
42
  # `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`,
26
- # `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
27
- # `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
43
+ # `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.
44
+ # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
28
45
  sig do
29
46
  returns(
30
47
  T.nilable(
@@ -53,6 +70,7 @@ module OpenAI
53
70
  # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
54
71
  # For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
55
72
  # prompt is a free text string, for example "expect words related to technology".
73
+ # Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.
56
74
  sig { returns(T.nilable(String)) }
57
75
  attr_reader :prompt
58
76
 
@@ -61,6 +79,7 @@ module OpenAI
61
79
 
62
80
  sig do
63
81
  params(
82
+ delay: OpenAI::Realtime::AudioTranscription::Delay::OrSymbol,
64
83
  language: String,
65
84
  model:
66
85
  T.any(
@@ -71,20 +90,25 @@ module OpenAI
71
90
  ).returns(T.attached_class)
72
91
  end
73
92
  def self.new(
93
+ # Controls how long the model waits before emitting transcription text. Higher
94
+ # values can improve transcription accuracy at the cost of latency. Only supported
95
+ # with `gpt-realtime-whisper` in GA Realtime sessions.
96
+ delay: nil,
74
97
  # The language of the input audio. Supplying the input language in
75
98
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
76
99
  # format will improve accuracy and latency.
77
100
  language: nil,
78
101
  # The model to use for transcription. Current options are `whisper-1`,
79
102
  # `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`,
80
- # `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
81
- # `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
103
+ # `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.
104
+ # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
82
105
  model: nil,
83
106
  # An optional text to guide the model's style or continue a previous audio
84
107
  # segment. For `whisper-1`, the
85
108
  # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
86
109
  # For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
87
110
  # prompt is a free text string, for example "expect words related to technology".
111
+ # Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.
88
112
  prompt: nil
89
113
  )
90
114
  end
@@ -92,6 +116,7 @@ module OpenAI
92
116
  sig do
93
117
  override.returns(
94
118
  {
119
+ delay: OpenAI::Realtime::AudioTranscription::Delay::OrSymbol,
95
120
  language: String,
96
121
  model:
97
122
  T.any(
@@ -105,10 +130,59 @@ module OpenAI
105
130
  def to_hash
106
131
  end
107
132
 
133
+ # Controls how long the model waits before emitting transcription text. Higher
134
+ # values can improve transcription accuracy at the cost of latency. Only supported
135
+ # with `gpt-realtime-whisper` in GA Realtime sessions.
136
+ module Delay
137
+ extend OpenAI::Internal::Type::Enum
138
+
139
+ TaggedSymbol =
140
+ T.type_alias do
141
+ T.all(Symbol, OpenAI::Realtime::AudioTranscription::Delay)
142
+ end
143
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
144
+
145
+ MINIMAL =
146
+ T.let(
147
+ :minimal,
148
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
149
+ )
150
+ LOW =
151
+ T.let(
152
+ :low,
153
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
154
+ )
155
+ MEDIUM =
156
+ T.let(
157
+ :medium,
158
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
159
+ )
160
+ HIGH =
161
+ T.let(
162
+ :high,
163
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
164
+ )
165
+ XHIGH =
166
+ T.let(
167
+ :xhigh,
168
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
169
+ )
170
+
171
+ sig do
172
+ override.returns(
173
+ T::Array[
174
+ OpenAI::Realtime::AudioTranscription::Delay::TaggedSymbol
175
+ ]
176
+ )
177
+ end
178
+ def self.values
179
+ end
180
+ end
181
+
108
182
  # The model to use for transcription. Current options are `whisper-1`,
109
183
  # `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`,
110
- # `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use
111
- # `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
184
+ # `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.
185
+ # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
112
186
  module Model
113
187
  extend OpenAI::Internal::Type::Union
114
188
 
@@ -159,6 +233,11 @@ module OpenAI
159
233
  :"gpt-4o-transcribe-diarize",
160
234
  OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol
161
235
  )
236
+ GPT_REALTIME_WHISPER =
237
+ T.let(
238
+ :"gpt-realtime-whisper",
239
+ OpenAI::Realtime::AudioTranscription::Model::TaggedSymbol
240
+ )
162
241
  end
163
242
  end
164
243
  end
@@ -91,6 +91,9 @@ module OpenAI
91
91
  # trails off with "uhhm", the model will score a low probability of turn end and
92
92
  # wait longer for the user to continue speaking. This can be useful for more
93
93
  # natural conversations, but may have a higher latency.
94
+ #
95
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
96
+ # `null`; VAD is not supported.
94
97
  sig do
95
98
  returns(
96
99
  T.nilable(
@@ -154,6 +157,9 @@ module OpenAI
154
157
  # trails off with "uhhm", the model will score a low probability of turn end and
155
158
  # wait longer for the user to continue speaking. This can be useful for more
156
159
  # natural conversations, but may have a higher latency.
160
+ #
161
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
162
+ # `null`; VAD is not supported.
157
163
  turn_detection: nil
158
164
  )
159
165
  end
@@ -16,6 +16,9 @@ module OpenAI
16
16
  # trails off with "uhhm", the model will score a low probability of turn end and
17
17
  # wait longer for the user to continue speaking. This can be useful for more
18
18
  # natural conversations, but may have a higher latency.
19
+ #
20
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
21
+ # `null`; VAD is not supported.
19
22
  module RealtimeAudioInputTurnDetection
20
23
  extend OpenAI::Internal::Type::Union
21
24
 
@@ -0,0 +1,54 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeReasoning < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeReasoning,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Constrains effort on reasoning for reasoning-capable Realtime models such as
16
+ # `gpt-realtime-2`.
17
+ sig do
18
+ returns(
19
+ T.nilable(OpenAI::Realtime::RealtimeReasoningEffort::OrSymbol)
20
+ )
21
+ end
22
+ attr_reader :effort
23
+
24
+ sig do
25
+ params(
26
+ effort: OpenAI::Realtime::RealtimeReasoningEffort::OrSymbol
27
+ ).void
28
+ end
29
+ attr_writer :effort
30
+
31
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
32
+ sig do
33
+ params(
34
+ effort: OpenAI::Realtime::RealtimeReasoningEffort::OrSymbol
35
+ ).returns(T.attached_class)
36
+ end
37
+ def self.new(
38
+ # Constrains effort on reasoning for reasoning-capable Realtime models such as
39
+ # `gpt-realtime-2`.
40
+ effort: nil
41
+ )
42
+ end
43
+
44
+ sig do
45
+ override.returns(
46
+ { effort: OpenAI::Realtime::RealtimeReasoningEffort::OrSymbol }
47
+ )
48
+ end
49
+ def to_hash
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,44 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ # Constrains effort on reasoning for reasoning-capable Realtime models such as
7
+ # `gpt-realtime-2`.
8
+ module RealtimeReasoningEffort
9
+ extend OpenAI::Internal::Type::Enum
10
+
11
+ TaggedSymbol =
12
+ T.type_alias do
13
+ T.all(Symbol, OpenAI::Realtime::RealtimeReasoningEffort)
14
+ end
15
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
16
+
17
+ MINIMAL =
18
+ T.let(
19
+ :minimal,
20
+ OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol
21
+ )
22
+ LOW =
23
+ T.let(:low, OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol)
24
+ MEDIUM =
25
+ T.let(
26
+ :medium,
27
+ OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol
28
+ )
29
+ HIGH =
30
+ T.let(:high, OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol)
31
+ XHIGH =
32
+ T.let(:xhigh, OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol)
33
+
34
+ sig do
35
+ override.returns(
36
+ T::Array[OpenAI::Realtime::RealtimeReasoningEffort::TaggedSymbol]
37
+ )
38
+ end
39
+ def self.values
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -159,6 +159,14 @@ module OpenAI
159
159
  end
160
160
  attr_writer :output_modalities
161
161
 
162
+ # Whether the model may call multiple tools in parallel. Only supported by
163
+ # reasoning Realtime models such as `gpt-realtime-2`.
164
+ sig { returns(T.nilable(T::Boolean)) }
165
+ attr_reader :parallel_tool_calls
166
+
167
+ sig { params(parallel_tool_calls: T::Boolean).void }
168
+ attr_writer :parallel_tool_calls
169
+
162
170
  # Reference to a prompt template and its variables.
163
171
  # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
164
172
  sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) }
@@ -171,6 +179,15 @@ module OpenAI
171
179
  end
172
180
  attr_writer :prompt
173
181
 
182
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
183
+ sig { returns(T.nilable(OpenAI::Realtime::RealtimeReasoning)) }
184
+ attr_reader :reasoning
185
+
186
+ sig do
187
+ params(reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash).void
188
+ end
189
+ attr_writer :reasoning
190
+
174
191
  # How the model chooses tools. Provide one of the string modes or force a specific
175
192
  # function/MCP tool.
176
193
  sig do
@@ -256,7 +273,9 @@ module OpenAI
256
273
  T::Array[
257
274
  OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol
258
275
  ],
276
+ parallel_tool_calls: T::Boolean,
259
277
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash),
278
+ reasoning: OpenAI::Realtime::RealtimeReasoning::OrHash,
260
279
  tool_choice:
261
280
  T.any(
262
281
  OpenAI::Responses::ToolChoiceOptions::OrSymbol,
@@ -313,9 +332,14 @@ module OpenAI
313
332
  # transcript. Setting the output to mode `text` will disable audio output from the
314
333
  # model.
315
334
  output_modalities: nil,
335
+ # Whether the model may call multiple tools in parallel. Only supported by
336
+ # reasoning Realtime models such as `gpt-realtime-2`.
337
+ parallel_tool_calls: nil,
316
338
  # Reference to a prompt template and its variables.
317
339
  # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
318
340
  prompt: nil,
341
+ # Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
342
+ reasoning: nil,
319
343
  # How the model chooses tools. Provide one of the string modes or force a specific
320
344
  # function/MCP tool.
321
345
  tool_choice: nil,
@@ -354,7 +378,9 @@ module OpenAI
354
378
  T::Array[
355
379
  OpenAI::Realtime::RealtimeResponseCreateParams::OutputModality::OrSymbol
356
380
  ],
381
+ parallel_tool_calls: T::Boolean,
357
382
  prompt: T.nilable(OpenAI::Responses::ResponsePrompt),
383
+ reasoning: OpenAI::Realtime::RealtimeReasoning,
358
384
  tool_choice:
359
385
  T.any(
360
386
  OpenAI::Responses::ToolChoiceOptions::OrSymbol,
@@ -274,6 +274,9 @@ module OpenAI
274
274
  # trails off with "uhhm", the model will score a low probability of turn end and
275
275
  # wait longer for the user to continue speaking. This can be useful for more
276
276
  # natural conversations, but may have a higher latency.
277
+ #
278
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
279
+ # `null`; VAD is not supported.
277
280
  sig do
278
281
  returns(
279
282
  T.nilable(
@@ -442,6 +445,9 @@ module OpenAI
442
445
  # trails off with "uhhm", the model will score a low probability of turn end and
443
446
  # wait longer for the user to continue speaking. This can be useful for more
444
447
  # natural conversations, but may have a higher latency.
448
+ #
449
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
450
+ # `null`; VAD is not supported.
445
451
  turn_detection: nil,
446
452
  # The voice the model uses to respond. Voice cannot be changed during the session
447
453
  # once the model has responded with audio at least once. Current voice options are
@@ -948,6 +954,9 @@ module OpenAI
948
954
  # trails off with "uhhm", the model will score a low probability of turn end and
949
955
  # wait longer for the user to continue speaking. This can be useful for more
950
956
  # natural conversations, but may have a higher latency.
957
+ #
958
+ # For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
959
+ # `null`; VAD is not supported.
951
960
  module TurnDetection
952
961
  extend OpenAI::Internal::Type::Union
953
962