openai 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +1 -1
  4. data/lib/openai/client.rb +4 -0
  5. data/lib/openai/internal/stream.rb +3 -2
  6. data/lib/openai/models/audio/speech_create_params.rb +6 -0
  7. data/lib/openai/models/chat/chat_completion_audio_param.rb +6 -0
  8. data/lib/openai/models/evals/run_cancel_response.rb +2 -2
  9. data/lib/openai/models/evals/run_create_params.rb +2 -2
  10. data/lib/openai/models/evals/run_create_response.rb +2 -2
  11. data/lib/openai/models/evals/run_list_response.rb +2 -2
  12. data/lib/openai/models/evals/run_retrieve_response.rb +2 -2
  13. data/lib/openai/models/realtime/client_secret_create_params.rb +93 -0
  14. data/lib/openai/models/realtime/client_secret_create_response.rb +300 -0
  15. data/lib/openai/models/realtime/conversation_created_event.rb +70 -0
  16. data/lib/openai/models/realtime/conversation_item.rb +44 -0
  17. data/lib/openai/models/realtime/conversation_item_added.rb +48 -0
  18. data/lib/openai/models/realtime/conversation_item_create_event.rb +57 -0
  19. data/lib/openai/models/realtime/conversation_item_created_event.rb +59 -0
  20. data/lib/openai/models/realtime/conversation_item_delete_event.rb +39 -0
  21. data/lib/openai/models/realtime/conversation_item_deleted_event.rb +38 -0
  22. data/lib/openai/models/realtime/conversation_item_done.rb +48 -0
  23. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +189 -0
  24. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +63 -0
  25. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rb +96 -0
  26. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_segment.rb +84 -0
  27. data/lib/openai/models/realtime/conversation_item_retrieve_event.rb +40 -0
  28. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +68 -0
  29. data/lib/openai/models/realtime/conversation_item_truncated_event.rb +60 -0
  30. data/lib/openai/models/realtime/conversation_item_with_reference.rb +235 -0
  31. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +49 -0
  32. data/lib/openai/models/realtime/input_audio_buffer_clear_event.rb +29 -0
  33. data/lib/openai/models/realtime/input_audio_buffer_cleared_event.rb +29 -0
  34. data/lib/openai/models/realtime/input_audio_buffer_commit_event.rb +35 -0
  35. data/lib/openai/models/realtime/input_audio_buffer_committed_event.rb +51 -0
  36. data/lib/openai/models/realtime/input_audio_buffer_speech_started_event.rb +59 -0
  37. data/lib/openai/models/realtime/input_audio_buffer_speech_stopped_event.rb +51 -0
  38. data/lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb +52 -0
  39. data/lib/openai/models/realtime/log_prob_properties.rb +39 -0
  40. data/lib/openai/models/realtime/mcp_list_tools_completed.rb +36 -0
  41. data/lib/openai/models/realtime/mcp_list_tools_failed.rb +36 -0
  42. data/lib/openai/models/realtime/mcp_list_tools_in_progress.rb +36 -0
  43. data/lib/openai/models/realtime/output_audio_buffer_clear_event.rb +32 -0
  44. data/lib/openai/models/realtime/rate_limits_updated_event.rb +91 -0
  45. data/lib/openai/models/realtime/realtime_audio_config.rb +446 -0
  46. data/lib/openai/models/realtime/realtime_client_event.rb +123 -0
  47. data/lib/openai/models/realtime/realtime_client_secret_config.rb +64 -0
  48. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +118 -0
  49. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +94 -0
  50. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +86 -0
  51. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +118 -0
  52. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +135 -0
  53. data/lib/openai/models/realtime/realtime_error.rb +55 -0
  54. data/lib/openai/models/realtime/realtime_error_event.rb +38 -0
  55. data/lib/openai/models/realtime/realtime_mcp_approval_request.rb +52 -0
  56. data/lib/openai/models/realtime/realtime_mcp_approval_response.rb +52 -0
  57. data/lib/openai/models/realtime/realtime_mcp_list_tools.rb +84 -0
  58. data/lib/openai/models/realtime/realtime_mcp_protocol_error.rb +29 -0
  59. data/lib/openai/models/realtime/realtime_mcp_tool_call.rb +94 -0
  60. data/lib/openai/models/realtime/realtime_mcp_tool_execution_error.rb +23 -0
  61. data/lib/openai/models/realtime/realtime_mcphttp_error.rb +29 -0
  62. data/lib/openai/models/realtime/realtime_response.rb +259 -0
  63. data/lib/openai/models/realtime/realtime_response_status.rb +103 -0
  64. data/lib/openai/models/realtime/realtime_response_usage.rb +61 -0
  65. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +36 -0
  66. data/lib/openai/models/realtime/realtime_response_usage_output_token_details.rb +28 -0
  67. data/lib/openai/models/realtime/realtime_server_event.rb +369 -0
  68. data/lib/openai/models/realtime/realtime_session.rb +696 -0
  69. data/lib/openai/models/realtime/realtime_session_create_request.rb +234 -0
  70. data/lib/openai/models/realtime/realtime_session_create_response.rb +579 -0
  71. data/lib/openai/models/realtime/realtime_tool_choice_config.rb +32 -0
  72. data/lib/openai/models/realtime/realtime_tools_config.rb +11 -0
  73. data/lib/openai/models/realtime/realtime_tools_config_union.rb +379 -0
  74. data/lib/openai/models/realtime/realtime_tracing_config.rb +61 -0
  75. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +312 -0
  76. data/lib/openai/models/realtime/realtime_truncation.rb +67 -0
  77. data/lib/openai/models/realtime/response_audio_delta_event.rb +68 -0
  78. data/lib/openai/models/realtime/response_audio_done_event.rb +61 -0
  79. data/lib/openai/models/realtime/response_audio_transcript_delta_event.rb +68 -0
  80. data/lib/openai/models/realtime/response_audio_transcript_done_event.rb +70 -0
  81. data/lib/openai/models/realtime/response_cancel_event.rb +42 -0
  82. data/lib/openai/models/realtime/response_content_part_added_event.rb +120 -0
  83. data/lib/openai/models/realtime/response_content_part_done_event.rb +120 -0
  84. data/lib/openai/models/realtime/response_create_event.rb +391 -0
  85. data/lib/openai/models/realtime/response_created_event.rb +37 -0
  86. data/lib/openai/models/realtime/response_done_event.rb +38 -0
  87. data/lib/openai/models/realtime/response_function_call_arguments_delta_event.rb +72 -0
  88. data/lib/openai/models/realtime/response_function_call_arguments_done_event.rb +73 -0
  89. data/lib/openai/models/realtime/response_mcp_call_arguments_delta.rb +68 -0
  90. data/lib/openai/models/realtime/response_mcp_call_arguments_done.rb +60 -0
  91. data/lib/openai/models/realtime/response_mcp_call_completed.rb +44 -0
  92. data/lib/openai/models/realtime/response_mcp_call_failed.rb +44 -0
  93. data/lib/openai/models/realtime/response_mcp_call_in_progress.rb +44 -0
  94. data/lib/openai/models/realtime/response_output_item_added_event.rb +52 -0
  95. data/lib/openai/models/realtime/response_output_item_done_event.rb +53 -0
  96. data/lib/openai/models/realtime/response_text_delta_event.rb +68 -0
  97. data/lib/openai/models/realtime/response_text_done_event.rb +69 -0
  98. data/lib/openai/models/realtime/session_created_event.rb +38 -0
  99. data/lib/openai/models/realtime/session_update_event.rb +44 -0
  100. data/lib/openai/models/realtime/session_updated_event.rb +37 -0
  101. data/lib/openai/models/realtime/transcription_session_created.rb +278 -0
  102. data/lib/openai/models/realtime/transcription_session_update.rb +36 -0
  103. data/lib/openai/models/realtime/transcription_session_updated_event.rb +279 -0
  104. data/lib/openai/models/responses/response.rb +6 -3
  105. data/lib/openai/models/responses/response_create_params.rb +6 -3
  106. data/lib/openai/models/responses/tool.rb +3 -156
  107. data/lib/openai/models/responses/web_search_preview_tool.rb +124 -0
  108. data/lib/openai/models/responses/web_search_tool.rb +58 -21
  109. data/lib/openai/models/webhooks/realtime_call_incoming_webhook_event.rb +119 -0
  110. data/lib/openai/models/webhooks/unwrap_webhook_event.rb +4 -1
  111. data/lib/openai/models.rb +2 -0
  112. data/lib/openai/resources/realtime/client_secrets.rb +44 -0
  113. data/lib/openai/resources/realtime.rb +18 -0
  114. data/lib/openai/resources/responses.rb +2 -2
  115. data/lib/openai/resources/webhooks.rb +1 -1
  116. data/lib/openai/version.rb +1 -1
  117. data/lib/openai.rb +95 -0
  118. data/rbi/openai/client.rbi +3 -0
  119. data/rbi/openai/models/audio/speech_create_params.rbi +10 -0
  120. data/rbi/openai/models/chat/chat_completion_audio_param.rbi +10 -0
  121. data/rbi/openai/models/evals/run_cancel_response.rbi +4 -4
  122. data/rbi/openai/models/evals/run_create_params.rbi +8 -8
  123. data/rbi/openai/models/evals/run_create_response.rbi +4 -4
  124. data/rbi/openai/models/evals/run_list_response.rbi +4 -4
  125. data/rbi/openai/models/evals/run_retrieve_response.rbi +4 -4
  126. data/rbi/openai/models/realtime/client_secret_create_params.rbi +222 -0
  127. data/rbi/openai/models/realtime/client_secret_create_response.rbi +676 -0
  128. data/rbi/openai/models/realtime/conversation_created_event.rbi +164 -0
  129. data/rbi/openai/models/realtime/conversation_item.rbi +35 -0
  130. data/rbi/openai/models/realtime/conversation_item_added.rbi +105 -0
  131. data/rbi/openai/models/realtime/conversation_item_create_event.rbi +123 -0
  132. data/rbi/openai/models/realtime/conversation_item_created_event.rbi +117 -0
  133. data/rbi/openai/models/realtime/conversation_item_delete_event.rbi +57 -0
  134. data/rbi/openai/models/realtime/conversation_item_deleted_event.rbi +53 -0
  135. data/rbi/openai/models/realtime/conversation_item_done.rbi +105 -0
  136. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +305 -0
  137. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +93 -0
  138. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbi +158 -0
  139. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbi +107 -0
  140. data/rbi/openai/models/realtime/conversation_item_retrieve_event.rbi +58 -0
  141. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +94 -0
  142. data/rbi/openai/models/realtime/conversation_item_truncated_event.rbi +80 -0
  143. data/rbi/openai/models/realtime/conversation_item_with_reference.rbi +549 -0
  144. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +65 -0
  145. data/rbi/openai/models/realtime/input_audio_buffer_clear_event.rbi +43 -0
  146. data/rbi/openai/models/realtime/input_audio_buffer_cleared_event.rbi +40 -0
  147. data/rbi/openai/models/realtime/input_audio_buffer_commit_event.rbi +49 -0
  148. data/rbi/openai/models/realtime/input_audio_buffer_committed_event.rbi +72 -0
  149. data/rbi/openai/models/realtime/input_audio_buffer_speech_started_event.rbi +82 -0
  150. data/rbi/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbi +73 -0
  151. data/rbi/openai/models/realtime/input_audio_buffer_timeout_triggered.rbi +75 -0
  152. data/rbi/openai/models/realtime/log_prob_properties.rbi +55 -0
  153. data/rbi/openai/models/realtime/mcp_list_tools_completed.rbi +51 -0
  154. data/rbi/openai/models/realtime/mcp_list_tools_failed.rbi +51 -0
  155. data/rbi/openai/models/realtime/mcp_list_tools_in_progress.rbi +51 -0
  156. data/rbi/openai/models/realtime/output_audio_buffer_clear_event.rbi +46 -0
  157. data/rbi/openai/models/realtime/rate_limits_updated_event.rbi +187 -0
  158. data/rbi/openai/models/realtime/realtime_audio_config.rbi +1004 -0
  159. data/rbi/openai/models/realtime/realtime_client_event.rbi +38 -0
  160. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +147 -0
  161. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +292 -0
  162. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +199 -0
  163. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +188 -0
  164. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +292 -0
  165. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +319 -0
  166. data/rbi/openai/models/realtime/realtime_error.rbi +72 -0
  167. data/rbi/openai/models/realtime/realtime_error_event.rbi +64 -0
  168. data/rbi/openai/models/realtime/realtime_mcp_approval_request.rbi +75 -0
  169. data/rbi/openai/models/realtime/realtime_mcp_approval_response.rbi +75 -0
  170. data/rbi/openai/models/realtime/realtime_mcp_list_tools.rbi +131 -0
  171. data/rbi/openai/models/realtime/realtime_mcp_protocol_error.rbi +40 -0
  172. data/rbi/openai/models/realtime/realtime_mcp_tool_call.rbi +145 -0
  173. data/rbi/openai/models/realtime/realtime_mcp_tool_execution_error.rbi +31 -0
  174. data/rbi/openai/models/realtime/realtime_mcphttp_error.rbi +40 -0
  175. data/rbi/openai/models/realtime/realtime_response.rbi +573 -0
  176. data/rbi/openai/models/realtime/realtime_response_status.rbi +233 -0
  177. data/rbi/openai/models/realtime/realtime_response_usage.rbi +121 -0
  178. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +68 -0
  179. data/rbi/openai/models/realtime/realtime_response_usage_output_token_details.rbi +51 -0
  180. data/rbi/openai/models/realtime/realtime_server_event.rbi +311 -0
  181. data/rbi/openai/models/realtime/realtime_session.rbi +1426 -0
  182. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +560 -0
  183. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1249 -0
  184. data/rbi/openai/models/realtime/realtime_tool_choice_config.rbi +30 -0
  185. data/rbi/openai/models/realtime/realtime_tools_config.rbi +15 -0
  186. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +755 -0
  187. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +95 -0
  188. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +703 -0
  189. data/rbi/openai/models/realtime/realtime_truncation.rbi +117 -0
  190. data/rbi/openai/models/realtime/response_audio_delta_event.rbi +91 -0
  191. data/rbi/openai/models/realtime/response_audio_done_event.rbi +84 -0
  192. data/rbi/openai/models/realtime/response_audio_transcript_delta_event.rbi +91 -0
  193. data/rbi/openai/models/realtime/response_audio_transcript_done_event.rbi +93 -0
  194. data/rbi/openai/models/realtime/response_cancel_event.rbi +63 -0
  195. data/rbi/openai/models/realtime/response_content_part_added_event.rbi +219 -0
  196. data/rbi/openai/models/realtime/response_content_part_done_event.rbi +219 -0
  197. data/rbi/openai/models/realtime/response_create_event.rbi +863 -0
  198. data/rbi/openai/models/realtime/response_created_event.rbi +65 -0
  199. data/rbi/openai/models/realtime/response_done_event.rbi +66 -0
  200. data/rbi/openai/models/realtime/response_function_call_arguments_delta_event.rbi +91 -0
  201. data/rbi/openai/models/realtime/response_function_call_arguments_done_event.rbi +92 -0
  202. data/rbi/openai/models/realtime/response_mcp_call_arguments_delta.rbi +91 -0
  203. data/rbi/openai/models/realtime/response_mcp_call_arguments_done.rbi +83 -0
  204. data/rbi/openai/models/realtime/response_mcp_call_completed.rbi +67 -0
  205. data/rbi/openai/models/realtime/response_mcp_call_failed.rbi +67 -0
  206. data/rbi/openai/models/realtime/response_mcp_call_in_progress.rbi +67 -0
  207. data/rbi/openai/models/realtime/response_output_item_added_event.rbi +111 -0
  208. data/rbi/openai/models/realtime/response_output_item_done_event.rbi +112 -0
  209. data/rbi/openai/models/realtime/response_text_delta_event.rbi +91 -0
  210. data/rbi/openai/models/realtime/response_text_done_event.rbi +92 -0
  211. data/rbi/openai/models/realtime/session_created_event.rbi +64 -0
  212. data/rbi/openai/models/realtime/session_update_event.rbi +77 -0
  213. data/rbi/openai/models/realtime/session_updated_event.rbi +63 -0
  214. data/rbi/openai/models/realtime/transcription_session_created.rbi +653 -0
  215. data/rbi/openai/models/realtime/transcription_session_update.rbi +74 -0
  216. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +657 -0
  217. data/rbi/openai/models/responses/response.rbi +10 -4
  218. data/rbi/openai/models/responses/response_create_params.rbi +16 -10
  219. data/rbi/openai/models/responses/tool.rbi +2 -348
  220. data/rbi/openai/models/responses/web_search_preview_tool.rbi +245 -0
  221. data/rbi/openai/models/responses/web_search_tool.rbi +120 -23
  222. data/rbi/openai/models/webhooks/realtime_call_incoming_webhook_event.rbi +222 -0
  223. data/rbi/openai/models/webhooks/unwrap_webhook_event.rbi +1 -0
  224. data/rbi/openai/models.rbi +2 -0
  225. data/rbi/openai/resources/realtime/client_secrets.rbi +38 -0
  226. data/rbi/openai/resources/realtime.rbi +15 -0
  227. data/rbi/openai/resources/responses.rbi +12 -6
  228. data/rbi/openai/resources/webhooks.rbi +1 -0
  229. data/sig/openai/client.rbs +2 -0
  230. data/sig/openai/models/audio/speech_create_params.rbs +4 -0
  231. data/sig/openai/models/chat/chat_completion_audio_param.rbs +4 -0
  232. data/sig/openai/models/realtime/client_secret_create_params.rbs +89 -0
  233. data/sig/openai/models/realtime/client_secret_create_response.rbs +292 -0
  234. data/sig/openai/models/realtime/conversation_created_event.rbs +70 -0
  235. data/sig/openai/models/realtime/conversation_item.rbs +22 -0
  236. data/sig/openai/models/realtime/conversation_item_added.rbs +37 -0
  237. data/sig/openai/models/realtime/conversation_item_create_event.rbs +41 -0
  238. data/sig/openai/models/realtime/conversation_item_created_event.rbs +37 -0
  239. data/sig/openai/models/realtime/conversation_item_delete_event.rbs +30 -0
  240. data/sig/openai/models/realtime/conversation_item_deleted_event.rbs +32 -0
  241. data/sig/openai/models/realtime/conversation_item_done.rbs +37 -0
  242. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbs +136 -0
  243. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbs +51 -0
  244. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbs +77 -0
  245. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbs +62 -0
  246. data/sig/openai/models/realtime/conversation_item_retrieve_event.rbs +34 -0
  247. data/sig/openai/models/realtime/conversation_item_truncate_event.rbs +44 -0
  248. data/sig/openai/models/realtime/conversation_item_truncated_event.rbs +42 -0
  249. data/sig/openai/models/realtime/conversation_item_with_reference.rbs +207 -0
  250. data/sig/openai/models/realtime/input_audio_buffer_append_event.rbs +30 -0
  251. data/sig/openai/models/realtime/input_audio_buffer_clear_event.rbs +23 -0
  252. data/sig/openai/models/realtime/input_audio_buffer_cleared_event.rbs +24 -0
  253. data/sig/openai/models/realtime/input_audio_buffer_commit_event.rbs +23 -0
  254. data/sig/openai/models/realtime/input_audio_buffer_committed_event.rbs +37 -0
  255. data/sig/openai/models/realtime/input_audio_buffer_speech_started_event.rbs +37 -0
  256. data/sig/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbs +37 -0
  257. data/sig/openai/models/realtime/input_audio_buffer_timeout_triggered.rbs +42 -0
  258. data/sig/openai/models/realtime/log_prob_properties.rbs +28 -0
  259. data/sig/openai/models/realtime/mcp_list_tools_completed.rbs +28 -0
  260. data/sig/openai/models/realtime/mcp_list_tools_failed.rbs +28 -0
  261. data/sig/openai/models/realtime/mcp_list_tools_in_progress.rbs +32 -0
  262. data/sig/openai/models/realtime/output_audio_buffer_clear_event.rbs +23 -0
  263. data/sig/openai/models/realtime/rate_limits_updated_event.rbs +85 -0
  264. data/sig/openai/models/realtime/realtime_audio_config.rbs +354 -0
  265. data/sig/openai/models/realtime/realtime_client_event.rbs +25 -0
  266. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +60 -0
  267. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +117 -0
  268. data/sig/openai/models/realtime/realtime_conversation_item_function_call.rbs +86 -0
  269. data/sig/openai/models/realtime/realtime_conversation_item_function_call_output.rbs +79 -0
  270. data/sig/openai/models/realtime/realtime_conversation_item_system_message.rbs +117 -0
  271. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +132 -0
  272. data/sig/openai/models/realtime/realtime_error.rbs +42 -0
  273. data/sig/openai/models/realtime/realtime_error_event.rbs +32 -0
  274. data/sig/openai/models/realtime/realtime_mcp_approval_request.rbs +42 -0
  275. data/sig/openai/models/realtime/realtime_mcp_approval_response.rbs +42 -0
  276. data/sig/openai/models/realtime/realtime_mcp_list_tools.rbs +71 -0
  277. data/sig/openai/models/realtime/realtime_mcp_protocol_error.rbs +28 -0
  278. data/sig/openai/models/realtime/realtime_mcp_tool_call.rbs +68 -0
  279. data/sig/openai/models/realtime/realtime_mcp_tool_execution_error.rbs +18 -0
  280. data/sig/openai/models/realtime/realtime_mcphttp_error.rbs +24 -0
  281. data/sig/openai/models/realtime/realtime_response.rbs +210 -0
  282. data/sig/openai/models/realtime/realtime_response_status.rbs +90 -0
  283. data/sig/openai/models/realtime/realtime_response_usage.rbs +56 -0
  284. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +34 -0
  285. data/sig/openai/models/realtime/realtime_response_usage_output_token_details.rbs +22 -0
  286. data/sig/openai/models/realtime/realtime_server_event.rbs +168 -0
  287. data/sig/openai/models/realtime/realtime_session.rbs +521 -0
  288. data/sig/openai/models/realtime/realtime_session_create_request.rbs +178 -0
  289. data/sig/openai/models/realtime/realtime_session_create_response.rbs +526 -0
  290. data/sig/openai/models/realtime/realtime_tool_choice_config.rbs +16 -0
  291. data/sig/openai/models/realtime/realtime_tools_config.rbs +10 -0
  292. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +280 -0
  293. data/sig/openai/models/realtime/realtime_tracing_config.rbs +43 -0
  294. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +242 -0
  295. data/sig/openai/models/realtime/realtime_truncation.rbs +53 -0
  296. data/sig/openai/models/realtime/response_audio_delta_event.rbs +52 -0
  297. data/sig/openai/models/realtime/response_audio_done_event.rbs +47 -0
  298. data/sig/openai/models/realtime/response_audio_transcript_delta_event.rbs +52 -0
  299. data/sig/openai/models/realtime/response_audio_transcript_done_event.rbs +52 -0
  300. data/sig/openai/models/realtime/response_cancel_event.rbs +32 -0
  301. data/sig/openai/models/realtime/response_content_part_added_event.rbs +105 -0
  302. data/sig/openai/models/realtime/response_content_part_done_event.rbs +105 -0
  303. data/sig/openai/models/realtime/response_create_event.rbs +281 -0
  304. data/sig/openai/models/realtime/response_created_event.rbs +32 -0
  305. data/sig/openai/models/realtime/response_done_event.rbs +32 -0
  306. data/sig/openai/models/realtime/response_function_call_arguments_delta_event.rbs +52 -0
  307. data/sig/openai/models/realtime/response_function_call_arguments_done_event.rbs +52 -0
  308. data/sig/openai/models/realtime/response_mcp_call_arguments_delta.rbs +52 -0
  309. data/sig/openai/models/realtime/response_mcp_call_arguments_done.rbs +47 -0
  310. data/sig/openai/models/realtime/response_mcp_call_completed.rbs +37 -0
  311. data/sig/openai/models/realtime/response_mcp_call_failed.rbs +37 -0
  312. data/sig/openai/models/realtime/response_mcp_call_in_progress.rbs +37 -0
  313. data/sig/openai/models/realtime/response_output_item_added_event.rbs +42 -0
  314. data/sig/openai/models/realtime/response_output_item_done_event.rbs +42 -0
  315. data/sig/openai/models/realtime/response_text_delta_event.rbs +52 -0
  316. data/sig/openai/models/realtime/response_text_done_event.rbs +52 -0
  317. data/sig/openai/models/realtime/session_created_event.rbs +32 -0
  318. data/sig/openai/models/realtime/session_update_event.rbs +34 -0
  319. data/sig/openai/models/realtime/session_updated_event.rbs +32 -0
  320. data/sig/openai/models/realtime/transcription_session_created.rbs +282 -0
  321. data/sig/openai/models/realtime/transcription_session_update.rbs +34 -0
  322. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +282 -0
  323. data/sig/openai/models/responses/tool.rbs +1 -121
  324. data/sig/openai/models/responses/web_search_preview_tool.rbs +96 -0
  325. data/sig/openai/models/responses/web_search_tool.rbs +39 -10
  326. data/sig/openai/models/webhooks/realtime_call_incoming_webhook_event.rbs +90 -0
  327. data/sig/openai/models/webhooks/unwrap_webhook_event.rbs +1 -0
  328. data/sig/openai/models.rbs +2 -0
  329. data/sig/openai/resources/realtime/client_secrets.rbs +15 -0
  330. data/sig/openai/resources/realtime.rbs +9 -0
  331. data/sig/openai/resources/webhooks.rbs +1 -0
  332. metadata +287 -2
@@ -0,0 +1,703 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionCreateRequest < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # ID of the model to use. The options are `gpt-4o-transcribe`,
16
+ # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
17
+ # Whisper V2 model).
18
+ sig do
19
+ returns(
20
+ T.any(
21
+ String,
22
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
23
+ )
24
+ )
25
+ end
26
+ attr_accessor :model
27
+
28
+ # The type of session to create. Always `transcription` for transcription
29
+ # sessions.
30
+ sig { returns(Symbol) }
31
+ attr_accessor :type
32
+
33
+ # The set of items to include in the transcription. Current available items are:
34
+ #
35
+ # - `item.input_audio_transcription.logprobs`
36
+ sig do
37
+ returns(
38
+ T.nilable(
39
+ T::Array[
40
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
41
+ ]
42
+ )
43
+ )
44
+ end
45
+ attr_reader :include
46
+
47
+ sig do
48
+ params(
49
+ include:
50
+ T::Array[
51
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
52
+ ]
53
+ ).void
54
+ end
55
+ attr_writer :include
56
+
57
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
58
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
59
+ # (mono), and little-endian byte order.
60
+ sig do
61
+ returns(
62
+ T.nilable(
63
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol
64
+ )
65
+ )
66
+ end
67
+ attr_reader :input_audio_format
68
+
69
+ sig do
70
+ params(
71
+ input_audio_format:
72
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol
73
+ ).void
74
+ end
75
+ attr_writer :input_audio_format
76
+
77
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
78
+ # off. Noise reduction filters audio added to the input audio buffer before it is
79
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
80
+ # detection accuracy (reducing false positives) and model performance by improving
81
+ # perception of the input audio.
82
+ sig do
83
+ returns(
84
+ T.nilable(
85
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction
86
+ )
87
+ )
88
+ end
89
+ attr_reader :input_audio_noise_reduction
90
+
91
+ sig do
92
+ params(
93
+ input_audio_noise_reduction:
94
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash
95
+ ).void
96
+ end
97
+ attr_writer :input_audio_noise_reduction
98
+
99
+ # Configuration for input audio transcription. The client can optionally set the
100
+ # language and prompt for transcription, these offer additional guidance to the
101
+ # transcription service.
102
+ sig do
103
+ returns(
104
+ T.nilable(
105
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription
106
+ )
107
+ )
108
+ end
109
+ attr_reader :input_audio_transcription
110
+
111
+ sig do
112
+ params(
113
+ input_audio_transcription:
114
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash
115
+ ).void
116
+ end
117
+ attr_writer :input_audio_transcription
118
+
119
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
120
+ # means that the model will detect the start and end of speech based on audio
121
+ # volume and respond at the end of user speech.
122
+ sig do
123
+ returns(
124
+ T.nilable(
125
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection
126
+ )
127
+ )
128
+ end
129
+ attr_reader :turn_detection
130
+
131
+ sig do
132
+ params(
133
+ turn_detection:
134
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash
135
+ ).void
136
+ end
137
+ attr_writer :turn_detection
138
+
139
+ # Realtime transcription session object configuration.
140
+ sig do
141
+ params(
142
+ model:
143
+ T.any(
144
+ String,
145
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
146
+ ),
147
+ include:
148
+ T::Array[
149
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
150
+ ],
151
+ input_audio_format:
152
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol,
153
+ input_audio_noise_reduction:
154
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::OrHash,
155
+ input_audio_transcription:
156
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::OrHash,
157
+ turn_detection:
158
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::OrHash,
159
+ type: Symbol
160
+ ).returns(T.attached_class)
161
+ end
162
+ def self.new(
163
+ # ID of the model to use. The options are `gpt-4o-transcribe`,
164
+ # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
165
+ # Whisper V2 model).
166
+ model:,
167
+ # The set of items to include in the transcription. Current available items are:
168
+ #
169
+ # - `item.input_audio_transcription.logprobs`
170
+ include: nil,
171
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
172
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
173
+ # (mono), and little-endian byte order.
174
+ input_audio_format: nil,
175
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
176
+ # off. Noise reduction filters audio added to the input audio buffer before it is
177
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
178
+ # detection accuracy (reducing false positives) and model performance by improving
179
+ # perception of the input audio.
180
+ input_audio_noise_reduction: nil,
181
+ # Configuration for input audio transcription. The client can optionally set the
182
+ # language and prompt for transcription, these offer additional guidance to the
183
+ # transcription service.
184
+ input_audio_transcription: nil,
185
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
186
+ # means that the model will detect the start and end of speech based on audio
187
+ # volume and respond at the end of user speech.
188
+ turn_detection: nil,
189
+ # The type of session to create. Always `transcription` for transcription
190
+ # sessions.
191
+ type: :transcription
192
+ )
193
+ end
194
+
195
+ sig do
196
+ override.returns(
197
+ {
198
+ model:
199
+ T.any(
200
+ String,
201
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::OrSymbol
202
+ ),
203
+ type: Symbol,
204
+ include:
205
+ T::Array[
206
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::OrSymbol
207
+ ],
208
+ input_audio_format:
209
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::OrSymbol,
210
+ input_audio_noise_reduction:
211
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction,
212
+ input_audio_transcription:
213
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription,
214
+ turn_detection:
215
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection
216
+ }
217
+ )
218
+ end
219
+ def to_hash
220
+ end
221
+
222
+ # ID of the model to use. The options are `gpt-4o-transcribe`,
223
+ # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
224
+ # Whisper V2 model).
225
+ module Model
226
+ extend OpenAI::Internal::Type::Union
227
+
228
+ Variants =
229
+ T.type_alias do
230
+ T.any(
231
+ String,
232
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
233
+ )
234
+ end
235
+
236
+ sig do
237
+ override.returns(
238
+ T::Array[
239
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::Variants
240
+ ]
241
+ )
242
+ end
243
+ def self.variants
244
+ end
245
+
246
+ TaggedSymbol =
247
+ T.type_alias do
248
+ T.all(
249
+ Symbol,
250
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model
251
+ )
252
+ end
253
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
254
+
255
+ WHISPER_1 =
256
+ T.let(
257
+ :"whisper-1",
258
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
259
+ )
260
+ GPT_4O_TRANSCRIBE =
261
+ T.let(
262
+ :"gpt-4o-transcribe",
263
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
264
+ )
265
+ GPT_4O_MINI_TRANSCRIBE =
266
+ T.let(
267
+ :"gpt-4o-mini-transcribe",
268
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Model::TaggedSymbol
269
+ )
270
+ end
271
+
272
+ module Include
273
+ extend OpenAI::Internal::Type::Enum
274
+
275
+ TaggedSymbol =
276
+ T.type_alias do
277
+ T.all(
278
+ Symbol,
279
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include
280
+ )
281
+ end
282
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
283
+
284
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
285
+ T.let(
286
+ :"item.input_audio_transcription.logprobs",
287
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::TaggedSymbol
288
+ )
289
+
290
+ sig do
291
+ override.returns(
292
+ T::Array[
293
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::Include::TaggedSymbol
294
+ ]
295
+ )
296
+ end
297
+ def self.values
298
+ end
299
+ end
300
+
301
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
302
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
303
+ # (mono), and little-endian byte order.
304
+ module InputAudioFormat
305
+ extend OpenAI::Internal::Type::Enum
306
+
307
+ TaggedSymbol =
308
+ T.type_alias do
309
+ T.all(
310
+ Symbol,
311
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat
312
+ )
313
+ end
314
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
315
+
316
+ PCM16 =
317
+ T.let(
318
+ :pcm16,
319
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
320
+ )
321
+ G711_ULAW =
322
+ T.let(
323
+ :g711_ulaw,
324
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
325
+ )
326
+ G711_ALAW =
327
+ T.let(
328
+ :g711_alaw,
329
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
330
+ )
331
+
332
+ sig do
333
+ override.returns(
334
+ T::Array[
335
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioFormat::TaggedSymbol
336
+ ]
337
+ )
338
+ end
339
+ def self.values
340
+ end
341
+ end
342
+
343
+ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
344
+ OrHash =
345
+ T.type_alias do
346
+ T.any(
347
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction,
348
+ OpenAI::Internal::AnyHash
349
+ )
350
+ end
351
+
352
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
353
+ # headphones, `far_field` is for far-field microphones such as laptop or
354
+ # conference room microphones.
355
+ sig do
356
+ returns(
357
+ T.nilable(
358
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
359
+ )
360
+ )
361
+ end
362
+ attr_reader :type
363
+
364
+ sig do
365
+ params(
366
+ type:
367
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
368
+ ).void
369
+ end
370
+ attr_writer :type
371
+
372
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
373
+ # off. Noise reduction filters audio added to the input audio buffer before it is
374
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
375
+ # detection accuracy (reducing false positives) and model performance by improving
376
+ # perception of the input audio.
377
+ sig do
378
+ params(
379
+ type:
380
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
381
+ ).returns(T.attached_class)
382
+ end
383
+ def self.new(
384
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
385
+ # headphones, `far_field` is for far-field microphones such as laptop or
386
+ # conference room microphones.
387
+ type: nil
388
+ )
389
+ end
390
+
391
+ sig do
392
+ override.returns(
393
+ {
394
+ type:
395
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::OrSymbol
396
+ }
397
+ )
398
+ end
399
+ def to_hash
400
+ end
401
+
402
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
403
+ # headphones, `far_field` is for far-field microphones such as laptop or
404
+ # conference room microphones.
405
+ module Type
406
+ extend OpenAI::Internal::Type::Enum
407
+
408
+ TaggedSymbol =
409
+ T.type_alias do
410
+ T.all(
411
+ Symbol,
412
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type
413
+ )
414
+ end
415
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
416
+
417
+ NEAR_FIELD =
418
+ T.let(
419
+ :near_field,
420
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
421
+ )
422
+ FAR_FIELD =
423
+ T.let(
424
+ :far_field,
425
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
426
+ )
427
+
428
+ sig do
429
+ override.returns(
430
+ T::Array[
431
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioNoiseReduction::Type::TaggedSymbol
432
+ ]
433
+ )
434
+ end
435
+ def self.values
436
+ end
437
+ end
438
+ end
439
+
440
+ class InputAudioTranscription < OpenAI::Internal::Type::BaseModel
441
+ OrHash =
442
+ T.type_alias do
443
+ T.any(
444
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription,
445
+ OpenAI::Internal::AnyHash
446
+ )
447
+ end
448
+
449
+ # The language of the input audio. Supplying the input language in
450
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
451
+ # format will improve accuracy and latency.
452
+ sig { returns(T.nilable(String)) }
453
+ attr_reader :language
454
+
455
+ sig { params(language: String).void }
456
+ attr_writer :language
457
+
458
+ # The model to use for transcription, current options are `gpt-4o-transcribe`,
459
+ # `gpt-4o-mini-transcribe`, and `whisper-1`.
460
+ sig do
461
+ returns(
462
+ T.nilable(
463
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol
464
+ )
465
+ )
466
+ end
467
+ attr_reader :model
468
+
469
+ sig do
470
+ params(
471
+ model:
472
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol
473
+ ).void
474
+ end
475
+ attr_writer :model
476
+
477
+ # An optional text to guide the model's style or continue a previous audio
478
+ # segment. For `whisper-1`, the
479
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
480
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
481
+ # "expect words related to technology".
482
+ sig { returns(T.nilable(String)) }
483
+ attr_reader :prompt
484
+
485
+ sig { params(prompt: String).void }
486
+ attr_writer :prompt
487
+
488
+ # Configuration for input audio transcription. The client can optionally set the
489
+ # language and prompt for transcription, these offer additional guidance to the
490
+ # transcription service.
491
+ sig do
492
+ params(
493
+ language: String,
494
+ model:
495
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol,
496
+ prompt: String
497
+ ).returns(T.attached_class)
498
+ end
499
+ def self.new(
500
+ # The language of the input audio. Supplying the input language in
501
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
502
+ # format will improve accuracy and latency.
503
+ language: nil,
504
+ # The model to use for transcription, current options are `gpt-4o-transcribe`,
505
+ # `gpt-4o-mini-transcribe`, and `whisper-1`.
506
+ model: nil,
507
+ # An optional text to guide the model's style or continue a previous audio
508
+ # segment. For `whisper-1`, the
509
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
510
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
511
+ # "expect words related to technology".
512
+ prompt: nil
513
+ )
514
+ end
515
+
516
+ sig do
517
+ override.returns(
518
+ {
519
+ language: String,
520
+ model:
521
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::OrSymbol,
522
+ prompt: String
523
+ }
524
+ )
525
+ end
526
+ def to_hash
527
+ end
528
+
529
+ # The model to use for transcription, current options are `gpt-4o-transcribe`,
530
+ # `gpt-4o-mini-transcribe`, and `whisper-1`.
531
+ module Model
532
+ extend OpenAI::Internal::Type::Enum
533
+
534
+ TaggedSymbol =
535
+ T.type_alias do
536
+ T.all(
537
+ Symbol,
538
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model
539
+ )
540
+ end
541
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
542
+
543
+ GPT_4O_TRANSCRIBE =
544
+ T.let(
545
+ :"gpt-4o-transcribe",
546
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
547
+ )
548
+ GPT_4O_MINI_TRANSCRIBE =
549
+ T.let(
550
+ :"gpt-4o-mini-transcribe",
551
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
552
+ )
553
+ WHISPER_1 =
554
+ T.let(
555
+ :"whisper-1",
556
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
557
+ )
558
+
559
+ sig do
560
+ override.returns(
561
+ T::Array[
562
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::InputAudioTranscription::Model::TaggedSymbol
563
+ ]
564
+ )
565
+ end
566
+ def self.values
567
+ end
568
+ end
569
+ end
570
+
571
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
572
+ OrHash =
573
+ T.type_alias do
574
+ T.any(
575
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection,
576
+ OpenAI::Internal::AnyHash
577
+ )
578
+ end
579
+
580
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
581
+ # Defaults to 300ms.
582
+ sig { returns(T.nilable(Integer)) }
583
+ attr_reader :prefix_padding_ms
584
+
585
+ sig { params(prefix_padding_ms: Integer).void }
586
+ attr_writer :prefix_padding_ms
587
+
588
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
589
+ # With shorter values the model will respond more quickly, but may jump in on
590
+ # short pauses from the user.
591
+ sig { returns(T.nilable(Integer)) }
592
+ attr_reader :silence_duration_ms
593
+
594
+ sig { params(silence_duration_ms: Integer).void }
595
+ attr_writer :silence_duration_ms
596
+
597
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
598
+ # threshold will require louder audio to activate the model, and thus might
599
+ # perform better in noisy environments.
600
+ sig { returns(T.nilable(Float)) }
601
+ attr_reader :threshold
602
+
603
+ sig { params(threshold: Float).void }
604
+ attr_writer :threshold
605
+
606
+ # Type of turn detection. Only `server_vad` is currently supported for
607
+ # transcription sessions.
608
+ sig do
609
+ returns(
610
+ T.nilable(
611
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
612
+ )
613
+ )
614
+ end
615
+ attr_reader :type
616
+
617
+ sig do
618
+ params(
619
+ type:
620
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
621
+ ).void
622
+ end
623
+ attr_writer :type
624
+
625
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
626
+ # means that the model will detect the start and end of speech based on audio
627
+ # volume and respond at the end of user speech.
628
+ sig do
629
+ params(
630
+ prefix_padding_ms: Integer,
631
+ silence_duration_ms: Integer,
632
+ threshold: Float,
633
+ type:
634
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
635
+ ).returns(T.attached_class)
636
+ end
637
+ def self.new(
638
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
639
+ # Defaults to 300ms.
640
+ prefix_padding_ms: nil,
641
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
642
+ # With shorter values the model will respond more quickly, but may jump in on
643
+ # short pauses from the user.
644
+ silence_duration_ms: nil,
645
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
646
+ # threshold will require louder audio to activate the model, and thus might
647
+ # perform better in noisy environments.
648
+ threshold: nil,
649
+ # Type of turn detection. Only `server_vad` is currently supported for
650
+ # transcription sessions.
651
+ type: nil
652
+ )
653
+ end
654
+
655
+ sig do
656
+ override.returns(
657
+ {
658
+ prefix_padding_ms: Integer,
659
+ silence_duration_ms: Integer,
660
+ threshold: Float,
661
+ type:
662
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::OrSymbol
663
+ }
664
+ )
665
+ end
666
+ def to_hash
667
+ end
668
+
669
+ # Type of turn detection. Only `server_vad` is currently supported for
670
+ # transcription sessions.
671
+ module Type
672
+ extend OpenAI::Internal::Type::Enum
673
+
674
+ TaggedSymbol =
675
+ T.type_alias do
676
+ T.all(
677
+ Symbol,
678
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type
679
+ )
680
+ end
681
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
682
+
683
+ SERVER_VAD =
684
+ T.let(
685
+ :server_vad,
686
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol
687
+ )
688
+
689
+ sig do
690
+ override.returns(
691
+ T::Array[
692
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest::TurnDetection::Type::TaggedSymbol
693
+ ]
694
+ )
695
+ end
696
+ def self.values
697
+ end
698
+ end
699
+ end
700
+ end
701
+ end
702
+ end
703
+ end