openai 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +1 -1
  4. data/lib/openai/client.rb +4 -0
  5. data/lib/openai/internal/stream.rb +3 -2
  6. data/lib/openai/models/audio/speech_create_params.rb +6 -0
  7. data/lib/openai/models/chat/chat_completion_audio_param.rb +6 -0
  8. data/lib/openai/models/evals/run_cancel_response.rb +2 -2
  9. data/lib/openai/models/evals/run_create_params.rb +2 -2
  10. data/lib/openai/models/evals/run_create_response.rb +2 -2
  11. data/lib/openai/models/evals/run_list_response.rb +2 -2
  12. data/lib/openai/models/evals/run_retrieve_response.rb +2 -2
  13. data/lib/openai/models/realtime/client_secret_create_params.rb +93 -0
  14. data/lib/openai/models/realtime/client_secret_create_response.rb +300 -0
  15. data/lib/openai/models/realtime/conversation_created_event.rb +70 -0
  16. data/lib/openai/models/realtime/conversation_item.rb +44 -0
  17. data/lib/openai/models/realtime/conversation_item_added.rb +48 -0
  18. data/lib/openai/models/realtime/conversation_item_create_event.rb +57 -0
  19. data/lib/openai/models/realtime/conversation_item_created_event.rb +59 -0
  20. data/lib/openai/models/realtime/conversation_item_delete_event.rb +39 -0
  21. data/lib/openai/models/realtime/conversation_item_deleted_event.rb +38 -0
  22. data/lib/openai/models/realtime/conversation_item_done.rb +48 -0
  23. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +189 -0
  24. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +63 -0
  25. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rb +96 -0
  26. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_segment.rb +84 -0
  27. data/lib/openai/models/realtime/conversation_item_retrieve_event.rb +40 -0
  28. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +68 -0
  29. data/lib/openai/models/realtime/conversation_item_truncated_event.rb +60 -0
  30. data/lib/openai/models/realtime/conversation_item_with_reference.rb +235 -0
  31. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +49 -0
  32. data/lib/openai/models/realtime/input_audio_buffer_clear_event.rb +29 -0
  33. data/lib/openai/models/realtime/input_audio_buffer_cleared_event.rb +29 -0
  34. data/lib/openai/models/realtime/input_audio_buffer_commit_event.rb +35 -0
  35. data/lib/openai/models/realtime/input_audio_buffer_committed_event.rb +51 -0
  36. data/lib/openai/models/realtime/input_audio_buffer_speech_started_event.rb +59 -0
  37. data/lib/openai/models/realtime/input_audio_buffer_speech_stopped_event.rb +51 -0
  38. data/lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb +52 -0
  39. data/lib/openai/models/realtime/log_prob_properties.rb +39 -0
  40. data/lib/openai/models/realtime/mcp_list_tools_completed.rb +36 -0
  41. data/lib/openai/models/realtime/mcp_list_tools_failed.rb +36 -0
  42. data/lib/openai/models/realtime/mcp_list_tools_in_progress.rb +36 -0
  43. data/lib/openai/models/realtime/output_audio_buffer_clear_event.rb +32 -0
  44. data/lib/openai/models/realtime/rate_limits_updated_event.rb +91 -0
  45. data/lib/openai/models/realtime/realtime_audio_config.rb +446 -0
  46. data/lib/openai/models/realtime/realtime_client_event.rb +123 -0
  47. data/lib/openai/models/realtime/realtime_client_secret_config.rb +64 -0
  48. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +118 -0
  49. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +94 -0
  50. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +86 -0
  51. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +118 -0
  52. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +135 -0
  53. data/lib/openai/models/realtime/realtime_error.rb +55 -0
  54. data/lib/openai/models/realtime/realtime_error_event.rb +38 -0
  55. data/lib/openai/models/realtime/realtime_mcp_approval_request.rb +52 -0
  56. data/lib/openai/models/realtime/realtime_mcp_approval_response.rb +52 -0
  57. data/lib/openai/models/realtime/realtime_mcp_list_tools.rb +84 -0
  58. data/lib/openai/models/realtime/realtime_mcp_protocol_error.rb +29 -0
  59. data/lib/openai/models/realtime/realtime_mcp_tool_call.rb +94 -0
  60. data/lib/openai/models/realtime/realtime_mcp_tool_execution_error.rb +23 -0
  61. data/lib/openai/models/realtime/realtime_mcphttp_error.rb +29 -0
  62. data/lib/openai/models/realtime/realtime_response.rb +259 -0
  63. data/lib/openai/models/realtime/realtime_response_status.rb +103 -0
  64. data/lib/openai/models/realtime/realtime_response_usage.rb +61 -0
  65. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +36 -0
  66. data/lib/openai/models/realtime/realtime_response_usage_output_token_details.rb +28 -0
  67. data/lib/openai/models/realtime/realtime_server_event.rb +369 -0
  68. data/lib/openai/models/realtime/realtime_session.rb +696 -0
  69. data/lib/openai/models/realtime/realtime_session_create_request.rb +234 -0
  70. data/lib/openai/models/realtime/realtime_session_create_response.rb +579 -0
  71. data/lib/openai/models/realtime/realtime_tool_choice_config.rb +32 -0
  72. data/lib/openai/models/realtime/realtime_tools_config.rb +11 -0
  73. data/lib/openai/models/realtime/realtime_tools_config_union.rb +379 -0
  74. data/lib/openai/models/realtime/realtime_tracing_config.rb +61 -0
  75. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +312 -0
  76. data/lib/openai/models/realtime/realtime_truncation.rb +67 -0
  77. data/lib/openai/models/realtime/response_audio_delta_event.rb +68 -0
  78. data/lib/openai/models/realtime/response_audio_done_event.rb +61 -0
  79. data/lib/openai/models/realtime/response_audio_transcript_delta_event.rb +68 -0
  80. data/lib/openai/models/realtime/response_audio_transcript_done_event.rb +70 -0
  81. data/lib/openai/models/realtime/response_cancel_event.rb +42 -0
  82. data/lib/openai/models/realtime/response_content_part_added_event.rb +120 -0
  83. data/lib/openai/models/realtime/response_content_part_done_event.rb +120 -0
  84. data/lib/openai/models/realtime/response_create_event.rb +391 -0
  85. data/lib/openai/models/realtime/response_created_event.rb +37 -0
  86. data/lib/openai/models/realtime/response_done_event.rb +38 -0
  87. data/lib/openai/models/realtime/response_function_call_arguments_delta_event.rb +72 -0
  88. data/lib/openai/models/realtime/response_function_call_arguments_done_event.rb +73 -0
  89. data/lib/openai/models/realtime/response_mcp_call_arguments_delta.rb +68 -0
  90. data/lib/openai/models/realtime/response_mcp_call_arguments_done.rb +60 -0
  91. data/lib/openai/models/realtime/response_mcp_call_completed.rb +44 -0
  92. data/lib/openai/models/realtime/response_mcp_call_failed.rb +44 -0
  93. data/lib/openai/models/realtime/response_mcp_call_in_progress.rb +44 -0
  94. data/lib/openai/models/realtime/response_output_item_added_event.rb +52 -0
  95. data/lib/openai/models/realtime/response_output_item_done_event.rb +53 -0
  96. data/lib/openai/models/realtime/response_text_delta_event.rb +68 -0
  97. data/lib/openai/models/realtime/response_text_done_event.rb +69 -0
  98. data/lib/openai/models/realtime/session_created_event.rb +38 -0
  99. data/lib/openai/models/realtime/session_update_event.rb +44 -0
  100. data/lib/openai/models/realtime/session_updated_event.rb +37 -0
  101. data/lib/openai/models/realtime/transcription_session_created.rb +278 -0
  102. data/lib/openai/models/realtime/transcription_session_update.rb +36 -0
  103. data/lib/openai/models/realtime/transcription_session_updated_event.rb +279 -0
  104. data/lib/openai/models/responses/response.rb +6 -3
  105. data/lib/openai/models/responses/response_create_params.rb +6 -3
  106. data/lib/openai/models/responses/tool.rb +3 -156
  107. data/lib/openai/models/responses/web_search_preview_tool.rb +124 -0
  108. data/lib/openai/models/responses/web_search_tool.rb +58 -21
  109. data/lib/openai/models/webhooks/realtime_call_incoming_webhook_event.rb +119 -0
  110. data/lib/openai/models/webhooks/unwrap_webhook_event.rb +4 -1
  111. data/lib/openai/models.rb +2 -0
  112. data/lib/openai/resources/realtime/client_secrets.rb +44 -0
  113. data/lib/openai/resources/realtime.rb +18 -0
  114. data/lib/openai/resources/responses.rb +2 -2
  115. data/lib/openai/resources/webhooks.rb +1 -1
  116. data/lib/openai/version.rb +1 -1
  117. data/lib/openai.rb +95 -0
  118. data/rbi/openai/client.rbi +3 -0
  119. data/rbi/openai/models/audio/speech_create_params.rbi +10 -0
  120. data/rbi/openai/models/chat/chat_completion_audio_param.rbi +10 -0
  121. data/rbi/openai/models/evals/run_cancel_response.rbi +4 -4
  122. data/rbi/openai/models/evals/run_create_params.rbi +8 -8
  123. data/rbi/openai/models/evals/run_create_response.rbi +4 -4
  124. data/rbi/openai/models/evals/run_list_response.rbi +4 -4
  125. data/rbi/openai/models/evals/run_retrieve_response.rbi +4 -4
  126. data/rbi/openai/models/realtime/client_secret_create_params.rbi +222 -0
  127. data/rbi/openai/models/realtime/client_secret_create_response.rbi +676 -0
  128. data/rbi/openai/models/realtime/conversation_created_event.rbi +164 -0
  129. data/rbi/openai/models/realtime/conversation_item.rbi +35 -0
  130. data/rbi/openai/models/realtime/conversation_item_added.rbi +105 -0
  131. data/rbi/openai/models/realtime/conversation_item_create_event.rbi +123 -0
  132. data/rbi/openai/models/realtime/conversation_item_created_event.rbi +117 -0
  133. data/rbi/openai/models/realtime/conversation_item_delete_event.rbi +57 -0
  134. data/rbi/openai/models/realtime/conversation_item_deleted_event.rbi +53 -0
  135. data/rbi/openai/models/realtime/conversation_item_done.rbi +105 -0
  136. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +305 -0
  137. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +93 -0
  138. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbi +158 -0
  139. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbi +107 -0
  140. data/rbi/openai/models/realtime/conversation_item_retrieve_event.rbi +58 -0
  141. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +94 -0
  142. data/rbi/openai/models/realtime/conversation_item_truncated_event.rbi +80 -0
  143. data/rbi/openai/models/realtime/conversation_item_with_reference.rbi +549 -0
  144. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +65 -0
  145. data/rbi/openai/models/realtime/input_audio_buffer_clear_event.rbi +43 -0
  146. data/rbi/openai/models/realtime/input_audio_buffer_cleared_event.rbi +40 -0
  147. data/rbi/openai/models/realtime/input_audio_buffer_commit_event.rbi +49 -0
  148. data/rbi/openai/models/realtime/input_audio_buffer_committed_event.rbi +72 -0
  149. data/rbi/openai/models/realtime/input_audio_buffer_speech_started_event.rbi +82 -0
  150. data/rbi/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbi +73 -0
  151. data/rbi/openai/models/realtime/input_audio_buffer_timeout_triggered.rbi +75 -0
  152. data/rbi/openai/models/realtime/log_prob_properties.rbi +55 -0
  153. data/rbi/openai/models/realtime/mcp_list_tools_completed.rbi +51 -0
  154. data/rbi/openai/models/realtime/mcp_list_tools_failed.rbi +51 -0
  155. data/rbi/openai/models/realtime/mcp_list_tools_in_progress.rbi +51 -0
  156. data/rbi/openai/models/realtime/output_audio_buffer_clear_event.rbi +46 -0
  157. data/rbi/openai/models/realtime/rate_limits_updated_event.rbi +187 -0
  158. data/rbi/openai/models/realtime/realtime_audio_config.rbi +1004 -0
  159. data/rbi/openai/models/realtime/realtime_client_event.rbi +38 -0
  160. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +147 -0
  161. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +292 -0
  162. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +199 -0
  163. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +188 -0
  164. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +292 -0
  165. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +319 -0
  166. data/rbi/openai/models/realtime/realtime_error.rbi +72 -0
  167. data/rbi/openai/models/realtime/realtime_error_event.rbi +64 -0
  168. data/rbi/openai/models/realtime/realtime_mcp_approval_request.rbi +75 -0
  169. data/rbi/openai/models/realtime/realtime_mcp_approval_response.rbi +75 -0
  170. data/rbi/openai/models/realtime/realtime_mcp_list_tools.rbi +131 -0
  171. data/rbi/openai/models/realtime/realtime_mcp_protocol_error.rbi +40 -0
  172. data/rbi/openai/models/realtime/realtime_mcp_tool_call.rbi +145 -0
  173. data/rbi/openai/models/realtime/realtime_mcp_tool_execution_error.rbi +31 -0
  174. data/rbi/openai/models/realtime/realtime_mcphttp_error.rbi +40 -0
  175. data/rbi/openai/models/realtime/realtime_response.rbi +573 -0
  176. data/rbi/openai/models/realtime/realtime_response_status.rbi +233 -0
  177. data/rbi/openai/models/realtime/realtime_response_usage.rbi +121 -0
  178. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +68 -0
  179. data/rbi/openai/models/realtime/realtime_response_usage_output_token_details.rbi +51 -0
  180. data/rbi/openai/models/realtime/realtime_server_event.rbi +311 -0
  181. data/rbi/openai/models/realtime/realtime_session.rbi +1426 -0
  182. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +560 -0
  183. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1249 -0
  184. data/rbi/openai/models/realtime/realtime_tool_choice_config.rbi +30 -0
  185. data/rbi/openai/models/realtime/realtime_tools_config.rbi +15 -0
  186. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +755 -0
  187. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +95 -0
  188. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +703 -0
  189. data/rbi/openai/models/realtime/realtime_truncation.rbi +117 -0
  190. data/rbi/openai/models/realtime/response_audio_delta_event.rbi +91 -0
  191. data/rbi/openai/models/realtime/response_audio_done_event.rbi +84 -0
  192. data/rbi/openai/models/realtime/response_audio_transcript_delta_event.rbi +91 -0
  193. data/rbi/openai/models/realtime/response_audio_transcript_done_event.rbi +93 -0
  194. data/rbi/openai/models/realtime/response_cancel_event.rbi +63 -0
  195. data/rbi/openai/models/realtime/response_content_part_added_event.rbi +219 -0
  196. data/rbi/openai/models/realtime/response_content_part_done_event.rbi +219 -0
  197. data/rbi/openai/models/realtime/response_create_event.rbi +863 -0
  198. data/rbi/openai/models/realtime/response_created_event.rbi +65 -0
  199. data/rbi/openai/models/realtime/response_done_event.rbi +66 -0
  200. data/rbi/openai/models/realtime/response_function_call_arguments_delta_event.rbi +91 -0
  201. data/rbi/openai/models/realtime/response_function_call_arguments_done_event.rbi +92 -0
  202. data/rbi/openai/models/realtime/response_mcp_call_arguments_delta.rbi +91 -0
  203. data/rbi/openai/models/realtime/response_mcp_call_arguments_done.rbi +83 -0
  204. data/rbi/openai/models/realtime/response_mcp_call_completed.rbi +67 -0
  205. data/rbi/openai/models/realtime/response_mcp_call_failed.rbi +67 -0
  206. data/rbi/openai/models/realtime/response_mcp_call_in_progress.rbi +67 -0
  207. data/rbi/openai/models/realtime/response_output_item_added_event.rbi +111 -0
  208. data/rbi/openai/models/realtime/response_output_item_done_event.rbi +112 -0
  209. data/rbi/openai/models/realtime/response_text_delta_event.rbi +91 -0
  210. data/rbi/openai/models/realtime/response_text_done_event.rbi +92 -0
  211. data/rbi/openai/models/realtime/session_created_event.rbi +64 -0
  212. data/rbi/openai/models/realtime/session_update_event.rbi +77 -0
  213. data/rbi/openai/models/realtime/session_updated_event.rbi +63 -0
  214. data/rbi/openai/models/realtime/transcription_session_created.rbi +653 -0
  215. data/rbi/openai/models/realtime/transcription_session_update.rbi +74 -0
  216. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +657 -0
  217. data/rbi/openai/models/responses/response.rbi +10 -4
  218. data/rbi/openai/models/responses/response_create_params.rbi +16 -10
  219. data/rbi/openai/models/responses/tool.rbi +2 -348
  220. data/rbi/openai/models/responses/web_search_preview_tool.rbi +245 -0
  221. data/rbi/openai/models/responses/web_search_tool.rbi +120 -23
  222. data/rbi/openai/models/webhooks/realtime_call_incoming_webhook_event.rbi +222 -0
  223. data/rbi/openai/models/webhooks/unwrap_webhook_event.rbi +1 -0
  224. data/rbi/openai/models.rbi +2 -0
  225. data/rbi/openai/resources/realtime/client_secrets.rbi +38 -0
  226. data/rbi/openai/resources/realtime.rbi +15 -0
  227. data/rbi/openai/resources/responses.rbi +12 -6
  228. data/rbi/openai/resources/webhooks.rbi +1 -0
  229. data/sig/openai/client.rbs +2 -0
  230. data/sig/openai/models/audio/speech_create_params.rbs +4 -0
  231. data/sig/openai/models/chat/chat_completion_audio_param.rbs +4 -0
  232. data/sig/openai/models/realtime/client_secret_create_params.rbs +89 -0
  233. data/sig/openai/models/realtime/client_secret_create_response.rbs +292 -0
  234. data/sig/openai/models/realtime/conversation_created_event.rbs +70 -0
  235. data/sig/openai/models/realtime/conversation_item.rbs +22 -0
  236. data/sig/openai/models/realtime/conversation_item_added.rbs +37 -0
  237. data/sig/openai/models/realtime/conversation_item_create_event.rbs +41 -0
  238. data/sig/openai/models/realtime/conversation_item_created_event.rbs +37 -0
  239. data/sig/openai/models/realtime/conversation_item_delete_event.rbs +30 -0
  240. data/sig/openai/models/realtime/conversation_item_deleted_event.rbs +32 -0
  241. data/sig/openai/models/realtime/conversation_item_done.rbs +37 -0
  242. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbs +136 -0
  243. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbs +51 -0
  244. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbs +77 -0
  245. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbs +62 -0
  246. data/sig/openai/models/realtime/conversation_item_retrieve_event.rbs +34 -0
  247. data/sig/openai/models/realtime/conversation_item_truncate_event.rbs +44 -0
  248. data/sig/openai/models/realtime/conversation_item_truncated_event.rbs +42 -0
  249. data/sig/openai/models/realtime/conversation_item_with_reference.rbs +207 -0
  250. data/sig/openai/models/realtime/input_audio_buffer_append_event.rbs +30 -0
  251. data/sig/openai/models/realtime/input_audio_buffer_clear_event.rbs +23 -0
  252. data/sig/openai/models/realtime/input_audio_buffer_cleared_event.rbs +24 -0
  253. data/sig/openai/models/realtime/input_audio_buffer_commit_event.rbs +23 -0
  254. data/sig/openai/models/realtime/input_audio_buffer_committed_event.rbs +37 -0
  255. data/sig/openai/models/realtime/input_audio_buffer_speech_started_event.rbs +37 -0
  256. data/sig/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbs +37 -0
  257. data/sig/openai/models/realtime/input_audio_buffer_timeout_triggered.rbs +42 -0
  258. data/sig/openai/models/realtime/log_prob_properties.rbs +28 -0
  259. data/sig/openai/models/realtime/mcp_list_tools_completed.rbs +28 -0
  260. data/sig/openai/models/realtime/mcp_list_tools_failed.rbs +28 -0
  261. data/sig/openai/models/realtime/mcp_list_tools_in_progress.rbs +32 -0
  262. data/sig/openai/models/realtime/output_audio_buffer_clear_event.rbs +23 -0
  263. data/sig/openai/models/realtime/rate_limits_updated_event.rbs +85 -0
  264. data/sig/openai/models/realtime/realtime_audio_config.rbs +354 -0
  265. data/sig/openai/models/realtime/realtime_client_event.rbs +25 -0
  266. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +60 -0
  267. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +117 -0
  268. data/sig/openai/models/realtime/realtime_conversation_item_function_call.rbs +86 -0
  269. data/sig/openai/models/realtime/realtime_conversation_item_function_call_output.rbs +79 -0
  270. data/sig/openai/models/realtime/realtime_conversation_item_system_message.rbs +117 -0
  271. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +132 -0
  272. data/sig/openai/models/realtime/realtime_error.rbs +42 -0
  273. data/sig/openai/models/realtime/realtime_error_event.rbs +32 -0
  274. data/sig/openai/models/realtime/realtime_mcp_approval_request.rbs +42 -0
  275. data/sig/openai/models/realtime/realtime_mcp_approval_response.rbs +42 -0
  276. data/sig/openai/models/realtime/realtime_mcp_list_tools.rbs +71 -0
  277. data/sig/openai/models/realtime/realtime_mcp_protocol_error.rbs +28 -0
  278. data/sig/openai/models/realtime/realtime_mcp_tool_call.rbs +68 -0
  279. data/sig/openai/models/realtime/realtime_mcp_tool_execution_error.rbs +18 -0
  280. data/sig/openai/models/realtime/realtime_mcphttp_error.rbs +24 -0
  281. data/sig/openai/models/realtime/realtime_response.rbs +210 -0
  282. data/sig/openai/models/realtime/realtime_response_status.rbs +90 -0
  283. data/sig/openai/models/realtime/realtime_response_usage.rbs +56 -0
  284. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +34 -0
  285. data/sig/openai/models/realtime/realtime_response_usage_output_token_details.rbs +22 -0
  286. data/sig/openai/models/realtime/realtime_server_event.rbs +168 -0
  287. data/sig/openai/models/realtime/realtime_session.rbs +521 -0
  288. data/sig/openai/models/realtime/realtime_session_create_request.rbs +178 -0
  289. data/sig/openai/models/realtime/realtime_session_create_response.rbs +526 -0
  290. data/sig/openai/models/realtime/realtime_tool_choice_config.rbs +16 -0
  291. data/sig/openai/models/realtime/realtime_tools_config.rbs +10 -0
  292. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +280 -0
  293. data/sig/openai/models/realtime/realtime_tracing_config.rbs +43 -0
  294. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +242 -0
  295. data/sig/openai/models/realtime/realtime_truncation.rbs +53 -0
  296. data/sig/openai/models/realtime/response_audio_delta_event.rbs +52 -0
  297. data/sig/openai/models/realtime/response_audio_done_event.rbs +47 -0
  298. data/sig/openai/models/realtime/response_audio_transcript_delta_event.rbs +52 -0
  299. data/sig/openai/models/realtime/response_audio_transcript_done_event.rbs +52 -0
  300. data/sig/openai/models/realtime/response_cancel_event.rbs +32 -0
  301. data/sig/openai/models/realtime/response_content_part_added_event.rbs +105 -0
  302. data/sig/openai/models/realtime/response_content_part_done_event.rbs +105 -0
  303. data/sig/openai/models/realtime/response_create_event.rbs +281 -0
  304. data/sig/openai/models/realtime/response_created_event.rbs +32 -0
  305. data/sig/openai/models/realtime/response_done_event.rbs +32 -0
  306. data/sig/openai/models/realtime/response_function_call_arguments_delta_event.rbs +52 -0
  307. data/sig/openai/models/realtime/response_function_call_arguments_done_event.rbs +52 -0
  308. data/sig/openai/models/realtime/response_mcp_call_arguments_delta.rbs +52 -0
  309. data/sig/openai/models/realtime/response_mcp_call_arguments_done.rbs +47 -0
  310. data/sig/openai/models/realtime/response_mcp_call_completed.rbs +37 -0
  311. data/sig/openai/models/realtime/response_mcp_call_failed.rbs +37 -0
  312. data/sig/openai/models/realtime/response_mcp_call_in_progress.rbs +37 -0
  313. data/sig/openai/models/realtime/response_output_item_added_event.rbs +42 -0
  314. data/sig/openai/models/realtime/response_output_item_done_event.rbs +42 -0
  315. data/sig/openai/models/realtime/response_text_delta_event.rbs +52 -0
  316. data/sig/openai/models/realtime/response_text_done_event.rbs +52 -0
  317. data/sig/openai/models/realtime/session_created_event.rbs +32 -0
  318. data/sig/openai/models/realtime/session_update_event.rbs +34 -0
  319. data/sig/openai/models/realtime/session_updated_event.rbs +32 -0
  320. data/sig/openai/models/realtime/transcription_session_created.rbs +282 -0
  321. data/sig/openai/models/realtime/transcription_session_update.rbs +34 -0
  322. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +282 -0
  323. data/sig/openai/models/responses/tool.rbs +1 -121
  324. data/sig/openai/models/responses/web_search_preview_tool.rbs +96 -0
  325. data/sig/openai/models/responses/web_search_tool.rbs +39 -10
  326. data/sig/openai/models/webhooks/realtime_call_incoming_webhook_event.rbs +90 -0
  327. data/sig/openai/models/webhooks/unwrap_webhook_event.rbs +1 -0
  328. data/sig/openai/models.rbs +2 -0
  329. data/sig/openai/resources/realtime/client_secrets.rbs +15 -0
  330. data/sig/openai/resources/realtime.rbs +9 -0
  331. data/sig/openai/resources/webhooks.rbs +1 -0
  332. metadata +287 -2
@@ -0,0 +1,1004 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioConfig < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeAudioConfig,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ sig { returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfig::Input)) }
16
+ attr_reader :input
17
+
18
+ sig do
19
+ params(
20
+ input: OpenAI::Realtime::RealtimeAudioConfig::Input::OrHash
21
+ ).void
22
+ end
23
+ attr_writer :input
24
+
25
+ sig do
26
+ returns(T.nilable(OpenAI::Realtime::RealtimeAudioConfig::Output))
27
+ end
28
+ attr_reader :output
29
+
30
+ sig do
31
+ params(
32
+ output: OpenAI::Realtime::RealtimeAudioConfig::Output::OrHash
33
+ ).void
34
+ end
35
+ attr_writer :output
36
+
37
+ # Configuration for input and output audio.
38
+ sig do
39
+ params(
40
+ input: OpenAI::Realtime::RealtimeAudioConfig::Input::OrHash,
41
+ output: OpenAI::Realtime::RealtimeAudioConfig::Output::OrHash
42
+ ).returns(T.attached_class)
43
+ end
44
+ def self.new(input: nil, output: nil)
45
+ end
46
+
47
+ sig do
48
+ override.returns(
49
+ {
50
+ input: OpenAI::Realtime::RealtimeAudioConfig::Input,
51
+ output: OpenAI::Realtime::RealtimeAudioConfig::Output
52
+ }
53
+ )
54
+ end
55
+ def to_hash
56
+ end
57
+
58
+ class Input < OpenAI::Internal::Type::BaseModel
59
+ OrHash =
60
+ T.type_alias do
61
+ T.any(
62
+ OpenAI::Realtime::RealtimeAudioConfig::Input,
63
+ OpenAI::Internal::AnyHash
64
+ )
65
+ end
66
+
67
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
68
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
69
+ # (mono), and little-endian byte order.
70
+ sig do
71
+ returns(
72
+ T.nilable(
73
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol
74
+ )
75
+ )
76
+ end
77
+ attr_reader :format_
78
+
79
+ sig do
80
+ params(
81
+ format_:
82
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol
83
+ ).void
84
+ end
85
+ attr_writer :format_
86
+
87
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
88
+ # off. Noise reduction filters audio added to the input audio buffer before it is
89
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
90
+ # detection accuracy (reducing false positives) and model performance by improving
91
+ # perception of the input audio.
92
+ sig do
93
+ returns(
94
+ T.nilable(
95
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction
96
+ )
97
+ )
98
+ end
99
+ attr_reader :noise_reduction
100
+
101
+ sig do
102
+ params(
103
+ noise_reduction:
104
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::OrHash
105
+ ).void
106
+ end
107
+ attr_writer :noise_reduction
108
+
109
+ # Configuration for input audio transcription, defaults to off and can be set to
110
+ # `null` to turn off once on. Input audio transcription is not native to the
111
+ # model, since the model consumes audio directly. Transcription runs
112
+ # asynchronously through
113
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
114
+ # and should be treated as guidance of input audio content rather than precisely
115
+ # what the model heard. The client can optionally set the language and prompt for
116
+ # transcription, these offer additional guidance to the transcription service.
117
+ sig do
118
+ returns(
119
+ T.nilable(
120
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription
121
+ )
122
+ )
123
+ end
124
+ attr_reader :transcription
125
+
126
+ sig do
127
+ params(
128
+ transcription:
129
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::OrHash
130
+ ).void
131
+ end
132
+ attr_writer :transcription
133
+
134
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
135
+ # set to `null` to turn off, in which case the client must manually trigger model
136
+ # response. Server VAD means that the model will detect the start and end of
137
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
138
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
139
+ # semantically estimate whether the user has finished speaking, then dynamically
140
+ # sets a timeout based on this probability. For example, if user audio trails off
141
+ # with "uhhm", the model will score a low probability of turn end and wait longer
142
+ # for the user to continue speaking. This can be useful for more natural
143
+ # conversations, but may have a higher latency.
144
+ sig do
145
+ returns(
146
+ T.nilable(
147
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection
148
+ )
149
+ )
150
+ end
151
+ attr_reader :turn_detection
152
+
153
+ sig do
154
+ params(
155
+ turn_detection:
156
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::OrHash
157
+ ).void
158
+ end
159
+ attr_writer :turn_detection
160
+
161
+ sig do
162
+ params(
163
+ format_:
164
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol,
165
+ noise_reduction:
166
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::OrHash,
167
+ transcription:
168
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::OrHash,
169
+ turn_detection:
170
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::OrHash
171
+ ).returns(T.attached_class)
172
+ end
173
+ def self.new(
174
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
175
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
176
+ # (mono), and little-endian byte order.
177
+ format_: nil,
178
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
179
+ # off. Noise reduction filters audio added to the input audio buffer before it is
180
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
181
+ # detection accuracy (reducing false positives) and model performance by improving
182
+ # perception of the input audio.
183
+ noise_reduction: nil,
184
+ # Configuration for input audio transcription, defaults to off and can be set to
185
+ # `null` to turn off once on. Input audio transcription is not native to the
186
+ # model, since the model consumes audio directly. Transcription runs
187
+ # asynchronously through
188
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
189
+ # and should be treated as guidance of input audio content rather than precisely
190
+ # what the model heard. The client can optionally set the language and prompt for
191
+ # transcription, these offer additional guidance to the transcription service.
192
+ transcription: nil,
193
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
194
+ # set to `null` to turn off, in which case the client must manually trigger model
195
+ # response. Server VAD means that the model will detect the start and end of
196
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
197
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
198
+ # semantically estimate whether the user has finished speaking, then dynamically
199
+ # sets a timeout based on this probability. For example, if user audio trails off
200
+ # with "uhhm", the model will score a low probability of turn end and wait longer
201
+ # for the user to continue speaking. This can be useful for more natural
202
+ # conversations, but may have a higher latency.
203
+ turn_detection: nil
204
+ )
205
+ end
206
+
207
+ sig do
208
+ override.returns(
209
+ {
210
+ format_:
211
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::OrSymbol,
212
+ noise_reduction:
213
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction,
214
+ transcription:
215
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription,
216
+ turn_detection:
217
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection
218
+ }
219
+ )
220
+ end
221
+ def to_hash
222
+ end
223
+
224
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
225
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
226
+ # (mono), and little-endian byte order.
227
+ module Format
228
+ extend OpenAI::Internal::Type::Enum
229
+
230
+ TaggedSymbol =
231
+ T.type_alias do
232
+ T.all(
233
+ Symbol,
234
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format
235
+ )
236
+ end
237
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
238
+
239
+ PCM16 =
240
+ T.let(
241
+ :pcm16,
242
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol
243
+ )
244
+ G711_ULAW =
245
+ T.let(
246
+ :g711_ulaw,
247
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol
248
+ )
249
+ G711_ALAW =
250
+ T.let(
251
+ :g711_alaw,
252
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol
253
+ )
254
+
255
+ sig do
256
+ override.returns(
257
+ T::Array[
258
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Format::TaggedSymbol
259
+ ]
260
+ )
261
+ end
262
+ def self.values
263
+ end
264
+ end
265
+
266
+ class NoiseReduction < OpenAI::Internal::Type::BaseModel
267
+ OrHash =
268
+ T.type_alias do
269
+ T.any(
270
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction,
271
+ OpenAI::Internal::AnyHash
272
+ )
273
+ end
274
+
275
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
276
+ # headphones, `far_field` is for far-field microphones such as laptop or
277
+ # conference room microphones.
278
+ sig do
279
+ returns(
280
+ T.nilable(
281
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol
282
+ )
283
+ )
284
+ end
285
+ attr_reader :type
286
+
287
+ sig do
288
+ params(
289
+ type:
290
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol
291
+ ).void
292
+ end
293
+ attr_writer :type
294
+
295
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
296
+ # off. Noise reduction filters audio added to the input audio buffer before it is
297
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
298
+ # detection accuracy (reducing false positives) and model performance by improving
299
+ # perception of the input audio.
300
+ sig do
301
+ params(
302
+ type:
303
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol
304
+ ).returns(T.attached_class)
305
+ end
306
+ def self.new(
307
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
308
+ # headphones, `far_field` is for far-field microphones such as laptop or
309
+ # conference room microphones.
310
+ type: nil
311
+ )
312
+ end
313
+
314
+ sig do
315
+ override.returns(
316
+ {
317
+ type:
318
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::OrSymbol
319
+ }
320
+ )
321
+ end
322
+ def to_hash
323
+ end
324
+
325
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
326
+ # headphones, `far_field` is for far-field microphones such as laptop or
327
+ # conference room microphones.
328
+ module Type
329
+ extend OpenAI::Internal::Type::Enum
330
+
331
+ TaggedSymbol =
332
+ T.type_alias do
333
+ T.all(
334
+ Symbol,
335
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type
336
+ )
337
+ end
338
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
339
+
340
+ NEAR_FIELD =
341
+ T.let(
342
+ :near_field,
343
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol
344
+ )
345
+ FAR_FIELD =
346
+ T.let(
347
+ :far_field,
348
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol
349
+ )
350
+
351
+ sig do
352
+ override.returns(
353
+ T::Array[
354
+ OpenAI::Realtime::RealtimeAudioConfig::Input::NoiseReduction::Type::TaggedSymbol
355
+ ]
356
+ )
357
+ end
358
+ def self.values
359
+ end
360
+ end
361
+ end
362
+
363
+ class Transcription < OpenAI::Internal::Type::BaseModel
364
+ OrHash =
365
+ T.type_alias do
366
+ T.any(
367
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription,
368
+ OpenAI::Internal::AnyHash
369
+ )
370
+ end
371
+
372
+ # The language of the input audio. Supplying the input language in
373
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
374
+ # format will improve accuracy and latency.
375
+ sig { returns(T.nilable(String)) }
376
+ attr_reader :language
377
+
378
+ sig { params(language: String).void }
379
+ attr_writer :language
380
+
381
+ # The model to use for transcription. Current options are `whisper-1`,
382
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and
383
+ # `gpt-4o-transcribe-diarize`.
384
+ sig do
385
+ returns(
386
+ T.nilable(
387
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol
388
+ )
389
+ )
390
+ end
391
+ attr_reader :model
392
+
393
+ sig do
394
+ params(
395
+ model:
396
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol
397
+ ).void
398
+ end
399
+ attr_writer :model
400
+
401
+ # An optional text to guide the model's style or continue a previous audio
402
+ # segment. For `whisper-1`, the
403
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
404
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
405
+ # "expect words related to technology".
406
+ sig { returns(T.nilable(String)) }
407
+ attr_reader :prompt
408
+
409
+ sig { params(prompt: String).void }
410
+ attr_writer :prompt
411
+
412
+ # Configuration for input audio transcription, defaults to off and can be set to
413
+ # `null` to turn off once on. Input audio transcription is not native to the
414
+ # model, since the model consumes audio directly. Transcription runs
415
+ # asynchronously through
416
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
417
+ # and should be treated as guidance of input audio content rather than precisely
418
+ # what the model heard. The client can optionally set the language and prompt for
419
+ # transcription, these offer additional guidance to the transcription service.
420
+ sig do
421
+ params(
422
+ language: String,
423
+ model:
424
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol,
425
+ prompt: String
426
+ ).returns(T.attached_class)
427
+ end
428
+ def self.new(
429
+ # The language of the input audio. Supplying the input language in
430
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
431
+ # format will improve accuracy and latency.
432
+ language: nil,
433
+ # The model to use for transcription. Current options are `whisper-1`,
434
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and
435
+ # `gpt-4o-transcribe-diarize`.
436
+ model: nil,
437
+ # An optional text to guide the model's style or continue a previous audio
438
+ # segment. For `whisper-1`, the
439
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
440
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
441
+ # "expect words related to technology".
442
+ prompt: nil
443
+ )
444
+ end
445
+
446
+ sig do
447
+ override.returns(
448
+ {
449
+ language: String,
450
+ model:
451
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::OrSymbol,
452
+ prompt: String
453
+ }
454
+ )
455
+ end
456
+ def to_hash
457
+ end
458
+
459
+ # The model to use for transcription. Current options are `whisper-1`,
460
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and
461
+ # `gpt-4o-transcribe-diarize`.
462
+ module Model
463
+ extend OpenAI::Internal::Type::Enum
464
+
465
+ TaggedSymbol =
466
+ T.type_alias do
467
+ T.all(
468
+ Symbol,
469
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model
470
+ )
471
+ end
472
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
473
+
474
+ WHISPER_1 =
475
+ T.let(
476
+ :"whisper-1",
477
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
478
+ )
479
+ GPT_4O_TRANSCRIBE_LATEST =
480
+ T.let(
481
+ :"gpt-4o-transcribe-latest",
482
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
483
+ )
484
+ GPT_4O_MINI_TRANSCRIBE =
485
+ T.let(
486
+ :"gpt-4o-mini-transcribe",
487
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
488
+ )
489
+ GPT_4O_TRANSCRIBE =
490
+ T.let(
491
+ :"gpt-4o-transcribe",
492
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
493
+ )
494
+ GPT_4O_TRANSCRIBE_DIARIZE =
495
+ T.let(
496
+ :"gpt-4o-transcribe-diarize",
497
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
498
+ )
499
+
500
+ sig do
501
+ override.returns(
502
+ T::Array[
503
+ OpenAI::Realtime::RealtimeAudioConfig::Input::Transcription::Model::TaggedSymbol
504
+ ]
505
+ )
506
+ end
507
+ def self.values
508
+ end
509
+ end
510
+ end
511
+
512
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
513
+ OrHash =
514
+ T.type_alias do
515
+ T.any(
516
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection,
517
+ OpenAI::Internal::AnyHash
518
+ )
519
+ end
520
+
521
+ # Whether or not to automatically generate a response when a VAD stop event
522
+ # occurs.
523
+ sig { returns(T.nilable(T::Boolean)) }
524
+ attr_reader :create_response
525
+
526
+ sig { params(create_response: T::Boolean).void }
527
+ attr_writer :create_response
528
+
529
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
530
+ # will wait longer for the user to continue speaking, `high` will respond more
531
+ # quickly. `auto` is the default and is equivalent to `medium`.
532
+ sig do
533
+ returns(
534
+ T.nilable(
535
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol
536
+ )
537
+ )
538
+ end
539
+ attr_reader :eagerness
540
+
541
+ sig do
542
+ params(
543
+ eagerness:
544
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol
545
+ ).void
546
+ end
547
+ attr_writer :eagerness
548
+
549
+ # Optional idle timeout after which turn detection will auto-timeout when no
550
+ # additional audio is received.
551
+ sig { returns(T.nilable(Integer)) }
552
+ attr_accessor :idle_timeout_ms
553
+
554
+ # Whether or not to automatically interrupt any ongoing response with output to
555
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
556
+ # occurs.
557
+ sig { returns(T.nilable(T::Boolean)) }
558
+ attr_reader :interrupt_response
559
+
560
+ sig { params(interrupt_response: T::Boolean).void }
561
+ attr_writer :interrupt_response
562
+
563
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
564
+ # detected speech (in milliseconds). Defaults to 300ms.
565
+ sig { returns(T.nilable(Integer)) }
566
+ attr_reader :prefix_padding_ms
567
+
568
+ sig { params(prefix_padding_ms: Integer).void }
569
+ attr_writer :prefix_padding_ms
570
+
571
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
572
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
573
+ # more quickly, but may jump in on short pauses from the user.
574
+ sig { returns(T.nilable(Integer)) }
575
+ attr_reader :silence_duration_ms
576
+
577
+ sig { params(silence_duration_ms: Integer).void }
578
+ attr_writer :silence_duration_ms
579
+
580
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
581
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
582
+ # model, and thus might perform better in noisy environments.
583
+ sig { returns(T.nilable(Float)) }
584
+ attr_reader :threshold
585
+
586
+ sig { params(threshold: Float).void }
587
+ attr_writer :threshold
588
+
589
+ # Type of turn detection.
590
+ sig do
591
+ returns(
592
+ T.nilable(
593
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol
594
+ )
595
+ )
596
+ end
597
+ attr_reader :type
598
+
599
+ sig do
600
+ params(
601
+ type:
602
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol
603
+ ).void
604
+ end
605
+ attr_writer :type
606
+
607
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
608
+ # set to `null` to turn off, in which case the client must manually trigger model
609
+ # response. Server VAD means that the model will detect the start and end of
610
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
611
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
612
+ # semantically estimate whether the user has finished speaking, then dynamically
613
+ # sets a timeout based on this probability. For example, if user audio trails off
614
+ # with "uhhm", the model will score a low probability of turn end and wait longer
615
+ # for the user to continue speaking. This can be useful for more natural
616
+ # conversations, but may have a higher latency.
617
+ sig do
618
+ params(
619
+ create_response: T::Boolean,
620
+ eagerness:
621
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol,
622
+ idle_timeout_ms: T.nilable(Integer),
623
+ interrupt_response: T::Boolean,
624
+ prefix_padding_ms: Integer,
625
+ silence_duration_ms: Integer,
626
+ threshold: Float,
627
+ type:
628
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol
629
+ ).returns(T.attached_class)
630
+ end
631
+ def self.new(
632
+ # Whether or not to automatically generate a response when a VAD stop event
633
+ # occurs.
634
+ create_response: nil,
635
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
636
+ # will wait longer for the user to continue speaking, `high` will respond more
637
+ # quickly. `auto` is the default and is equivalent to `medium`.
638
+ eagerness: nil,
639
+ # Optional idle timeout after which turn detection will auto-timeout when no
640
+ # additional audio is received.
641
+ idle_timeout_ms: nil,
642
+ # Whether or not to automatically interrupt any ongoing response with output to
643
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
644
+ # occurs.
645
+ interrupt_response: nil,
646
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
647
+ # detected speech (in milliseconds). Defaults to 300ms.
648
+ prefix_padding_ms: nil,
649
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
650
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
651
+ # more quickly, but may jump in on short pauses from the user.
652
+ silence_duration_ms: nil,
653
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
654
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
655
+ # model, and thus might perform better in noisy environments.
656
+ threshold: nil,
657
+ # Type of turn detection.
658
+ type: nil
659
+ )
660
+ end
661
+
662
+ sig do
663
+ override.returns(
664
+ {
665
+ create_response: T::Boolean,
666
+ eagerness:
667
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::OrSymbol,
668
+ idle_timeout_ms: T.nilable(Integer),
669
+ interrupt_response: T::Boolean,
670
+ prefix_padding_ms: Integer,
671
+ silence_duration_ms: Integer,
672
+ threshold: Float,
673
+ type:
674
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::OrSymbol
675
+ }
676
+ )
677
+ end
678
+ def to_hash
679
+ end
680
+
681
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
682
+ # will wait longer for the user to continue speaking, `high` will respond more
683
+ # quickly. `auto` is the default and is equivalent to `medium`.
684
+ module Eagerness
685
+ extend OpenAI::Internal::Type::Enum
686
+
687
+ TaggedSymbol =
688
+ T.type_alias do
689
+ T.all(
690
+ Symbol,
691
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness
692
+ )
693
+ end
694
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
695
+
696
+ LOW =
697
+ T.let(
698
+ :low,
699
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol
700
+ )
701
+ MEDIUM =
702
+ T.let(
703
+ :medium,
704
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol
705
+ )
706
+ HIGH =
707
+ T.let(
708
+ :high,
709
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol
710
+ )
711
+ AUTO =
712
+ T.let(
713
+ :auto,
714
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol
715
+ )
716
+
717
+ sig do
718
+ override.returns(
719
+ T::Array[
720
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Eagerness::TaggedSymbol
721
+ ]
722
+ )
723
+ end
724
+ def self.values
725
+ end
726
+ end
727
+
728
+ # Type of turn detection.
729
+ module Type
730
+ extend OpenAI::Internal::Type::Enum
731
+
732
+ TaggedSymbol =
733
+ T.type_alias do
734
+ T.all(
735
+ Symbol,
736
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type
737
+ )
738
+ end
739
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
740
+
741
+ SERVER_VAD =
742
+ T.let(
743
+ :server_vad,
744
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol
745
+ )
746
+ SEMANTIC_VAD =
747
+ T.let(
748
+ :semantic_vad,
749
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol
750
+ )
751
+
752
+ sig do
753
+ override.returns(
754
+ T::Array[
755
+ OpenAI::Realtime::RealtimeAudioConfig::Input::TurnDetection::Type::TaggedSymbol
756
+ ]
757
+ )
758
+ end
759
+ def self.values
760
+ end
761
+ end
762
+ end
763
+ end
764
+
765
+ class Output < OpenAI::Internal::Type::BaseModel
766
+ OrHash =
767
+ T.type_alias do
768
+ T.any(
769
+ OpenAI::Realtime::RealtimeAudioConfig::Output,
770
+ OpenAI::Internal::AnyHash
771
+ )
772
+ end
773
+
774
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
775
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
776
+ sig do
777
+ returns(
778
+ T.nilable(
779
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol
780
+ )
781
+ )
782
+ end
783
+ attr_reader :format_
784
+
785
+ sig do
786
+ params(
787
+ format_:
788
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol
789
+ ).void
790
+ end
791
+ attr_writer :format_
792
+
793
+ # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
794
+ # minimum speed. 1.5 is the maximum speed. This value can only be changed in
795
+ # between model turns, not while a response is in progress.
796
+ sig { returns(T.nilable(Float)) }
797
+ attr_reader :speed
798
+
799
+ sig { params(speed: Float).void }
800
+ attr_writer :speed
801
+
802
+ # The voice the model uses to respond. Voice cannot be changed during the session
803
+ # once the model has responded with audio at least once. Current voice options are
804
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
805
+ # and `cedar`.
806
+ sig do
807
+ returns(
808
+ T.nilable(
809
+ T.any(
810
+ String,
811
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol
812
+ )
813
+ )
814
+ )
815
+ end
816
+ attr_reader :voice
817
+
818
+ sig do
819
+ params(
820
+ voice:
821
+ T.any(
822
+ String,
823
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol
824
+ )
825
+ ).void
826
+ end
827
+ attr_writer :voice
828
+
829
+ sig do
830
+ params(
831
+ format_:
832
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol,
833
+ speed: Float,
834
+ voice:
835
+ T.any(
836
+ String,
837
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol
838
+ )
839
+ ).returns(T.attached_class)
840
+ end
841
+ def self.new(
842
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
843
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
844
+ format_: nil,
845
+ # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
846
+ # minimum speed. 1.5 is the maximum speed. This value can only be changed in
847
+ # between model turns, not while a response is in progress.
848
+ speed: nil,
849
+ # The voice the model uses to respond. Voice cannot be changed during the session
850
+ # once the model has responded with audio at least once. Current voice options are
851
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
852
+ # and `cedar`.
853
+ voice: nil
854
+ )
855
+ end
856
+
857
+ sig do
858
+ override.returns(
859
+ {
860
+ format_:
861
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::OrSymbol,
862
+ speed: Float,
863
+ voice:
864
+ T.any(
865
+ String,
866
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::OrSymbol
867
+ )
868
+ }
869
+ )
870
+ end
871
+ def to_hash
872
+ end
873
+
874
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
875
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
876
+ module Format
877
+ extend OpenAI::Internal::Type::Enum
878
+
879
+ TaggedSymbol =
880
+ T.type_alias do
881
+ T.all(
882
+ Symbol,
883
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format
884
+ )
885
+ end
886
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
887
+
888
+ PCM16 =
889
+ T.let(
890
+ :pcm16,
891
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol
892
+ )
893
+ G711_ULAW =
894
+ T.let(
895
+ :g711_ulaw,
896
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol
897
+ )
898
+ G711_ALAW =
899
+ T.let(
900
+ :g711_alaw,
901
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol
902
+ )
903
+
904
+ sig do
905
+ override.returns(
906
+ T::Array[
907
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Format::TaggedSymbol
908
+ ]
909
+ )
910
+ end
911
+ def self.values
912
+ end
913
+ end
914
+
915
+ # The voice the model uses to respond. Voice cannot be changed during the session
916
+ # once the model has responded with audio at least once. Current voice options are
917
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
918
+ # and `cedar`.
919
+ module Voice
920
+ extend OpenAI::Internal::Type::Union
921
+
922
+ Variants =
923
+ T.type_alias do
924
+ T.any(
925
+ String,
926
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
927
+ )
928
+ end
929
+
930
+ sig do
931
+ override.returns(
932
+ T::Array[
933
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::Variants
934
+ ]
935
+ )
936
+ end
937
+ def self.variants
938
+ end
939
+
940
+ TaggedSymbol =
941
+ T.type_alias do
942
+ T.all(
943
+ Symbol,
944
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice
945
+ )
946
+ end
947
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
948
+
949
+ ALLOY =
950
+ T.let(
951
+ :alloy,
952
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
953
+ )
954
+ ASH =
955
+ T.let(
956
+ :ash,
957
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
958
+ )
959
+ BALLAD =
960
+ T.let(
961
+ :ballad,
962
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
963
+ )
964
+ CORAL =
965
+ T.let(
966
+ :coral,
967
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
968
+ )
969
+ ECHO =
970
+ T.let(
971
+ :echo,
972
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
973
+ )
974
+ SAGE =
975
+ T.let(
976
+ :sage,
977
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
978
+ )
979
+ SHIMMER =
980
+ T.let(
981
+ :shimmer,
982
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
983
+ )
984
+ VERSE =
985
+ T.let(
986
+ :verse,
987
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
988
+ )
989
+ MARIN =
990
+ T.let(
991
+ :marin,
992
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
993
+ )
994
+ CEDAR =
995
+ T.let(
996
+ :cedar,
997
+ OpenAI::Realtime::RealtimeAudioConfig::Output::Voice::TaggedSymbol
998
+ )
999
+ end
1000
+ end
1001
+ end
1002
+ end
1003
+ end
1004
+ end