openai 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/README.md +1 -1
  4. data/lib/openai/client.rb +4 -0
  5. data/lib/openai/internal/stream.rb +3 -2
  6. data/lib/openai/models/audio/speech_create_params.rb +6 -0
  7. data/lib/openai/models/chat/chat_completion_audio_param.rb +6 -0
  8. data/lib/openai/models/evals/run_cancel_response.rb +2 -2
  9. data/lib/openai/models/evals/run_create_params.rb +2 -2
  10. data/lib/openai/models/evals/run_create_response.rb +2 -2
  11. data/lib/openai/models/evals/run_list_response.rb +2 -2
  12. data/lib/openai/models/evals/run_retrieve_response.rb +2 -2
  13. data/lib/openai/models/realtime/client_secret_create_params.rb +93 -0
  14. data/lib/openai/models/realtime/client_secret_create_response.rb +300 -0
  15. data/lib/openai/models/realtime/conversation_created_event.rb +70 -0
  16. data/lib/openai/models/realtime/conversation_item.rb +44 -0
  17. data/lib/openai/models/realtime/conversation_item_added.rb +48 -0
  18. data/lib/openai/models/realtime/conversation_item_create_event.rb +57 -0
  19. data/lib/openai/models/realtime/conversation_item_created_event.rb +59 -0
  20. data/lib/openai/models/realtime/conversation_item_delete_event.rb +39 -0
  21. data/lib/openai/models/realtime/conversation_item_deleted_event.rb +38 -0
  22. data/lib/openai/models/realtime/conversation_item_done.rb +48 -0
  23. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +189 -0
  24. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +63 -0
  25. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rb +96 -0
  26. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_segment.rb +84 -0
  27. data/lib/openai/models/realtime/conversation_item_retrieve_event.rb +40 -0
  28. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +68 -0
  29. data/lib/openai/models/realtime/conversation_item_truncated_event.rb +60 -0
  30. data/lib/openai/models/realtime/conversation_item_with_reference.rb +235 -0
  31. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +49 -0
  32. data/lib/openai/models/realtime/input_audio_buffer_clear_event.rb +29 -0
  33. data/lib/openai/models/realtime/input_audio_buffer_cleared_event.rb +29 -0
  34. data/lib/openai/models/realtime/input_audio_buffer_commit_event.rb +35 -0
  35. data/lib/openai/models/realtime/input_audio_buffer_committed_event.rb +51 -0
  36. data/lib/openai/models/realtime/input_audio_buffer_speech_started_event.rb +59 -0
  37. data/lib/openai/models/realtime/input_audio_buffer_speech_stopped_event.rb +51 -0
  38. data/lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb +52 -0
  39. data/lib/openai/models/realtime/log_prob_properties.rb +39 -0
  40. data/lib/openai/models/realtime/mcp_list_tools_completed.rb +36 -0
  41. data/lib/openai/models/realtime/mcp_list_tools_failed.rb +36 -0
  42. data/lib/openai/models/realtime/mcp_list_tools_in_progress.rb +36 -0
  43. data/lib/openai/models/realtime/output_audio_buffer_clear_event.rb +32 -0
  44. data/lib/openai/models/realtime/rate_limits_updated_event.rb +91 -0
  45. data/lib/openai/models/realtime/realtime_audio_config.rb +446 -0
  46. data/lib/openai/models/realtime/realtime_client_event.rb +123 -0
  47. data/lib/openai/models/realtime/realtime_client_secret_config.rb +64 -0
  48. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +118 -0
  49. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +94 -0
  50. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +86 -0
  51. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +118 -0
  52. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +135 -0
  53. data/lib/openai/models/realtime/realtime_error.rb +55 -0
  54. data/lib/openai/models/realtime/realtime_error_event.rb +38 -0
  55. data/lib/openai/models/realtime/realtime_mcp_approval_request.rb +52 -0
  56. data/lib/openai/models/realtime/realtime_mcp_approval_response.rb +52 -0
  57. data/lib/openai/models/realtime/realtime_mcp_list_tools.rb +84 -0
  58. data/lib/openai/models/realtime/realtime_mcp_protocol_error.rb +29 -0
  59. data/lib/openai/models/realtime/realtime_mcp_tool_call.rb +94 -0
  60. data/lib/openai/models/realtime/realtime_mcp_tool_execution_error.rb +23 -0
  61. data/lib/openai/models/realtime/realtime_mcphttp_error.rb +29 -0
  62. data/lib/openai/models/realtime/realtime_response.rb +259 -0
  63. data/lib/openai/models/realtime/realtime_response_status.rb +103 -0
  64. data/lib/openai/models/realtime/realtime_response_usage.rb +61 -0
  65. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +36 -0
  66. data/lib/openai/models/realtime/realtime_response_usage_output_token_details.rb +28 -0
  67. data/lib/openai/models/realtime/realtime_server_event.rb +369 -0
  68. data/lib/openai/models/realtime/realtime_session.rb +696 -0
  69. data/lib/openai/models/realtime/realtime_session_create_request.rb +234 -0
  70. data/lib/openai/models/realtime/realtime_session_create_response.rb +579 -0
  71. data/lib/openai/models/realtime/realtime_tool_choice_config.rb +32 -0
  72. data/lib/openai/models/realtime/realtime_tools_config.rb +11 -0
  73. data/lib/openai/models/realtime/realtime_tools_config_union.rb +379 -0
  74. data/lib/openai/models/realtime/realtime_tracing_config.rb +61 -0
  75. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +312 -0
  76. data/lib/openai/models/realtime/realtime_truncation.rb +67 -0
  77. data/lib/openai/models/realtime/response_audio_delta_event.rb +68 -0
  78. data/lib/openai/models/realtime/response_audio_done_event.rb +61 -0
  79. data/lib/openai/models/realtime/response_audio_transcript_delta_event.rb +68 -0
  80. data/lib/openai/models/realtime/response_audio_transcript_done_event.rb +70 -0
  81. data/lib/openai/models/realtime/response_cancel_event.rb +42 -0
  82. data/lib/openai/models/realtime/response_content_part_added_event.rb +120 -0
  83. data/lib/openai/models/realtime/response_content_part_done_event.rb +120 -0
  84. data/lib/openai/models/realtime/response_create_event.rb +391 -0
  85. data/lib/openai/models/realtime/response_created_event.rb +37 -0
  86. data/lib/openai/models/realtime/response_done_event.rb +38 -0
  87. data/lib/openai/models/realtime/response_function_call_arguments_delta_event.rb +72 -0
  88. data/lib/openai/models/realtime/response_function_call_arguments_done_event.rb +73 -0
  89. data/lib/openai/models/realtime/response_mcp_call_arguments_delta.rb +68 -0
  90. data/lib/openai/models/realtime/response_mcp_call_arguments_done.rb +60 -0
  91. data/lib/openai/models/realtime/response_mcp_call_completed.rb +44 -0
  92. data/lib/openai/models/realtime/response_mcp_call_failed.rb +44 -0
  93. data/lib/openai/models/realtime/response_mcp_call_in_progress.rb +44 -0
  94. data/lib/openai/models/realtime/response_output_item_added_event.rb +52 -0
  95. data/lib/openai/models/realtime/response_output_item_done_event.rb +53 -0
  96. data/lib/openai/models/realtime/response_text_delta_event.rb +68 -0
  97. data/lib/openai/models/realtime/response_text_done_event.rb +69 -0
  98. data/lib/openai/models/realtime/session_created_event.rb +38 -0
  99. data/lib/openai/models/realtime/session_update_event.rb +44 -0
  100. data/lib/openai/models/realtime/session_updated_event.rb +37 -0
  101. data/lib/openai/models/realtime/transcription_session_created.rb +278 -0
  102. data/lib/openai/models/realtime/transcription_session_update.rb +36 -0
  103. data/lib/openai/models/realtime/transcription_session_updated_event.rb +279 -0
  104. data/lib/openai/models/responses/response.rb +6 -3
  105. data/lib/openai/models/responses/response_create_params.rb +6 -3
  106. data/lib/openai/models/responses/tool.rb +3 -156
  107. data/lib/openai/models/responses/web_search_preview_tool.rb +124 -0
  108. data/lib/openai/models/responses/web_search_tool.rb +58 -21
  109. data/lib/openai/models/webhooks/realtime_call_incoming_webhook_event.rb +119 -0
  110. data/lib/openai/models/webhooks/unwrap_webhook_event.rb +4 -1
  111. data/lib/openai/models.rb +2 -0
  112. data/lib/openai/resources/realtime/client_secrets.rb +44 -0
  113. data/lib/openai/resources/realtime.rb +18 -0
  114. data/lib/openai/resources/responses.rb +2 -2
  115. data/lib/openai/resources/webhooks.rb +1 -1
  116. data/lib/openai/version.rb +1 -1
  117. data/lib/openai.rb +95 -0
  118. data/rbi/openai/client.rbi +3 -0
  119. data/rbi/openai/models/audio/speech_create_params.rbi +10 -0
  120. data/rbi/openai/models/chat/chat_completion_audio_param.rbi +10 -0
  121. data/rbi/openai/models/evals/run_cancel_response.rbi +4 -4
  122. data/rbi/openai/models/evals/run_create_params.rbi +8 -8
  123. data/rbi/openai/models/evals/run_create_response.rbi +4 -4
  124. data/rbi/openai/models/evals/run_list_response.rbi +4 -4
  125. data/rbi/openai/models/evals/run_retrieve_response.rbi +4 -4
  126. data/rbi/openai/models/realtime/client_secret_create_params.rbi +222 -0
  127. data/rbi/openai/models/realtime/client_secret_create_response.rbi +676 -0
  128. data/rbi/openai/models/realtime/conversation_created_event.rbi +164 -0
  129. data/rbi/openai/models/realtime/conversation_item.rbi +35 -0
  130. data/rbi/openai/models/realtime/conversation_item_added.rbi +105 -0
  131. data/rbi/openai/models/realtime/conversation_item_create_event.rbi +123 -0
  132. data/rbi/openai/models/realtime/conversation_item_created_event.rbi +117 -0
  133. data/rbi/openai/models/realtime/conversation_item_delete_event.rbi +57 -0
  134. data/rbi/openai/models/realtime/conversation_item_deleted_event.rbi +53 -0
  135. data/rbi/openai/models/realtime/conversation_item_done.rbi +105 -0
  136. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +305 -0
  137. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +93 -0
  138. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbi +158 -0
  139. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbi +107 -0
  140. data/rbi/openai/models/realtime/conversation_item_retrieve_event.rbi +58 -0
  141. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +94 -0
  142. data/rbi/openai/models/realtime/conversation_item_truncated_event.rbi +80 -0
  143. data/rbi/openai/models/realtime/conversation_item_with_reference.rbi +549 -0
  144. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +65 -0
  145. data/rbi/openai/models/realtime/input_audio_buffer_clear_event.rbi +43 -0
  146. data/rbi/openai/models/realtime/input_audio_buffer_cleared_event.rbi +40 -0
  147. data/rbi/openai/models/realtime/input_audio_buffer_commit_event.rbi +49 -0
  148. data/rbi/openai/models/realtime/input_audio_buffer_committed_event.rbi +72 -0
  149. data/rbi/openai/models/realtime/input_audio_buffer_speech_started_event.rbi +82 -0
  150. data/rbi/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbi +73 -0
  151. data/rbi/openai/models/realtime/input_audio_buffer_timeout_triggered.rbi +75 -0
  152. data/rbi/openai/models/realtime/log_prob_properties.rbi +55 -0
  153. data/rbi/openai/models/realtime/mcp_list_tools_completed.rbi +51 -0
  154. data/rbi/openai/models/realtime/mcp_list_tools_failed.rbi +51 -0
  155. data/rbi/openai/models/realtime/mcp_list_tools_in_progress.rbi +51 -0
  156. data/rbi/openai/models/realtime/output_audio_buffer_clear_event.rbi +46 -0
  157. data/rbi/openai/models/realtime/rate_limits_updated_event.rbi +187 -0
  158. data/rbi/openai/models/realtime/realtime_audio_config.rbi +1004 -0
  159. data/rbi/openai/models/realtime/realtime_client_event.rbi +38 -0
  160. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +147 -0
  161. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +292 -0
  162. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +199 -0
  163. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +188 -0
  164. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +292 -0
  165. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +319 -0
  166. data/rbi/openai/models/realtime/realtime_error.rbi +72 -0
  167. data/rbi/openai/models/realtime/realtime_error_event.rbi +64 -0
  168. data/rbi/openai/models/realtime/realtime_mcp_approval_request.rbi +75 -0
  169. data/rbi/openai/models/realtime/realtime_mcp_approval_response.rbi +75 -0
  170. data/rbi/openai/models/realtime/realtime_mcp_list_tools.rbi +131 -0
  171. data/rbi/openai/models/realtime/realtime_mcp_protocol_error.rbi +40 -0
  172. data/rbi/openai/models/realtime/realtime_mcp_tool_call.rbi +145 -0
  173. data/rbi/openai/models/realtime/realtime_mcp_tool_execution_error.rbi +31 -0
  174. data/rbi/openai/models/realtime/realtime_mcphttp_error.rbi +40 -0
  175. data/rbi/openai/models/realtime/realtime_response.rbi +573 -0
  176. data/rbi/openai/models/realtime/realtime_response_status.rbi +233 -0
  177. data/rbi/openai/models/realtime/realtime_response_usage.rbi +121 -0
  178. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +68 -0
  179. data/rbi/openai/models/realtime/realtime_response_usage_output_token_details.rbi +51 -0
  180. data/rbi/openai/models/realtime/realtime_server_event.rbi +311 -0
  181. data/rbi/openai/models/realtime/realtime_session.rbi +1426 -0
  182. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +560 -0
  183. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1249 -0
  184. data/rbi/openai/models/realtime/realtime_tool_choice_config.rbi +30 -0
  185. data/rbi/openai/models/realtime/realtime_tools_config.rbi +15 -0
  186. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +755 -0
  187. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +95 -0
  188. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +703 -0
  189. data/rbi/openai/models/realtime/realtime_truncation.rbi +117 -0
  190. data/rbi/openai/models/realtime/response_audio_delta_event.rbi +91 -0
  191. data/rbi/openai/models/realtime/response_audio_done_event.rbi +84 -0
  192. data/rbi/openai/models/realtime/response_audio_transcript_delta_event.rbi +91 -0
  193. data/rbi/openai/models/realtime/response_audio_transcript_done_event.rbi +93 -0
  194. data/rbi/openai/models/realtime/response_cancel_event.rbi +63 -0
  195. data/rbi/openai/models/realtime/response_content_part_added_event.rbi +219 -0
  196. data/rbi/openai/models/realtime/response_content_part_done_event.rbi +219 -0
  197. data/rbi/openai/models/realtime/response_create_event.rbi +863 -0
  198. data/rbi/openai/models/realtime/response_created_event.rbi +65 -0
  199. data/rbi/openai/models/realtime/response_done_event.rbi +66 -0
  200. data/rbi/openai/models/realtime/response_function_call_arguments_delta_event.rbi +91 -0
  201. data/rbi/openai/models/realtime/response_function_call_arguments_done_event.rbi +92 -0
  202. data/rbi/openai/models/realtime/response_mcp_call_arguments_delta.rbi +91 -0
  203. data/rbi/openai/models/realtime/response_mcp_call_arguments_done.rbi +83 -0
  204. data/rbi/openai/models/realtime/response_mcp_call_completed.rbi +67 -0
  205. data/rbi/openai/models/realtime/response_mcp_call_failed.rbi +67 -0
  206. data/rbi/openai/models/realtime/response_mcp_call_in_progress.rbi +67 -0
  207. data/rbi/openai/models/realtime/response_output_item_added_event.rbi +111 -0
  208. data/rbi/openai/models/realtime/response_output_item_done_event.rbi +112 -0
  209. data/rbi/openai/models/realtime/response_text_delta_event.rbi +91 -0
  210. data/rbi/openai/models/realtime/response_text_done_event.rbi +92 -0
  211. data/rbi/openai/models/realtime/session_created_event.rbi +64 -0
  212. data/rbi/openai/models/realtime/session_update_event.rbi +77 -0
  213. data/rbi/openai/models/realtime/session_updated_event.rbi +63 -0
  214. data/rbi/openai/models/realtime/transcription_session_created.rbi +653 -0
  215. data/rbi/openai/models/realtime/transcription_session_update.rbi +74 -0
  216. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +657 -0
  217. data/rbi/openai/models/responses/response.rbi +10 -4
  218. data/rbi/openai/models/responses/response_create_params.rbi +16 -10
  219. data/rbi/openai/models/responses/tool.rbi +2 -348
  220. data/rbi/openai/models/responses/web_search_preview_tool.rbi +245 -0
  221. data/rbi/openai/models/responses/web_search_tool.rbi +120 -23
  222. data/rbi/openai/models/webhooks/realtime_call_incoming_webhook_event.rbi +222 -0
  223. data/rbi/openai/models/webhooks/unwrap_webhook_event.rbi +1 -0
  224. data/rbi/openai/models.rbi +2 -0
  225. data/rbi/openai/resources/realtime/client_secrets.rbi +38 -0
  226. data/rbi/openai/resources/realtime.rbi +15 -0
  227. data/rbi/openai/resources/responses.rbi +12 -6
  228. data/rbi/openai/resources/webhooks.rbi +1 -0
  229. data/sig/openai/client.rbs +2 -0
  230. data/sig/openai/models/audio/speech_create_params.rbs +4 -0
  231. data/sig/openai/models/chat/chat_completion_audio_param.rbs +4 -0
  232. data/sig/openai/models/realtime/client_secret_create_params.rbs +89 -0
  233. data/sig/openai/models/realtime/client_secret_create_response.rbs +292 -0
  234. data/sig/openai/models/realtime/conversation_created_event.rbs +70 -0
  235. data/sig/openai/models/realtime/conversation_item.rbs +22 -0
  236. data/sig/openai/models/realtime/conversation_item_added.rbs +37 -0
  237. data/sig/openai/models/realtime/conversation_item_create_event.rbs +41 -0
  238. data/sig/openai/models/realtime/conversation_item_created_event.rbs +37 -0
  239. data/sig/openai/models/realtime/conversation_item_delete_event.rbs +30 -0
  240. data/sig/openai/models/realtime/conversation_item_deleted_event.rbs +32 -0
  241. data/sig/openai/models/realtime/conversation_item_done.rbs +37 -0
  242. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbs +136 -0
  243. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbs +51 -0
  244. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_failed_event.rbs +77 -0
  245. data/sig/openai/models/realtime/conversation_item_input_audio_transcription_segment.rbs +62 -0
  246. data/sig/openai/models/realtime/conversation_item_retrieve_event.rbs +34 -0
  247. data/sig/openai/models/realtime/conversation_item_truncate_event.rbs +44 -0
  248. data/sig/openai/models/realtime/conversation_item_truncated_event.rbs +42 -0
  249. data/sig/openai/models/realtime/conversation_item_with_reference.rbs +207 -0
  250. data/sig/openai/models/realtime/input_audio_buffer_append_event.rbs +30 -0
  251. data/sig/openai/models/realtime/input_audio_buffer_clear_event.rbs +23 -0
  252. data/sig/openai/models/realtime/input_audio_buffer_cleared_event.rbs +24 -0
  253. data/sig/openai/models/realtime/input_audio_buffer_commit_event.rbs +23 -0
  254. data/sig/openai/models/realtime/input_audio_buffer_committed_event.rbs +37 -0
  255. data/sig/openai/models/realtime/input_audio_buffer_speech_started_event.rbs +37 -0
  256. data/sig/openai/models/realtime/input_audio_buffer_speech_stopped_event.rbs +37 -0
  257. data/sig/openai/models/realtime/input_audio_buffer_timeout_triggered.rbs +42 -0
  258. data/sig/openai/models/realtime/log_prob_properties.rbs +28 -0
  259. data/sig/openai/models/realtime/mcp_list_tools_completed.rbs +28 -0
  260. data/sig/openai/models/realtime/mcp_list_tools_failed.rbs +28 -0
  261. data/sig/openai/models/realtime/mcp_list_tools_in_progress.rbs +32 -0
  262. data/sig/openai/models/realtime/output_audio_buffer_clear_event.rbs +23 -0
  263. data/sig/openai/models/realtime/rate_limits_updated_event.rbs +85 -0
  264. data/sig/openai/models/realtime/realtime_audio_config.rbs +354 -0
  265. data/sig/openai/models/realtime/realtime_client_event.rbs +25 -0
  266. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +60 -0
  267. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +117 -0
  268. data/sig/openai/models/realtime/realtime_conversation_item_function_call.rbs +86 -0
  269. data/sig/openai/models/realtime/realtime_conversation_item_function_call_output.rbs +79 -0
  270. data/sig/openai/models/realtime/realtime_conversation_item_system_message.rbs +117 -0
  271. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +132 -0
  272. data/sig/openai/models/realtime/realtime_error.rbs +42 -0
  273. data/sig/openai/models/realtime/realtime_error_event.rbs +32 -0
  274. data/sig/openai/models/realtime/realtime_mcp_approval_request.rbs +42 -0
  275. data/sig/openai/models/realtime/realtime_mcp_approval_response.rbs +42 -0
  276. data/sig/openai/models/realtime/realtime_mcp_list_tools.rbs +71 -0
  277. data/sig/openai/models/realtime/realtime_mcp_protocol_error.rbs +28 -0
  278. data/sig/openai/models/realtime/realtime_mcp_tool_call.rbs +68 -0
  279. data/sig/openai/models/realtime/realtime_mcp_tool_execution_error.rbs +18 -0
  280. data/sig/openai/models/realtime/realtime_mcphttp_error.rbs +24 -0
  281. data/sig/openai/models/realtime/realtime_response.rbs +210 -0
  282. data/sig/openai/models/realtime/realtime_response_status.rbs +90 -0
  283. data/sig/openai/models/realtime/realtime_response_usage.rbs +56 -0
  284. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +34 -0
  285. data/sig/openai/models/realtime/realtime_response_usage_output_token_details.rbs +22 -0
  286. data/sig/openai/models/realtime/realtime_server_event.rbs +168 -0
  287. data/sig/openai/models/realtime/realtime_session.rbs +521 -0
  288. data/sig/openai/models/realtime/realtime_session_create_request.rbs +178 -0
  289. data/sig/openai/models/realtime/realtime_session_create_response.rbs +526 -0
  290. data/sig/openai/models/realtime/realtime_tool_choice_config.rbs +16 -0
  291. data/sig/openai/models/realtime/realtime_tools_config.rbs +10 -0
  292. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +280 -0
  293. data/sig/openai/models/realtime/realtime_tracing_config.rbs +43 -0
  294. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +242 -0
  295. data/sig/openai/models/realtime/realtime_truncation.rbs +53 -0
  296. data/sig/openai/models/realtime/response_audio_delta_event.rbs +52 -0
  297. data/sig/openai/models/realtime/response_audio_done_event.rbs +47 -0
  298. data/sig/openai/models/realtime/response_audio_transcript_delta_event.rbs +52 -0
  299. data/sig/openai/models/realtime/response_audio_transcript_done_event.rbs +52 -0
  300. data/sig/openai/models/realtime/response_cancel_event.rbs +32 -0
  301. data/sig/openai/models/realtime/response_content_part_added_event.rbs +105 -0
  302. data/sig/openai/models/realtime/response_content_part_done_event.rbs +105 -0
  303. data/sig/openai/models/realtime/response_create_event.rbs +281 -0
  304. data/sig/openai/models/realtime/response_created_event.rbs +32 -0
  305. data/sig/openai/models/realtime/response_done_event.rbs +32 -0
  306. data/sig/openai/models/realtime/response_function_call_arguments_delta_event.rbs +52 -0
  307. data/sig/openai/models/realtime/response_function_call_arguments_done_event.rbs +52 -0
  308. data/sig/openai/models/realtime/response_mcp_call_arguments_delta.rbs +52 -0
  309. data/sig/openai/models/realtime/response_mcp_call_arguments_done.rbs +47 -0
  310. data/sig/openai/models/realtime/response_mcp_call_completed.rbs +37 -0
  311. data/sig/openai/models/realtime/response_mcp_call_failed.rbs +37 -0
  312. data/sig/openai/models/realtime/response_mcp_call_in_progress.rbs +37 -0
  313. data/sig/openai/models/realtime/response_output_item_added_event.rbs +42 -0
  314. data/sig/openai/models/realtime/response_output_item_done_event.rbs +42 -0
  315. data/sig/openai/models/realtime/response_text_delta_event.rbs +52 -0
  316. data/sig/openai/models/realtime/response_text_done_event.rbs +52 -0
  317. data/sig/openai/models/realtime/session_created_event.rbs +32 -0
  318. data/sig/openai/models/realtime/session_update_event.rbs +34 -0
  319. data/sig/openai/models/realtime/session_updated_event.rbs +32 -0
  320. data/sig/openai/models/realtime/transcription_session_created.rbs +282 -0
  321. data/sig/openai/models/realtime/transcription_session_update.rbs +34 -0
  322. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +282 -0
  323. data/sig/openai/models/responses/tool.rbs +1 -121
  324. data/sig/openai/models/responses/web_search_preview_tool.rbs +96 -0
  325. data/sig/openai/models/responses/web_search_tool.rbs +39 -10
  326. data/sig/openai/models/webhooks/realtime_call_incoming_webhook_event.rbs +90 -0
  327. data/sig/openai/models/webhooks/unwrap_webhook_event.rbs +1 -0
  328. data/sig/openai/models.rbs +2 -0
  329. data/sig/openai/resources/realtime/client_secrets.rbs +15 -0
  330. data/sig/openai/resources/realtime.rbs +9 -0
  331. data/sig/openai/resources/webhooks.rbs +1 -0
  332. metadata +287 -2
@@ -0,0 +1,1426 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeSession < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(OpenAI::Realtime::RealtimeSession, OpenAI::Internal::AnyHash)
10
+ end
11
+
12
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
13
+ sig { returns(T.nilable(String)) }
14
+ attr_reader :id
15
+
16
+ sig { params(id: String).void }
17
+ attr_writer :id
18
+
19
+ # Expiration timestamp for the session, in seconds since epoch.
20
+ sig { returns(T.nilable(Integer)) }
21
+ attr_reader :expires_at
22
+
23
+ sig { params(expires_at: Integer).void }
24
+ attr_writer :expires_at
25
+
26
+ # Additional fields to include in server outputs.
27
+ #
28
+ # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
29
+ # transcription.
30
+ sig do
31
+ returns(
32
+ T.nilable(
33
+ T::Array[OpenAI::Realtime::RealtimeSession::Include::OrSymbol]
34
+ )
35
+ )
36
+ end
37
+ attr_accessor :include
38
+
39
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
40
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
41
+ # (mono), and little-endian byte order.
42
+ sig do
43
+ returns(
44
+ T.nilable(
45
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::OrSymbol
46
+ )
47
+ )
48
+ end
49
+ attr_reader :input_audio_format
50
+
51
+ sig do
52
+ params(
53
+ input_audio_format:
54
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::OrSymbol
55
+ ).void
56
+ end
57
+ attr_writer :input_audio_format
58
+
59
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
60
+ # off. Noise reduction filters audio added to the input audio buffer before it is
61
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
62
+ # detection accuracy (reducing false positives) and model performance by improving
63
+ # perception of the input audio.
64
+ sig do
65
+ returns(
66
+ T.nilable(
67
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction
68
+ )
69
+ )
70
+ end
71
+ attr_reader :input_audio_noise_reduction
72
+
73
+ sig do
74
+ params(
75
+ input_audio_noise_reduction:
76
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::OrHash
77
+ ).void
78
+ end
79
+ attr_writer :input_audio_noise_reduction
80
+
81
+ # Configuration for input audio transcription, defaults to off and can be set to
82
+ # `null` to turn off once on. Input audio transcription is not native to the
83
+ # model, since the model consumes audio directly. Transcription runs
84
+ # asynchronously through
85
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
86
+ # and should be treated as guidance of input audio content rather than precisely
87
+ # what the model heard. The client can optionally set the language and prompt for
88
+ # transcription, these offer additional guidance to the transcription service.
89
+ sig do
90
+ returns(
91
+ T.nilable(
92
+ OpenAI::Realtime::RealtimeSession::InputAudioTranscription
93
+ )
94
+ )
95
+ end
96
+ attr_reader :input_audio_transcription
97
+
98
+ sig do
99
+ params(
100
+ input_audio_transcription:
101
+ T.nilable(
102
+ OpenAI::Realtime::RealtimeSession::InputAudioTranscription::OrHash
103
+ )
104
+ ).void
105
+ end
106
+ attr_writer :input_audio_transcription
107
+
108
+ # The default system instructions (i.e. system message) prepended to model calls.
109
+ # This field allows the client to guide the model on desired responses. The model
110
+ # can be instructed on response content and format, (e.g. "be extremely succinct",
111
+ # "act friendly", "here are examples of good responses") and on audio behavior
112
+ # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
113
+ # instructions are not guaranteed to be followed by the model, but they provide
114
+ # guidance to the model on the desired behavior.
115
+ #
116
+ # Note that the server sets default instructions which will be used if this field
117
+ # is not set and are visible in the `session.created` event at the start of the
118
+ # session.
119
+ sig { returns(T.nilable(String)) }
120
+ attr_reader :instructions
121
+
122
+ sig { params(instructions: String).void }
123
+ attr_writer :instructions
124
+
125
+ # Maximum number of output tokens for a single assistant response, inclusive of
126
+ # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
127
+ # `inf` for the maximum available tokens for a given model. Defaults to `inf`.
128
+ sig { returns(T.nilable(T.any(Integer, Symbol))) }
129
+ attr_reader :max_response_output_tokens
130
+
131
+ sig { params(max_response_output_tokens: T.any(Integer, Symbol)).void }
132
+ attr_writer :max_response_output_tokens
133
+
134
+ # The set of modalities the model can respond with. To disable audio, set this to
135
+ # ["text"].
136
+ sig do
137
+ returns(
138
+ T.nilable(
139
+ T::Array[OpenAI::Realtime::RealtimeSession::Modality::OrSymbol]
140
+ )
141
+ )
142
+ end
143
+ attr_reader :modalities
144
+
145
+ sig do
146
+ params(
147
+ modalities:
148
+ T::Array[OpenAI::Realtime::RealtimeSession::Modality::OrSymbol]
149
+ ).void
150
+ end
151
+ attr_writer :modalities
152
+
153
+ # The Realtime model used for this session.
154
+ sig do
155
+ returns(T.nilable(OpenAI::Realtime::RealtimeSession::Model::OrSymbol))
156
+ end
157
+ attr_reader :model
158
+
159
+ sig do
160
+ params(model: OpenAI::Realtime::RealtimeSession::Model::OrSymbol).void
161
+ end
162
+ attr_writer :model
163
+
164
+ # The object type. Always `realtime.session`.
165
+ sig do
166
+ returns(
167
+ T.nilable(OpenAI::Realtime::RealtimeSession::Object::OrSymbol)
168
+ )
169
+ end
170
+ attr_reader :object
171
+
172
+ sig do
173
+ params(
174
+ object: OpenAI::Realtime::RealtimeSession::Object::OrSymbol
175
+ ).void
176
+ end
177
+ attr_writer :object
178
+
179
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
180
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
181
+ sig do
182
+ returns(
183
+ T.nilable(
184
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::OrSymbol
185
+ )
186
+ )
187
+ end
188
+ attr_reader :output_audio_format
189
+
190
+ sig do
191
+ params(
192
+ output_audio_format:
193
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::OrSymbol
194
+ ).void
195
+ end
196
+ attr_writer :output_audio_format
197
+
198
+ # Reference to a prompt template and its variables.
199
+ # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
200
+ sig { returns(T.nilable(OpenAI::Responses::ResponsePrompt)) }
201
+ attr_reader :prompt
202
+
203
+ sig do
204
+ params(
205
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash)
206
+ ).void
207
+ end
208
+ attr_writer :prompt
209
+
210
+ # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
211
+ # minimum speed. 1.5 is the maximum speed. This value can only be changed in
212
+ # between model turns, not while a response is in progress.
213
+ sig { returns(T.nilable(Float)) }
214
+ attr_reader :speed
215
+
216
+ sig { params(speed: Float).void }
217
+ attr_writer :speed
218
+
219
+ # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
220
+ # temperature of 0.8 is highly recommended for best performance.
221
+ sig { returns(T.nilable(Float)) }
222
+ attr_reader :temperature
223
+
224
+ sig { params(temperature: Float).void }
225
+ attr_writer :temperature
226
+
227
+ # How the model chooses tools. Options are `auto`, `none`, `required`, or specify
228
+ # a function.
229
+ sig { returns(T.nilable(String)) }
230
+ attr_reader :tool_choice
231
+
232
+ sig { params(tool_choice: String).void }
233
+ attr_writer :tool_choice
234
+
235
+ # Tools (functions) available to the model.
236
+ sig do
237
+ returns(T.nilable(T::Array[OpenAI::Realtime::RealtimeSession::Tool]))
238
+ end
239
+ attr_reader :tools
240
+
241
+ sig do
242
+ params(
243
+ tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool::OrHash]
244
+ ).void
245
+ end
246
+ attr_writer :tools
247
+
248
+ # Configuration options for tracing. Set to null to disable tracing. Once tracing
249
+ # is enabled for a session, the configuration cannot be modified.
250
+ #
251
+ # `auto` will create a trace for the session with default values for the workflow
252
+ # name, group id, and metadata.
253
+ sig do
254
+ returns(
255
+ T.nilable(
256
+ T.any(
257
+ Symbol,
258
+ OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration
259
+ )
260
+ )
261
+ )
262
+ end
263
+ attr_accessor :tracing
264
+
265
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
266
+ # set to `null` to turn off, in which case the client must manually trigger model
267
+ # response. Server VAD means that the model will detect the start and end of
268
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
269
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
270
+ # semantically estimate whether the user has finished speaking, then dynamically
271
+ # sets a timeout based on this probability. For example, if user audio trails off
272
+ # with "uhhm", the model will score a low probability of turn end and wait longer
273
+ # for the user to continue speaking. This can be useful for more natural
274
+ # conversations, but may have a higher latency.
275
+ sig do
276
+ returns(T.nilable(OpenAI::Realtime::RealtimeSession::TurnDetection))
277
+ end
278
+ attr_reader :turn_detection
279
+
280
+ sig do
281
+ params(
282
+ turn_detection:
283
+ T.nilable(
284
+ OpenAI::Realtime::RealtimeSession::TurnDetection::OrHash
285
+ )
286
+ ).void
287
+ end
288
+ attr_writer :turn_detection
289
+
290
+ # The voice the model uses to respond. Voice cannot be changed during the session
291
+ # once the model has responded with audio at least once. Current voice options are
292
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
293
+ sig do
294
+ returns(
295
+ T.nilable(
296
+ T.any(String, OpenAI::Realtime::RealtimeSession::Voice::OrSymbol)
297
+ )
298
+ )
299
+ end
300
+ attr_reader :voice
301
+
302
+ sig do
303
+ params(
304
+ voice:
305
+ T.any(String, OpenAI::Realtime::RealtimeSession::Voice::OrSymbol)
306
+ ).void
307
+ end
308
+ attr_writer :voice
309
+
310
+ # Realtime session object.
311
+ sig do
312
+ params(
313
+ id: String,
314
+ expires_at: Integer,
315
+ include:
316
+ T.nilable(
317
+ T::Array[OpenAI::Realtime::RealtimeSession::Include::OrSymbol]
318
+ ),
319
+ input_audio_format:
320
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::OrSymbol,
321
+ input_audio_noise_reduction:
322
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::OrHash,
323
+ input_audio_transcription:
324
+ T.nilable(
325
+ OpenAI::Realtime::RealtimeSession::InputAudioTranscription::OrHash
326
+ ),
327
+ instructions: String,
328
+ max_response_output_tokens: T.any(Integer, Symbol),
329
+ modalities:
330
+ T::Array[OpenAI::Realtime::RealtimeSession::Modality::OrSymbol],
331
+ model: OpenAI::Realtime::RealtimeSession::Model::OrSymbol,
332
+ object: OpenAI::Realtime::RealtimeSession::Object::OrSymbol,
333
+ output_audio_format:
334
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::OrSymbol,
335
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt::OrHash),
336
+ speed: Float,
337
+ temperature: Float,
338
+ tool_choice: String,
339
+ tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool::OrHash],
340
+ tracing:
341
+ T.nilable(
342
+ T.any(
343
+ Symbol,
344
+ OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration::OrHash
345
+ )
346
+ ),
347
+ turn_detection:
348
+ T.nilable(
349
+ OpenAI::Realtime::RealtimeSession::TurnDetection::OrHash
350
+ ),
351
+ voice:
352
+ T.any(String, OpenAI::Realtime::RealtimeSession::Voice::OrSymbol)
353
+ ).returns(T.attached_class)
354
+ end
355
+ def self.new(
356
+ # Unique identifier for the session that looks like `sess_1234567890abcdef`.
357
+ id: nil,
358
+ # Expiration timestamp for the session, in seconds since epoch.
359
+ expires_at: nil,
360
+ # Additional fields to include in server outputs.
361
+ #
362
+ # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
363
+ # transcription.
364
+ include: nil,
365
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
366
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
367
+ # (mono), and little-endian byte order.
368
+ input_audio_format: nil,
369
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
370
+ # off. Noise reduction filters audio added to the input audio buffer before it is
371
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
372
+ # detection accuracy (reducing false positives) and model performance by improving
373
+ # perception of the input audio.
374
+ input_audio_noise_reduction: nil,
375
+ # Configuration for input audio transcription, defaults to off and can be set to
376
+ # `null` to turn off once on. Input audio transcription is not native to the
377
+ # model, since the model consumes audio directly. Transcription runs
378
+ # asynchronously through
379
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
380
+ # and should be treated as guidance of input audio content rather than precisely
381
+ # what the model heard. The client can optionally set the language and prompt for
382
+ # transcription, these offer additional guidance to the transcription service.
383
+ input_audio_transcription: nil,
384
+ # The default system instructions (i.e. system message) prepended to model calls.
385
+ # This field allows the client to guide the model on desired responses. The model
386
+ # can be instructed on response content and format, (e.g. "be extremely succinct",
387
+ # "act friendly", "here are examples of good responses") and on audio behavior
388
+ # (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
389
+ # instructions are not guaranteed to be followed by the model, but they provide
390
+ # guidance to the model on the desired behavior.
391
+ #
392
+ # Note that the server sets default instructions which will be used if this field
393
+ # is not set and are visible in the `session.created` event at the start of the
394
+ # session.
395
+ instructions: nil,
396
+ # Maximum number of output tokens for a single assistant response, inclusive of
397
+ # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
398
+ # `inf` for the maximum available tokens for a given model. Defaults to `inf`.
399
+ max_response_output_tokens: nil,
400
+ # The set of modalities the model can respond with. To disable audio, set this to
401
+ # ["text"].
402
+ modalities: nil,
403
+ # The Realtime model used for this session.
404
+ model: nil,
405
+ # The object type. Always `realtime.session`.
406
+ object: nil,
407
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
408
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
409
+ output_audio_format: nil,
410
+ # Reference to a prompt template and its variables.
411
+ # [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
412
+ prompt: nil,
413
+ # The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the
414
+ # minimum speed. 1.5 is the maximum speed. This value can only be changed in
415
+ # between model turns, not while a response is in progress.
416
+ speed: nil,
417
+ # Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
418
+ # temperature of 0.8 is highly recommended for best performance.
419
+ temperature: nil,
420
+ # How the model chooses tools. Options are `auto`, `none`, `required`, or specify
421
+ # a function.
422
+ tool_choice: nil,
423
+ # Tools (functions) available to the model.
424
+ tools: nil,
425
+ # Configuration options for tracing. Set to null to disable tracing. Once tracing
426
+ # is enabled for a session, the configuration cannot be modified.
427
+ #
428
+ # `auto` will create a trace for the session with default values for the workflow
429
+ # name, group id, and metadata.
430
+ tracing: nil,
431
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
432
+ # set to `null` to turn off, in which case the client must manually trigger model
433
+ # response. Server VAD means that the model will detect the start and end of
434
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
435
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
436
+ # semantically estimate whether the user has finished speaking, then dynamically
437
+ # sets a timeout based on this probability. For example, if user audio trails off
438
+ # with "uhhm", the model will score a low probability of turn end and wait longer
439
+ # for the user to continue speaking. This can be useful for more natural
440
+ # conversations, but may have a higher latency.
441
+ turn_detection: nil,
442
+ # The voice the model uses to respond. Voice cannot be changed during the session
443
+ # once the model has responded with audio at least once. Current voice options are
444
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
445
+ voice: nil
446
+ )
447
+ end
448
+
449
+ sig do
450
+ override.returns(
451
+ {
452
+ id: String,
453
+ expires_at: Integer,
454
+ include:
455
+ T.nilable(
456
+ T::Array[OpenAI::Realtime::RealtimeSession::Include::OrSymbol]
457
+ ),
458
+ input_audio_format:
459
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::OrSymbol,
460
+ input_audio_noise_reduction:
461
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction,
462
+ input_audio_transcription:
463
+ T.nilable(
464
+ OpenAI::Realtime::RealtimeSession::InputAudioTranscription
465
+ ),
466
+ instructions: String,
467
+ max_response_output_tokens: T.any(Integer, Symbol),
468
+ modalities:
469
+ T::Array[OpenAI::Realtime::RealtimeSession::Modality::OrSymbol],
470
+ model: OpenAI::Realtime::RealtimeSession::Model::OrSymbol,
471
+ object: OpenAI::Realtime::RealtimeSession::Object::OrSymbol,
472
+ output_audio_format:
473
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::OrSymbol,
474
+ prompt: T.nilable(OpenAI::Responses::ResponsePrompt),
475
+ speed: Float,
476
+ temperature: Float,
477
+ tool_choice: String,
478
+ tools: T::Array[OpenAI::Realtime::RealtimeSession::Tool],
479
+ tracing:
480
+ T.nilable(
481
+ T.any(
482
+ Symbol,
483
+ OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration
484
+ )
485
+ ),
486
+ turn_detection:
487
+ T.nilable(OpenAI::Realtime::RealtimeSession::TurnDetection),
488
+ voice:
489
+ T.any(
490
+ String,
491
+ OpenAI::Realtime::RealtimeSession::Voice::OrSymbol
492
+ )
493
+ }
494
+ )
495
+ end
496
+ def to_hash
497
+ end
498
+
499
+ module Include
500
+ extend OpenAI::Internal::Type::Enum
501
+
502
+ TaggedSymbol =
503
+ T.type_alias do
504
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Include)
505
+ end
506
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
507
+
508
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS =
509
+ T.let(
510
+ :"item.input_audio_transcription.logprobs",
511
+ OpenAI::Realtime::RealtimeSession::Include::TaggedSymbol
512
+ )
513
+
514
+ sig do
515
+ override.returns(
516
+ T::Array[OpenAI::Realtime::RealtimeSession::Include::TaggedSymbol]
517
+ )
518
+ end
519
+ def self.values
520
+ end
521
+ end
522
+
523
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
524
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
525
+ # (mono), and little-endian byte order.
526
+ module InputAudioFormat
527
+ extend OpenAI::Internal::Type::Enum
528
+
529
+ TaggedSymbol =
530
+ T.type_alias do
531
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::InputAudioFormat)
532
+ end
533
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
534
+
535
+ PCM16 =
536
+ T.let(
537
+ :pcm16,
538
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::TaggedSymbol
539
+ )
540
+ G711_ULAW =
541
+ T.let(
542
+ :g711_ulaw,
543
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::TaggedSymbol
544
+ )
545
+ G711_ALAW =
546
+ T.let(
547
+ :g711_alaw,
548
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::TaggedSymbol
549
+ )
550
+
551
+ sig do
552
+ override.returns(
553
+ T::Array[
554
+ OpenAI::Realtime::RealtimeSession::InputAudioFormat::TaggedSymbol
555
+ ]
556
+ )
557
+ end
558
+ def self.values
559
+ end
560
+ end
561
+
562
+ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
563
+ OrHash =
564
+ T.type_alias do
565
+ T.any(
566
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction,
567
+ OpenAI::Internal::AnyHash
568
+ )
569
+ end
570
+
571
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
572
+ # headphones, `far_field` is for far-field microphones such as laptop or
573
+ # conference room microphones.
574
+ sig do
575
+ returns(
576
+ T.nilable(
577
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol
578
+ )
579
+ )
580
+ end
581
+ attr_reader :type
582
+
583
+ sig do
584
+ params(
585
+ type:
586
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol
587
+ ).void
588
+ end
589
+ attr_writer :type
590
+
591
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
592
+ # off. Noise reduction filters audio added to the input audio buffer before it is
593
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
594
+ # detection accuracy (reducing false positives) and model performance by improving
595
+ # perception of the input audio.
596
+ sig do
597
+ params(
598
+ type:
599
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol
600
+ ).returns(T.attached_class)
601
+ end
602
+ def self.new(
603
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
604
+ # headphones, `far_field` is for far-field microphones such as laptop or
605
+ # conference room microphones.
606
+ type: nil
607
+ )
608
+ end
609
+
610
+ sig do
611
+ override.returns(
612
+ {
613
+ type:
614
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::OrSymbol
615
+ }
616
+ )
617
+ end
618
+ def to_hash
619
+ end
620
+
621
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
622
+ # headphones, `far_field` is for far-field microphones such as laptop or
623
+ # conference room microphones.
624
+ module Type
625
+ extend OpenAI::Internal::Type::Enum
626
+
627
+ TaggedSymbol =
628
+ T.type_alias do
629
+ T.all(
630
+ Symbol,
631
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type
632
+ )
633
+ end
634
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
635
+
636
+ NEAR_FIELD =
637
+ T.let(
638
+ :near_field,
639
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol
640
+ )
641
+ FAR_FIELD =
642
+ T.let(
643
+ :far_field,
644
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol
645
+ )
646
+
647
+ sig do
648
+ override.returns(
649
+ T::Array[
650
+ OpenAI::Realtime::RealtimeSession::InputAudioNoiseReduction::Type::TaggedSymbol
651
+ ]
652
+ )
653
+ end
654
+ def self.values
655
+ end
656
+ end
657
+ end
658
+
659
+ class InputAudioTranscription < OpenAI::Internal::Type::BaseModel
660
+ OrHash =
661
+ T.type_alias do
662
+ T.any(
663
+ OpenAI::Realtime::RealtimeSession::InputAudioTranscription,
664
+ OpenAI::Internal::AnyHash
665
+ )
666
+ end
667
+
668
+ # The language of the input audio. Supplying the input language in
669
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
670
+ # format will improve accuracy and latency.
671
+ sig { returns(T.nilable(String)) }
672
+ attr_reader :language
673
+
674
+ sig { params(language: String).void }
675
+ attr_writer :language
676
+
677
+ # The model to use for transcription, current options are `gpt-4o-transcribe`,
678
+ # `gpt-4o-mini-transcribe`, and `whisper-1`.
679
+ sig { returns(T.nilable(String)) }
680
+ attr_reader :model
681
+
682
+ sig { params(model: String).void }
683
+ attr_writer :model
684
+
685
+ # An optional text to guide the model's style or continue a previous audio
686
+ # segment. For `whisper-1`, the
687
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
688
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
689
+ # "expect words related to technology".
690
+ sig { returns(T.nilable(String)) }
691
+ attr_reader :prompt
692
+
693
+ sig { params(prompt: String).void }
694
+ attr_writer :prompt
695
+
696
+ # Configuration for input audio transcription, defaults to off and can be set to
697
+ # `null` to turn off once on. Input audio transcription is not native to the
698
+ # model, since the model consumes audio directly. Transcription runs
699
+ # asynchronously through
700
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
701
+ # and should be treated as guidance of input audio content rather than precisely
702
+ # what the model heard. The client can optionally set the language and prompt for
703
+ # transcription, these offer additional guidance to the transcription service.
704
+ sig do
705
+ params(language: String, model: String, prompt: String).returns(
706
+ T.attached_class
707
+ )
708
+ end
709
+ def self.new(
710
+ # The language of the input audio. Supplying the input language in
711
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
712
+ # format will improve accuracy and latency.
713
+ language: nil,
714
+ # The model to use for transcription, current options are `gpt-4o-transcribe`,
715
+ # `gpt-4o-mini-transcribe`, and `whisper-1`.
716
+ model: nil,
717
+ # An optional text to guide the model's style or continue a previous audio
718
+ # segment. For `whisper-1`, the
719
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
720
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
721
+ # "expect words related to technology".
722
+ prompt: nil
723
+ )
724
+ end
725
+
726
+ sig do
727
+ override.returns(
728
+ { language: String, model: String, prompt: String }
729
+ )
730
+ end
731
+ def to_hash
732
+ end
733
+ end
734
+
735
+ # Maximum number of output tokens for a single assistant response, inclusive of
736
+ # tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
737
+ # `inf` for the maximum available tokens for a given model. Defaults to `inf`.
738
+ module MaxResponseOutputTokens
739
+ extend OpenAI::Internal::Type::Union
740
+
741
+ Variants = T.type_alias { T.any(Integer, Symbol) }
742
+
743
+ sig do
744
+ override.returns(
745
+ T::Array[
746
+ OpenAI::Realtime::RealtimeSession::MaxResponseOutputTokens::Variants
747
+ ]
748
+ )
749
+ end
750
+ def self.variants
751
+ end
752
+ end
753
+
754
+ module Modality
755
+ extend OpenAI::Internal::Type::Enum
756
+
757
+ TaggedSymbol =
758
+ T.type_alias do
759
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Modality)
760
+ end
761
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
762
+
763
+ TEXT =
764
+ T.let(
765
+ :text,
766
+ OpenAI::Realtime::RealtimeSession::Modality::TaggedSymbol
767
+ )
768
+ AUDIO =
769
+ T.let(
770
+ :audio,
771
+ OpenAI::Realtime::RealtimeSession::Modality::TaggedSymbol
772
+ )
773
+
774
+ sig do
775
+ override.returns(
776
+ T::Array[
777
+ OpenAI::Realtime::RealtimeSession::Modality::TaggedSymbol
778
+ ]
779
+ )
780
+ end
781
+ def self.values
782
+ end
783
+ end
784
+
785
+ # The Realtime model used for this session.
786
+ module Model
787
+ extend OpenAI::Internal::Type::Enum
788
+
789
+ TaggedSymbol =
790
+ T.type_alias do
791
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Model)
792
+ end
793
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
794
+
795
+ GPT_4O_REALTIME_PREVIEW =
796
+ T.let(
797
+ :"gpt-4o-realtime-preview",
798
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
799
+ )
800
+ GPT_4O_REALTIME_PREVIEW_2024_10_01 =
801
+ T.let(
802
+ :"gpt-4o-realtime-preview-2024-10-01",
803
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
804
+ )
805
+ GPT_4O_REALTIME_PREVIEW_2024_12_17 =
806
+ T.let(
807
+ :"gpt-4o-realtime-preview-2024-12-17",
808
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
809
+ )
810
+ GPT_4O_REALTIME_PREVIEW_2025_06_03 =
811
+ T.let(
812
+ :"gpt-4o-realtime-preview-2025-06-03",
813
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
814
+ )
815
+ GPT_4O_MINI_REALTIME_PREVIEW =
816
+ T.let(
817
+ :"gpt-4o-mini-realtime-preview",
818
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
819
+ )
820
+ GPT_4O_MINI_REALTIME_PREVIEW_2024_12_17 =
821
+ T.let(
822
+ :"gpt-4o-mini-realtime-preview-2024-12-17",
823
+ OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol
824
+ )
825
+
826
+ sig do
827
+ override.returns(
828
+ T::Array[OpenAI::Realtime::RealtimeSession::Model::TaggedSymbol]
829
+ )
830
+ end
831
+ def self.values
832
+ end
833
+ end
834
+
835
+ # The object type. Always `realtime.session`.
836
+ module Object
837
+ extend OpenAI::Internal::Type::Enum
838
+
839
+ TaggedSymbol =
840
+ T.type_alias do
841
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Object)
842
+ end
843
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
844
+
845
+ REALTIME_SESSION =
846
+ T.let(
847
+ :"realtime.session",
848
+ OpenAI::Realtime::RealtimeSession::Object::TaggedSymbol
849
+ )
850
+
851
+ sig do
852
+ override.returns(
853
+ T::Array[OpenAI::Realtime::RealtimeSession::Object::TaggedSymbol]
854
+ )
855
+ end
856
+ def self.values
857
+ end
858
+ end
859
+
860
+ # The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
861
+ # For `pcm16`, output audio is sampled at a rate of 24kHz.
862
+ module OutputAudioFormat
863
+ extend OpenAI::Internal::Type::Enum
864
+
865
+ TaggedSymbol =
866
+ T.type_alias do
867
+ T.all(
868
+ Symbol,
869
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat
870
+ )
871
+ end
872
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
873
+
874
+ PCM16 =
875
+ T.let(
876
+ :pcm16,
877
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::TaggedSymbol
878
+ )
879
+ G711_ULAW =
880
+ T.let(
881
+ :g711_ulaw,
882
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::TaggedSymbol
883
+ )
884
+ G711_ALAW =
885
+ T.let(
886
+ :g711_alaw,
887
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::TaggedSymbol
888
+ )
889
+
890
+ sig do
891
+ override.returns(
892
+ T::Array[
893
+ OpenAI::Realtime::RealtimeSession::OutputAudioFormat::TaggedSymbol
894
+ ]
895
+ )
896
+ end
897
+ def self.values
898
+ end
899
+ end
900
+
901
+ class Tool < OpenAI::Internal::Type::BaseModel
902
+ OrHash =
903
+ T.type_alias do
904
+ T.any(
905
+ OpenAI::Realtime::RealtimeSession::Tool,
906
+ OpenAI::Internal::AnyHash
907
+ )
908
+ end
909
+
910
+ # The description of the function, including guidance on when and how to call it,
911
+ # and guidance about what to tell the user when calling (if anything).
912
+ sig { returns(T.nilable(String)) }
913
+ attr_reader :description
914
+
915
+ sig { params(description: String).void }
916
+ attr_writer :description
917
+
918
+ # The name of the function.
919
+ sig { returns(T.nilable(String)) }
920
+ attr_reader :name
921
+
922
+ sig { params(name: String).void }
923
+ attr_writer :name
924
+
925
+ # Parameters of the function in JSON Schema.
926
+ sig { returns(T.nilable(T.anything)) }
927
+ attr_reader :parameters
928
+
929
+ sig { params(parameters: T.anything).void }
930
+ attr_writer :parameters
931
+
932
+ # The type of the tool, i.e. `function`.
933
+ sig do
934
+ returns(
935
+ T.nilable(OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol)
936
+ )
937
+ end
938
+ attr_reader :type
939
+
940
+ sig do
941
+ params(
942
+ type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol
943
+ ).void
944
+ end
945
+ attr_writer :type
946
+
947
+ sig do
948
+ params(
949
+ description: String,
950
+ name: String,
951
+ parameters: T.anything,
952
+ type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol
953
+ ).returns(T.attached_class)
954
+ end
955
+ def self.new(
956
+ # The description of the function, including guidance on when and how to call it,
957
+ # and guidance about what to tell the user when calling (if anything).
958
+ description: nil,
959
+ # The name of the function.
960
+ name: nil,
961
+ # Parameters of the function in JSON Schema.
962
+ parameters: nil,
963
+ # The type of the tool, i.e. `function`.
964
+ type: nil
965
+ )
966
+ end
967
+
968
+ sig do
969
+ override.returns(
970
+ {
971
+ description: String,
972
+ name: String,
973
+ parameters: T.anything,
974
+ type: OpenAI::Realtime::RealtimeSession::Tool::Type::OrSymbol
975
+ }
976
+ )
977
+ end
978
+ def to_hash
979
+ end
980
+
981
+ # The type of the tool, i.e. `function`.
982
+ module Type
983
+ extend OpenAI::Internal::Type::Enum
984
+
985
+ TaggedSymbol =
986
+ T.type_alias do
987
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Tool::Type)
988
+ end
989
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
990
+
991
+ FUNCTION =
992
+ T.let(
993
+ :function,
994
+ OpenAI::Realtime::RealtimeSession::Tool::Type::TaggedSymbol
995
+ )
996
+
997
+ sig do
998
+ override.returns(
999
+ T::Array[
1000
+ OpenAI::Realtime::RealtimeSession::Tool::Type::TaggedSymbol
1001
+ ]
1002
+ )
1003
+ end
1004
+ def self.values
1005
+ end
1006
+ end
1007
+ end
1008
+
1009
+ # Configuration options for tracing. Set to null to disable tracing. Once tracing
1010
+ # is enabled for a session, the configuration cannot be modified.
1011
+ #
1012
+ # `auto` will create a trace for the session with default values for the workflow
1013
+ # name, group id, and metadata.
1014
+ module Tracing
1015
+ extend OpenAI::Internal::Type::Union
1016
+
1017
+ Variants =
1018
+ T.type_alias do
1019
+ T.any(
1020
+ Symbol,
1021
+ OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration
1022
+ )
1023
+ end
1024
+
1025
+ class TracingConfiguration < OpenAI::Internal::Type::BaseModel
1026
+ OrHash =
1027
+ T.type_alias do
1028
+ T.any(
1029
+ OpenAI::Realtime::RealtimeSession::Tracing::TracingConfiguration,
1030
+ OpenAI::Internal::AnyHash
1031
+ )
1032
+ end
1033
+
1034
+ # The group id to attach to this trace to enable filtering and grouping in the
1035
+ # traces dashboard.
1036
+ sig { returns(T.nilable(String)) }
1037
+ attr_reader :group_id
1038
+
1039
+ sig { params(group_id: String).void }
1040
+ attr_writer :group_id
1041
+
1042
+ # The arbitrary metadata to attach to this trace to enable filtering in the traces
1043
+ # dashboard.
1044
+ sig { returns(T.nilable(T.anything)) }
1045
+ attr_reader :metadata
1046
+
1047
+ sig { params(metadata: T.anything).void }
1048
+ attr_writer :metadata
1049
+
1050
+ # The name of the workflow to attach to this trace. This is used to name the trace
1051
+ # in the traces dashboard.
1052
+ sig { returns(T.nilable(String)) }
1053
+ attr_reader :workflow_name
1054
+
1055
+ sig { params(workflow_name: String).void }
1056
+ attr_writer :workflow_name
1057
+
1058
+ # Granular configuration for tracing.
1059
+ sig do
1060
+ params(
1061
+ group_id: String,
1062
+ metadata: T.anything,
1063
+ workflow_name: String
1064
+ ).returns(T.attached_class)
1065
+ end
1066
+ def self.new(
1067
+ # The group id to attach to this trace to enable filtering and grouping in the
1068
+ # traces dashboard.
1069
+ group_id: nil,
1070
+ # The arbitrary metadata to attach to this trace to enable filtering in the traces
1071
+ # dashboard.
1072
+ metadata: nil,
1073
+ # The name of the workflow to attach to this trace. This is used to name the trace
1074
+ # in the traces dashboard.
1075
+ workflow_name: nil
1076
+ )
1077
+ end
1078
+
1079
+ sig do
1080
+ override.returns(
1081
+ {
1082
+ group_id: String,
1083
+ metadata: T.anything,
1084
+ workflow_name: String
1085
+ }
1086
+ )
1087
+ end
1088
+ def to_hash
1089
+ end
1090
+ end
1091
+
1092
+ sig do
1093
+ override.returns(
1094
+ T::Array[OpenAI::Realtime::RealtimeSession::Tracing::Variants]
1095
+ )
1096
+ end
1097
+ def self.variants
1098
+ end
1099
+ end
1100
+
1101
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
1102
+ OrHash =
1103
+ T.type_alias do
1104
+ T.any(
1105
+ OpenAI::Realtime::RealtimeSession::TurnDetection,
1106
+ OpenAI::Internal::AnyHash
1107
+ )
1108
+ end
1109
+
1110
+ # Whether or not to automatically generate a response when a VAD stop event
1111
+ # occurs.
1112
+ sig { returns(T.nilable(T::Boolean)) }
1113
+ attr_reader :create_response
1114
+
1115
+ sig { params(create_response: T::Boolean).void }
1116
+ attr_writer :create_response
1117
+
1118
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
1119
+ # will wait longer for the user to continue speaking, `high` will respond more
1120
+ # quickly. `auto` is the default and is equivalent to `medium`.
1121
+ sig do
1122
+ returns(
1123
+ T.nilable(
1124
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::OrSymbol
1125
+ )
1126
+ )
1127
+ end
1128
+ attr_reader :eagerness
1129
+
1130
+ sig do
1131
+ params(
1132
+ eagerness:
1133
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::OrSymbol
1134
+ ).void
1135
+ end
1136
+ attr_writer :eagerness
1137
+
1138
+ # Optional idle timeout after which turn detection will auto-timeout when no
1139
+ # additional audio is received.
1140
+ sig { returns(T.nilable(Integer)) }
1141
+ attr_accessor :idle_timeout_ms
1142
+
1143
+ # Whether or not to automatically interrupt any ongoing response with output to
1144
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
1145
+ # occurs.
1146
+ sig { returns(T.nilable(T::Boolean)) }
1147
+ attr_reader :interrupt_response
1148
+
1149
+ sig { params(interrupt_response: T::Boolean).void }
1150
+ attr_writer :interrupt_response
1151
+
1152
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
1153
+ # detected speech (in milliseconds). Defaults to 300ms.
1154
+ sig { returns(T.nilable(Integer)) }
1155
+ attr_reader :prefix_padding_ms
1156
+
1157
+ sig { params(prefix_padding_ms: Integer).void }
1158
+ attr_writer :prefix_padding_ms
1159
+
1160
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
1161
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
1162
+ # more quickly, but may jump in on short pauses from the user.
1163
+ sig { returns(T.nilable(Integer)) }
1164
+ attr_reader :silence_duration_ms
1165
+
1166
+ sig { params(silence_duration_ms: Integer).void }
1167
+ attr_writer :silence_duration_ms
1168
+
1169
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
1170
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
1171
+ # model, and thus might perform better in noisy environments.
1172
+ sig { returns(T.nilable(Float)) }
1173
+ attr_reader :threshold
1174
+
1175
+ sig { params(threshold: Float).void }
1176
+ attr_writer :threshold
1177
+
1178
+ # Type of turn detection.
1179
+ sig do
1180
+ returns(
1181
+ T.nilable(
1182
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::OrSymbol
1183
+ )
1184
+ )
1185
+ end
1186
+ attr_reader :type
1187
+
1188
+ sig do
1189
+ params(
1190
+ type:
1191
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::OrSymbol
1192
+ ).void
1193
+ end
1194
+ attr_writer :type
1195
+
1196
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
1197
+ # set to `null` to turn off, in which case the client must manually trigger model
1198
+ # response. Server VAD means that the model will detect the start and end of
1199
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
1200
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
1201
+ # semantically estimate whether the user has finished speaking, then dynamically
1202
+ # sets a timeout based on this probability. For example, if user audio trails off
1203
+ # with "uhhm", the model will score a low probability of turn end and wait longer
1204
+ # for the user to continue speaking. This can be useful for more natural
1205
+ # conversations, but may have a higher latency.
1206
+ sig do
1207
+ params(
1208
+ create_response: T::Boolean,
1209
+ eagerness:
1210
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::OrSymbol,
1211
+ idle_timeout_ms: T.nilable(Integer),
1212
+ interrupt_response: T::Boolean,
1213
+ prefix_padding_ms: Integer,
1214
+ silence_duration_ms: Integer,
1215
+ threshold: Float,
1216
+ type:
1217
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::OrSymbol
1218
+ ).returns(T.attached_class)
1219
+ end
1220
+ def self.new(
1221
+ # Whether or not to automatically generate a response when a VAD stop event
1222
+ # occurs.
1223
+ create_response: nil,
1224
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
1225
+ # will wait longer for the user to continue speaking, `high` will respond more
1226
+ # quickly. `auto` is the default and is equivalent to `medium`.
1227
+ eagerness: nil,
1228
+ # Optional idle timeout after which turn detection will auto-timeout when no
1229
+ # additional audio is received.
1230
+ idle_timeout_ms: nil,
1231
+ # Whether or not to automatically interrupt any ongoing response with output to
1232
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
1233
+ # occurs.
1234
+ interrupt_response: nil,
1235
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
1236
+ # detected speech (in milliseconds). Defaults to 300ms.
1237
+ prefix_padding_ms: nil,
1238
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
1239
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
1240
+ # more quickly, but may jump in on short pauses from the user.
1241
+ silence_duration_ms: nil,
1242
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
1243
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
1244
+ # model, and thus might perform better in noisy environments.
1245
+ threshold: nil,
1246
+ # Type of turn detection.
1247
+ type: nil
1248
+ )
1249
+ end
1250
+
1251
+ sig do
1252
+ override.returns(
1253
+ {
1254
+ create_response: T::Boolean,
1255
+ eagerness:
1256
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::OrSymbol,
1257
+ idle_timeout_ms: T.nilable(Integer),
1258
+ interrupt_response: T::Boolean,
1259
+ prefix_padding_ms: Integer,
1260
+ silence_duration_ms: Integer,
1261
+ threshold: Float,
1262
+ type:
1263
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::OrSymbol
1264
+ }
1265
+ )
1266
+ end
1267
+ def to_hash
1268
+ end
1269
+
1270
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
1271
+ # will wait longer for the user to continue speaking, `high` will respond more
1272
+ # quickly. `auto` is the default and is equivalent to `medium`.
1273
+ module Eagerness
1274
+ extend OpenAI::Internal::Type::Enum
1275
+
1276
+ TaggedSymbol =
1277
+ T.type_alias do
1278
+ T.all(
1279
+ Symbol,
1280
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness
1281
+ )
1282
+ end
1283
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1284
+
1285
+ LOW =
1286
+ T.let(
1287
+ :low,
1288
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::TaggedSymbol
1289
+ )
1290
+ MEDIUM =
1291
+ T.let(
1292
+ :medium,
1293
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::TaggedSymbol
1294
+ )
1295
+ HIGH =
1296
+ T.let(
1297
+ :high,
1298
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::TaggedSymbol
1299
+ )
1300
+ AUTO =
1301
+ T.let(
1302
+ :auto,
1303
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::TaggedSymbol
1304
+ )
1305
+
1306
+ sig do
1307
+ override.returns(
1308
+ T::Array[
1309
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Eagerness::TaggedSymbol
1310
+ ]
1311
+ )
1312
+ end
1313
+ def self.values
1314
+ end
1315
+ end
1316
+
1317
+ # Type of turn detection.
1318
+ module Type
1319
+ extend OpenAI::Internal::Type::Enum
1320
+
1321
+ TaggedSymbol =
1322
+ T.type_alias do
1323
+ T.all(
1324
+ Symbol,
1325
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type
1326
+ )
1327
+ end
1328
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1329
+
1330
+ SERVER_VAD =
1331
+ T.let(
1332
+ :server_vad,
1333
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::TaggedSymbol
1334
+ )
1335
+ SEMANTIC_VAD =
1336
+ T.let(
1337
+ :semantic_vad,
1338
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::TaggedSymbol
1339
+ )
1340
+
1341
+ sig do
1342
+ override.returns(
1343
+ T::Array[
1344
+ OpenAI::Realtime::RealtimeSession::TurnDetection::Type::TaggedSymbol
1345
+ ]
1346
+ )
1347
+ end
1348
+ def self.values
1349
+ end
1350
+ end
1351
+ end
1352
+
1353
+ # The voice the model uses to respond. Voice cannot be changed during the session
1354
+ # once the model has responded with audio at least once. Current voice options are
1355
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, and `verse`.
1356
+ module Voice
1357
+ extend OpenAI::Internal::Type::Union
1358
+
1359
+ Variants =
1360
+ T.type_alias do
1361
+ T.any(
1362
+ String,
1363
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1364
+ )
1365
+ end
1366
+
1367
+ sig do
1368
+ override.returns(
1369
+ T::Array[OpenAI::Realtime::RealtimeSession::Voice::Variants]
1370
+ )
1371
+ end
1372
+ def self.variants
1373
+ end
1374
+
1375
+ TaggedSymbol =
1376
+ T.type_alias do
1377
+ T.all(Symbol, OpenAI::Realtime::RealtimeSession::Voice)
1378
+ end
1379
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
1380
+
1381
+ ALLOY =
1382
+ T.let(
1383
+ :alloy,
1384
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1385
+ )
1386
+ ASH =
1387
+ T.let(:ash, OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol)
1388
+ BALLAD =
1389
+ T.let(
1390
+ :ballad,
1391
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1392
+ )
1393
+ CORAL =
1394
+ T.let(
1395
+ :coral,
1396
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1397
+ )
1398
+ ECHO =
1399
+ T.let(:echo, OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol)
1400
+ SAGE =
1401
+ T.let(:sage, OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol)
1402
+ SHIMMER =
1403
+ T.let(
1404
+ :shimmer,
1405
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1406
+ )
1407
+ VERSE =
1408
+ T.let(
1409
+ :verse,
1410
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1411
+ )
1412
+ MARIN =
1413
+ T.let(
1414
+ :marin,
1415
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1416
+ )
1417
+ CEDAR =
1418
+ T.let(
1419
+ :cedar,
1420
+ OpenAI::Realtime::RealtimeSession::Voice::TaggedSymbol
1421
+ )
1422
+ end
1423
+ end
1424
+ end
1425
+ end
1426
+ end