openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -0,0 +1,221 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioConfigInput < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeAudioConfigInput,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # The format of the input audio.
16
+ sig do
17
+ returns(
18
+ T.nilable(
19
+ T.any(
20
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
21
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
22
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
23
+ )
24
+ )
25
+ )
26
+ end
27
+ attr_reader :format_
28
+
29
+ sig do
30
+ params(
31
+ format_:
32
+ T.any(
33
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
34
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
35
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
36
+ )
37
+ ).void
38
+ end
39
+ attr_writer :format_
40
+
41
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
42
+ # off. Noise reduction filters audio added to the input audio buffer before it is
43
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
44
+ # detection accuracy (reducing false positives) and model performance by improving
45
+ # perception of the input audio.
46
+ sig do
47
+ returns(
48
+ T.nilable(
49
+ OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction
50
+ )
51
+ )
52
+ end
53
+ attr_reader :noise_reduction
54
+
55
+ sig do
56
+ params(
57
+ noise_reduction:
58
+ OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction::OrHash
59
+ ).void
60
+ end
61
+ attr_writer :noise_reduction
62
+
63
+ # Configuration for input audio transcription, defaults to off and can be set to
64
+ # `null` to turn off once on. Input audio transcription is not native to the
65
+ # model, since the model consumes audio directly. Transcription runs
66
+ # asynchronously through
67
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
68
+ # and should be treated as guidance of input audio content rather than precisely
69
+ # what the model heard. The client can optionally set the language and prompt for
70
+ # transcription, these offer additional guidance to the transcription service.
71
+ sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
72
+ attr_reader :transcription
73
+
74
+ sig do
75
+ params(
76
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash
77
+ ).void
78
+ end
79
+ attr_writer :transcription
80
+
81
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
82
+ # set to `null` to turn off, in which case the client must manually trigger model
83
+ # response. Server VAD means that the model will detect the start and end of
84
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
85
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
86
+ # semantically estimate whether the user has finished speaking, then dynamically
87
+ # sets a timeout based on this probability. For example, if user audio trails off
88
+ # with "uhhm", the model will score a low probability of turn end and wait longer
89
+ # for the user to continue speaking. This can be useful for more natural
90
+ # conversations, but may have a higher latency.
91
+ sig do
92
+ returns(T.nilable(OpenAI::Realtime::RealtimeAudioInputTurnDetection))
93
+ end
94
+ attr_reader :turn_detection
95
+
96
+ sig do
97
+ params(
98
+ turn_detection:
99
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::OrHash
100
+ ).void
101
+ end
102
+ attr_writer :turn_detection
103
+
104
+ sig do
105
+ params(
106
+ format_:
107
+ T.any(
108
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
109
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
110
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
111
+ ),
112
+ noise_reduction:
113
+ OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction::OrHash,
114
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash,
115
+ turn_detection:
116
+ OpenAI::Realtime::RealtimeAudioInputTurnDetection::OrHash
117
+ ).returns(T.attached_class)
118
+ end
119
+ def self.new(
120
+ # The format of the input audio.
121
+ format_: nil,
122
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
123
+ # off. Noise reduction filters audio added to the input audio buffer before it is
124
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
125
+ # detection accuracy (reducing false positives) and model performance by improving
126
+ # perception of the input audio.
127
+ noise_reduction: nil,
128
+ # Configuration for input audio transcription, defaults to off and can be set to
129
+ # `null` to turn off once on. Input audio transcription is not native to the
130
+ # model, since the model consumes audio directly. Transcription runs
131
+ # asynchronously through
132
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
133
+ # and should be treated as guidance of input audio content rather than precisely
134
+ # what the model heard. The client can optionally set the language and prompt for
135
+ # transcription, these offer additional guidance to the transcription service.
136
+ transcription: nil,
137
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
138
+ # set to `null` to turn off, in which case the client must manually trigger model
139
+ # response. Server VAD means that the model will detect the start and end of
140
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
141
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
142
+ # semantically estimate whether the user has finished speaking, then dynamically
143
+ # sets a timeout based on this probability. For example, if user audio trails off
144
+ # with "uhhm", the model will score a low probability of turn end and wait longer
145
+ # for the user to continue speaking. This can be useful for more natural
146
+ # conversations, but may have a higher latency.
147
+ turn_detection: nil
148
+ )
149
+ end
150
+
151
+ sig do
152
+ override.returns(
153
+ {
154
+ format_:
155
+ T.any(
156
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
157
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
158
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
159
+ ),
160
+ noise_reduction:
161
+ OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction,
162
+ transcription: OpenAI::Realtime::AudioTranscription,
163
+ turn_detection: OpenAI::Realtime::RealtimeAudioInputTurnDetection
164
+ }
165
+ )
166
+ end
167
+ def to_hash
168
+ end
169
+
170
+ class NoiseReduction < OpenAI::Internal::Type::BaseModel
171
+ OrHash =
172
+ T.type_alias do
173
+ T.any(
174
+ OpenAI::Realtime::RealtimeAudioConfigInput::NoiseReduction,
175
+ OpenAI::Internal::AnyHash
176
+ )
177
+ end
178
+
179
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
180
+ # headphones, `far_field` is for far-field microphones such as laptop or
181
+ # conference room microphones.
182
+ sig do
183
+ returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol))
184
+ end
185
+ attr_reader :type
186
+
187
+ sig do
188
+ params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void
189
+ end
190
+ attr_writer :type
191
+
192
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
193
+ # off. Noise reduction filters audio added to the input audio buffer before it is
194
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
195
+ # detection accuracy (reducing false positives) and model performance by improving
196
+ # perception of the input audio.
197
+ sig do
198
+ params(
199
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
200
+ ).returns(T.attached_class)
201
+ end
202
+ def self.new(
203
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
204
+ # headphones, `far_field` is for far-field microphones such as laptop or
205
+ # conference room microphones.
206
+ type: nil
207
+ )
208
+ end
209
+
210
+ sig do
211
+ override.returns(
212
+ { type: OpenAI::Realtime::NoiseReductionType::OrSymbol }
213
+ )
214
+ end
215
+ def to_hash
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,222 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeAudioConfigOutput,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # The format of the output audio.
16
+ sig do
17
+ returns(
18
+ T.nilable(
19
+ T.any(
20
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
21
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
22
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
23
+ )
24
+ )
25
+ )
26
+ end
27
+ attr_reader :format_
28
+
29
+ sig do
30
+ params(
31
+ format_:
32
+ T.any(
33
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
34
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
35
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
36
+ )
37
+ ).void
38
+ end
39
+ attr_writer :format_
40
+
41
+ # The speed of the model's spoken response as a multiple of the original speed.
42
+ # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
43
+ # This value can only be changed in between model turns, not while a response is
44
+ # in progress.
45
+ #
46
+ # This parameter is a post-processing adjustment to the audio after it is
47
+ # generated, it's also possible to prompt the model to speak faster or slower.
48
+ sig { returns(T.nilable(Float)) }
49
+ attr_reader :speed
50
+
51
+ sig { params(speed: Float).void }
52
+ attr_writer :speed
53
+
54
+ # The voice the model uses to respond. Voice cannot be changed during the session
55
+ # once the model has responded with audio at least once. Current voice options are
56
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
57
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
58
+ sig do
59
+ returns(
60
+ T.nilable(
61
+ T.any(
62
+ String,
63
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol
64
+ )
65
+ )
66
+ )
67
+ end
68
+ attr_reader :voice
69
+
70
+ sig do
71
+ params(
72
+ voice:
73
+ T.any(
74
+ String,
75
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol
76
+ )
77
+ ).void
78
+ end
79
+ attr_writer :voice
80
+
81
+ sig do
82
+ params(
83
+ format_:
84
+ T.any(
85
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
86
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
87
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
88
+ ),
89
+ speed: Float,
90
+ voice:
91
+ T.any(
92
+ String,
93
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol
94
+ )
95
+ ).returns(T.attached_class)
96
+ end
97
+ def self.new(
98
+ # The format of the output audio.
99
+ format_: nil,
100
+ # The speed of the model's spoken response as a multiple of the original speed.
101
+ # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
102
+ # This value can only be changed in between model turns, not while a response is
103
+ # in progress.
104
+ #
105
+ # This parameter is a post-processing adjustment to the audio after it is
106
+ # generated, it's also possible to prompt the model to speak faster or slower.
107
+ speed: nil,
108
+ # The voice the model uses to respond. Voice cannot be changed during the session
109
+ # once the model has responded with audio at least once. Current voice options are
110
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
111
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
112
+ voice: nil
113
+ )
114
+ end
115
+
116
+ sig do
117
+ override.returns(
118
+ {
119
+ format_:
120
+ T.any(
121
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
122
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
123
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
124
+ ),
125
+ speed: Float,
126
+ voice:
127
+ T.any(
128
+ String,
129
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::OrSymbol
130
+ )
131
+ }
132
+ )
133
+ end
134
+ def to_hash
135
+ end
136
+
137
+ # The voice the model uses to respond. Voice cannot be changed during the session
138
+ # once the model has responded with audio at least once. Current voice options are
139
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
140
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
141
+ module Voice
142
+ extend OpenAI::Internal::Type::Union
143
+
144
+ Variants =
145
+ T.type_alias do
146
+ T.any(
147
+ String,
148
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
149
+ )
150
+ end
151
+
152
+ sig do
153
+ override.returns(
154
+ T::Array[
155
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::Variants
156
+ ]
157
+ )
158
+ end
159
+ def self.variants
160
+ end
161
+
162
+ TaggedSymbol =
163
+ T.type_alias do
164
+ T.all(Symbol, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice)
165
+ end
166
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
167
+
168
+ ALLOY =
169
+ T.let(
170
+ :alloy,
171
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
172
+ )
173
+ ASH =
174
+ T.let(
175
+ :ash,
176
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
177
+ )
178
+ BALLAD =
179
+ T.let(
180
+ :ballad,
181
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
182
+ )
183
+ CORAL =
184
+ T.let(
185
+ :coral,
186
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
187
+ )
188
+ ECHO =
189
+ T.let(
190
+ :echo,
191
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
192
+ )
193
+ SAGE =
194
+ T.let(
195
+ :sage,
196
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
197
+ )
198
+ SHIMMER =
199
+ T.let(
200
+ :shimmer,
201
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
202
+ )
203
+ VERSE =
204
+ T.let(
205
+ :verse,
206
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
207
+ )
208
+ MARIN =
209
+ T.let(
210
+ :marin,
211
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
212
+ )
213
+ CEDAR =
214
+ T.let(
215
+ :cedar,
216
+ OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol
217
+ )
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end