openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -0,0 +1,195 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ RealtimeTranscriptionSessionCreateResponse =
6
+ Realtime::RealtimeTranscriptionSessionCreateResponse
7
+
8
+ module Realtime
9
+ class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel
10
+ OrHash =
11
+ T.type_alias do
12
+ T.any(
13
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse,
14
+ OpenAI::Internal::AnyHash
15
+ )
16
+ end
17
+
18
+ # Ephemeral key returned by the API. Only present when the session is created on
19
+ # the server via REST API.
20
+ sig do
21
+ returns(OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret)
22
+ end
23
+ attr_reader :client_secret
24
+
25
+ sig do
26
+ params(
27
+ client_secret:
28
+ OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash
29
+ ).void
30
+ end
31
+ attr_writer :client_secret
32
+
33
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
34
+ sig { returns(T.nilable(String)) }
35
+ attr_reader :input_audio_format
36
+
37
+ sig { params(input_audio_format: String).void }
38
+ attr_writer :input_audio_format
39
+
40
+ # Configuration of the transcription model.
41
+ sig do
42
+ returns(
43
+ T.nilable(
44
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription
45
+ )
46
+ )
47
+ end
48
+ attr_reader :input_audio_transcription
49
+
50
+ sig do
51
+ params(
52
+ input_audio_transcription:
53
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash
54
+ ).void
55
+ end
56
+ attr_writer :input_audio_transcription
57
+
58
+ # The set of modalities the model can respond with. To disable audio, set this to
59
+ # ["text"].
60
+ sig do
61
+ returns(
62
+ T.nilable(
63
+ T::Array[
64
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
65
+ ]
66
+ )
67
+ )
68
+ end
69
+ attr_reader :modalities
70
+
71
+ sig do
72
+ params(
73
+ modalities:
74
+ T::Array[
75
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
76
+ ]
77
+ ).void
78
+ end
79
+ attr_writer :modalities
80
+
81
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
82
+ # means that the model will detect the start and end of speech based on audio
83
+ # volume and respond at the end of user speech.
84
+ sig do
85
+ returns(
86
+ T.nilable(
87
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
88
+ )
89
+ )
90
+ end
91
+ attr_reader :turn_detection
92
+
93
+ sig do
94
+ params(
95
+ turn_detection:
96
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
97
+ ).void
98
+ end
99
+ attr_writer :turn_detection
100
+
101
+ # A new Realtime transcription session configuration.
102
+ #
103
+ # When a session is created on the server via REST API, the session object also
104
+ # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
105
+ # not present when a session is updated via the WebSocket API.
106
+ sig do
107
+ params(
108
+ client_secret:
109
+ OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret::OrHash,
110
+ input_audio_format: String,
111
+ input_audio_transcription:
112
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::OrHash,
113
+ modalities:
114
+ T::Array[
115
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::OrSymbol
116
+ ],
117
+ turn_detection:
118
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection::OrHash
119
+ ).returns(T.attached_class)
120
+ end
121
+ def self.new(
122
+ # Ephemeral key returned by the API. Only present when the session is created on
123
+ # the server via REST API.
124
+ client_secret:,
125
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
126
+ input_audio_format: nil,
127
+ # Configuration of the transcription model.
128
+ input_audio_transcription: nil,
129
+ # The set of modalities the model can respond with. To disable audio, set this to
130
+ # ["text"].
131
+ modalities: nil,
132
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
133
+ # means that the model will detect the start and end of speech based on audio
134
+ # volume and respond at the end of user speech.
135
+ turn_detection: nil
136
+ )
137
+ end
138
+
139
+ sig do
140
+ override.returns(
141
+ {
142
+ client_secret:
143
+ OpenAI::Realtime::RealtimeTranscriptionSessionClientSecret,
144
+ input_audio_format: String,
145
+ input_audio_transcription:
146
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription,
147
+ modalities:
148
+ T::Array[
149
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
150
+ ],
151
+ turn_detection:
152
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection
153
+ }
154
+ )
155
+ end
156
+ def to_hash
157
+ end
158
+
159
+ module Modality
160
+ extend OpenAI::Internal::Type::Enum
161
+
162
+ TaggedSymbol =
163
+ T.type_alias do
164
+ T.all(
165
+ Symbol,
166
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality
167
+ )
168
+ end
169
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
170
+
171
+ TEXT =
172
+ T.let(
173
+ :text,
174
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
175
+ )
176
+ AUDIO =
177
+ T.let(
178
+ :audio,
179
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
180
+ )
181
+
182
+ sig do
183
+ override.returns(
184
+ T::Array[
185
+ OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse::Modality::TaggedSymbol
186
+ ]
187
+ )
188
+ end
189
+ def self.values
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,144 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ RealtimeTranscriptionSessionInputAudioTranscription =
6
+ Realtime::RealtimeTranscriptionSessionInputAudioTranscription
7
+
8
+ module Realtime
9
+ class RealtimeTranscriptionSessionInputAudioTranscription < OpenAI::Internal::Type::BaseModel
10
+ OrHash =
11
+ T.type_alias do
12
+ T.any(
13
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription,
14
+ OpenAI::Internal::AnyHash
15
+ )
16
+ end
17
+
18
+ # The language of the input audio. Supplying the input language in
19
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
20
+ # format will improve accuracy and latency.
21
+ sig { returns(T.nilable(String)) }
22
+ attr_reader :language
23
+
24
+ sig { params(language: String).void }
25
+ attr_writer :language
26
+
27
+ # The model to use for transcription. Current options are `whisper-1`,
28
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
29
+ sig do
30
+ returns(
31
+ T.nilable(
32
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
33
+ )
34
+ )
35
+ end
36
+ attr_reader :model
37
+
38
+ sig do
39
+ params(
40
+ model:
41
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol
42
+ ).void
43
+ end
44
+ attr_writer :model
45
+
46
+ # An optional text to guide the model's style or continue a previous audio
47
+ # segment. For `whisper-1`, the
48
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
49
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
50
+ # "expect words related to technology".
51
+ sig { returns(T.nilable(String)) }
52
+ attr_reader :prompt
53
+
54
+ sig { params(prompt: String).void }
55
+ attr_writer :prompt
56
+
57
+ # Configuration of the transcription model.
58
+ sig do
59
+ params(
60
+ language: String,
61
+ model:
62
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::OrSymbol,
63
+ prompt: String
64
+ ).returns(T.attached_class)
65
+ end
66
+ def self.new(
67
+ # The language of the input audio. Supplying the input language in
68
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
69
+ # format will improve accuracy and latency.
70
+ language: nil,
71
+ # The model to use for transcription. Current options are `whisper-1`,
72
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
73
+ model: nil,
74
+ # An optional text to guide the model's style or continue a previous audio
75
+ # segment. For `whisper-1`, the
76
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
77
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
78
+ # "expect words related to technology".
79
+ prompt: nil
80
+ )
81
+ end
82
+
83
+ sig do
84
+ override.returns(
85
+ {
86
+ language: String,
87
+ model:
88
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol,
89
+ prompt: String
90
+ }
91
+ )
92
+ end
93
+ def to_hash
94
+ end
95
+
96
+ # The model to use for transcription. Current options are `whisper-1`,
97
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
98
+ module Model
99
+ extend OpenAI::Internal::Type::Enum
100
+
101
+ TaggedSymbol =
102
+ T.type_alias do
103
+ T.all(
104
+ Symbol,
105
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model
106
+ )
107
+ end
108
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
109
+
110
+ WHISPER_1 =
111
+ T.let(
112
+ :"whisper-1",
113
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
114
+ )
115
+ GPT_4O_TRANSCRIBE_LATEST =
116
+ T.let(
117
+ :"gpt-4o-transcribe-latest",
118
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
119
+ )
120
+ GPT_4O_MINI_TRANSCRIBE =
121
+ T.let(
122
+ :"gpt-4o-mini-transcribe",
123
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
124
+ )
125
+ GPT_4O_TRANSCRIBE =
126
+ T.let(
127
+ :"gpt-4o-transcribe",
128
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
129
+ )
130
+
131
+ sig do
132
+ override.returns(
133
+ T::Array[
134
+ OpenAI::Realtime::RealtimeTranscriptionSessionInputAudioTranscription::Model::TaggedSymbol
135
+ ]
136
+ )
137
+ end
138
+ def self.values
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,94 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ RealtimeTranscriptionSessionTurnDetection =
6
+ Realtime::RealtimeTranscriptionSessionTurnDetection
7
+
8
+ module Realtime
9
+ class RealtimeTranscriptionSessionTurnDetection < OpenAI::Internal::Type::BaseModel
10
+ OrHash =
11
+ T.type_alias do
12
+ T.any(
13
+ OpenAI::Realtime::RealtimeTranscriptionSessionTurnDetection,
14
+ OpenAI::Internal::AnyHash
15
+ )
16
+ end
17
+
18
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
19
+ # Defaults to 300ms.
20
+ sig { returns(T.nilable(Integer)) }
21
+ attr_reader :prefix_padding_ms
22
+
23
+ sig { params(prefix_padding_ms: Integer).void }
24
+ attr_writer :prefix_padding_ms
25
+
26
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
27
+ # With shorter values the model will respond more quickly, but may jump in on
28
+ # short pauses from the user.
29
+ sig { returns(T.nilable(Integer)) }
30
+ attr_reader :silence_duration_ms
31
+
32
+ sig { params(silence_duration_ms: Integer).void }
33
+ attr_writer :silence_duration_ms
34
+
35
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
36
+ # threshold will require louder audio to activate the model, and thus might
37
+ # perform better in noisy environments.
38
+ sig { returns(T.nilable(Float)) }
39
+ attr_reader :threshold
40
+
41
+ sig { params(threshold: Float).void }
42
+ attr_writer :threshold
43
+
44
+ # Type of turn detection, only `server_vad` is currently supported.
45
+ sig { returns(T.nilable(String)) }
46
+ attr_reader :type
47
+
48
+ sig { params(type: String).void }
49
+ attr_writer :type
50
+
51
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
52
+ # means that the model will detect the start and end of speech based on audio
53
+ # volume and respond at the end of user speech.
54
+ sig do
55
+ params(
56
+ prefix_padding_ms: Integer,
57
+ silence_duration_ms: Integer,
58
+ threshold: Float,
59
+ type: String
60
+ ).returns(T.attached_class)
61
+ end
62
+ def self.new(
63
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
64
+ # Defaults to 300ms.
65
+ prefix_padding_ms: nil,
66
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
67
+ # With shorter values the model will respond more quickly, but may jump in on
68
+ # short pauses from the user.
69
+ silence_duration_ms: nil,
70
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
71
+ # threshold will require louder audio to activate the model, and thus might
72
+ # perform better in noisy environments.
73
+ threshold: nil,
74
+ # Type of turn detection, only `server_vad` is currently supported.
75
+ type: nil
76
+ )
77
+ end
78
+
79
+ sig do
80
+ override.returns(
81
+ {
82
+ prefix_padding_ms: Integer,
83
+ silence_duration_ms: Integer,
84
+ threshold: Float,
85
+ type: String
86
+ }
87
+ )
88
+ end
89
+ def to_hash
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -4,8 +4,7 @@ module OpenAI
4
4
  module Models
5
5
  module Realtime
6
6
  # Controls how the realtime conversation is truncated prior to model inference.
7
- # The default is `auto`. When set to `retention_ratio`, the server retains a
8
- # fraction of the conversation tokens prior to the instructions.
7
+ # The default is `auto`.
9
8
  module RealtimeTruncation
10
9
  extend OpenAI::Internal::Type::Union
11
10
 
@@ -13,11 +12,13 @@ module OpenAI
13
12
  T.type_alias do
14
13
  T.any(
15
14
  OpenAI::Realtime::RealtimeTruncation::RealtimeTruncationStrategy::TaggedSymbol,
16
- OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation
15
+ OpenAI::Realtime::RealtimeTruncationRetentionRatio
17
16
  )
18
17
  end
19
18
 
20
- # The truncation strategy to use for the session.
19
+ # The truncation strategy to use for the session. `auto` is the default truncation
20
+ # strategy. `disabled` will disable truncation and emit errors when the
21
+ # conversation exceeds the input token limit.
21
22
  module RealtimeTruncationStrategy
22
23
  extend OpenAI::Internal::Type::Enum
23
24
 
@@ -52,58 +53,6 @@ module OpenAI
52
53
  end
53
54
  end
54
55
 
55
- class RetentionRatioTruncation < OpenAI::Internal::Type::BaseModel
56
- OrHash =
57
- T.type_alias do
58
- T.any(
59
- OpenAI::Realtime::RealtimeTruncation::RetentionRatioTruncation,
60
- OpenAI::Internal::AnyHash
61
- )
62
- end
63
-
64
- # Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
65
- sig { returns(Float) }
66
- attr_accessor :retention_ratio
67
-
68
- # Use retention ratio truncation.
69
- sig { returns(Symbol) }
70
- attr_accessor :type
71
-
72
- # Optional cap on tokens allowed after the instructions.
73
- sig { returns(T.nilable(Integer)) }
74
- attr_accessor :post_instructions_token_limit
75
-
76
- # Retain a fraction of the conversation tokens.
77
- sig do
78
- params(
79
- retention_ratio: Float,
80
- post_instructions_token_limit: T.nilable(Integer),
81
- type: Symbol
82
- ).returns(T.attached_class)
83
- end
84
- def self.new(
85
- # Fraction of pre-instruction conversation tokens to retain (0.0 - 1.0).
86
- retention_ratio:,
87
- # Optional cap on tokens allowed after the instructions.
88
- post_instructions_token_limit: nil,
89
- # Use retention ratio truncation.
90
- type: :retention_ratio
91
- )
92
- end
93
-
94
- sig do
95
- override.returns(
96
- {
97
- retention_ratio: Float,
98
- type: Symbol,
99
- post_instructions_token_limit: T.nilable(Integer)
100
- }
101
- )
102
- end
103
- def to_hash
104
- end
105
- end
106
-
107
56
  sig do
108
57
  override.returns(
109
58
  T::Array[OpenAI::Realtime::RealtimeTruncation::Variants]
@@ -0,0 +1,45 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTruncationRetentionRatio < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTruncationRetentionRatio,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
16
+ # conversation exceeds the input token limit.
17
+ sig { returns(Float) }
18
+ attr_accessor :retention_ratio
19
+
20
+ # Use retention ratio truncation.
21
+ sig { returns(Symbol) }
22
+ attr_accessor :type
23
+
24
+ # Retain a fraction of the conversation tokens when the conversation exceeds the
25
+ # input token limit. This allows you to amortize truncations across multiple
26
+ # turns, which can help improve cached token usage.
27
+ sig do
28
+ params(retention_ratio: Float, type: Symbol).returns(T.attached_class)
29
+ end
30
+ def self.new(
31
+ # Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
32
+ # conversation exceeds the input token limit.
33
+ retention_ratio:,
34
+ # Use retention ratio truncation.
35
+ type: :retention_ratio
36
+ )
37
+ end
38
+
39
+ sig { override.returns({ retention_ratio: Float, type: Symbol }) }
40
+ def to_hash
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -33,7 +33,9 @@ module OpenAI
33
33
 
34
34
  # Send this event to cancel an in-progress response. The server will respond with
35
35
  # a `response.done` event with a status of `response.status=cancelled`. If there
36
- # is no response to cancel, the server will respond with an error.
36
+ # is no response to cancel, the server will respond with an error. It's safe to
37
+ # call `response.cancel` even if no response is in progress, an error will be
38
+ # returned the session will remain unaffected.
37
39
  sig do
38
40
  params(event_id: String, response_id: String, type: Symbol).returns(
39
41
  T.attached_class