openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -11,10 +11,14 @@ module OpenAI
11
11
  required :event_id, String
12
12
 
13
13
  # @!attribute session
14
- # A Realtime transcription session configuration object.
14
+ # A new Realtime transcription session configuration.
15
15
  #
16
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session]
17
- required :session, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session }
16
+ # When a session is created on the server via REST API, the session object also
17
+ # contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
18
+ # not present when a session is updated via the WebSocket API.
19
+ #
20
+ # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse]
21
+ required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse }
18
22
 
19
23
  # @!attribute type
20
24
  # The event type, must be `transcription_session.created`.
@@ -30,248 +34,9 @@ module OpenAI
30
34
  #
31
35
  # @param event_id [String] The unique ID of the server event.
32
36
  #
33
- # @param session [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session] A Realtime transcription session configuration object.
37
+ # @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] A new Realtime transcription session configuration.
34
38
  #
35
39
  # @param type [Symbol, :"transcription_session.created"] The event type, must be `transcription_session.created`.
36
-
37
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated#session
38
- class Session < OpenAI::Internal::Type::BaseModel
39
- # @!attribute id
40
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
41
- #
42
- # @return [String, nil]
43
- optional :id, String
44
-
45
- # @!attribute audio
46
- # Configuration for input audio for the session.
47
- #
48
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio, nil]
49
- optional :audio, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio }
50
-
51
- # @!attribute expires_at
52
- # Expiration timestamp for the session, in seconds since epoch.
53
- #
54
- # @return [Integer, nil]
55
- optional :expires_at, Integer
56
-
57
- # @!attribute include
58
- # Additional fields to include in server outputs.
59
- #
60
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
61
- # transcription.
62
- #
63
- # @return [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Include>, nil]
64
- optional :include,
65
- -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionCreated::Session::Include] }
66
-
67
- # @!attribute object
68
- # The object type. Always `realtime.transcription_session`.
69
- #
70
- # @return [String, nil]
71
- optional :object, String
72
-
73
- # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil)
74
- # Some parameter documentations has been truncated, see
75
- # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session} for more
76
- # details.
77
- #
78
- # A Realtime transcription session configuration object.
79
- #
80
- # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`.
81
- #
82
- # @param audio [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio] Configuration for input audio for the session.
83
- #
84
- # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch.
85
- #
86
- # @param include [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Include>] Additional fields to include in server outputs.
87
- #
88
- # @param object [String] The object type. Always `realtime.transcription_session`.
89
-
90
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session#audio
91
- class Audio < OpenAI::Internal::Type::BaseModel
92
- # @!attribute input
93
- #
94
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input, nil]
95
- optional :input, -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input }
96
-
97
- # @!method initialize(input: nil)
98
- # Configuration for input audio for the session.
99
- #
100
- # @param input [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input]
101
-
102
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio#input
103
- class Input < OpenAI::Internal::Type::BaseModel
104
- # @!attribute format_
105
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
106
- #
107
- # @return [String, nil]
108
- optional :format_, String, api_name: :format
109
-
110
- # @!attribute noise_reduction
111
- # Configuration for input audio noise reduction.
112
- #
113
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction, nil]
114
- optional :noise_reduction,
115
- -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction }
116
-
117
- # @!attribute transcription
118
- # Configuration of the transcription model.
119
- #
120
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription, nil]
121
- optional :transcription,
122
- -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription }
123
-
124
- # @!attribute turn_detection
125
- # Configuration for turn detection.
126
- #
127
- # @return [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection, nil]
128
- optional :turn_detection,
129
- -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection }
130
-
131
- # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
132
- # Some parameter documentations has been truncated, see
133
- # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input}
134
- # for more details.
135
- #
136
- # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
137
- #
138
- # @param noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction] Configuration for input audio noise reduction.
139
- #
140
- # @param transcription [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription] Configuration of the transcription model.
141
- #
142
- # @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection] Configuration for turn detection.
143
-
144
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#noise_reduction
145
- class NoiseReduction < OpenAI::Internal::Type::BaseModel
146
- # @!attribute type
147
- #
148
- # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type, nil]
149
- optional :type,
150
- enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type }
151
-
152
- # @!method initialize(type: nil)
153
- # Configuration for input audio noise reduction.
154
- #
155
- # @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction::Type]
156
-
157
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::NoiseReduction#type
158
- module Type
159
- extend OpenAI::Internal::Type::Enum
160
-
161
- NEAR_FIELD = :near_field
162
- FAR_FIELD = :far_field
163
-
164
- # @!method self.values
165
- # @return [Array<Symbol>]
166
- end
167
- end
168
-
169
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#transcription
170
- class Transcription < OpenAI::Internal::Type::BaseModel
171
- # @!attribute language
172
- # The language of the input audio. Supplying the input language in
173
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
174
- # format will improve accuracy and latency.
175
- #
176
- # @return [String, nil]
177
- optional :language, String
178
-
179
- # @!attribute model
180
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
181
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
182
- #
183
- # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model, nil]
184
- optional :model,
185
- enum: -> { OpenAI::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model }
186
-
187
- # @!attribute prompt
188
- # An optional text to guide the model's style or continue a previous audio
189
- # segment. The
190
- # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
191
- # should match the audio language.
192
- #
193
- # @return [String, nil]
194
- optional :prompt, String
195
-
196
- # @!method initialize(language: nil, model: nil, prompt: nil)
197
- # Some parameter documentations has been truncated, see
198
- # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription}
199
- # for more details.
200
- #
201
- # Configuration of the transcription model.
202
- #
203
- # @param language [String] The language of the input audio. Supplying the input language in
204
- #
205
- # @param model [Symbol, OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra
206
- #
207
- # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
208
-
209
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
210
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
211
- #
212
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::Transcription#model
213
- module Model
214
- extend OpenAI::Internal::Type::Enum
215
-
216
- GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
217
- GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
218
- WHISPER_1 = :"whisper-1"
219
-
220
- # @!method self.values
221
- # @return [Array<Symbol>]
222
- end
223
- end
224
-
225
- # @see OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input#turn_detection
226
- class TurnDetection < OpenAI::Internal::Type::BaseModel
227
- # @!attribute prefix_padding_ms
228
- #
229
- # @return [Integer, nil]
230
- optional :prefix_padding_ms, Integer
231
-
232
- # @!attribute silence_duration_ms
233
- #
234
- # @return [Integer, nil]
235
- optional :silence_duration_ms, Integer
236
-
237
- # @!attribute threshold
238
- #
239
- # @return [Float, nil]
240
- optional :threshold, Float
241
-
242
- # @!attribute type
243
- # Type of turn detection, only `server_vad` is currently supported.
244
- #
245
- # @return [String, nil]
246
- optional :type, String
247
-
248
- # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
249
- # Some parameter documentations has been truncated, see
250
- # {OpenAI::Models::Realtime::TranscriptionSessionCreated::Session::Audio::Input::TurnDetection}
251
- # for more details.
252
- #
253
- # Configuration for turn detection.
254
- #
255
- # @param prefix_padding_ms [Integer]
256
- #
257
- # @param silence_duration_ms [Integer]
258
- #
259
- # @param threshold [Float]
260
- #
261
- # @param type [String] Type of turn detection, only `server_vad` is currently supported.
262
- end
263
- end
264
- end
265
-
266
- module Include
267
- extend OpenAI::Internal::Type::Enum
268
-
269
- ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
270
-
271
- # @!method self.values
272
- # @return [Array<Symbol>]
273
- end
274
- end
275
40
  end
276
41
  end
277
42
  end
@@ -7,8 +7,8 @@ module OpenAI
7
7
  # @!attribute session
8
8
  # Realtime transcription session object configuration.
9
9
  #
10
- # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest]
11
- required :session, -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateRequest }
10
+ # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session]
11
+ required :session, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session }
12
12
 
13
13
  # @!attribute type
14
14
  # The event type, must be `transcription_session.update`.
@@ -25,11 +25,187 @@ module OpenAI
25
25
  # @!method initialize(session:, event_id: nil, type: :"transcription_session.update")
26
26
  # Send this event to update a transcription session.
27
27
  #
28
- # @param session [OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Realtime transcription session object configuration.
28
+ # @param session [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session] Realtime transcription session object configuration.
29
29
  #
30
30
  # @param event_id [String] Optional client-generated ID used to identify this event.
31
31
  #
32
32
  # @param type [Symbol, :"transcription_session.update"] The event type, must be `transcription_session.update`.
33
+
34
+ # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate#session
35
+ class Session < OpenAI::Internal::Type::BaseModel
36
+ # @!attribute include
37
+ # The set of items to include in the transcription. Current available items are:
38
+ # `item.input_audio_transcription.logprobs`
39
+ #
40
+ # @return [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::Include>, nil]
41
+ optional :include,
42
+ -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Realtime::TranscriptionSessionUpdate::Session::Include] }
43
+
44
+ # @!attribute input_audio_format
45
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
46
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
47
+ # (mono), and little-endian byte order.
48
+ #
49
+ # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat, nil]
50
+ optional :input_audio_format,
51
+ enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat }
52
+
53
+ # @!attribute input_audio_noise_reduction
54
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
55
+ # off. Noise reduction filters audio added to the input audio buffer before it is
56
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
57
+ # detection accuracy (reducing false positives) and model performance by improving
58
+ # perception of the input audio.
59
+ #
60
+ # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction, nil]
61
+ optional :input_audio_noise_reduction,
62
+ -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction }
63
+
64
+ # @!attribute input_audio_transcription
65
+ # Configuration for input audio transcription. The client can optionally set the
66
+ # language and prompt for transcription, these offer additional guidance to the
67
+ # transcription service.
68
+ #
69
+ # @return [OpenAI::Models::Realtime::AudioTranscription, nil]
70
+ optional :input_audio_transcription, -> { OpenAI::Realtime::AudioTranscription }
71
+
72
+ # @!attribute turn_detection
73
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
74
+ # means that the model will detect the start and end of speech based on audio
75
+ # volume and respond at the end of user speech.
76
+ #
77
+ # @return [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection, nil]
78
+ optional :turn_detection, -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection }
79
+
80
+ # @!method initialize(include: nil, input_audio_format: nil, input_audio_noise_reduction: nil, input_audio_transcription: nil, turn_detection: nil)
81
+ # Some parameter documentations has been truncated, see
82
+ # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session} for more
83
+ # details.
84
+ #
85
+ # Realtime transcription session object configuration.
86
+ #
87
+ # @param include [Array<Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::Include>] The set of items to include in the transcription. Current available items are:
88
+ #
89
+ # @param input_audio_format [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioFormat] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
90
+ #
91
+ # @param input_audio_noise_reduction [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn
92
+ #
93
+ # @param input_audio_transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription. The client can optionally set the l
94
+ #
95
+ # @param turn_detection [OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection] Configuration for turn detection. Can be set to `null` to turn off. Server VAD m
96
+
97
+ module Include
98
+ extend OpenAI::Internal::Type::Enum
99
+
100
+ ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
101
+
102
+ # @!method self.values
103
+ # @return [Array<Symbol>]
104
+ end
105
+
106
+ # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
107
+ # `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
108
+ # (mono), and little-endian byte order.
109
+ #
110
+ # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_format
111
+ module InputAudioFormat
112
+ extend OpenAI::Internal::Type::Enum
113
+
114
+ PCM16 = :pcm16
115
+ G711_ULAW = :g711_ulaw
116
+ G711_ALAW = :g711_alaw
117
+
118
+ # @!method self.values
119
+ # @return [Array<Symbol>]
120
+ end
121
+
122
+ # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#input_audio_noise_reduction
123
+ class InputAudioNoiseReduction < OpenAI::Internal::Type::BaseModel
124
+ # @!attribute type
125
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
126
+ # headphones, `far_field` is for far-field microphones such as laptop or
127
+ # conference room microphones.
128
+ #
129
+ # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil]
130
+ optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType }
131
+
132
+ # @!method initialize(type: nil)
133
+ # Some parameter documentations has been truncated, see
134
+ # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::InputAudioNoiseReduction}
135
+ # for more details.
136
+ #
137
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
138
+ # off. Noise reduction filters audio added to the input audio buffer before it is
139
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
140
+ # detection accuracy (reducing false positives) and model performance by improving
141
+ # perception of the input audio.
142
+ #
143
+ # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h
144
+ end
145
+
146
+ # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session#turn_detection
147
+ class TurnDetection < OpenAI::Internal::Type::BaseModel
148
+ # @!attribute prefix_padding_ms
149
+ # Amount of audio to include before the VAD detected speech (in milliseconds).
150
+ # Defaults to 300ms.
151
+ #
152
+ # @return [Integer, nil]
153
+ optional :prefix_padding_ms, Integer
154
+
155
+ # @!attribute silence_duration_ms
156
+ # Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
157
+ # With shorter values the model will respond more quickly, but may jump in on
158
+ # short pauses from the user.
159
+ #
160
+ # @return [Integer, nil]
161
+ optional :silence_duration_ms, Integer
162
+
163
+ # @!attribute threshold
164
+ # Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
165
+ # threshold will require louder audio to activate the model, and thus might
166
+ # perform better in noisy environments.
167
+ #
168
+ # @return [Float, nil]
169
+ optional :threshold, Float
170
+
171
+ # @!attribute type
172
+ # Type of turn detection. Only `server_vad` is currently supported for
173
+ # transcription sessions.
174
+ #
175
+ # @return [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type, nil]
176
+ optional :type, enum: -> { OpenAI::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type }
177
+
178
+ # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
179
+ # Some parameter documentations has been truncated, see
180
+ # {OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection}
181
+ # for more details.
182
+ #
183
+ # Configuration for turn detection. Can be set to `null` to turn off. Server VAD
184
+ # means that the model will detect the start and end of speech based on audio
185
+ # volume and respond at the end of user speech.
186
+ #
187
+ # @param prefix_padding_ms [Integer] Amount of audio to include before the VAD detected speech (in
188
+ #
189
+ # @param silence_duration_ms [Integer] Duration of silence to detect speech stop (in milliseconds). Defaults
190
+ #
191
+ # @param threshold [Float] Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A
192
+ #
193
+ # @param type [Symbol, OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection::Type] Type of turn detection. Only `server_vad` is currently supported for transcripti
194
+
195
+ # Type of turn detection. Only `server_vad` is currently supported for
196
+ # transcription sessions.
197
+ #
198
+ # @see OpenAI::Models::Realtime::TranscriptionSessionUpdate::Session::TurnDetection#type
199
+ module Type
200
+ extend OpenAI::Internal::Type::Enum
201
+
202
+ SERVER_VAD = :server_vad
203
+
204
+ # @!method self.values
205
+ # @return [Array<Symbol>]
206
+ end
207
+ end
208
+ end
33
209
  end
34
210
  end
35
211
  end