openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -30,14 +30,16 @@ module OpenAI
30
30
  sig { returns(Symbol) }
31
31
  attr_accessor :type
32
32
 
33
- # The unique ID of the item.
33
+ # The unique ID of the item. This may be provided by the client or generated by
34
+ # the server.
34
35
  sig { returns(T.nilable(String)) }
35
36
  attr_reader :id
36
37
 
37
38
  sig { params(id: String).void }
38
39
  attr_writer :id
39
40
 
40
- # Identifier for the API object being returned - always `realtime.item`.
41
+ # Identifier for the API object being returned - always `realtime.item`. Optional
42
+ # when creating a new item.
41
43
  sig do
42
44
  returns(
43
45
  T.nilable(
@@ -92,9 +94,11 @@ module OpenAI
92
94
  def self.new(
93
95
  # The content of the message.
94
96
  content:,
95
- # The unique ID of the item.
97
+ # The unique ID of the item. This may be provided by the client or generated by
98
+ # the server.
96
99
  id: nil,
97
- # Identifier for the API object being returned - always `realtime.item`.
100
+ # Identifier for the API object being returned - always `realtime.item`. Optional
101
+ # when creating a new item.
98
102
  object: nil,
99
103
  # The status of the item. Has no effect on the conversation.
100
104
  status: nil,
@@ -134,6 +138,15 @@ module OpenAI
134
138
  )
135
139
  end
136
140
 
141
+ # Base64-encoded audio bytes, these will be parsed as the format specified in the
142
+ # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
143
+ # if not specified.
144
+ sig { returns(T.nilable(String)) }
145
+ attr_reader :audio
146
+
147
+ sig { params(audio: String).void }
148
+ attr_writer :audio
149
+
137
150
  # The text content.
138
151
  sig { returns(T.nilable(String)) }
139
152
  attr_reader :text
@@ -141,7 +154,16 @@ module OpenAI
141
154
  sig { params(text: String).void }
142
155
  attr_writer :text
143
156
 
144
- # The content type. Always `text` for assistant messages.
157
+ # The transcript of the audio content, this will always be present if the output
158
+ # type is `audio`.
159
+ sig { returns(T.nilable(String)) }
160
+ attr_reader :transcript
161
+
162
+ sig { params(transcript: String).void }
163
+ attr_writer :transcript
164
+
165
+ # The content type, `output_text` or `output_audio` depending on the session
166
+ # `output_modalities` configuration.
145
167
  sig do
146
168
  returns(
147
169
  T.nilable(
@@ -161,15 +183,25 @@ module OpenAI
161
183
 
162
184
  sig do
163
185
  params(
186
+ audio: String,
164
187
  text: String,
188
+ transcript: String,
165
189
  type:
166
190
  OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::OrSymbol
167
191
  ).returns(T.attached_class)
168
192
  end
169
193
  def self.new(
194
+ # Base64-encoded audio bytes, these will be parsed as the format specified in the
195
+ # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
196
+ # if not specified.
197
+ audio: nil,
170
198
  # The text content.
171
199
  text: nil,
172
- # The content type. Always `text` for assistant messages.
200
+ # The transcript of the audio content, this will always be present if the output
201
+ # type is `audio`.
202
+ transcript: nil,
203
+ # The content type, `output_text` or `output_audio` depending on the session
204
+ # `output_modalities` configuration.
173
205
  type: nil
174
206
  )
175
207
  end
@@ -177,7 +209,9 @@ module OpenAI
177
209
  sig do
178
210
  override.returns(
179
211
  {
212
+ audio: String,
180
213
  text: String,
214
+ transcript: String,
181
215
  type:
182
216
  OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::OrSymbol
183
217
  }
@@ -186,7 +220,8 @@ module OpenAI
186
220
  def to_hash
187
221
  end
188
222
 
189
- # The content type. Always `text` for assistant messages.
223
+ # The content type, `output_text` or `output_audio` depending on the session
224
+ # `output_modalities` configuration.
190
225
  module Type
191
226
  extend OpenAI::Internal::Type::Enum
192
227
 
@@ -199,9 +234,14 @@ module OpenAI
199
234
  end
200
235
  OrSymbol = T.type_alias { T.any(Symbol, String) }
201
236
 
202
- TEXT =
237
+ OUTPUT_TEXT =
238
+ T.let(
239
+ :output_text,
240
+ OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::TaggedSymbol
241
+ )
242
+ OUTPUT_AUDIO =
203
243
  T.let(
204
- :text,
244
+ :output_audio,
205
245
  OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type::TaggedSymbol
206
246
  )
207
247
 
@@ -217,7 +257,8 @@ module OpenAI
217
257
  end
218
258
  end
219
259
 
220
- # Identifier for the API object being returned - always `realtime.item`.
260
+ # Identifier for the API object being returned - always `realtime.item`. Optional
261
+ # when creating a new item.
221
262
  module Object
222
263
  extend OpenAI::Internal::Type::Enum
223
264
 
@@ -12,7 +12,9 @@ module OpenAI
12
12
  )
13
13
  end
14
14
 
15
- # The arguments of the function call.
15
+ # The arguments of the function call. This is a JSON-encoded string representing
16
+ # the arguments passed to the function, for example
17
+ # `{"arg1": "value1", "arg2": 42}`.
16
18
  sig { returns(String) }
17
19
  attr_accessor :arguments
18
20
 
@@ -24,7 +26,8 @@ module OpenAI
24
26
  sig { returns(Symbol) }
25
27
  attr_accessor :type
26
28
 
27
- # The unique ID of the item.
29
+ # The unique ID of the item. This may be provided by the client or generated by
30
+ # the server.
28
31
  sig { returns(T.nilable(String)) }
29
32
  attr_reader :id
30
33
 
@@ -38,7 +41,8 @@ module OpenAI
38
41
  sig { params(call_id: String).void }
39
42
  attr_writer :call_id
40
43
 
41
- # Identifier for the API object being returned - always `realtime.item`.
44
+ # Identifier for the API object being returned - always `realtime.item`. Optional
45
+ # when creating a new item.
42
46
  sig do
43
47
  returns(
44
48
  T.nilable(
@@ -89,15 +93,19 @@ module OpenAI
89
93
  ).returns(T.attached_class)
90
94
  end
91
95
  def self.new(
92
- # The arguments of the function call.
96
+ # The arguments of the function call. This is a JSON-encoded string representing
97
+ # the arguments passed to the function, for example
98
+ # `{"arg1": "value1", "arg2": 42}`.
93
99
  arguments:,
94
100
  # The name of the function being called.
95
101
  name:,
96
- # The unique ID of the item.
102
+ # The unique ID of the item. This may be provided by the client or generated by
103
+ # the server.
97
104
  id: nil,
98
105
  # The ID of the function call.
99
106
  call_id: nil,
100
- # Identifier for the API object being returned - always `realtime.item`.
107
+ # Identifier for the API object being returned - always `realtime.item`. Optional
108
+ # when creating a new item.
101
109
  object: nil,
102
110
  # The status of the item. Has no effect on the conversation.
103
111
  status: nil,
@@ -124,7 +132,8 @@ module OpenAI
124
132
  def to_hash
125
133
  end
126
134
 
127
- # Identifier for the API object being returned - always `realtime.item`.
135
+ # Identifier for the API object being returned - always `realtime.item`. Optional
136
+ # when creating a new item.
128
137
  module Object
129
138
  extend OpenAI::Internal::Type::Enum
130
139
 
@@ -16,7 +16,8 @@ module OpenAI
16
16
  sig { returns(String) }
17
17
  attr_accessor :call_id
18
18
 
19
- # The output of the function call.
19
+ # The output of the function call, this is free text and can contain any
20
+ # information or simply be empty.
20
21
  sig { returns(String) }
21
22
  attr_accessor :output
22
23
 
@@ -24,14 +25,16 @@ module OpenAI
24
25
  sig { returns(Symbol) }
25
26
  attr_accessor :type
26
27
 
27
- # The unique ID of the item.
28
+ # The unique ID of the item. This may be provided by the client or generated by
29
+ # the server.
28
30
  sig { returns(T.nilable(String)) }
29
31
  attr_reader :id
30
32
 
31
33
  sig { params(id: String).void }
32
34
  attr_writer :id
33
35
 
34
- # Identifier for the API object being returned - always `realtime.item`.
36
+ # Identifier for the API object being returned - always `realtime.item`. Optional
37
+ # when creating a new item.
35
38
  sig do
36
39
  returns(
37
40
  T.nilable(
@@ -83,11 +86,14 @@ module OpenAI
83
86
  def self.new(
84
87
  # The ID of the function call this output is for.
85
88
  call_id:,
86
- # The output of the function call.
89
+ # The output of the function call, this is free text and can contain any
90
+ # information or simply be empty.
87
91
  output:,
88
- # The unique ID of the item.
92
+ # The unique ID of the item. This may be provided by the client or generated by
93
+ # the server.
89
94
  id: nil,
90
- # Identifier for the API object being returned - always `realtime.item`.
95
+ # Identifier for the API object being returned - always `realtime.item`. Optional
96
+ # when creating a new item.
91
97
  object: nil,
92
98
  # The status of the item. Has no effect on the conversation.
93
99
  status: nil,
@@ -113,7 +119,8 @@ module OpenAI
113
119
  def to_hash
114
120
  end
115
121
 
116
- # Identifier for the API object being returned - always `realtime.item`.
122
+ # Identifier for the API object being returned - always `realtime.item`. Optional
123
+ # when creating a new item.
117
124
  module Object
118
125
  extend OpenAI::Internal::Type::Enum
119
126
 
@@ -30,14 +30,16 @@ module OpenAI
30
30
  sig { returns(Symbol) }
31
31
  attr_accessor :type
32
32
 
33
- # The unique ID of the item.
33
+ # The unique ID of the item. This may be provided by the client or generated by
34
+ # the server.
34
35
  sig { returns(T.nilable(String)) }
35
36
  attr_reader :id
36
37
 
37
38
  sig { params(id: String).void }
38
39
  attr_writer :id
39
40
 
40
- # Identifier for the API object being returned - always `realtime.item`.
41
+ # Identifier for the API object being returned - always `realtime.item`. Optional
42
+ # when creating a new item.
41
43
  sig do
42
44
  returns(
43
45
  T.nilable(
@@ -73,7 +75,12 @@ module OpenAI
73
75
  end
74
76
  attr_writer :status
75
77
 
76
- # A system message item in a Realtime conversation.
78
+ # A system message in a Realtime conversation can be used to provide additional
79
+ # context or instructions to the model. This is similar but distinct from the
80
+ # instruction prompt provided at the start of a conversation, as system messages
81
+ # can be added at any point in the conversation. For major changes to the
82
+ # conversation's behavior, use instructions, but for smaller updates (e.g. "the
83
+ # user is now asking about a different topic"), use system messages.
77
84
  sig do
78
85
  params(
79
86
  content:
@@ -92,9 +99,11 @@ module OpenAI
92
99
  def self.new(
93
100
  # The content of the message.
94
101
  content:,
95
- # The unique ID of the item.
102
+ # The unique ID of the item. This may be provided by the client or generated by
103
+ # the server.
96
104
  id: nil,
97
- # Identifier for the API object being returned - always `realtime.item`.
105
+ # Identifier for the API object being returned - always `realtime.item`. Optional
106
+ # when creating a new item.
98
107
  object: nil,
99
108
  # The status of the item. Has no effect on the conversation.
100
109
  status: nil,
@@ -217,7 +226,8 @@ module OpenAI
217
226
  end
218
227
  end
219
228
 
220
- # Identifier for the API object being returned - always `realtime.item`.
229
+ # Identifier for the API object being returned - always `realtime.item`. Optional
230
+ # when creating a new item.
221
231
  module Object
222
232
  extend OpenAI::Internal::Type::Enum
223
233
 
@@ -30,14 +30,16 @@ module OpenAI
30
30
  sig { returns(Symbol) }
31
31
  attr_accessor :type
32
32
 
33
- # The unique ID of the item.
33
+ # The unique ID of the item. This may be provided by the client or generated by
34
+ # the server.
34
35
  sig { returns(T.nilable(String)) }
35
36
  attr_reader :id
36
37
 
37
38
  sig { params(id: String).void }
38
39
  attr_writer :id
39
40
 
40
- # Identifier for the API object being returned - always `realtime.item`.
41
+ # Identifier for the API object being returned - always `realtime.item`. Optional
42
+ # when creating a new item.
41
43
  sig do
42
44
  returns(
43
45
  T.nilable(
@@ -92,9 +94,11 @@ module OpenAI
92
94
  def self.new(
93
95
  # The content of the message.
94
96
  content:,
95
- # The unique ID of the item.
97
+ # The unique ID of the item. This may be provided by the client or generated by
98
+ # the server.
96
99
  id: nil,
97
- # Identifier for the API object being returned - always `realtime.item`.
100
+ # Identifier for the API object being returned - always `realtime.item`. Optional
101
+ # when creating a new item.
98
102
  object: nil,
99
103
  # The status of the item. Has no effect on the conversation.
100
104
  status: nil,
@@ -134,13 +138,43 @@ module OpenAI
134
138
  )
135
139
  end
136
140
 
137
- # Base64-encoded audio bytes (for `input_audio`).
141
+ # Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
142
+ # format specified in the session input audio type configuration. This defaults to
143
+ # PCM 16-bit 24kHz mono if not specified.
138
144
  sig { returns(T.nilable(String)) }
139
145
  attr_reader :audio
140
146
 
141
147
  sig { params(audio: String).void }
142
148
  attr_writer :audio
143
149
 
150
+ # The detail level of the image (for `input_image`). `auto` will default to
151
+ # `high`.
152
+ sig do
153
+ returns(
154
+ T.nilable(
155
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol
156
+ )
157
+ )
158
+ end
159
+ attr_reader :detail
160
+
161
+ sig do
162
+ params(
163
+ detail:
164
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol
165
+ ).void
166
+ end
167
+ attr_writer :detail
168
+
169
+ # Base64-encoded image bytes (for `input_image`) as a data URI. For example
170
+ # `...`. Supported formats are PNG
171
+ # and JPEG.
172
+ sig { returns(T.nilable(String)) }
173
+ attr_reader :image_url
174
+
175
+ sig { params(image_url: String).void }
176
+ attr_writer :image_url
177
+
144
178
  # The text content (for `input_text`).
145
179
  sig { returns(T.nilable(String)) }
146
180
  attr_reader :text
@@ -148,14 +182,15 @@ module OpenAI
148
182
  sig { params(text: String).void }
149
183
  attr_writer :text
150
184
 
151
- # Transcript of the audio (for `input_audio`).
185
+ # Transcript of the audio (for `input_audio`). This is not sent to the model, but
186
+ # will be attached to the message item for reference.
152
187
  sig { returns(T.nilable(String)) }
153
188
  attr_reader :transcript
154
189
 
155
190
  sig { params(transcript: String).void }
156
191
  attr_writer :transcript
157
192
 
158
- # The content type (`input_text` or `input_audio`).
193
+ # The content type (`input_text`, `input_audio`, or `input_image`).
159
194
  sig do
160
195
  returns(
161
196
  T.nilable(
@@ -176,6 +211,9 @@ module OpenAI
176
211
  sig do
177
212
  params(
178
213
  audio: String,
214
+ detail:
215
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol,
216
+ image_url: String,
179
217
  text: String,
180
218
  transcript: String,
181
219
  type:
@@ -183,13 +221,23 @@ module OpenAI
183
221
  ).returns(T.attached_class)
184
222
  end
185
223
  def self.new(
186
- # Base64-encoded audio bytes (for `input_audio`).
224
+ # Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
225
+ # format specified in the session input audio type configuration. This defaults to
226
+ # PCM 16-bit 24kHz mono if not specified.
187
227
  audio: nil,
228
+ # The detail level of the image (for `input_image`). `auto` will default to
229
+ # `high`.
230
+ detail: nil,
231
+ # Base64-encoded image bytes (for `input_image`) as a data URI. For example
232
+ # `...`. Supported formats are PNG
233
+ # and JPEG.
234
+ image_url: nil,
188
235
  # The text content (for `input_text`).
189
236
  text: nil,
190
- # Transcript of the audio (for `input_audio`).
237
+ # Transcript of the audio (for `input_audio`). This is not sent to the model, but
238
+ # will be attached to the message item for reference.
191
239
  transcript: nil,
192
- # The content type (`input_text` or `input_audio`).
240
+ # The content type (`input_text`, `input_audio`, or `input_image`).
193
241
  type: nil
194
242
  )
195
243
  end
@@ -198,6 +246,9 @@ module OpenAI
198
246
  override.returns(
199
247
  {
200
248
  audio: String,
249
+ detail:
250
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::OrSymbol,
251
+ image_url: String,
201
252
  text: String,
202
253
  transcript: String,
203
254
  type:
@@ -208,7 +259,48 @@ module OpenAI
208
259
  def to_hash
209
260
  end
210
261
 
211
- # The content type (`input_text` or `input_audio`).
262
+ # The detail level of the image (for `input_image`). `auto` will default to
263
+ # `high`.
264
+ module Detail
265
+ extend OpenAI::Internal::Type::Enum
266
+
267
+ TaggedSymbol =
268
+ T.type_alias do
269
+ T.all(
270
+ Symbol,
271
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail
272
+ )
273
+ end
274
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
275
+
276
+ AUTO =
277
+ T.let(
278
+ :auto,
279
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol
280
+ )
281
+ LOW =
282
+ T.let(
283
+ :low,
284
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol
285
+ )
286
+ HIGH =
287
+ T.let(
288
+ :high,
289
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol
290
+ )
291
+
292
+ sig do
293
+ override.returns(
294
+ T::Array[
295
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Detail::TaggedSymbol
296
+ ]
297
+ )
298
+ end
299
+ def self.values
300
+ end
301
+ end
302
+
303
+ # The content type (`input_text`, `input_audio`, or `input_image`).
212
304
  module Type
213
305
  extend OpenAI::Internal::Type::Enum
214
306
 
@@ -231,6 +323,11 @@ module OpenAI
231
323
  :input_audio,
232
324
  OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Type::TaggedSymbol
233
325
  )
326
+ INPUT_IMAGE =
327
+ T.let(
328
+ :input_image,
329
+ OpenAI::Realtime::RealtimeConversationItemUserMessage::Content::Type::TaggedSymbol
330
+ )
234
331
 
235
332
  sig do
236
333
  override.returns(
@@ -244,7 +341,8 @@ module OpenAI
244
341
  end
245
342
  end
246
343
 
247
- # Identifier for the API object being returned - always `realtime.item`.
344
+ # Identifier for the API object being returned - always `realtime.item`. Optional
345
+ # when creating a new item.
248
346
  module Object
249
347
  extend OpenAI::Internal::Type::Enum
250
348