openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 887b4188085cb58d7d08b043bb231aaf9c911e1fde2a5b6e0494c3478491e81f
4
- data.tar.gz: c4e908ee7d15fac9f59e99ab3f8adb3cb0b584990292a86fcdc2b36b660b9529
3
+ metadata.gz: 9e3a0c23bd15f70018f2c35d6f1de5c6f85dad6c66d1b10c8ec12f2070a7cccc
4
+ data.tar.gz: a91b9648024379a1fcb634cc3c41562805419945680e7ff8972dfc7233d92d09
5
5
  SHA512:
6
- metadata.gz: 56484dcf1283f408c0d2025ccbe87af7ecd74e4807888630759520a0676687d7c33311d3597b61807d23589b4a5343032bd87e3f5e6277e6727b9c7aa4192058
7
- data.tar.gz: 7160a979ee2c76c52762487d989f9973f5302f2551951a5403768e1d38bdd8617b4796ca2c4b5e3887f30c0a4fa5ba367670327a25d49d5a7f9403d1433cec62
6
+ metadata.gz: aa501862f1e017ae5cd912792154066ce4ed487e850dab2a80dc171e8fc743dce7b6371258806746d7f4c126ce2d55c573d4fe85f2214038adba919a7fc5e39a
7
+ data.tar.gz: bc3dcc99cc106579631b269155019b392292d7fa09ff8a419fd8d76b4bf7da04e4b1bfc1bc0b8df17063d0ce99149e282075a08a2102ac68a66a416efcfe4347
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.23.0 (2025-09-08)
4
+
5
+ Full Changelog: [v0.22.1...v0.23.0](https://github.com/openai/openai-ruby/compare/v0.22.1...v0.23.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** ship the RealtimeGA API shape ([6c59e2c](https://github.com/openai/openai-ruby/commit/6c59e2c78ea130b626442e2230676afcca3a906f))
10
+
3
11
  ## 0.22.1 (2025-09-05)
4
12
 
5
13
  Full Changelog: [v0.22.0...v0.22.1](https://github.com/openai/openai-ruby/compare/v0.22.0...v0.22.1)
data/README.md CHANGED
@@ -15,7 +15,7 @@ To use this gem, install via Bundler by adding the following to your application
15
15
  <!-- x-release-please-start-version -->
16
16
 
17
17
  ```ruby
18
- gem "openai", "~> 0.22.1"
18
+ gem "openai", "~> 0.23.0"
19
19
  ```
20
20
 
21
21
  <!-- x-release-please-end -->
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class AudioTranscription < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute language
8
+ # The language of the input audio. Supplying the input language in
9
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
10
+ # format will improve accuracy and latency.
11
+ #
12
+ # @return [String, nil]
13
+ optional :language, String
14
+
15
+ # @!attribute model
16
+ # The model to use for transcription. Current options are `whisper-1`,
17
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
18
+ #
19
+ # @return [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model, nil]
20
+ optional :model, enum: -> { OpenAI::Realtime::AudioTranscription::Model }
21
+
22
+ # @!attribute prompt
23
+ # An optional text to guide the model's style or continue a previous audio
24
+ # segment. For `whisper-1`, the
25
+ # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
26
+ # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
27
+ # "expect words related to technology".
28
+ #
29
+ # @return [String, nil]
30
+ optional :prompt, String
31
+
32
+ # @!method initialize(language: nil, model: nil, prompt: nil)
33
+ # Some parameter documentations has been truncated, see
34
+ # {OpenAI::Models::Realtime::AudioTranscription} for more details.
35
+ #
36
+ # @param language [String] The language of the input audio. Supplying the input language in
37
+ #
38
+ # @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra
39
+ #
40
+ # @param prompt [String] An optional text to guide the model's style or continue a previous audio
41
+
42
+ # The model to use for transcription. Current options are `whisper-1`,
43
+ # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
44
+ #
45
+ # @see OpenAI::Models::Realtime::AudioTranscription#model
46
+ module Model
47
+ extend OpenAI::Internal::Type::Enum
48
+
49
+ WHISPER_1 = :"whisper-1"
50
+ GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest"
51
+ GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
52
+ GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
53
+
54
+ # @!method self.values
55
+ # @return [Array<Symbol>]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -9,7 +9,10 @@ module OpenAI
9
9
  include OpenAI::Internal::Type::RequestParameters
10
10
 
11
11
  # @!attribute expires_after
12
- # Configuration for the ephemeral token expiration.
12
+ # Configuration for the client secret expiration. Expiration refers to the time
13
+ # after which a client secret will no longer be valid for creating sessions. The
14
+ # session itself may continue after that time once started. A secret can be used
15
+ # to create multiple sessions until it expires.
13
16
  #
14
17
  # @return [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter, nil]
15
18
  optional :expires_after, -> { OpenAI::Realtime::ClientSecretCreateParams::ExpiresAfter }
@@ -25,7 +28,7 @@ module OpenAI
25
28
  # Some parameter documentations has been truncated, see
26
29
  # {OpenAI::Models::Realtime::ClientSecretCreateParams} for more details.
27
30
  #
28
- # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the ephemeral token expiration.
31
+ # @param expires_after [OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter] Configuration for the client secret expiration. Expiration refers to the time af
29
32
  #
30
33
  # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateRequest, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateRequest] Session configuration to use for the client secret. Choose either a realtime
31
34
  #
@@ -33,15 +36,17 @@ module OpenAI
33
36
 
34
37
  class ExpiresAfter < OpenAI::Internal::Type::BaseModel
35
38
  # @!attribute anchor
36
- # The anchor point for the ephemeral token expiration. Only `created_at` is
37
- # currently supported.
39
+ # The anchor point for the client secret expiration, meaning that `seconds` will
40
+ # be added to the `created_at` time of the client secret to produce an expiration
41
+ # timestamp. Only `created_at` is currently supported.
38
42
  #
39
43
  # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor, nil]
40
44
  optional :anchor, enum: -> { OpenAI::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor }
41
45
 
42
46
  # @!attribute seconds
43
47
  # The number of seconds from the anchor point to the expiration. Select a value
44
- # between `10` and `7200`.
48
+ # between `10` and `7200` (2 hours). This default to 600 seconds (10 minutes) if
49
+ # not specified.
45
50
  #
46
51
  # @return [Integer, nil]
47
52
  optional :seconds, Integer
@@ -51,14 +56,18 @@ module OpenAI
51
56
  # {OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter} for more
52
57
  # details.
53
58
  #
54
- # Configuration for the ephemeral token expiration.
59
+ # Configuration for the client secret expiration. Expiration refers to the time
60
+ # after which a client secret will no longer be valid for creating sessions. The
61
+ # session itself may continue after that time once started. A secret can be used
62
+ # to create multiple sessions until it expires.
55
63
  #
56
- # @param anchor [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor] The anchor point for the ephemeral token expiration. Only `created_at` is curren
64
+ # @param anchor [Symbol, OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter::Anchor] The anchor point for the client secret expiration, meaning that `seconds` will b
57
65
  #
58
66
  # @param seconds [Integer] The number of seconds from the anchor point to the expiration. Select a value be
59
67
 
60
- # The anchor point for the ephemeral token expiration. Only `created_at` is
61
- # currently supported.
68
+ # The anchor point for the client secret expiration, meaning that `seconds` will
69
+ # be added to the `created_at` time of the client secret to produce an expiration
70
+ # timestamp. Only `created_at` is currently supported.
62
71
  #
63
72
  # @see OpenAI::Models::Realtime::ClientSecretCreateParams::ExpiresAfter#anchor
64
73
  module Anchor
@@ -14,7 +14,7 @@ module OpenAI
14
14
  # @!attribute session
15
15
  # The session configuration for either a realtime or transcription session.
16
16
  #
17
- # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse]
17
+ # @return [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse]
18
18
  required :session, union: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session }
19
19
 
20
20
  # @!attribute value
@@ -31,7 +31,7 @@ module OpenAI
31
31
  #
32
32
  # @param expires_at [Integer] Expiration timestamp for the client secret, in seconds since epoch.
33
33
  #
34
- # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse] The session configuration for either a realtime or transcription session.
34
+ # @param session [OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse] The session configuration for either a realtime or transcription session.
35
35
  #
36
36
  # @param value [String] The generated client secret value.
37
37
 
@@ -41,258 +41,19 @@ module OpenAI
41
41
  module Session
42
42
  extend OpenAI::Internal::Type::Union
43
43
 
44
- # A Realtime session configuration object.
44
+ # A new Realtime session configuration, with an ephemeral key. Default TTL
45
+ # for keys is one minute.
45
46
  variant -> { OpenAI::Realtime::RealtimeSessionCreateResponse }
46
47
 
47
- # A Realtime transcription session configuration object.
48
- variant -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse }
49
-
50
- class RealtimeTranscriptionSessionCreateResponse < OpenAI::Internal::Type::BaseModel
51
- # @!attribute id
52
- # Unique identifier for the session that looks like `sess_1234567890abcdef`.
53
- #
54
- # @return [String, nil]
55
- optional :id, String
56
-
57
- # @!attribute audio
58
- # Configuration for input audio for the session.
59
- #
60
- # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio, nil]
61
- optional :audio,
62
- -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio }
63
-
64
- # @!attribute expires_at
65
- # Expiration timestamp for the session, in seconds since epoch.
66
- #
67
- # @return [Integer, nil]
68
- optional :expires_at, Integer
69
-
70
- # @!attribute include
71
- # Additional fields to include in server outputs.
72
- #
73
- # - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
74
- # transcription.
75
- #
76
- # @return [Array<Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include>, nil]
77
- optional :include,
78
- -> do
79
- OpenAI::Internal::Type::ArrayOf[
80
- enum: OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include
81
- ]
82
- end
83
-
84
- # @!attribute object
85
- # The object type. Always `realtime.transcription_session`.
86
- #
87
- # @return [String, nil]
88
- optional :object, String
89
-
90
- # @!method initialize(id: nil, audio: nil, expires_at: nil, include: nil, object: nil)
91
- # Some parameter documentations has been truncated, see
92
- # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse}
93
- # for more details.
94
- #
95
- # A Realtime transcription session configuration object.
96
- #
97
- # @param id [String] Unique identifier for the session that looks like `sess_1234567890abcdef`.
98
- #
99
- # @param audio [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio] Configuration for input audio for the session.
100
- #
101
- # @param expires_at [Integer] Expiration timestamp for the session, in seconds since epoch.
102
- #
103
- # @param include [Array<Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Include>] Additional fields to include in server outputs.
104
- #
105
- # @param object [String] The object type. Always `realtime.transcription_session`.
106
-
107
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse#audio
108
- class Audio < OpenAI::Internal::Type::BaseModel
109
- # @!attribute input
110
- #
111
- # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input, nil]
112
- optional :input,
113
- -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input }
114
-
115
- # @!method initialize(input: nil)
116
- # Configuration for input audio for the session.
117
- #
118
- # @param input [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input]
119
-
120
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio#input
121
- class Input < OpenAI::Internal::Type::BaseModel
122
- # @!attribute format_
123
- # The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
124
- #
125
- # @return [String, nil]
126
- optional :format_, String, api_name: :format
127
-
128
- # @!attribute noise_reduction
129
- # Configuration for input audio noise reduction.
130
- #
131
- # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction, nil]
132
- optional :noise_reduction,
133
- -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction }
134
-
135
- # @!attribute transcription
136
- # Configuration of the transcription model.
137
- #
138
- # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription, nil]
139
- optional :transcription,
140
- -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription }
141
-
142
- # @!attribute turn_detection
143
- # Configuration for turn detection.
144
- #
145
- # @return [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection, nil]
146
- optional :turn_detection,
147
- -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection }
148
-
149
- # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
150
- # Some parameter documentations has been truncated, see
151
- # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input}
152
- # for more details.
153
- #
154
- # @param format_ [String] The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
155
- #
156
- # @param noise_reduction [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction] Configuration for input audio noise reduction.
157
- #
158
- # @param transcription [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription] Configuration of the transcription model.
159
- #
160
- # @param turn_detection [OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection] Configuration for turn detection.
161
-
162
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#noise_reduction
163
- class NoiseReduction < OpenAI::Internal::Type::BaseModel
164
- # @!attribute type
165
- #
166
- # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type, nil]
167
- optional :type,
168
- enum: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type }
169
-
170
- # @!method initialize(type: nil)
171
- # Configuration for input audio noise reduction.
172
- #
173
- # @param type [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction::Type]
174
-
175
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::NoiseReduction#type
176
- module Type
177
- extend OpenAI::Internal::Type::Enum
178
-
179
- NEAR_FIELD = :near_field
180
- FAR_FIELD = :far_field
181
-
182
- # @!method self.values
183
- # @return [Array<Symbol>]
184
- end
185
- end
186
-
187
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#transcription
188
- class Transcription < OpenAI::Internal::Type::BaseModel
189
- # @!attribute language
190
- # The language of the input audio. Supplying the input language in
191
- # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
192
- # format will improve accuracy and latency.
193
- #
194
- # @return [String, nil]
195
- optional :language, String
196
-
197
- # @!attribute model
198
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
199
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
200
- #
201
- # @return [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model, nil]
202
- optional :model,
203
- enum: -> { OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model }
204
-
205
- # @!attribute prompt
206
- # An optional text to guide the model's style or continue a previous audio
207
- # segment. The
208
- # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
209
- # should match the audio language.
210
- #
211
- # @return [String, nil]
212
- optional :prompt, String
213
-
214
- # @!method initialize(language: nil, model: nil, prompt: nil)
215
- # Some parameter documentations has been truncated, see
216
- # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription}
217
- # for more details.
218
- #
219
- # Configuration of the transcription model.
220
- #
221
- # @param language [String] The language of the input audio. Supplying the input language in
222
- #
223
- # @param model [Symbol, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription::Model] The model to use for transcription. Can be `gpt-4o-transcribe`, `gpt-4o-mini-tra
224
- #
225
- # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
226
-
227
- # The model to use for transcription. Can be `gpt-4o-transcribe`,
228
- # `gpt-4o-mini-transcribe`, or `whisper-1`.
229
- #
230
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::Transcription#model
231
- module Model
232
- extend OpenAI::Internal::Type::Enum
233
-
234
- GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
235
- GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
236
- WHISPER_1 = :"whisper-1"
237
-
238
- # @!method self.values
239
- # @return [Array<Symbol>]
240
- end
241
- end
242
-
243
- # @see OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input#turn_detection
244
- class TurnDetection < OpenAI::Internal::Type::BaseModel
245
- # @!attribute prefix_padding_ms
246
- #
247
- # @return [Integer, nil]
248
- optional :prefix_padding_ms, Integer
249
-
250
- # @!attribute silence_duration_ms
251
- #
252
- # @return [Integer, nil]
253
- optional :silence_duration_ms, Integer
254
-
255
- # @!attribute threshold
256
- #
257
- # @return [Float, nil]
258
- optional :threshold, Float
259
-
260
- # @!attribute type
261
- # Type of turn detection, only `server_vad` is currently supported.
262
- #
263
- # @return [String, nil]
264
- optional :type, String
265
-
266
- # @!method initialize(prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
267
- # Some parameter documentations has been truncated, see
268
- # {OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse::Audio::Input::TurnDetection}
269
- # for more details.
270
- #
271
- # Configuration for turn detection.
272
- #
273
- # @param prefix_padding_ms [Integer]
274
- #
275
- # @param silence_duration_ms [Integer]
276
- #
277
- # @param threshold [Float]
278
- #
279
- # @param type [String] Type of turn detection, only `server_vad` is currently supported.
280
- end
281
- end
282
- end
283
-
284
- module Include
285
- extend OpenAI::Internal::Type::Enum
286
-
287
- ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = :"item.input_audio_transcription.logprobs"
288
-
289
- # @!method self.values
290
- # @return [Array<Symbol>]
291
- end
292
- end
48
+ # A new Realtime transcription session configuration.
49
+ #
50
+ # When a session is created on the server via REST API, the session object
51
+ # also contains an ephemeral key. Default TTL for keys is 10 minutes. This
52
+ # property is not present when a session is updated via the WebSocket API.
53
+ variant -> { OpenAI::Realtime::RealtimeTranscriptionSessionCreateResponse }
293
54
 
294
55
  # @!method self.variants
295
- # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::ClientSecretCreateResponse::Session::RealtimeTranscriptionSessionCreateResponse)]
56
+ # @return [Array(OpenAI::Models::Realtime::RealtimeSessionCreateResponse, OpenAI::Models::Realtime::RealtimeTranscriptionSessionCreateResponse)]
296
57
  end
297
58
  end
298
59
  end
@@ -9,7 +9,7 @@ module OpenAI
9
9
 
10
10
  discriminator :type
11
11
 
12
- # A system message item in a Realtime conversation.
12
+ # A system message in a Realtime conversation can be used to provide additional context or instructions to the model. This is similar but distinct from the instruction prompt provided at the start of a conversation, as system messages can be added at any point in the conversation. For major changes to the conversation's behavior, use instructions, but for smaller updates (e.g. "the user is now asking about a different topic"), use system messages.
13
13
  variant :message, -> { OpenAI::Realtime::RealtimeConversationItemSystemMessage }
14
14
 
15
15
  # A user message item in a Realtime conversation.
@@ -33,7 +33,20 @@ module OpenAI
33
33
  # Some parameter documentations has been truncated, see
34
34
  # {OpenAI::Models::Realtime::ConversationItemAdded} for more details.
35
35
  #
36
- # Returned when a conversation item is added.
36
+ # Sent by the server when an Item is added to the default Conversation. This can
37
+ # happen in several cases:
38
+ #
39
+ # - When the client sends a `conversation.item.create` event.
40
+ # - When the input audio buffer is committed. In this case the item will be a user
41
+ # message containing the audio from the buffer.
42
+ # - When the model is generating a Response. In this case the
43
+ # `conversation.item.added` event will be sent when the model starts generating
44
+ # a specific Item, and thus it will not yet have any content (and `status` will
45
+ # be `in_progress`).
46
+ #
47
+ # The event will include the full content of the Item (except when model is
48
+ # generating a Response) except for audio data, which can be retrieved separately
49
+ # with a `conversation.item.retrieve` event if necessary.
37
50
  #
38
51
  # @param event_id [String] The unique ID of the server event.
39
52
  #
@@ -35,6 +35,9 @@ module OpenAI
35
35
  #
36
36
  # Returned when a conversation item is finalized.
37
37
  #
38
+ # The event will include the full content of the Item except for audio data, which
39
+ # can be retrieved separately with a `conversation.item.retrieve` event if needed.
40
+ #
38
41
  # @param event_id [String] The unique ID of the server event.
39
42
  #
40
43
  # @param item [OpenAI::Models::Realtime::RealtimeConversationItemSystemMessage, OpenAI::Models::Realtime::RealtimeConversationItemUserMessage, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCallOutput, OpenAI::Models::Realtime::RealtimeMcpApprovalResponse, OpenAI::Models::Realtime::RealtimeMcpListTools, OpenAI::Models::Realtime::RealtimeMcpToolCall, OpenAI::Models::Realtime::RealtimeMcpApprovalRequest] A single item within a Realtime conversation.
@@ -17,7 +17,7 @@ module OpenAI
17
17
  required :event_id, String
18
18
 
19
19
  # @!attribute item_id
20
- # The ID of the user message item containing the audio.
20
+ # The ID of the item containing the audio that is being transcribed.
21
21
  #
22
22
  # @return [String]
23
23
  required :item_id, String
@@ -35,7 +35,8 @@ module OpenAI
35
35
  required :type, const: :"conversation.item.input_audio_transcription.completed"
36
36
 
37
37
  # @!attribute usage
38
- # Usage statistics for the transcription.
38
+ # Usage statistics for the transcription, this is billed according to the ASR
39
+ # model's pricing rather than the realtime model's pricing.
39
40
  #
40
41
  # @return [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration]
41
42
  required :usage,
@@ -56,9 +57,9 @@ module OpenAI
56
57
  #
57
58
  # This event is the output of audio transcription for user audio written to the
58
59
  # user audio buffer. Transcription begins when the input audio buffer is committed
59
- # by the client or server (in `server_vad` mode). Transcription runs
60
- # asynchronously with Response creation, so this event may come before or after
61
- # the Response events.
60
+ # by the client or server (when VAD is enabled). Transcription runs asynchronously
61
+ # with Response creation, so this event may come before or after the Response
62
+ # events.
62
63
  #
63
64
  # Realtime API models accept audio natively, and thus input transcription is a
64
65
  # separate process run on a separate ASR (Automatic Speech Recognition) model. The
@@ -69,17 +70,18 @@ module OpenAI
69
70
  #
70
71
  # @param event_id [String] The unique ID of the server event.
71
72
  #
72
- # @param item_id [String] The ID of the user message item containing the audio.
73
+ # @param item_id [String] The ID of the item containing the audio that is being transcribed.
73
74
  #
74
75
  # @param transcript [String] The transcribed text.
75
76
  #
76
- # @param usage [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration] Usage statistics for the transcription.
77
+ # @param usage [OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageTokens, OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent::Usage::TranscriptTextUsageDuration] Usage statistics for the transcription, this is billed according to the ASR mode
77
78
  #
78
79
  # @param logprobs [Array<OpenAI::Models::Realtime::LogProbProperties>, nil] The log probabilities of the transcription.
79
80
  #
80
81
  # @param type [Symbol, :"conversation.item.input_audio_transcription.completed"] The event type, must be
81
82
 
82
- # Usage statistics for the transcription.
83
+ # Usage statistics for the transcription, this is billed according to the ASR
84
+ # model's pricing rather than the realtime model's pricing.
83
85
  #
84
86
  # @see OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionCompletedEvent#usage
85
87
  module Usage
@@ -11,7 +11,7 @@ module OpenAI
11
11
  required :event_id, String
12
12
 
13
13
  # @!attribute item_id
14
- # The ID of the item.
14
+ # The ID of the item containing the audio that is being transcribed.
15
15
  #
16
16
  # @return [String]
17
17
  required :item_id, String
@@ -35,7 +35,12 @@ module OpenAI
35
35
  optional :delta, String
36
36
 
37
37
  # @!attribute logprobs
38
- # The log probabilities of the transcription.
38
+ # The log probabilities of the transcription. These can be enabled by
39
+ # configurating the session with
40
+ # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the
41
+ # array corresponds a log probability of which token would be selected for this
42
+ # chunk of transcription. This can help to identify if it was possible there were
43
+ # multiple valid options for a given chunk of transcription.
39
44
  #
40
45
  # @return [Array<OpenAI::Models::Realtime::LogProbProperties>, nil]
41
46
  optional :logprobs,
@@ -43,18 +48,22 @@ module OpenAI
43
48
  nil?: true
44
49
 
45
50
  # @!method initialize(event_id:, item_id:, content_index: nil, delta: nil, logprobs: nil, type: :"conversation.item.input_audio_transcription.delta")
51
+ # Some parameter documentations has been truncated, see
52
+ # {OpenAI::Models::Realtime::ConversationItemInputAudioTranscriptionDeltaEvent}
53
+ # for more details.
54
+ #
46
55
  # Returned when the text value of an input audio transcription content part is
47
- # updated.
56
+ # updated with incremental transcription results.
48
57
  #
49
58
  # @param event_id [String] The unique ID of the server event.
50
59
  #
51
- # @param item_id [String] The ID of the item.
60
+ # @param item_id [String] The ID of the item containing the audio that is being transcribed.
52
61
  #
53
62
  # @param content_index [Integer] The index of the content part in the item's content array.
54
63
  #
55
64
  # @param delta [String] The text delta.
56
65
  #
57
- # @param logprobs [Array<OpenAI::Models::Realtime::LogProbProperties>, nil] The log probabilities of the transcription.
66
+ # @param logprobs [Array<OpenAI::Models::Realtime::LogProbProperties>, nil] The log probabilities of the transcription. These can be enabled by configuratin
58
67
  #
59
68
  # @param type [Symbol, :"conversation.item.input_audio_transcription.delta"] The event type, must be `conversation.item.input_audio_transcription.delta`.
60
69
  end
@@ -13,7 +13,7 @@ module OpenAI
13
13
  required :audio_end_ms, Integer
14
14
 
15
15
  # @!attribute content_index
16
- # The index of the content part to truncate. Set this to 0.
16
+ # The index of the content part to truncate. Set this to `0`.
17
17
  #
18
18
  # @return [Integer]
19
19
  required :content_index, Integer
@@ -55,7 +55,7 @@ module OpenAI
55
55
  #
56
56
  # @param audio_end_ms [Integer] Inclusive duration up to which audio is truncated, in milliseconds. If
57
57
  #
58
- # @param content_index [Integer] The index of the content part to truncate. Set this to 0.
58
+ # @param content_index [Integer] The index of the content part to truncate. Set this to `0`.
59
59
  #
60
60
  # @param item_id [String] The ID of the assistant message item to truncate. Only assistant message
61
61
  #
@@ -28,14 +28,19 @@ module OpenAI
28
28
  # {OpenAI::Models::Realtime::InputAudioBufferAppendEvent} for more details.
29
29
  #
30
30
  # Send this event to append audio bytes to the input audio buffer. The audio
31
- # buffer is temporary storage you can write to and later commit. In Server VAD
32
- # mode, the audio buffer is used to detect speech and the server will decide when
33
- # to commit. When Server VAD is disabled, you must commit the audio buffer
34
- # manually.
31
+ # buffer is temporary storage you can write to and later commit. A "commit" will
32
+ # create a new user message item in the conversation history from the buffer
33
+ # content and clear the buffer. Input audio transcription (if enabled) will be
34
+ # generated when the buffer is committed.
35
+ #
36
+ # If VAD is enabled the audio buffer is used to detect speech and the server will
37
+ # decide when to commit. When Server VAD is disabled, you must commit the audio
38
+ # buffer manually. Input audio noise reduction operates on writes to the audio
39
+ # buffer.
35
40
  #
36
41
  # The client may choose how much audio to place in each event up to a maximum of
37
42
  # 15 MiB, for example streaming smaller chunks from the client may allow the VAD
38
- # to be more responsive. Unlike made other client events, the server will not send
43
+ # to be more responsive. Unlike most other client events, the server will not send
39
44
  # a confirmation response to this event.
40
45
  #
41
46
  # @param audio [String] Base64-encoded audio bytes. This must be in the format specified by the