openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute format_
8
+ # The format of the output audio.
9
+ #
10
+ # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil]
11
+ optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format
12
+
13
+ # @!attribute speed
14
+ # The speed of the model's spoken response as a multiple of the original speed.
15
+ # 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
16
+ # This value can only be changed in between model turns, not while a response is
17
+ # in progress.
18
+ #
19
+ # This parameter is a post-processing adjustment to the audio after it is
20
+ # generated, it's also possible to prompt the model to speak faster or slower.
21
+ #
22
+ # @return [Float, nil]
23
+ optional :speed, Float
24
+
25
+ # @!attribute voice
26
+ # The voice the model uses to respond. Voice cannot be changed during the session
27
+ # once the model has responded with audio at least once. Current voice options are
28
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
29
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
30
+ #
31
+ # @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice, nil]
32
+ optional :voice, union: -> { OpenAI::Realtime::RealtimeAudioConfigOutput::Voice }
33
+
34
+ # @!method initialize(format_: nil, speed: nil, voice: nil)
35
+ # Some parameter documentations has been truncated, see
36
+ # {OpenAI::Models::Realtime::RealtimeAudioConfigOutput} for more details.
37
+ #
38
+ # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio.
39
+ #
40
+ # @param speed [Float] The speed of the model's spoken response as a multiple of the original speed.
41
+ #
42
+ # @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice] The voice the model uses to respond. Voice cannot be changed during the
43
+
44
+ # The voice the model uses to respond. Voice cannot be changed during the session
45
+ # once the model has responded with audio at least once. Current voice options are
46
+ # `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`,
47
+ # and `cedar`. We recommend `marin` and `cedar` for best quality.
48
+ #
49
+ # @see OpenAI::Models::Realtime::RealtimeAudioConfigOutput#voice
50
+ module Voice
51
+ extend OpenAI::Internal::Type::Union
52
+
53
+ variant String
54
+
55
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ALLOY }
56
+
57
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ASH }
58
+
59
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::BALLAD }
60
+
61
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CORAL }
62
+
63
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ECHO }
64
+
65
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SAGE }
66
+
67
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::SHIMMER }
68
+
69
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::VERSE }
70
+
71
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::MARIN }
72
+
73
+ variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CEDAR }
74
+
75
+ # @!method self.variants
76
+ # @return [Array(String, Symbol)]
77
+
78
+ define_sorbet_constant!(:Variants) do
79
+ T.type_alias { T.any(String, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol) }
80
+ end
81
+
82
+ # @!group
83
+
84
+ ALLOY = :alloy
85
+ ASH = :ash
86
+ BALLAD = :ballad
87
+ CORAL = :coral
88
+ ECHO = :echo
89
+ SAGE = :sage
90
+ SHIMMER = :shimmer
91
+ VERSE = :verse
92
+ MARIN = :marin
93
+ CEDAR = :cedar
94
+
95
+ # @!endgroup
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ # The PCM audio format. Only a 24kHz sample rate is supported.
7
+ module RealtimeAudioFormats
8
+ extend OpenAI::Internal::Type::Union
9
+
10
+ discriminator :type
11
+
12
+ # The PCM audio format. Only a 24kHz sample rate is supported.
13
+ variant :"audio/pcm", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM }
14
+
15
+ # The G.711 μ-law format.
16
+ variant :"audio/pcmu", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU }
17
+
18
+ # The G.711 A-law format.
19
+ variant :"audio/pcma", -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA }
20
+
21
+ class AudioPCM < OpenAI::Internal::Type::BaseModel
22
+ # @!attribute rate
23
+ # The sample rate of the audio. Always `24000`.
24
+ #
25
+ # @return [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate, nil]
26
+ optional :rate, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Rate }
27
+
28
+ # @!attribute type
29
+ # The audio format. Always `audio/pcm`.
30
+ #
31
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type, nil]
32
+ optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::Type }
33
+
34
+ # @!method initialize(rate: nil, type: nil)
35
+ # The PCM audio format. Only a 24kHz sample rate is supported.
36
+ #
37
+ # @param rate [Integer, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Rate] The sample rate of the audio. Always `24000`.
38
+ #
39
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM::Type] The audio format. Always `audio/pcm`.
40
+
41
+ # The sample rate of the audio. Always `24000`.
42
+ #
43
+ # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#rate
44
+ module Rate
45
+ extend OpenAI::Internal::Type::Enum
46
+
47
+ RATE_24000 = 24_000
48
+
49
+ # @!method self.values
50
+ # @return [Array<Integer>]
51
+ end
52
+
53
+ # The audio format. Always `audio/pcm`.
54
+ #
55
+ # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM#type
56
+ module Type
57
+ extend OpenAI::Internal::Type::Enum
58
+
59
+ AUDIO_PCM = :"audio/pcm"
60
+
61
+ # @!method self.values
62
+ # @return [Array<Symbol>]
63
+ end
64
+ end
65
+
66
+ class AudioPCMU < OpenAI::Internal::Type::BaseModel
67
+ # @!attribute type
68
+ # The audio format. Always `audio/pcmu`.
69
+ #
70
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type, nil]
71
+ optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::Type }
72
+
73
+ # @!method initialize(type: nil)
74
+ # The G.711 μ-law format.
75
+ #
76
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU::Type] The audio format. Always `audio/pcmu`.
77
+
78
+ # The audio format. Always `audio/pcmu`.
79
+ #
80
+ # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU#type
81
+ module Type
82
+ extend OpenAI::Internal::Type::Enum
83
+
84
+ AUDIO_PCMU = :"audio/pcmu"
85
+
86
+ # @!method self.values
87
+ # @return [Array<Symbol>]
88
+ end
89
+ end
90
+
91
+ class AudioPCMA < OpenAI::Internal::Type::BaseModel
92
+ # @!attribute type
93
+ # The audio format. Always `audio/pcma`.
94
+ #
95
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type, nil]
96
+ optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::Type }
97
+
98
+ # @!method initialize(type: nil)
99
+ # The G.711 A-law format.
100
+ #
101
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA::Type] The audio format. Always `audio/pcma`.
102
+
103
+ # The audio format. Always `audio/pcma`.
104
+ #
105
+ # @see OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA#type
106
+ module Type
107
+ extend OpenAI::Internal::Type::Enum
108
+
109
+ AUDIO_PCMA = :"audio/pcma"
110
+
111
+ # @!method self.values
112
+ # @return [Array<Symbol>]
113
+ end
114
+ end
115
+
116
+ # @!method self.variants
117
+ # @return [Array(OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA)]
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute create_response
8
+ # Whether or not to automatically generate a response when a VAD stop event
9
+ # occurs.
10
+ #
11
+ # @return [Boolean, nil]
12
+ optional :create_response, OpenAI::Internal::Type::Boolean
13
+
14
+ # @!attribute eagerness
15
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
16
+ # will wait longer for the user to continue speaking, `high` will respond more
17
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
18
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
19
+ #
20
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness, nil]
21
+ optional :eagerness, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness }
22
+
23
+ # @!attribute idle_timeout_ms
24
+ # Optional idle timeout after which turn detection will auto-timeout when no
25
+ # additional audio is received.
26
+ #
27
+ # @return [Integer, nil]
28
+ optional :idle_timeout_ms, Integer, nil?: true
29
+
30
+ # @!attribute interrupt_response
31
+ # Whether or not to automatically interrupt any ongoing response with output to
32
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
33
+ # occurs.
34
+ #
35
+ # @return [Boolean, nil]
36
+ optional :interrupt_response, OpenAI::Internal::Type::Boolean
37
+
38
+ # @!attribute prefix_padding_ms
39
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
40
+ # detected speech (in milliseconds). Defaults to 300ms.
41
+ #
42
+ # @return [Integer, nil]
43
+ optional :prefix_padding_ms, Integer
44
+
45
+ # @!attribute silence_duration_ms
46
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
47
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
48
+ # more quickly, but may jump in on short pauses from the user.
49
+ #
50
+ # @return [Integer, nil]
51
+ optional :silence_duration_ms, Integer
52
+
53
+ # @!attribute threshold
54
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
55
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
56
+ # model, and thus might perform better in noisy environments.
57
+ #
58
+ # @return [Float, nil]
59
+ optional :threshold, Float
60
+
61
+ # @!attribute type
62
+ # Type of turn detection.
63
+ #
64
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type, nil]
65
+ optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type }
66
+
67
+ # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
68
+ # Some parameter documentations has been truncated, see
69
+ # {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection} for more details.
70
+ #
71
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
72
+ # set to `null` to turn off, in which case the client must manually trigger model
73
+ # response. Server VAD means that the model will detect the start and end of
74
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
75
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
76
+ # semantically estimate whether the user has finished speaking, then dynamically
77
+ # sets a timeout based on this probability. For example, if user audio trails off
78
+ # with "uhhm", the model will score a low probability of turn end and wait longer
79
+ # for the user to continue speaking. This can be useful for more natural
80
+ # conversations, but may have a higher latency.
81
+ #
82
+ # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
83
+ #
84
+ # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
85
+ #
86
+ # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when
87
+ #
88
+ # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
89
+ #
90
+ # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec
91
+ #
92
+ # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m
93
+ #
94
+ # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
95
+ #
96
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type] Type of turn detection.
97
+
98
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
99
+ # will wait longer for the user to continue speaking, `high` will respond more
100
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
101
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
102
+ #
103
+ # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#eagerness
104
+ module Eagerness
105
+ extend OpenAI::Internal::Type::Enum
106
+
107
+ LOW = :low
108
+ MEDIUM = :medium
109
+ HIGH = :high
110
+ AUTO = :auto
111
+
112
+ # @!method self.values
113
+ # @return [Array<Symbol>]
114
+ end
115
+
116
+ # Type of turn detection.
117
+ #
118
+ # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#type
119
+ module Type
120
+ extend OpenAI::Internal::Type::Enum
121
+
122
+ SERVER_VAD = :server_vad
123
+ SEMANTIC_VAD = :semantic_vad
124
+
125
+ # @!method self.values
126
+ # @return [Array<Symbol>]
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -44,14 +44,17 @@ module OpenAI
44
44
  variant :"conversation.item.truncate", -> { OpenAI::Realtime::ConversationItemTruncateEvent }
45
45
 
46
46
  # Send this event to append audio bytes to the input audio buffer. The audio
47
- # buffer is temporary storage you can write to and later commit. In Server VAD
48
- # mode, the audio buffer is used to detect speech and the server will decide
47
+ # buffer is temporary storage you can write to and later commit. A "commit" will create a new
48
+ # user message item in the conversation history from the buffer content and clear the buffer.
49
+ # Input audio transcription (if enabled) will be generated when the buffer is committed.
50
+ #
51
+ # If VAD is enabled the audio buffer is used to detect speech and the server will decide
49
52
  # when to commit. When Server VAD is disabled, you must commit the audio buffer
50
- # manually.
53
+ # manually. Input audio noise reduction operates on writes to the audio buffer.
51
54
  #
52
55
  # The client may choose how much audio to place in each event up to a maximum
53
56
  # of 15 MiB, for example streaming smaller chunks from the client may allow the
54
- # VAD to be more responsive. Unlike made other client events, the server will
57
+ # VAD to be more responsive. Unlike most other client events, the server will
55
58
  # not send a confirmation response to this event.
56
59
  variant :"input_audio_buffer.append", -> { OpenAI::Realtime::InputAudioBufferAppendEvent }
57
60
 
@@ -66,21 +69,16 @@ module OpenAI
66
69
  # [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
67
70
  variant :"output_audio_buffer.clear", -> { OpenAI::Realtime::OutputAudioBufferClearEvent }
68
71
 
69
- # Send this event to commit the user input audio buffer, which will create a
70
- # new user message item in the conversation. This event will produce an error
71
- # if the input audio buffer is empty. When in Server VAD mode, the client does
72
- # not need to send this event, the server will commit the audio buffer
73
- # automatically.
72
+ # Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
74
73
  #
75
- # Committing the input audio buffer will trigger input audio transcription
76
- # (if enabled in session configuration), but it will not create a response
77
- # from the model. The server will respond with an `input_audio_buffer.committed`
78
- # event.
74
+ # Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event.
79
75
  variant :"input_audio_buffer.commit", -> { OpenAI::Realtime::InputAudioBufferCommitEvent }
80
76
 
81
77
  # Send this event to cancel an in-progress response. The server will respond
82
78
  # with a `response.done` event with a status of `response.status=cancelled`. If
83
- # there is no response to cancel, the server will respond with an error.
79
+ # there is no response to cancel, the server will respond with an error. It's safe
80
+ # to call `response.cancel` even if no response is in progress, an error will be
81
+ # returned the session will remain unaffected.
84
82
  variant :"response.cancel", -> { OpenAI::Realtime::ResponseCancelEvent }
85
83
 
86
84
  # This event instructs the server to create a Response, which means triggering
@@ -89,27 +87,37 @@ module OpenAI
89
87
  #
90
88
  # A Response will include at least one Item, and may have two, in which case
91
89
  # the second will be a function call. These Items will be appended to the
92
- # conversation history.
90
+ # conversation history by default.
93
91
  #
94
92
  # The server will respond with a `response.created` event, events for Items
95
93
  # and content created, and finally a `response.done` event to indicate the
96
94
  # Response is complete.
97
95
  #
98
96
  # The `response.create` event includes inference configuration like
99
- # `instructions`, and `temperature`. These fields will override the Session's
97
+ # `instructions` and `tools`. If these are set, they will override the Session's
100
98
  # configuration for this Response only.
99
+ #
100
+ # Responses can be created out-of-band of the default Conversation, meaning that they can
101
+ # have arbitrary input, and it's possible to disable writing the output to the Conversation.
102
+ # Only one Response can write to the default Conversation at a time, but otherwise multiple
103
+ # Responses can be created in parallel. The `metadata` field is a good way to disambiguate
104
+ # multiple simultaneous Responses.
105
+ #
106
+ # Clients can set `conversation` to `none` to create a Response that does not write to the default
107
+ # Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
108
+ # raw Items and references to existing Items.
101
109
  variant :"response.create", -> { OpenAI::Realtime::ResponseCreateEvent }
102
110
 
103
- # Send this event to update the session’s default configuration.
104
- # The client may send this event at any time to update any field,
105
- # except for `voice`. However, note that once a session has been
106
- # initialized with a particular `model`, it can’t be changed to
107
- # another model using `session.update`.
111
+ # Send this event to update the session’s configuration.
112
+ # The client may send this event at any time to update any field
113
+ # except for `voice` and `model`. `voice` can be updated only if there have been no other
114
+ # audio outputs yet.
108
115
  #
109
116
  # When the server receives a `session.update`, it will respond
110
117
  # with a `session.updated` event showing the full, effective configuration.
111
- # Only the fields that are present are updated. To clear a field like
112
- # `instructions`, pass an empty string.
118
+ # Only the fields that are present in the `session.update` are updated. To clear a field like
119
+ # `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
120
+ # To clear a field like `turn_detection`, pass `null`.
113
121
  variant :"session.update", -> { OpenAI::Realtime::SessionUpdateEvent }
114
122
 
115
123
  # Send this event to update a transcription session.
@@ -24,13 +24,15 @@ module OpenAI
24
24
  required :type, const: :message
25
25
 
26
26
  # @!attribute id
27
- # The unique ID of the item.
27
+ # The unique ID of the item. This may be provided by the client or generated by
28
+ # the server.
28
29
  #
29
30
  # @return [String, nil]
30
31
  optional :id, String
31
32
 
32
33
  # @!attribute object
33
- # Identifier for the API object being returned - always `realtime.item`.
34
+ # Identifier for the API object being returned - always `realtime.item`. Optional
35
+ # when creating a new item.
34
36
  #
35
37
  # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object, nil]
36
38
  optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Object }
@@ -42,13 +44,17 @@ module OpenAI
42
44
  optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Status }
43
45
 
44
46
  # @!method initialize(content:, id: nil, object: nil, status: nil, role: :assistant, type: :message)
47
+ # Some parameter documentations has been truncated, see
48
+ # {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage} for more
49
+ # details.
50
+ #
45
51
  # An assistant message item in a Realtime conversation.
46
52
  #
47
53
  # @param content [Array<OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content>] The content of the message.
48
54
  #
49
- # @param id [String] The unique ID of the item.
55
+ # @param id [String] The unique ID of the item. This may be provided by the client or generated by th
50
56
  #
51
- # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`.
57
+ # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Object] Identifier for the API object being returned - always `realtime.item`. Optional
52
58
  #
53
59
  # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Status] The status of the item. Has no effect on the conversation.
54
60
  #
@@ -57,37 +63,64 @@ module OpenAI
57
63
  # @param type [Symbol, :message] The type of the item. Always `message`.
58
64
 
59
65
  class Content < OpenAI::Internal::Type::BaseModel
66
+ # @!attribute audio
67
+ # Base64-encoded audio bytes, these will be parsed as the format specified in the
68
+ # session output audio type configuration. This defaults to PCM 16-bit 24kHz mono
69
+ # if not specified.
70
+ #
71
+ # @return [String, nil]
72
+ optional :audio, String
73
+
60
74
  # @!attribute text
61
75
  # The text content.
62
76
  #
63
77
  # @return [String, nil]
64
78
  optional :text, String
65
79
 
80
+ # @!attribute transcript
81
+ # The transcript of the audio content, this will always be present if the output
82
+ # type is `audio`.
83
+ #
84
+ # @return [String, nil]
85
+ optional :transcript, String
86
+
66
87
  # @!attribute type
67
- # The content type. Always `text` for assistant messages.
88
+ # The content type, `output_text` or `output_audio` depending on the session
89
+ # `output_modalities` configuration.
68
90
  #
69
91
  # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type, nil]
70
92
  optional :type, enum: -> { OpenAI::Realtime::RealtimeConversationItemAssistantMessage::Content::Type }
71
93
 
72
- # @!method initialize(text: nil, type: nil)
94
+ # @!method initialize(audio: nil, text: nil, transcript: nil, type: nil)
95
+ # Some parameter documentations has been truncated, see
96
+ # {OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content}
97
+ # for more details.
98
+ #
99
+ # @param audio [String] Base64-encoded audio bytes, these will be parsed as the format specified in the
100
+ #
73
101
  # @param text [String] The text content.
74
102
  #
75
- # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type. Always `text` for assistant messages.
103
+ # @param transcript [String] The transcript of the audio content, this will always be present if the output t
104
+ #
105
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content::Type] The content type, `output_text` or `output_audio` depending on the session `outp
76
106
 
77
- # The content type. Always `text` for assistant messages.
107
+ # The content type, `output_text` or `output_audio` depending on the session
108
+ # `output_modalities` configuration.
78
109
  #
79
110
  # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage::Content#type
80
111
  module Type
81
112
  extend OpenAI::Internal::Type::Enum
82
113
 
83
- TEXT = :text
114
+ OUTPUT_TEXT = :output_text
115
+ OUTPUT_AUDIO = :output_audio
84
116
 
85
117
  # @!method self.values
86
118
  # @return [Array<Symbol>]
87
119
  end
88
120
  end
89
121
 
90
- # Identifier for the API object being returned - always `realtime.item`.
122
+ # Identifier for the API object being returned - always `realtime.item`. Optional
123
+ # when creating a new item.
91
124
  #
92
125
  # @see OpenAI::Models::Realtime::RealtimeConversationItemAssistantMessage#object
93
126
  module Object
@@ -5,7 +5,9 @@ module OpenAI
5
5
  module Realtime
6
6
  class RealtimeConversationItemFunctionCall < OpenAI::Internal::Type::BaseModel
7
7
  # @!attribute arguments
8
- # The arguments of the function call.
8
+ # The arguments of the function call. This is a JSON-encoded string representing
9
+ # the arguments passed to the function, for example
10
+ # `{"arg1": "value1", "arg2": 42}`.
9
11
  #
10
12
  # @return [String]
11
13
  required :arguments, String
@@ -23,7 +25,8 @@ module OpenAI
23
25
  required :type, const: :function_call
24
26
 
25
27
  # @!attribute id
26
- # The unique ID of the item.
28
+ # The unique ID of the item. This may be provided by the client or generated by
29
+ # the server.
27
30
  #
28
31
  # @return [String, nil]
29
32
  optional :id, String
@@ -35,7 +38,8 @@ module OpenAI
35
38
  optional :call_id, String
36
39
 
37
40
  # @!attribute object
38
- # Identifier for the API object being returned - always `realtime.item`.
41
+ # Identifier for the API object being returned - always `realtime.item`. Optional
42
+ # when creating a new item.
39
43
  #
40
44
  # @return [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object, nil]
41
45
  optional :object, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Object }
@@ -47,23 +51,28 @@ module OpenAI
47
51
  optional :status, enum: -> { OpenAI::Realtime::RealtimeConversationItemFunctionCall::Status }
48
52
 
49
53
  # @!method initialize(arguments:, name:, id: nil, call_id: nil, object: nil, status: nil, type: :function_call)
54
+ # Some parameter documentations has been truncated, see
55
+ # {OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall} for more
56
+ # details.
57
+ #
50
58
  # A function call item in a Realtime conversation.
51
59
  #
52
- # @param arguments [String] The arguments of the function call.
60
+ # @param arguments [String] The arguments of the function call. This is a JSON-encoded string representing t
53
61
  #
54
62
  # @param name [String] The name of the function being called.
55
63
  #
56
- # @param id [String] The unique ID of the item.
64
+ # @param id [String] The unique ID of the item. This may be provided by the client or generated by th
57
65
  #
58
66
  # @param call_id [String] The ID of the function call.
59
67
  #
60
- # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`.
68
+ # @param object [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Object] Identifier for the API object being returned - always `realtime.item`. Optional
61
69
  #
62
70
  # @param status [Symbol, OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall::Status] The status of the item. Has no effect on the conversation.
63
71
  #
64
72
  # @param type [Symbol, :function_call] The type of the item. Always `function_call`.
65
73
 
66
- # Identifier for the API object being returned - always `realtime.item`.
74
+ # Identifier for the API object being returned - always `realtime.item`. Optional
75
+ # when creating a new item.
67
76
  #
68
77
  # @see OpenAI::Models::Realtime::RealtimeConversationItemFunctionCall#object
69
78
  module Object