openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -11,63 +11,12 @@ module OpenAI
11
11
 
12
12
  discriminator :type
13
13
 
14
- variant :function, -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Function }
14
+ variant :function, -> { OpenAI::Realtime::Models }
15
15
 
16
16
  # Give the model access to additional tools via remote Model Context Protocol
17
17
  # (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
18
18
  variant :mcp, -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp }
19
19
 
20
- class Function < OpenAI::Internal::Type::BaseModel
21
- # @!attribute description
22
- # The description of the function, including guidance on when and how to call it,
23
- # and guidance about what to tell the user when calling (if anything).
24
- #
25
- # @return [String, nil]
26
- optional :description, String
27
-
28
- # @!attribute name
29
- # The name of the function.
30
- #
31
- # @return [String, nil]
32
- optional :name, String
33
-
34
- # @!attribute parameters
35
- # Parameters of the function in JSON Schema.
36
- #
37
- # @return [Object, nil]
38
- optional :parameters, OpenAI::Internal::Type::Unknown
39
-
40
- # @!attribute type
41
- # The type of the tool, i.e. `function`.
42
- #
43
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::Type, nil]
44
- optional :type, enum: -> { OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type }
45
-
46
- # @!method initialize(description: nil, name: nil, parameters: nil, type: nil)
47
- # Some parameter documentations has been truncated, see
48
- # {OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function} for more details.
49
- #
50
- # @param description [String] The description of the function, including guidance on when and how
51
- #
52
- # @param name [String] The name of the function.
53
- #
54
- # @param parameters [Object] Parameters of the function in JSON Schema.
55
- #
56
- # @param type [Symbol, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function::Type] The type of the tool, i.e. `function`.
57
-
58
- # The type of the tool, i.e. `function`.
59
- #
60
- # @see OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function#type
61
- module Type
62
- extend OpenAI::Internal::Type::Enum
63
-
64
- FUNCTION = :function
65
-
66
- # @!method self.values
67
- # @return [Array<Symbol>]
68
- end
69
- end
70
-
71
20
  class Mcp < OpenAI::Internal::Type::BaseModel
72
21
  # @!attribute server_label
73
22
  # A label for this MCP server, used to identify it in tool calls.
@@ -372,7 +321,7 @@ module OpenAI
372
321
  end
373
322
 
374
323
  # @!method self.variants
375
- # @return [Array(OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Function, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp)]
324
+ # @return [Array(OpenAI::Models::Realtime::Models, OpenAI::Models::Realtime::RealtimeToolsConfigUnion::Mcp)]
376
325
  end
377
326
  end
378
327
  end
@@ -3,8 +3,9 @@
3
3
  module OpenAI
4
4
  module Models
5
5
  module Realtime
6
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
7
- # is enabled for a session, the configuration cannot be modified.
6
+ # Realtime API can write session traces to the
7
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
8
+ # tracing is enabled for a session, the configuration cannot be modified.
8
9
  #
9
10
  # `auto` will create a trace for the session with default values for the workflow
10
11
  # name, group id, and metadata.
@@ -20,21 +21,21 @@ module OpenAI
20
21
  class TracingConfiguration < OpenAI::Internal::Type::BaseModel
21
22
  # @!attribute group_id
22
23
  # The group id to attach to this trace to enable filtering and grouping in the
23
- # traces dashboard.
24
+ # Traces Dashboard.
24
25
  #
25
26
  # @return [String, nil]
26
27
  optional :group_id, String
27
28
 
28
29
  # @!attribute metadata
29
- # The arbitrary metadata to attach to this trace to enable filtering in the traces
30
- # dashboard.
30
+ # The arbitrary metadata to attach to this trace to enable filtering in the Traces
31
+ # Dashboard.
31
32
  #
32
33
  # @return [Object, nil]
33
34
  optional :metadata, OpenAI::Internal::Type::Unknown
34
35
 
35
36
  # @!attribute workflow_name
36
37
  # The name of the workflow to attach to this trace. This is used to name the trace
37
- # in the traces dashboard.
38
+ # in the Traces Dashboard.
38
39
  #
39
40
  # @return [String, nil]
40
41
  optional :workflow_name, String
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionAudio < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute input
8
+ #
9
+ # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput, nil]
10
+ optional :input, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput }
11
+
12
+ # @!method initialize(input: nil)
13
+ # Configuration for input and output audio.
14
+ #
15
+ # @param input [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionAudioInput < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute format_
8
+ # The PCM audio format. Only a 24kHz sample rate is supported.
9
+ #
10
+ # @return [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA, nil]
11
+ optional :format_, union: -> { OpenAI::Realtime::RealtimeAudioFormats }, api_name: :format
12
+
13
+ # @!attribute noise_reduction
14
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
15
+ # off. Noise reduction filters audio added to the input audio buffer before it is
16
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
17
+ # detection accuracy (reducing false positives) and model performance by improving
18
+ # perception of the input audio.
19
+ #
20
+ # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction, nil]
21
+ optional :noise_reduction, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction }
22
+
23
+ # @!attribute transcription
24
+ # Configuration for input audio transcription, defaults to off and can be set to
25
+ # `null` to turn off once on. Input audio transcription is not native to the
26
+ # model, since the model consumes audio directly. Transcription runs
27
+ # asynchronously through
28
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
29
+ # and should be treated as guidance of input audio content rather than precisely
30
+ # what the model heard. The client can optionally set the language and prompt for
31
+ # transcription, these offer additional guidance to the transcription service.
32
+ #
33
+ # @return [OpenAI::Models::Realtime::AudioTranscription, nil]
34
+ optional :transcription, -> { OpenAI::Realtime::AudioTranscription }
35
+
36
+ # @!attribute turn_detection
37
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
38
+ # set to `null` to turn off, in which case the client must manually trigger model
39
+ # response. Server VAD means that the model will detect the start and end of
40
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
41
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
42
+ # semantically estimate whether the user has finished speaking, then dynamically
43
+ # sets a timeout based on this probability. For example, if user audio trails off
44
+ # with "uhhm", the model will score a low probability of turn end and wait longer
45
+ # for the user to continue speaking. This can be useful for more natural
46
+ # conversations, but may have a higher latency.
47
+ #
48
+ # @return [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection, nil]
49
+ optional :turn_detection, -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection }
50
+
51
+ # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
52
+ # Some parameter documentations has been truncated, see
53
+ # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput} for more
54
+ # details.
55
+ #
56
+ # @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The PCM audio format. Only a 24kHz sample rate is supported.
57
+ #
58
+ # @param noise_reduction [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction] Configuration for input audio noise reduction. This can be set to `null` to turn
59
+ #
60
+ # @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to `
61
+ #
62
+ # @param turn_detection [OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
63
+
64
+ # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput#noise_reduction
65
+ class NoiseReduction < OpenAI::Internal::Type::BaseModel
66
+ # @!attribute type
67
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
68
+ # headphones, `far_field` is for far-field microphones such as laptop or
69
+ # conference room microphones.
70
+ #
71
+ # @return [Symbol, OpenAI::Models::Realtime::NoiseReductionType, nil]
72
+ optional :type, enum: -> { OpenAI::Realtime::NoiseReductionType }
73
+
74
+ # @!method initialize(type: nil)
75
+ # Some parameter documentations has been truncated, see
76
+ # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction}
77
+ # for more details.
78
+ #
79
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
80
+ # off. Noise reduction filters audio added to the input audio buffer before it is
81
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
82
+ # detection accuracy (reducing false positives) and model performance by improving
83
+ # perception of the input audio.
84
+ #
85
+ # @param type [Symbol, OpenAI::Models::Realtime::NoiseReductionType] Type of noise reduction. `near_field` is for close-talking microphones such as h
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute create_response
8
+ # Whether or not to automatically generate a response when a VAD stop event
9
+ # occurs.
10
+ #
11
+ # @return [Boolean, nil]
12
+ optional :create_response, OpenAI::Internal::Type::Boolean
13
+
14
+ # @!attribute eagerness
15
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
16
+ # will wait longer for the user to continue speaking, `high` will respond more
17
+ # quickly. `auto` is the default and is equivalent to `medium`.
18
+ #
19
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness, nil]
20
+ optional :eagerness,
21
+ enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness }
22
+
23
+ # @!attribute idle_timeout_ms
24
+ # Optional idle timeout after which turn detection will auto-timeout when no
25
+ # additional audio is received.
26
+ #
27
+ # @return [Integer, nil]
28
+ optional :idle_timeout_ms, Integer, nil?: true
29
+
30
+ # @!attribute interrupt_response
31
+ # Whether or not to automatically interrupt any ongoing response with output to
32
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
33
+ # occurs.
34
+ #
35
+ # @return [Boolean, nil]
36
+ optional :interrupt_response, OpenAI::Internal::Type::Boolean
37
+
38
+ # @!attribute prefix_padding_ms
39
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
40
+ # detected speech (in milliseconds). Defaults to 300ms.
41
+ #
42
+ # @return [Integer, nil]
43
+ optional :prefix_padding_ms, Integer
44
+
45
+ # @!attribute silence_duration_ms
46
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
47
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
48
+ # more quickly, but may jump in on short pauses from the user.
49
+ #
50
+ # @return [Integer, nil]
51
+ optional :silence_duration_ms, Integer
52
+
53
+ # @!attribute threshold
54
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
55
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
56
+ # model, and thus might perform better in noisy environments.
57
+ #
58
+ # @return [Float, nil]
59
+ optional :threshold, Float
60
+
61
+ # @!attribute type
62
+ # Type of turn detection.
63
+ #
64
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type, nil]
65
+ optional :type, enum: -> { OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type }
66
+
67
+ # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
68
+ # Some parameter documentations has been truncated, see
69
+ # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection}
70
+ # for more details.
71
+ #
72
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
73
+ # set to `null` to turn off, in which case the client must manually trigger model
74
+ # response. Server VAD means that the model will detect the start and end of
75
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
76
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
77
+ # semantically estimate whether the user has finished speaking, then dynamically
78
+ # sets a timeout based on this probability. For example, if user audio trails off
79
+ # with "uhhm", the model will score a low probability of turn end and wait longer
80
+ # for the user to continue speaking. This can be useful for more natural
81
+ # conversations, but may have a higher latency.
82
+ #
83
+ # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
84
+ #
85
+ # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
86
+ #
87
+ # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when
88
+ #
89
+ # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
90
+ #
91
+ # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec
92
+ #
93
+ # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m
94
+ #
95
+ # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
96
+ #
97
+ # @param type [Symbol, OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::Type] Type of turn detection.
98
+
99
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
100
+ # will wait longer for the user to continue speaking, `high` will respond more
101
+ # quickly. `auto` is the default and is equivalent to `medium`.
102
+ #
103
+ # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection#eagerness
104
+ module Eagerness
105
+ extend OpenAI::Internal::Type::Enum
106
+
107
+ LOW = :low
108
+ MEDIUM = :medium
109
+ HIGH = :high
110
+ AUTO = :auto
111
+
112
+ # @!method self.values
113
+ # @return [Array<Symbol>]
114
+ end
115
+
116
+ # Type of turn detection.
117
+ #
118
+ # @see OpenAI::Models::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection#type
119
+ module Type
120
+ extend OpenAI::Internal::Type::Enum
121
+
122
+ SERVER_VAD = :server_vad
123
+ SEMANTIC_VAD = :semantic_vad
124
+
125
+ # @!method self.values
126
+ # @return [Array<Symbol>]
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionClientSecret < OpenAI::Internal::Type::BaseModel
7
+ # @!attribute expires_at
8
+ # Timestamp for when the token expires. Currently, all tokens expire after one
9
+ # minute.
10
+ #
11
+ # @return [Integer]
12
+ required :expires_at, Integer
13
+
14
+ # @!attribute value
15
+ # Ephemeral key usable in client environments to authenticate connections to the
16
+ # Realtime API. Use this in client-side environments rather than a standard API
17
+ # token, which should only be used server-side.
18
+ #
19
+ # @return [String]
20
+ required :value, String
21
+
22
+ # @!method initialize(expires_at:, value:)
23
+ # Some parameter documentations has been truncated, see
24
+ # {OpenAI::Models::Realtime::RealtimeTranscriptionSessionClientSecret} for more
25
+ # details.
26
+ #
27
+ # Ephemeral key returned by the API. Only present when the session is created on
28
+ # the server via REST API.
29
+ #
30
+ # @param expires_at [Integer] Timestamp for when the token expires. Currently, all tokens expire
31
+ #
32
+ # @param value [String] Ephemeral key usable in client environments to authenticate connections
33
+ end
34
+ end
35
+
36
+ RealtimeTranscriptionSessionClientSecret = Realtime::RealtimeTranscriptionSessionClientSecret
37
+ end
38
+ end