openai 0.23.1 → 0.23.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/chat/completion_list_params.rb +6 -3
  5. data/lib/openai/models/conversations/conversation_create_params.rb +7 -3
  6. data/lib/openai/models/evals/run_cancel_response.rb +6 -3
  7. data/lib/openai/models/evals/run_create_params.rb +6 -3
  8. data/lib/openai/models/evals/run_create_response.rb +6 -3
  9. data/lib/openai/models/evals/run_list_response.rb +6 -3
  10. data/lib/openai/models/evals/run_retrieve_response.rb +6 -3
  11. data/lib/openai/models/realtime/input_audio_buffer_timeout_triggered.rb +25 -5
  12. data/lib/openai/models/realtime/realtime_audio_config_input.rb +14 -11
  13. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +173 -117
  14. data/lib/openai/models/realtime/realtime_server_event.rb +13 -1
  15. data/lib/openai/models/realtime/realtime_session.rb +179 -118
  16. data/lib/openai/models/realtime/realtime_session_create_response.rb +184 -122
  17. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +16 -11
  18. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +175 -117
  19. data/lib/openai/models/responses/response.rb +8 -8
  20. data/lib/openai/models/responses/response_create_params.rb +8 -8
  21. data/lib/openai/resources/chat/completions.rb +1 -1
  22. data/lib/openai/resources/conversations.rb +1 -1
  23. data/lib/openai/version.rb +1 -1
  24. data/rbi/openai/models/chat/completion_list_params.rbi +10 -4
  25. data/rbi/openai/models/conversations/conversation_create_params.rbi +12 -4
  26. data/rbi/openai/models/evals/run_cancel_response.rbi +10 -4
  27. data/rbi/openai/models/evals/run_create_params.rbi +10 -4
  28. data/rbi/openai/models/evals/run_create_response.rbi +10 -4
  29. data/rbi/openai/models/evals/run_list_response.rbi +10 -4
  30. data/rbi/openai/models/evals/run_retrieve_response.rbi +10 -4
  31. data/rbi/openai/models/realtime/input_audio_buffer_timeout_triggered.rbi +24 -5
  32. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +44 -28
  33. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +264 -203
  34. data/rbi/openai/models/realtime/realtime_session.rbi +306 -231
  35. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +298 -232
  36. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +39 -28
  37. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +264 -200
  38. data/rbi/openai/models/responses/response.rbi +12 -12
  39. data/rbi/openai/models/responses/response_create_params.rbi +12 -12
  40. data/rbi/openai/resources/chat/completions.rbi +5 -2
  41. data/rbi/openai/resources/conversations.rbi +6 -2
  42. data/rbi/openai/resources/responses.rbi +8 -8
  43. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +4 -8
  44. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +91 -65
  45. data/sig/openai/models/realtime/realtime_session.rbs +95 -69
  46. data/sig/openai/models/realtime/realtime_session_create_response.rbs +95 -73
  47. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +4 -8
  48. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +91 -65
  49. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c61159341c0fe919d9f1042f0c705f57d05fc656407e03498bdf1367f704b6ca
4
- data.tar.gz: 26c9718404d8d1519acb3d9364f9918c623952679a0b29d11c159ebb070a6227
3
+ metadata.gz: 78de67c6327d605033609ccd07e0d74abf3435c29fca8a16814e47bb9a3adccb
4
+ data.tar.gz: 5acaec7441b93b103201e5b5f9848537f6e61126baaaffaf468073a063970d48
5
5
  SHA512:
6
- metadata.gz: e5bebf2d7459cf64493d9ce74b839ad8c9326df50953ff064ecd9b5a912c077ebef0c0c38b6a95b40f01b7f37e161ed42be30d985e7b608ca76db18908da2d8d
7
- data.tar.gz: 0a1fb39beb43b71336e7d0a398bbb56705c850de30f6eb110821b974967e21848d6cc5b626e57434df8bc247b4cd23f71f3c0b41ff425fe39b606ea44454fe2d
6
+ metadata.gz: 0c3830e5b495692fa49be0fb6f23da597a75f08c376105e98fd7ede52ec06374dbb4e0234739edf2c355f7a76a8e8d77a858dbce211d512f74bddc818717e47e
7
+ data.tar.gz: 751b22f53a5176438c4a8ee65a67393860ebecd282b8847fc50b0a9ccfe1c39ecc9940c501cb51f2e67ff8746816ff385f3d2721a527533c02e95ab0cc1eaf2d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.23.3 (2025-09-15)
4
+
5
+ Full Changelog: [v0.23.2...v0.23.3](https://github.com/openai/openai-ruby/compare/v0.23.2...v0.23.3)
6
+
7
+ ### Chores
8
+
9
+ * **api:** docs and spec refactoring ([81ccb86](https://github.com/openai/openai-ruby/commit/81ccb86c346e51a2b5d532a5997358aa86977572))
10
+
11
+ ## 0.23.2 (2025-09-11)
12
+
13
+ Full Changelog: [v0.23.1...v0.23.2](https://github.com/openai/openai-ruby/compare/v0.23.1...v0.23.2)
14
+
15
+ ### Chores
16
+
17
+ * **api:** Minor docs and type updates for realtime ([ccef982](https://github.com/openai/openai-ruby/commit/ccef9827b31206fc9ba40d2b6165eeefda7621f5))
18
+
3
19
  ## 0.23.1 (2025-09-10)
4
20
 
5
21
  Full Changelog: [v0.23.0...v0.23.1](https://github.com/openai/openai-ruby/compare/v0.23.0...v0.23.1)
data/README.md CHANGED
@@ -15,7 +15,7 @@ To use this gem, install via Bundler by adding the following to your application
15
15
  <!-- x-release-please-start-version -->
16
16
 
17
17
  ```ruby
18
- gem "openai", "~> 0.23.1"
18
+ gem "openai", "~> 0.23.3"
19
19
  ```
20
20
 
21
21
  <!-- x-release-please-end -->
@@ -21,9 +21,12 @@ module OpenAI
21
21
  optional :limit, Integer
22
22
 
23
23
  # @!attribute metadata
24
- # A list of metadata keys to filter the Chat Completions by. Example:
24
+ # Set of 16 key-value pairs that can be attached to an object. This can be useful
25
+ # for storing additional information about the object in a structured format, and
26
+ # querying for objects via API or the dashboard.
25
27
  #
26
- # `metadata[key1]=value1&metadata[key2]=value2`
28
+ # Keys are strings with a maximum length of 64 characters. Values are strings with
29
+ # a maximum length of 512 characters.
27
30
  #
28
31
  # @return [Hash{Symbol=>String}, nil]
29
32
  optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true
@@ -49,7 +52,7 @@ module OpenAI
49
52
  #
50
53
  # @param limit [Integer] Number of Chat Completions to retrieve.
51
54
  #
52
- # @param metadata [Hash{Symbol=>String}, nil] A list of metadata keys to filter the Chat Completions by. Example:
55
+ # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be
53
56
  #
54
57
  # @param model [String] The model used to generate the Chat Completions.
55
58
  #
@@ -18,8 +18,12 @@ module OpenAI
18
18
  nil?: true
19
19
 
20
20
  # @!attribute metadata
21
- # Set of 16 key-value pairs that can be attached to an object. Useful for storing
22
- # additional information about the object in a structured format.
21
+ # Set of 16 key-value pairs that can be attached to an object. This can be useful
22
+ # for storing additional information about the object in a structured format, and
23
+ # querying for objects via API or the dashboard.
24
+ #
25
+ # Keys are strings with a maximum length of 64 characters. Values are strings with
26
+ # a maximum length of 512 characters.
23
27
  #
24
28
  # @return [Hash{Symbol=>String}, nil]
25
29
  optional :metadata, OpenAI::Internal::Type::HashOf[String], nil?: true
@@ -30,7 +34,7 @@ module OpenAI
30
34
  #
31
35
  # @param items [Array<OpenAI::Models::Responses::EasyInputMessage, OpenAI::Models::Responses::ResponseInputItem::Message, OpenAI::Models::Responses::ResponseOutputMessage, OpenAI::Models::Responses::ResponseFileSearchToolCall, OpenAI::Models::Responses::ResponseComputerToolCall, OpenAI::Models::Responses::ResponseInputItem::ComputerCallOutput, OpenAI::Models::Responses::ResponseFunctionWebSearch, OpenAI::Models::Responses::ResponseFunctionToolCall, OpenAI::Models::Responses::ResponseInputItem::FunctionCallOutput, OpenAI::Models::Responses::ResponseReasoningItem, OpenAI::Models::Responses::ResponseInputItem::ImageGenerationCall, OpenAI::Models::Responses::ResponseCodeInterpreterToolCall, OpenAI::Models::Responses::ResponseInputItem::LocalShellCall, OpenAI::Models::Responses::ResponseInputItem::LocalShellCallOutput, OpenAI::Models::Responses::ResponseInputItem::McpListTools, OpenAI::Models::Responses::ResponseInputItem::McpApprovalRequest, OpenAI::Models::Responses::ResponseInputItem::McpApprovalResponse, OpenAI::Models::Responses::ResponseInputItem::McpCall, OpenAI::Models::Responses::ResponseCustomToolCallOutput, OpenAI::Models::Responses::ResponseCustomToolCall, OpenAI::Models::Responses::ResponseInputItem::ItemReference>, nil] Initial items to include in the conversation context.
32
36
  #
33
- # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. Useful for
37
+ # @param metadata [Hash{Symbol=>String}, nil] Set of 16 key-value pairs that can be attached to an object. This can be
34
38
  #
35
39
  # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
36
40
  end
@@ -314,8 +314,11 @@ module OpenAI
314
314
  optional :model, String, nil?: true
315
315
 
316
316
  # @!attribute reasoning_effort
317
- # Optional reasoning effort parameter. This is a query parameter used to select
318
- # responses.
317
+ # Constrains effort on reasoning for
318
+ # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
319
+ # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
320
+ # effort can result in faster responses and fewer tokens used on reasoning in a
321
+ # response.
319
322
  #
320
323
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
321
324
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -361,7 +364,7 @@ module OpenAI
361
364
  #
362
365
  # @param model [String, nil] The name of the model to find responses for. This is a query parameter used to s
363
366
  #
364
- # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Optional reasoning effort parameter. This is a query parameter used to select re
367
+ # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Constrains effort on reasoning for
365
368
  #
366
369
  # @param temperature [Float, nil] Sampling temperature. This is a query parameter used to select responses.
367
370
  #
@@ -226,8 +226,11 @@ module OpenAI
226
226
  optional :model, String, nil?: true
227
227
 
228
228
  # @!attribute reasoning_effort
229
- # Optional reasoning effort parameter. This is a query parameter used to select
230
- # responses.
229
+ # Constrains effort on reasoning for
230
+ # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
231
+ # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
232
+ # effort can result in faster responses and fewer tokens used on reasoning in a
233
+ # response.
231
234
  #
232
235
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
233
236
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -273,7 +276,7 @@ module OpenAI
273
276
  #
274
277
  # @param model [String, nil] The name of the model to find responses for. This is a query parameter used to s
275
278
  #
276
- # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Optional reasoning effort parameter. This is a query parameter used to select re
279
+ # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Constrains effort on reasoning for
277
280
  #
278
281
  # @param temperature [Float, nil] Sampling temperature. This is a query parameter used to select responses.
279
282
  #
@@ -314,8 +314,11 @@ module OpenAI
314
314
  optional :model, String, nil?: true
315
315
 
316
316
  # @!attribute reasoning_effort
317
- # Optional reasoning effort parameter. This is a query parameter used to select
318
- # responses.
317
+ # Constrains effort on reasoning for
318
+ # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
319
+ # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
320
+ # effort can result in faster responses and fewer tokens used on reasoning in a
321
+ # response.
319
322
  #
320
323
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
321
324
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -361,7 +364,7 @@ module OpenAI
361
364
  #
362
365
  # @param model [String, nil] The name of the model to find responses for. This is a query parameter used to s
363
366
  #
364
- # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Optional reasoning effort parameter. This is a query parameter used to select re
367
+ # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Constrains effort on reasoning for
365
368
  #
366
369
  # @param temperature [Float, nil] Sampling temperature. This is a query parameter used to select responses.
367
370
  #
@@ -314,8 +314,11 @@ module OpenAI
314
314
  optional :model, String, nil?: true
315
315
 
316
316
  # @!attribute reasoning_effort
317
- # Optional reasoning effort parameter. This is a query parameter used to select
318
- # responses.
317
+ # Constrains effort on reasoning for
318
+ # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
319
+ # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
320
+ # effort can result in faster responses and fewer tokens used on reasoning in a
321
+ # response.
319
322
  #
320
323
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
321
324
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -361,7 +364,7 @@ module OpenAI
361
364
  #
362
365
  # @param model [String, nil] The name of the model to find responses for. This is a query parameter used to s
363
366
  #
364
- # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Optional reasoning effort parameter. This is a query parameter used to select re
367
+ # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Constrains effort on reasoning for
365
368
  #
366
369
  # @param temperature [Float, nil] Sampling temperature. This is a query parameter used to select responses.
367
370
  #
@@ -314,8 +314,11 @@ module OpenAI
314
314
  optional :model, String, nil?: true
315
315
 
316
316
  # @!attribute reasoning_effort
317
- # Optional reasoning effort parameter. This is a query parameter used to select
318
- # responses.
317
+ # Constrains effort on reasoning for
318
+ # [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
319
+ # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
320
+ # effort can result in faster responses and fewer tokens used on reasoning in a
321
+ # response.
319
322
  #
320
323
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
321
324
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
@@ -361,7 +364,7 @@ module OpenAI
361
364
  #
362
365
  # @param model [String, nil] The name of the model to find responses for. This is a query parameter used to s
363
366
  #
364
- # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Optional reasoning effort parameter. This is a query parameter used to select re
367
+ # @param reasoning_effort [Symbol, OpenAI::Models::ReasoningEffort, nil] Constrains effort on reasoning for
365
368
  #
366
369
  # @param temperature [Float, nil] Sampling temperature. This is a query parameter used to select responses.
367
370
  #
@@ -5,13 +5,15 @@ module OpenAI
5
5
  module Realtime
6
6
  class InputAudioBufferTimeoutTriggered < OpenAI::Internal::Type::BaseModel
7
7
  # @!attribute audio_end_ms
8
- # Millisecond offset where speech ended within the buffered audio.
8
+ # Millisecond offset of audio written to the input audio buffer at the time the
9
+ # timeout was triggered.
9
10
  #
10
11
  # @return [Integer]
11
12
  required :audio_end_ms, Integer
12
13
 
13
14
  # @!attribute audio_start_ms
14
- # Millisecond offset where speech started within the buffered audio.
15
+ # Millisecond offset of audio written to the input audio buffer that was after the
16
+ # playback time of the last model response.
15
17
  #
16
18
  # @return [Integer]
17
19
  required :audio_start_ms, Integer
@@ -35,11 +37,29 @@ module OpenAI
35
37
  required :type, const: :"input_audio_buffer.timeout_triggered"
36
38
 
37
39
  # @!method initialize(audio_end_ms:, audio_start_ms:, event_id:, item_id:, type: :"input_audio_buffer.timeout_triggered")
38
- # Returned when the server VAD timeout is triggered for the input audio buffer.
40
+ # Some parameter documentations has been truncated, see
41
+ # {OpenAI::Models::Realtime::InputAudioBufferTimeoutTriggered} for more details.
39
42
  #
40
- # @param audio_end_ms [Integer] Millisecond offset where speech ended within the buffered audio.
43
+ # Returned when the Server VAD timeout is triggered for the input audio buffer.
44
+ # This is configured with `idle_timeout_ms` in the `turn_detection` settings of
45
+ # the session, and it indicates that there hasn't been any speech detected for the
46
+ # configured duration.
41
47
  #
42
- # @param audio_start_ms [Integer] Millisecond offset where speech started within the buffered audio.
48
+ # The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio
49
+ # after the last model response up to the triggering time, as an offset from the
50
+ # beginning of audio written to the input audio buffer. This means it demarcates
51
+ # the segment of audio that was silent and the difference between the start and
52
+ # end values will roughly match the configured timeout.
53
+ #
54
+ # The empty audio will be committed to the conversation as an `input_audio` item
55
+ # (there will be a `input_audio_buffer.committed` event) and a model response will
56
+ # be generated. There may be speech that didn't trigger VAD but is still detected
57
+ # by the model, so the model may respond with something relevant to the
58
+ # conversation or a prompt to continue speaking.
59
+ #
60
+ # @param audio_end_ms [Integer] Millisecond offset of audio written to the input audio buffer at the time the ti
61
+ #
62
+ # @param audio_start_ms [Integer] Millisecond offset of audio written to the input audio buffer that was after the
43
63
  #
44
64
  # @param event_id [String] The unique ID of the server event.
45
65
  #
@@ -36,17 +36,20 @@ module OpenAI
36
36
  # @!attribute turn_detection
37
37
  # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
38
38
  # set to `null` to turn off, in which case the client must manually trigger model
39
- # response. Server VAD means that the model will detect the start and end of
40
- # speech based on audio volume and respond at the end of user speech. Semantic VAD
41
- # is more advanced and uses a turn detection model (in conjunction with VAD) to
42
- # semantically estimate whether the user has finished speaking, then dynamically
43
- # sets a timeout based on this probability. For example, if user audio trails off
44
- # with "uhhm", the model will score a low probability of turn end and wait longer
45
- # for the user to continue speaking. This can be useful for more natural
46
- # conversations, but may have a higher latency.
39
+ # response.
47
40
  #
48
- # @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection, nil]
49
- optional :turn_detection, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }
41
+ # Server VAD means that the model will detect the start and end of speech based on
42
+ # audio volume and respond at the end of user speech.
43
+ #
44
+ # Semantic VAD is more advanced and uses a turn detection model (in conjunction
45
+ # with VAD) to semantically estimate whether the user has finished speaking, then
46
+ # dynamically sets a timeout based on this probability. For example, if user audio
47
+ # trails off with "uhhm", the model will score a low probability of turn end and
48
+ # wait longer for the user to continue speaking. This can be useful for more
49
+ # natural conversations, but may have a higher latency.
50
+ #
51
+ # @return [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil]
52
+ optional :turn_detection, union: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection }, nil?: true
50
53
 
51
54
  # @!method initialize(format_: nil, noise_reduction: nil, transcription: nil, turn_detection: nil)
52
55
  # Some parameter documentations has been truncated, see
@@ -58,7 +61,7 @@ module OpenAI
58
61
  #
59
62
  # @param transcription [OpenAI::Models::Realtime::AudioTranscription] Configuration for input audio transcription, defaults to off and can be set to `
60
63
  #
61
- # @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
64
+ # @param turn_detection [OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad, nil] Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
62
65
 
63
66
  # @see OpenAI::Models::Realtime::RealtimeAudioConfigInput#noise_reduction
64
67
  class NoiseReduction < OpenAI::Internal::Type::BaseModel
@@ -3,128 +3,184 @@
3
3
  module OpenAI
4
4
  module Models
5
5
  module Realtime
6
- class RealtimeAudioInputTurnDetection < OpenAI::Internal::Type::BaseModel
7
- # @!attribute create_response
8
- # Whether or not to automatically generate a response when a VAD stop event
9
- # occurs.
10
- #
11
- # @return [Boolean, nil]
12
- optional :create_response, OpenAI::Internal::Type::Boolean
13
-
14
- # @!attribute eagerness
15
- # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
16
- # will wait longer for the user to continue speaking, `high` will respond more
17
- # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
18
- # and `high` have max timeouts of 8s, 4s, and 2s respectively.
19
- #
20
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness, nil]
21
- optional :eagerness, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Eagerness }
22
-
23
- # @!attribute idle_timeout_ms
24
- # Optional idle timeout after which turn detection will auto-timeout when no
25
- # additional audio is received and emits a `timeout_triggered` event.
26
- #
27
- # @return [Integer, nil]
28
- optional :idle_timeout_ms, Integer, nil?: true
29
-
30
- # @!attribute interrupt_response
31
- # Whether or not to automatically interrupt any ongoing response with output to
32
- # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
33
- # occurs.
34
- #
35
- # @return [Boolean, nil]
36
- optional :interrupt_response, OpenAI::Internal::Type::Boolean
37
-
38
- # @!attribute prefix_padding_ms
39
- # Used only for `server_vad` mode. Amount of audio to include before the VAD
40
- # detected speech (in milliseconds). Defaults to 300ms.
41
- #
42
- # @return [Integer, nil]
43
- optional :prefix_padding_ms, Integer
44
-
45
- # @!attribute silence_duration_ms
46
- # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
47
- # milliseconds). Defaults to 500ms. With shorter values the model will respond
48
- # more quickly, but may jump in on short pauses from the user.
49
- #
50
- # @return [Integer, nil]
51
- optional :silence_duration_ms, Integer
52
-
53
- # @!attribute threshold
54
- # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
55
- # defaults to 0.5. A higher threshold will require louder audio to activate the
56
- # model, and thus might perform better in noisy environments.
57
- #
58
- # @return [Float, nil]
59
- optional :threshold, Float
60
-
61
- # @!attribute type
62
- # Type of turn detection.
63
- #
64
- # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type, nil]
65
- optional :type, enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::Type }
66
-
67
- # @!method initialize(create_response: nil, eagerness: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: nil)
68
- # Some parameter documentations has been truncated, see
69
- # {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection} for more details.
70
- #
71
- # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
72
- # set to `null` to turn off, in which case the client must manually trigger model
73
- # response. Server VAD means that the model will detect the start and end of
74
- # speech based on audio volume and respond at the end of user speech. Semantic VAD
75
- # is more advanced and uses a turn detection model (in conjunction with VAD) to
76
- # semantically estimate whether the user has finished speaking, then dynamically
77
- # sets a timeout based on this probability. For example, if user audio trails off
78
- # with "uhhm", the model will score a low probability of turn end and wait longer
79
- # for the user to continue speaking. This can be useful for more natural
80
- # conversations, but may have a higher latency.
81
- #
82
- # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
83
- #
84
- # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
85
- #
86
- # @param idle_timeout_ms [Integer, nil] Optional idle timeout after which turn detection will auto-timeout when
87
- #
88
- # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
89
- #
90
- # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec
91
- #
92
- # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m
93
- #
94
- # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
95
- #
96
- # @param type [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::Type] Type of turn detection.
97
-
98
- # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
99
- # will wait longer for the user to continue speaking, `high` will respond more
100
- # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
101
- # and `high` have max timeouts of 8s, 4s, and 2s respectively.
102
- #
103
- # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#eagerness
104
- module Eagerness
105
- extend OpenAI::Internal::Type::Enum
106
-
107
- LOW = :low
108
- MEDIUM = :medium
109
- HIGH = :high
110
- AUTO = :auto
111
-
112
- # @!method self.values
113
- # @return [Array<Symbol>]
6
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
7
+ # set to `null` to turn off, in which case the client must manually trigger model
8
+ # response.
9
+ #
10
+ # Server VAD means that the model will detect the start and end of speech based on
11
+ # audio volume and respond at the end of user speech.
12
+ #
13
+ # Semantic VAD is more advanced and uses a turn detection model (in conjunction
14
+ # with VAD) to semantically estimate whether the user has finished speaking, then
15
+ # dynamically sets a timeout based on this probability. For example, if user audio
16
+ # trails off with "uhhm", the model will score a low probability of turn end and
17
+ # wait longer for the user to continue speaking. This can be useful for more
18
+ # natural conversations, but may have a higher latency.
19
+ module RealtimeAudioInputTurnDetection
20
+ extend OpenAI::Internal::Type::Union
21
+
22
+ discriminator :type
23
+
24
+ # Server-side voice activity detection (VAD) which flips on when user speech is detected and off after a period of silence.
25
+ variant :server_vad, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::ServerVad }
26
+
27
+ # Server-side semantic turn detection which uses a model to determine when the user has finished speaking.
28
+ variant :semantic_vad, -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad }
29
+
30
+ class ServerVad < OpenAI::Internal::Type::BaseModel
31
+ # @!attribute type
32
+ # Type of turn detection, `server_vad` to turn on simple Server VAD.
33
+ #
34
+ # @return [Symbol, :server_vad]
35
+ required :type, const: :server_vad
36
+
37
+ # @!attribute create_response
38
+ # Whether or not to automatically generate a response when a VAD stop event
39
+ # occurs.
40
+ #
41
+ # @return [Boolean, nil]
42
+ optional :create_response, OpenAI::Internal::Type::Boolean
43
+
44
+ # @!attribute idle_timeout_ms
45
+ # Optional timeout after which a model response will be triggered automatically.
46
+ # This is useful for situations in which a long pause from the user is unexpected,
47
+ # such as a phone call. The model will effectively prompt the user to continue the
48
+ # conversation based on the current context.
49
+ #
50
+ # The timeout value will be applied after the last model response's audio has
51
+ # finished playing, i.e. it's set to the `response.done` time plus audio playback
52
+ # duration.
53
+ #
54
+ # An `input_audio_buffer.timeout_triggered` event (plus events associated with the
55
+ # Response) will be emitted when the timeout is reached. Idle timeout is currently
56
+ # only supported for `server_vad` mode.
57
+ #
58
+ # @return [Integer, nil]
59
+ optional :idle_timeout_ms, Integer, nil?: true
60
+
61
+ # @!attribute interrupt_response
62
+ # Whether or not to automatically interrupt any ongoing response with output to
63
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
64
+ # occurs.
65
+ #
66
+ # @return [Boolean, nil]
67
+ optional :interrupt_response, OpenAI::Internal::Type::Boolean
68
+
69
+ # @!attribute prefix_padding_ms
70
+ # Used only for `server_vad` mode. Amount of audio to include before the VAD
71
+ # detected speech (in milliseconds). Defaults to 300ms.
72
+ #
73
+ # @return [Integer, nil]
74
+ optional :prefix_padding_ms, Integer
75
+
76
+ # @!attribute silence_duration_ms
77
+ # Used only for `server_vad` mode. Duration of silence to detect speech stop (in
78
+ # milliseconds). Defaults to 500ms. With shorter values the model will respond
79
+ # more quickly, but may jump in on short pauses from the user.
80
+ #
81
+ # @return [Integer, nil]
82
+ optional :silence_duration_ms, Integer
83
+
84
+ # @!attribute threshold
85
+ # Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
86
+ # defaults to 0.5. A higher threshold will require louder audio to activate the
87
+ # model, and thus might perform better in noisy environments.
88
+ #
89
+ # @return [Float, nil]
90
+ optional :threshold, Float
91
+
92
+ # @!method initialize(create_response: nil, idle_timeout_ms: nil, interrupt_response: nil, prefix_padding_ms: nil, silence_duration_ms: nil, threshold: nil, type: :server_vad)
93
+ # Some parameter documentations has been truncated, see
94
+ # {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad} for more
95
+ # details.
96
+ #
97
+ # Server-side voice activity detection (VAD) which flips on when user speech is
98
+ # detected and off after a period of silence.
99
+ #
100
+ # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
101
+ #
102
+ # @param idle_timeout_ms [Integer, nil] Optional timeout after which a model response will be triggered automatically. T
103
+ #
104
+ # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
105
+ #
106
+ # @param prefix_padding_ms [Integer] Used only for `server_vad` mode. Amount of audio to include before the VAD detec
107
+ #
108
+ # @param silence_duration_ms [Integer] Used only for `server_vad` mode. Duration of silence to detect speech stop (in m
109
+ #
110
+ # @param threshold [Float] Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
111
+ #
112
+ # @param type [Symbol, :server_vad] Type of turn detection, `server_vad` to turn on simple Server VAD.
114
113
  end
115
114
 
116
- # Type of turn detection.
117
- #
118
- # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection#type
119
- module Type
120
- extend OpenAI::Internal::Type::Enum
115
+ class SemanticVad < OpenAI::Internal::Type::BaseModel
116
+ # @!attribute type
117
+ # Type of turn detection, `semantic_vad` to turn on Semantic VAD.
118
+ #
119
+ # @return [Symbol, :semantic_vad]
120
+ required :type, const: :semantic_vad
121
+
122
+ # @!attribute create_response
123
+ # Whether or not to automatically generate a response when a VAD stop event
124
+ # occurs.
125
+ #
126
+ # @return [Boolean, nil]
127
+ optional :create_response, OpenAI::Internal::Type::Boolean
128
+
129
+ # @!attribute eagerness
130
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
131
+ # will wait longer for the user to continue speaking, `high` will respond more
132
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
133
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
134
+ #
135
+ # @return [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness, nil]
136
+ optional :eagerness,
137
+ enum: -> { OpenAI::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness }
121
138
 
122
- SERVER_VAD = :server_vad
123
- SEMANTIC_VAD = :semantic_vad
139
+ # @!attribute interrupt_response
140
+ # Whether or not to automatically interrupt any ongoing response with output to
141
+ # the default conversation (i.e. `conversation` of `auto`) when a VAD start event
142
+ # occurs.
143
+ #
144
+ # @return [Boolean, nil]
145
+ optional :interrupt_response, OpenAI::Internal::Type::Boolean
124
146
 
125
- # @!method self.values
126
- # @return [Array<Symbol>]
147
+ # @!method initialize(create_response: nil, eagerness: nil, interrupt_response: nil, type: :semantic_vad)
148
+ # Some parameter documentations has been truncated, see
149
+ # {OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad} for
150
+ # more details.
151
+ #
152
+ # Server-side semantic turn detection which uses a model to determine when the
153
+ # user has finished speaking.
154
+ #
155
+ # @param create_response [Boolean] Whether or not to automatically generate a response when a VAD stop event occurs
156
+ #
157
+ # @param eagerness [Symbol, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad::Eagerness] Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
158
+ #
159
+ # @param interrupt_response [Boolean] Whether or not to automatically interrupt any ongoing response with output to th
160
+ #
161
+ # @param type [Symbol, :semantic_vad] Type of turn detection, `semantic_vad` to turn on Semantic VAD.
162
+
163
+ # Used only for `semantic_vad` mode. The eagerness of the model to respond. `low`
164
+ # will wait longer for the user to continue speaking, `high` will respond more
165
+ # quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`,
166
+ # and `high` have max timeouts of 8s, 4s, and 2s respectively.
167
+ #
168
+ # @see OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad#eagerness
169
+ module Eagerness
170
+ extend OpenAI::Internal::Type::Enum
171
+
172
+ LOW = :low
173
+ MEDIUM = :medium
174
+ HIGH = :high
175
+ AUTO = :auto
176
+
177
+ # @!method self.values
178
+ # @return [Array<Symbol>]
179
+ end
127
180
  end
181
+
182
+ # @!method self.variants
183
+ # @return [Array(OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::ServerVad, OpenAI::Models::Realtime::RealtimeAudioInputTurnDetection::SemanticVad)]
128
184
  end
129
185
  end
130
186
  end