openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -12,127 +12,11 @@ module OpenAI
12
12
  Variants =
13
13
  T.type_alias do
14
14
  T.any(
15
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function,
15
+ OpenAI::Realtime::Models,
16
16
  OpenAI::Realtime::RealtimeToolsConfigUnion::Mcp
17
17
  )
18
18
  end
19
19
 
20
- class Function < OpenAI::Internal::Type::BaseModel
21
- OrHash =
22
- T.type_alias do
23
- T.any(
24
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function,
25
- OpenAI::Internal::AnyHash
26
- )
27
- end
28
-
29
- # The description of the function, including guidance on when and how to call it,
30
- # and guidance about what to tell the user when calling (if anything).
31
- sig { returns(T.nilable(String)) }
32
- attr_reader :description
33
-
34
- sig { params(description: String).void }
35
- attr_writer :description
36
-
37
- # The name of the function.
38
- sig { returns(T.nilable(String)) }
39
- attr_reader :name
40
-
41
- sig { params(name: String).void }
42
- attr_writer :name
43
-
44
- # Parameters of the function in JSON Schema.
45
- sig { returns(T.nilable(T.anything)) }
46
- attr_reader :parameters
47
-
48
- sig { params(parameters: T.anything).void }
49
- attr_writer :parameters
50
-
51
- # The type of the tool, i.e. `function`.
52
- sig do
53
- returns(
54
- T.nilable(
55
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol
56
- )
57
- )
58
- end
59
- attr_reader :type
60
-
61
- sig do
62
- params(
63
- type:
64
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol
65
- ).void
66
- end
67
- attr_writer :type
68
-
69
- sig do
70
- params(
71
- description: String,
72
- name: String,
73
- parameters: T.anything,
74
- type:
75
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol
76
- ).returns(T.attached_class)
77
- end
78
- def self.new(
79
- # The description of the function, including guidance on when and how to call it,
80
- # and guidance about what to tell the user when calling (if anything).
81
- description: nil,
82
- # The name of the function.
83
- name: nil,
84
- # Parameters of the function in JSON Schema.
85
- parameters: nil,
86
- # The type of the tool, i.e. `function`.
87
- type: nil
88
- )
89
- end
90
-
91
- sig do
92
- override.returns(
93
- {
94
- description: String,
95
- name: String,
96
- parameters: T.anything,
97
- type:
98
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::OrSymbol
99
- }
100
- )
101
- end
102
- def to_hash
103
- end
104
-
105
- # The type of the tool, i.e. `function`.
106
- module Type
107
- extend OpenAI::Internal::Type::Enum
108
-
109
- TaggedSymbol =
110
- T.type_alias do
111
- T.all(
112
- Symbol,
113
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type
114
- )
115
- end
116
- OrSymbol = T.type_alias { T.any(Symbol, String) }
117
-
118
- FUNCTION =
119
- T.let(
120
- :function,
121
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::TaggedSymbol
122
- )
123
-
124
- sig do
125
- override.returns(
126
- T::Array[
127
- OpenAI::Realtime::RealtimeToolsConfigUnion::Function::Type::TaggedSymbol
128
- ]
129
- )
130
- end
131
- def self.values
132
- end
133
- end
134
- end
135
-
136
20
  class Mcp < OpenAI::Internal::Type::BaseModel
137
21
  OrHash =
138
22
  T.type_alias do
@@ -3,8 +3,9 @@
3
3
  module OpenAI
4
4
  module Models
5
5
  module Realtime
6
- # Configuration options for tracing. Set to null to disable tracing. Once tracing
7
- # is enabled for a session, the configuration cannot be modified.
6
+ # Realtime API can write session traces to the
7
+ # [Traces Dashboard](/logs?api=traces). Set to null to disable tracing. Once
8
+ # tracing is enabled for a session, the configuration cannot be modified.
8
9
  #
9
10
  # `auto` will create a trace for the session with default values for the workflow
10
11
  # name, group id, and metadata.
@@ -29,15 +30,15 @@ module OpenAI
29
30
  end
30
31
 
31
32
  # The group id to attach to this trace to enable filtering and grouping in the
32
- # traces dashboard.
33
+ # Traces Dashboard.
33
34
  sig { returns(T.nilable(String)) }
34
35
  attr_reader :group_id
35
36
 
36
37
  sig { params(group_id: String).void }
37
38
  attr_writer :group_id
38
39
 
39
- # The arbitrary metadata to attach to this trace to enable filtering in the traces
40
- # dashboard.
40
+ # The arbitrary metadata to attach to this trace to enable filtering in the Traces
41
+ # Dashboard.
41
42
  sig { returns(T.nilable(T.anything)) }
42
43
  attr_reader :metadata
43
44
 
@@ -45,7 +46,7 @@ module OpenAI
45
46
  attr_writer :metadata
46
47
 
47
48
  # The name of the workflow to attach to this trace. This is used to name the trace
48
- # in the traces dashboard.
49
+ # in the Traces Dashboard.
49
50
  sig { returns(T.nilable(String)) }
50
51
  attr_reader :workflow_name
51
52
 
@@ -62,13 +63,13 @@ module OpenAI
62
63
  end
63
64
  def self.new(
64
65
  # The group id to attach to this trace to enable filtering and grouping in the
65
- # traces dashboard.
66
+ # Traces Dashboard.
66
67
  group_id: nil,
67
- # The arbitrary metadata to attach to this trace to enable filtering in the traces
68
- # dashboard.
68
+ # The arbitrary metadata to attach to this trace to enable filtering in the Traces
69
+ # Dashboard.
69
70
  metadata: nil,
70
71
  # The name of the workflow to attach to this trace. This is used to name the trace
71
- # in the traces dashboard.
72
+ # in the Traces Dashboard.
72
73
  workflow_name: nil
73
74
  )
74
75
  end
@@ -0,0 +1,50 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionAudio < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudio,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ sig do
16
+ returns(
17
+ T.nilable(OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput)
18
+ )
19
+ end
20
+ attr_reader :input
21
+
22
+ sig do
23
+ params(
24
+ input:
25
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::OrHash
26
+ ).void
27
+ end
28
+ attr_writer :input
29
+
30
+ # Configuration for input and output audio.
31
+ sig do
32
+ params(
33
+ input:
34
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::OrHash
35
+ ).returns(T.attached_class)
36
+ end
37
+ def self.new(input: nil)
38
+ end
39
+
40
+ sig do
41
+ override.returns(
42
+ { input: OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput }
43
+ )
44
+ end
45
+ def to_hash
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,226 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class RealtimeTranscriptionSessionAudioInput < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(
10
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput,
11
+ OpenAI::Internal::AnyHash
12
+ )
13
+ end
14
+
15
+ # The PCM audio format. Only a 24kHz sample rate is supported.
16
+ sig do
17
+ returns(
18
+ T.nilable(
19
+ T.any(
20
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
21
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
22
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
23
+ )
24
+ )
25
+ )
26
+ end
27
+ attr_reader :format_
28
+
29
+ sig do
30
+ params(
31
+ format_:
32
+ T.any(
33
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
34
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
35
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
36
+ )
37
+ ).void
38
+ end
39
+ attr_writer :format_
40
+
41
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
42
+ # off. Noise reduction filters audio added to the input audio buffer before it is
43
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
44
+ # detection accuracy (reducing false positives) and model performance by improving
45
+ # perception of the input audio.
46
+ sig do
47
+ returns(
48
+ T.nilable(
49
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction
50
+ )
51
+ )
52
+ end
53
+ attr_reader :noise_reduction
54
+
55
+ sig do
56
+ params(
57
+ noise_reduction:
58
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction::OrHash
59
+ ).void
60
+ end
61
+ attr_writer :noise_reduction
62
+
63
+ # Configuration for input audio transcription, defaults to off and can be set to
64
+ # `null` to turn off once on. Input audio transcription is not native to the
65
+ # model, since the model consumes audio directly. Transcription runs
66
+ # asynchronously through
67
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
68
+ # and should be treated as guidance of input audio content rather than precisely
69
+ # what the model heard. The client can optionally set the language and prompt for
70
+ # transcription, these offer additional guidance to the transcription service.
71
+ sig { returns(T.nilable(OpenAI::Realtime::AudioTranscription)) }
72
+ attr_reader :transcription
73
+
74
+ sig do
75
+ params(
76
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash
77
+ ).void
78
+ end
79
+ attr_writer :transcription
80
+
81
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
82
+ # set to `null` to turn off, in which case the client must manually trigger model
83
+ # response. Server VAD means that the model will detect the start and end of
84
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
85
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
86
+ # semantically estimate whether the user has finished speaking, then dynamically
87
+ # sets a timeout based on this probability. For example, if user audio trails off
88
+ # with "uhhm", the model will score a low probability of turn end and wait longer
89
+ # for the user to continue speaking. This can be useful for more natural
90
+ # conversations, but may have a higher latency.
91
+ sig do
92
+ returns(
93
+ T.nilable(
94
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection
95
+ )
96
+ )
97
+ end
98
+ attr_reader :turn_detection
99
+
100
+ sig do
101
+ params(
102
+ turn_detection:
103
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::OrHash
104
+ ).void
105
+ end
106
+ attr_writer :turn_detection
107
+
108
+ sig do
109
+ params(
110
+ format_:
111
+ T.any(
112
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM::OrHash,
113
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU::OrHash,
114
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA::OrHash
115
+ ),
116
+ noise_reduction:
117
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction::OrHash,
118
+ transcription: OpenAI::Realtime::AudioTranscription::OrHash,
119
+ turn_detection:
120
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection::OrHash
121
+ ).returns(T.attached_class)
122
+ end
123
+ def self.new(
124
+ # The PCM audio format. Only a 24kHz sample rate is supported.
125
+ format_: nil,
126
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
127
+ # off. Noise reduction filters audio added to the input audio buffer before it is
128
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
129
+ # detection accuracy (reducing false positives) and model performance by improving
130
+ # perception of the input audio.
131
+ noise_reduction: nil,
132
+ # Configuration for input audio transcription, defaults to off and can be set to
133
+ # `null` to turn off once on. Input audio transcription is not native to the
134
+ # model, since the model consumes audio directly. Transcription runs
135
+ # asynchronously through
136
+ # [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
137
+ # and should be treated as guidance of input audio content rather than precisely
138
+ # what the model heard. The client can optionally set the language and prompt for
139
+ # transcription, these offer additional guidance to the transcription service.
140
+ transcription: nil,
141
+ # Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
142
+ # set to `null` to turn off, in which case the client must manually trigger model
143
+ # response. Server VAD means that the model will detect the start and end of
144
+ # speech based on audio volume and respond at the end of user speech. Semantic VAD
145
+ # is more advanced and uses a turn detection model (in conjunction with VAD) to
146
+ # semantically estimate whether the user has finished speaking, then dynamically
147
+ # sets a timeout based on this probability. For example, if user audio trails off
148
+ # with "uhhm", the model will score a low probability of turn end and wait longer
149
+ # for the user to continue speaking. This can be useful for more natural
150
+ # conversations, but may have a higher latency.
151
+ turn_detection: nil
152
+ )
153
+ end
154
+
155
+ sig do
156
+ override.returns(
157
+ {
158
+ format_:
159
+ T.any(
160
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCM,
161
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMU,
162
+ OpenAI::Realtime::RealtimeAudioFormats::AudioPCMA
163
+ ),
164
+ noise_reduction:
165
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction,
166
+ transcription: OpenAI::Realtime::AudioTranscription,
167
+ turn_detection:
168
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInputTurnDetection
169
+ }
170
+ )
171
+ end
172
+ def to_hash
173
+ end
174
+
175
+ class NoiseReduction < OpenAI::Internal::Type::BaseModel
176
+ OrHash =
177
+ T.type_alias do
178
+ T.any(
179
+ OpenAI::Realtime::RealtimeTranscriptionSessionAudioInput::NoiseReduction,
180
+ OpenAI::Internal::AnyHash
181
+ )
182
+ end
183
+
184
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
185
+ # headphones, `far_field` is for far-field microphones such as laptop or
186
+ # conference room microphones.
187
+ sig do
188
+ returns(T.nilable(OpenAI::Realtime::NoiseReductionType::OrSymbol))
189
+ end
190
+ attr_reader :type
191
+
192
+ sig do
193
+ params(type: OpenAI::Realtime::NoiseReductionType::OrSymbol).void
194
+ end
195
+ attr_writer :type
196
+
197
+ # Configuration for input audio noise reduction. This can be set to `null` to turn
198
+ # off. Noise reduction filters audio added to the input audio buffer before it is
199
+ # sent to VAD and the model. Filtering the audio can improve VAD and turn
200
+ # detection accuracy (reducing false positives) and model performance by improving
201
+ # perception of the input audio.
202
+ sig do
203
+ params(
204
+ type: OpenAI::Realtime::NoiseReductionType::OrSymbol
205
+ ).returns(T.attached_class)
206
+ end
207
+ def self.new(
208
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
209
+ # headphones, `far_field` is for far-field microphones such as laptop or
210
+ # conference room microphones.
211
+ type: nil
212
+ )
213
+ end
214
+
215
+ sig do
216
+ override.returns(
217
+ { type: OpenAI::Realtime::NoiseReductionType::OrSymbol }
218
+ )
219
+ end
220
+ def to_hash
221
+ end
222
+ end
223
+ end
224
+ end
225
+ end
226
+ end