openai 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/README.md +1 -1
  4. data/lib/openai/models/realtime/audio_transcription.rb +60 -0
  5. data/lib/openai/models/realtime/client_secret_create_params.rb +18 -9
  6. data/lib/openai/models/realtime/client_secret_create_response.rb +11 -250
  7. data/lib/openai/models/realtime/conversation_item.rb +1 -1
  8. data/lib/openai/models/realtime/conversation_item_added.rb +14 -1
  9. data/lib/openai/models/realtime/conversation_item_done.rb +3 -0
  10. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rb +10 -8
  11. data/lib/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rb +14 -5
  12. data/lib/openai/models/realtime/conversation_item_truncate_event.rb +2 -2
  13. data/lib/openai/models/realtime/input_audio_buffer_append_event.rb +10 -5
  14. data/lib/openai/models/realtime/models.rb +58 -0
  15. data/lib/openai/models/realtime/noise_reduction_type.rb +20 -0
  16. data/lib/openai/models/realtime/realtime_audio_config.rb +6 -427
  17. data/lib/openai/models/realtime/realtime_audio_config_input.rb +89 -0
  18. data/lib/openai/models/realtime/realtime_audio_config_output.rb +100 -0
  19. data/lib/openai/models/realtime/realtime_audio_formats.rb +121 -0
  20. data/lib/openai/models/realtime/realtime_audio_input_turn_detection.rb +131 -0
  21. data/lib/openai/models/realtime/realtime_client_event.rb +31 -23
  22. data/lib/openai/models/realtime/realtime_conversation_item_assistant_message.rb +43 -10
  23. data/lib/openai/models/realtime/realtime_conversation_item_function_call.rb +16 -7
  24. data/lib/openai/models/realtime/realtime_conversation_item_function_call_output.rb +15 -7
  25. data/lib/openai/models/realtime/realtime_conversation_item_system_message.rb +18 -6
  26. data/lib/openai/models/realtime/realtime_conversation_item_user_message.rb +62 -13
  27. data/lib/openai/models/realtime/realtime_response.rb +117 -107
  28. data/lib/openai/models/realtime/realtime_response_create_audio_output.rb +100 -0
  29. data/lib/openai/models/realtime/realtime_response_create_mcp_tool.rb +310 -0
  30. data/lib/openai/models/realtime/realtime_response_create_params.rb +225 -0
  31. data/lib/openai/models/realtime/realtime_response_status.rb +1 -1
  32. data/lib/openai/models/realtime/realtime_response_usage.rb +5 -2
  33. data/lib/openai/models/realtime/realtime_response_usage_input_token_details.rb +58 -8
  34. data/lib/openai/models/realtime/realtime_server_event.rb +21 -5
  35. data/lib/openai/models/realtime/realtime_session.rb +9 -125
  36. data/lib/openai/models/realtime/realtime_session_client_secret.rb +36 -0
  37. data/lib/openai/models/realtime/realtime_session_create_request.rb +50 -71
  38. data/lib/openai/models/realtime/realtime_session_create_response.rb +621 -219
  39. data/lib/openai/models/realtime/realtime_tools_config_union.rb +2 -53
  40. data/lib/openai/models/realtime/realtime_tracing_config.rb +7 -6
  41. data/lib/openai/models/realtime/realtime_transcription_session_audio.rb +19 -0
  42. data/lib/openai/models/realtime/realtime_transcription_session_audio_input.rb +90 -0
  43. data/lib/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rb +131 -0
  44. data/lib/openai/models/realtime/realtime_transcription_session_client_secret.rb +38 -0
  45. data/lib/openai/models/realtime/realtime_transcription_session_create_request.rb +12 -270
  46. data/lib/openai/models/realtime/realtime_transcription_session_create_response.rb +78 -0
  47. data/lib/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rb +66 -0
  48. data/lib/openai/models/realtime/realtime_transcription_session_turn_detection.rb +57 -0
  49. data/lib/openai/models/realtime/realtime_truncation.rb +8 -40
  50. data/lib/openai/models/realtime/realtime_truncation_retention_ratio.rb +34 -0
  51. data/lib/openai/models/realtime/response_cancel_event.rb +3 -1
  52. data/lib/openai/models/realtime/response_create_event.rb +18 -348
  53. data/lib/openai/models/realtime/response_done_event.rb +7 -0
  54. data/lib/openai/models/realtime/session_created_event.rb +20 -4
  55. data/lib/openai/models/realtime/session_update_event.rb +36 -12
  56. data/lib/openai/models/realtime/session_updated_event.rb +20 -4
  57. data/lib/openai/models/realtime/transcription_session_created.rb +8 -243
  58. data/lib/openai/models/realtime/transcription_session_update.rb +179 -3
  59. data/lib/openai/models/realtime/transcription_session_updated_event.rb +8 -243
  60. data/lib/openai/resources/realtime/client_secrets.rb +2 -3
  61. data/lib/openai/version.rb +1 -1
  62. data/lib/openai.rb +19 -1
  63. data/rbi/openai/models/realtime/audio_transcription.rbi +132 -0
  64. data/rbi/openai/models/realtime/client_secret_create_params.rbi +25 -11
  65. data/rbi/openai/models/realtime/client_secret_create_response.rbi +2 -587
  66. data/rbi/openai/models/realtime/conversation_item_added.rbi +14 -1
  67. data/rbi/openai/models/realtime/conversation_item_done.rbi +3 -0
  68. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_completed_event.rbi +11 -8
  69. data/rbi/openai/models/realtime/conversation_item_input_audio_transcription_delta_event.rbi +15 -5
  70. data/rbi/openai/models/realtime/conversation_item_truncate_event.rbi +2 -2
  71. data/rbi/openai/models/realtime/input_audio_buffer_append_event.rbi +10 -5
  72. data/rbi/openai/models/realtime/models.rbi +97 -0
  73. data/rbi/openai/models/realtime/noise_reduction_type.rbi +31 -0
  74. data/rbi/openai/models/realtime/realtime_audio_config.rbi +8 -956
  75. data/rbi/openai/models/realtime/realtime_audio_config_input.rbi +221 -0
  76. data/rbi/openai/models/realtime/realtime_audio_config_output.rbi +222 -0
  77. data/rbi/openai/models/realtime/realtime_audio_formats.rbi +329 -0
  78. data/rbi/openai/models/realtime/realtime_audio_input_turn_detection.rbi +262 -0
  79. data/rbi/openai/models/realtime/realtime_conversation_item_assistant_message.rbi +51 -10
  80. data/rbi/openai/models/realtime/realtime_conversation_item_function_call.rbi +16 -7
  81. data/rbi/openai/models/realtime/realtime_conversation_item_function_call_output.rbi +14 -7
  82. data/rbi/openai/models/realtime/realtime_conversation_item_system_message.rbi +16 -6
  83. data/rbi/openai/models/realtime/realtime_conversation_item_user_message.rbi +110 -12
  84. data/rbi/openai/models/realtime/realtime_response.rbi +287 -212
  85. data/rbi/openai/models/realtime/realtime_response_create_audio_output.rbi +250 -0
  86. data/rbi/openai/models/realtime/realtime_response_create_mcp_tool.rbi +616 -0
  87. data/rbi/openai/models/realtime/realtime_response_create_params.rbi +529 -0
  88. data/rbi/openai/models/realtime/realtime_response_usage.rbi +8 -2
  89. data/rbi/openai/models/realtime/realtime_response_usage_input_token_details.rbi +106 -7
  90. data/rbi/openai/models/realtime/realtime_server_event.rbi +4 -1
  91. data/rbi/openai/models/realtime/realtime_session.rbi +12 -262
  92. data/rbi/openai/models/realtime/realtime_session_client_secret.rbi +49 -0
  93. data/rbi/openai/models/realtime/realtime_session_create_request.rbi +112 -133
  94. data/rbi/openai/models/realtime/realtime_session_create_response.rbi +1229 -405
  95. data/rbi/openai/models/realtime/realtime_tools_config_union.rbi +1 -117
  96. data/rbi/openai/models/realtime/realtime_tracing_config.rbi +11 -10
  97. data/rbi/openai/models/realtime/realtime_transcription_session_audio.rbi +50 -0
  98. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input.rbi +226 -0
  99. data/rbi/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbi +259 -0
  100. data/rbi/openai/models/realtime/realtime_transcription_session_client_secret.rbi +51 -0
  101. data/rbi/openai/models/realtime/realtime_transcription_session_create_request.rbi +25 -597
  102. data/rbi/openai/models/realtime/realtime_transcription_session_create_response.rbi +195 -0
  103. data/rbi/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbi +144 -0
  104. data/rbi/openai/models/realtime/realtime_transcription_session_turn_detection.rbi +94 -0
  105. data/rbi/openai/models/realtime/realtime_truncation.rbi +5 -56
  106. data/rbi/openai/models/realtime/realtime_truncation_retention_ratio.rbi +45 -0
  107. data/rbi/openai/models/realtime/response_cancel_event.rbi +3 -1
  108. data/rbi/openai/models/realtime/response_create_event.rbi +19 -786
  109. data/rbi/openai/models/realtime/response_done_event.rbi +7 -0
  110. data/rbi/openai/models/realtime/session_created_event.rbi +42 -9
  111. data/rbi/openai/models/realtime/session_update_event.rbi +57 -19
  112. data/rbi/openai/models/realtime/session_updated_event.rbi +42 -9
  113. data/rbi/openai/models/realtime/transcription_session_created.rbi +17 -591
  114. data/rbi/openai/models/realtime/transcription_session_update.rbi +425 -7
  115. data/rbi/openai/models/realtime/transcription_session_updated_event.rbi +14 -591
  116. data/rbi/openai/resources/realtime/client_secrets.rbi +5 -3
  117. data/sig/openai/models/realtime/audio_transcription.rbs +57 -0
  118. data/sig/openai/models/realtime/client_secret_create_response.rbs +1 -251
  119. data/sig/openai/models/realtime/models.rbs +57 -0
  120. data/sig/openai/models/realtime/noise_reduction_type.rbs +16 -0
  121. data/sig/openai/models/realtime/realtime_audio_config.rbs +12 -331
  122. data/sig/openai/models/realtime/realtime_audio_config_input.rbs +72 -0
  123. data/sig/openai/models/realtime/realtime_audio_config_output.rbs +72 -0
  124. data/sig/openai/models/realtime/realtime_audio_formats.rbs +128 -0
  125. data/sig/openai/models/realtime/realtime_audio_input_turn_detection.rbs +99 -0
  126. data/sig/openai/models/realtime/realtime_conversation_item_assistant_message.rbs +17 -2
  127. data/sig/openai/models/realtime/realtime_conversation_item_user_message.rbs +30 -1
  128. data/sig/openai/models/realtime/realtime_response.rbs +103 -82
  129. data/sig/openai/models/realtime/realtime_response_create_audio_output.rbs +84 -0
  130. data/sig/openai/models/realtime/realtime_response_create_mcp_tool.rbs +218 -0
  131. data/sig/openai/models/realtime/realtime_response_create_params.rbs +148 -0
  132. data/sig/openai/models/realtime/realtime_response_usage_input_token_details.rbs +50 -1
  133. data/sig/openai/models/realtime/realtime_session.rbs +16 -106
  134. data/sig/openai/models/realtime/realtime_session_client_secret.rbs +20 -0
  135. data/sig/openai/models/realtime/realtime_session_create_request.rbs +27 -43
  136. data/sig/openai/models/realtime/realtime_session_create_response.rbs +389 -187
  137. data/sig/openai/models/realtime/realtime_tools_config_union.rbs +1 -53
  138. data/sig/openai/models/realtime/realtime_transcription_session_audio.rbs +24 -0
  139. data/sig/openai/models/realtime/realtime_transcription_session_audio_input.rbs +72 -0
  140. data/sig/openai/models/realtime/realtime_transcription_session_audio_input_turn_detection.rbs +99 -0
  141. data/sig/openai/models/realtime/realtime_transcription_session_client_secret.rbs +20 -0
  142. data/sig/openai/models/realtime/realtime_transcription_session_create_request.rbs +11 -203
  143. data/sig/openai/models/realtime/realtime_transcription_session_create_response.rbs +69 -0
  144. data/sig/openai/models/realtime/realtime_transcription_session_input_audio_transcription.rbs +59 -0
  145. data/sig/openai/models/realtime/realtime_transcription_session_turn_detection.rbs +47 -0
  146. data/sig/openai/models/realtime/realtime_truncation.rbs +1 -28
  147. data/sig/openai/models/realtime/realtime_truncation_retention_ratio.rbs +21 -0
  148. data/sig/openai/models/realtime/response_create_event.rbs +6 -249
  149. data/sig/openai/models/realtime/session_created_event.rbs +14 -4
  150. data/sig/openai/models/realtime/session_update_event.rbs +14 -4
  151. data/sig/openai/models/realtime/session_updated_event.rbs +14 -4
  152. data/sig/openai/models/realtime/transcription_session_created.rbs +4 -254
  153. data/sig/openai/models/realtime/transcription_session_update.rbs +154 -4
  154. data/sig/openai/models/realtime/transcription_session_updated_event.rbs +4 -254
  155. metadata +59 -5
  156. data/lib/openai/models/realtime/realtime_client_secret_config.rb +0 -64
  157. data/rbi/openai/models/realtime/realtime_client_secret_config.rbi +0 -147
  158. data/sig/openai/models/realtime/realtime_client_secret_config.rbs +0 -60
@@ -16,7 +16,7 @@ module OpenAI
16
16
  sig { returns(String) }
17
17
  attr_accessor :event_id
18
18
 
19
- # The ID of the item.
19
+ # The ID of the item containing the audio that is being transcribed.
20
20
  sig { returns(String) }
21
21
  attr_accessor :item_id
22
22
 
@@ -38,14 +38,19 @@ module OpenAI
38
38
  sig { params(delta: String).void }
39
39
  attr_writer :delta
40
40
 
41
- # The log probabilities of the transcription.
41
+ # The log probabilities of the transcription. These can be enabled by
42
+ # configurating the session with
43
+ # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the
44
+ # array corresponds a log probability of which token would be selected for this
45
+ # chunk of transcription. This can help to identify if it was possible there were
46
+ # multiple valid options for a given chunk of transcription.
42
47
  sig do
43
48
  returns(T.nilable(T::Array[OpenAI::Realtime::LogProbProperties]))
44
49
  end
45
50
  attr_accessor :logprobs
46
51
 
47
52
  # Returned when the text value of an input audio transcription content part is
48
- # updated.
53
+ # updated with incremental transcription results.
49
54
  sig do
50
55
  params(
51
56
  event_id: String,
@@ -60,13 +65,18 @@ module OpenAI
60
65
  def self.new(
61
66
  # The unique ID of the server event.
62
67
  event_id:,
63
- # The ID of the item.
68
+ # The ID of the item containing the audio that is being transcribed.
64
69
  item_id:,
65
70
  # The index of the content part in the item's content array.
66
71
  content_index: nil,
67
72
  # The text delta.
68
73
  delta: nil,
69
- # The log probabilities of the transcription.
74
+ # The log probabilities of the transcription. These can be enabled by
75
+ # configurating the session with
76
+ # `"include": ["item.input_audio_transcription.logprobs"]`. Each entry in the
77
+ # array corresponds a log probability of which token would be selected for this
78
+ # chunk of transcription. This can help to identify if it was possible there were
79
+ # multiple valid options for a given chunk of transcription.
70
80
  logprobs: nil,
71
81
  # The event type, must be `conversation.item.input_audio_transcription.delta`.
72
82
  type: :"conversation.item.input_audio_transcription.delta"
@@ -18,7 +18,7 @@ module OpenAI
18
18
  sig { returns(Integer) }
19
19
  attr_accessor :audio_end_ms
20
20
 
21
- # The index of the content part to truncate. Set this to 0.
21
+ # The index of the content part to truncate. Set this to `0`.
22
22
  sig { returns(Integer) }
23
23
  attr_accessor :content_index
24
24
 
@@ -63,7 +63,7 @@ module OpenAI
63
63
  # audio_end_ms is greater than the actual audio duration, the server will respond
64
64
  # with an error.
65
65
  audio_end_ms:,
66
- # The index of the content part to truncate. Set this to 0.
66
+ # The index of the content part to truncate. Set this to `0`.
67
67
  content_index:,
68
68
  # The ID of the assistant message item to truncate. Only assistant message items
69
69
  # can be truncated.
@@ -29,14 +29,19 @@ module OpenAI
29
29
  attr_writer :event_id
30
30
 
31
31
  # Send this event to append audio bytes to the input audio buffer. The audio
32
- # buffer is temporary storage you can write to and later commit. In Server VAD
33
- # mode, the audio buffer is used to detect speech and the server will decide when
34
- # to commit. When Server VAD is disabled, you must commit the audio buffer
35
- # manually.
32
+ # buffer is temporary storage you can write to and later commit. A "commit" will
33
+ # create a new user message item in the conversation history from the buffer
34
+ # content and clear the buffer. Input audio transcription (if enabled) will be
35
+ # generated when the buffer is committed.
36
+ #
37
+ # If VAD is enabled the audio buffer is used to detect speech and the server will
38
+ # decide when to commit. When Server VAD is disabled, you must commit the audio
39
+ # buffer manually. Input audio noise reduction operates on writes to the audio
40
+ # buffer.
36
41
  #
37
42
  # The client may choose how much audio to place in each event up to a maximum of
38
43
  # 15 MiB, for example streaming smaller chunks from the client may allow the VAD
39
- # to be more responsive. Unlike made other client events, the server will not send
44
+ # to be more responsive. Unlike most other client events, the server will not send
40
45
  # a confirmation response to this event.
41
46
  sig do
42
47
  params(audio: String, event_id: String, type: Symbol).returns(
@@ -0,0 +1,97 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ class Models < OpenAI::Internal::Type::BaseModel
7
+ OrHash =
8
+ T.type_alias do
9
+ T.any(OpenAI::Realtime::Models, OpenAI::Internal::AnyHash)
10
+ end
11
+
12
+ # The description of the function, including guidance on when and how to call it,
13
+ # and guidance about what to tell the user when calling (if anything).
14
+ sig { returns(T.nilable(String)) }
15
+ attr_reader :description
16
+
17
+ sig { params(description: String).void }
18
+ attr_writer :description
19
+
20
+ # The name of the function.
21
+ sig { returns(T.nilable(String)) }
22
+ attr_reader :name
23
+
24
+ sig { params(name: String).void }
25
+ attr_writer :name
26
+
27
+ # Parameters of the function in JSON Schema.
28
+ sig { returns(T.nilable(T.anything)) }
29
+ attr_reader :parameters
30
+
31
+ sig { params(parameters: T.anything).void }
32
+ attr_writer :parameters
33
+
34
+ # The type of the tool, i.e. `function`.
35
+ sig { returns(T.nilable(OpenAI::Realtime::Models::Type::OrSymbol)) }
36
+ attr_reader :type
37
+
38
+ sig { params(type: OpenAI::Realtime::Models::Type::OrSymbol).void }
39
+ attr_writer :type
40
+
41
+ sig do
42
+ params(
43
+ description: String,
44
+ name: String,
45
+ parameters: T.anything,
46
+ type: OpenAI::Realtime::Models::Type::OrSymbol
47
+ ).returns(T.attached_class)
48
+ end
49
+ def self.new(
50
+ # The description of the function, including guidance on when and how to call it,
51
+ # and guidance about what to tell the user when calling (if anything).
52
+ description: nil,
53
+ # The name of the function.
54
+ name: nil,
55
+ # Parameters of the function in JSON Schema.
56
+ parameters: nil,
57
+ # The type of the tool, i.e. `function`.
58
+ type: nil
59
+ )
60
+ end
61
+
62
+ sig do
63
+ override.returns(
64
+ {
65
+ description: String,
66
+ name: String,
67
+ parameters: T.anything,
68
+ type: OpenAI::Realtime::Models::Type::OrSymbol
69
+ }
70
+ )
71
+ end
72
+ def to_hash
73
+ end
74
+
75
+ # The type of the tool, i.e. `function`.
76
+ module Type
77
+ extend OpenAI::Internal::Type::Enum
78
+
79
+ TaggedSymbol =
80
+ T.type_alias { T.all(Symbol, OpenAI::Realtime::Models::Type) }
81
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
82
+
83
+ FUNCTION =
84
+ T.let(:function, OpenAI::Realtime::Models::Type::TaggedSymbol)
85
+
86
+ sig do
87
+ override.returns(
88
+ T::Array[OpenAI::Realtime::Models::Type::TaggedSymbol]
89
+ )
90
+ end
91
+ def self.values
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,31 @@
1
+ # typed: strong
2
+
3
+ module OpenAI
4
+ module Models
5
+ module Realtime
6
+ # Type of noise reduction. `near_field` is for close-talking microphones such as
7
+ # headphones, `far_field` is for far-field microphones such as laptop or
8
+ # conference room microphones.
9
+ module NoiseReductionType
10
+ extend OpenAI::Internal::Type::Enum
11
+
12
+ TaggedSymbol =
13
+ T.type_alias { T.all(Symbol, OpenAI::Realtime::NoiseReductionType) }
14
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
15
+
16
+ NEAR_FIELD =
17
+ T.let(:near_field, OpenAI::Realtime::NoiseReductionType::TaggedSymbol)
18
+ FAR_FIELD =
19
+ T.let(:far_field, OpenAI::Realtime::NoiseReductionType::TaggedSymbol)
20
+
21
+ sig do
22
+ override.returns(
23
+ T::Array[OpenAI::Realtime::NoiseReductionType::TaggedSymbol]
24
+ )
25
+ end
26
+ def self.values
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end