openai 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +28 -0
  3. data/README.md +1 -1
  4. data/lib/openai/internal/util.rb +5 -5
  5. data/lib/openai/models/audio/transcription_create_params.rb +42 -11
  6. data/lib/openai/models/audio/transcription_create_response.rb +4 -1
  7. data/lib/openai/models/audio/transcription_diarized.rb +160 -0
  8. data/lib/openai/models/audio/transcription_diarized_segment.rb +65 -0
  9. data/lib/openai/models/audio/transcription_stream_event.rb +7 -4
  10. data/lib/openai/models/audio/transcription_text_delta_event.rb +10 -1
  11. data/lib/openai/models/audio/transcription_text_segment_event.rb +63 -0
  12. data/lib/openai/models/audio_model.rb +1 -0
  13. data/lib/openai/models/audio_response_format.rb +5 -2
  14. data/lib/openai/models/beta/assistant_create_params.rb +3 -0
  15. data/lib/openai/models/beta/assistant_update_params.rb +3 -0
  16. data/lib/openai/models/beta/threads/run_create_params.rb +3 -0
  17. data/lib/openai/models/chat/completion_create_params.rb +3 -0
  18. data/lib/openai/models/comparison_filter.rb +29 -6
  19. data/lib/openai/models/evals/create_eval_completions_run_data_source.rb +3 -0
  20. data/lib/openai/models/evals/run_cancel_response.rb +6 -0
  21. data/lib/openai/models/evals/run_create_params.rb +6 -0
  22. data/lib/openai/models/evals/run_create_response.rb +6 -0
  23. data/lib/openai/models/evals/run_list_response.rb +6 -0
  24. data/lib/openai/models/evals/run_retrieve_response.rb +6 -0
  25. data/lib/openai/models/graders/score_model_grader.rb +3 -0
  26. data/lib/openai/models/realtime/audio_transcription.rb +8 -6
  27. data/lib/openai/models/reasoning.rb +3 -0
  28. data/lib/openai/models/reasoning_effort.rb +3 -0
  29. data/lib/openai/models/vector_store_create_params.rb +10 -1
  30. data/lib/openai/models/vector_stores/vector_store_file.rb +3 -3
  31. data/lib/openai/resources/audio/transcriptions.rb +12 -4
  32. data/lib/openai/resources/files.rb +1 -1
  33. data/lib/openai/resources/vector_stores.rb +3 -1
  34. data/lib/openai/version.rb +1 -1
  35. data/lib/openai.rb +3 -0
  36. data/rbi/openai/models/audio/transcription_create_params.rbi +66 -16
  37. data/rbi/openai/models/audio/transcription_create_response.rbi +1 -0
  38. data/rbi/openai/models/audio/transcription_diarized.rbi +281 -0
  39. data/rbi/openai/models/audio/transcription_diarized_segment.rbi +87 -0
  40. data/rbi/openai/models/audio/transcription_stream_event.rbi +4 -3
  41. data/rbi/openai/models/audio/transcription_text_delta_event.rbi +14 -1
  42. data/rbi/openai/models/audio/transcription_text_segment_event.rbi +86 -0
  43. data/rbi/openai/models/audio_model.rbi +2 -0
  44. data/rbi/openai/models/audio_response_format.rbi +6 -2
  45. data/rbi/openai/models/beta/assistant_create_params.rbi +6 -0
  46. data/rbi/openai/models/beta/assistant_update_params.rbi +6 -0
  47. data/rbi/openai/models/beta/threads/run_create_params.rbi +6 -0
  48. data/rbi/openai/models/chat/completion_create_params.rbi +6 -0
  49. data/rbi/openai/models/comparison_filter.rbi +43 -4
  50. data/rbi/openai/models/eval_create_response.rbi +4 -4
  51. data/rbi/openai/models/eval_list_response.rbi +4 -4
  52. data/rbi/openai/models/eval_retrieve_response.rbi +4 -4
  53. data/rbi/openai/models/eval_update_response.rbi +4 -4
  54. data/rbi/openai/models/evals/create_eval_completions_run_data_source.rbi +6 -0
  55. data/rbi/openai/models/evals/run_cancel_response.rbi +12 -0
  56. data/rbi/openai/models/evals/run_create_params.rbi +12 -0
  57. data/rbi/openai/models/evals/run_create_response.rbi +12 -0
  58. data/rbi/openai/models/evals/run_list_response.rbi +12 -0
  59. data/rbi/openai/models/evals/run_retrieve_response.rbi +12 -0
  60. data/rbi/openai/models/graders/score_model_grader.rbi +6 -0
  61. data/rbi/openai/models/realtime/audio_transcription.rbi +15 -12
  62. data/rbi/openai/models/reasoning.rbi +6 -0
  63. data/rbi/openai/models/reasoning_effort.rbi +3 -0
  64. data/rbi/openai/models/vector_store_create_params.rbi +13 -0
  65. data/rbi/openai/models/vector_stores/vector_store_file.rbi +3 -3
  66. data/rbi/openai/resources/audio/transcriptions.rbi +52 -14
  67. data/rbi/openai/resources/beta/assistants.rbi +6 -0
  68. data/rbi/openai/resources/beta/threads/runs.rbi +6 -0
  69. data/rbi/openai/resources/chat/completions.rbi +6 -0
  70. data/rbi/openai/resources/files.rbi +1 -1
  71. data/rbi/openai/resources/vector_stores.rbi +4 -0
  72. data/sig/openai/models/audio/transcription_create_params.rbs +14 -0
  73. data/sig/openai/models/audio/transcription_create_response.rbs +3 -1
  74. data/sig/openai/models/audio/transcription_diarized.rbs +129 -0
  75. data/sig/openai/models/audio/transcription_diarized_segment.rbs +47 -0
  76. data/sig/openai/models/audio/transcription_stream_event.rbs +2 -1
  77. data/sig/openai/models/audio/transcription_text_delta_event.rbs +9 -2
  78. data/sig/openai/models/audio/transcription_text_segment_event.rbs +47 -0
  79. data/sig/openai/models/audio_model.rbs +5 -1
  80. data/sig/openai/models/audio_response_format.rbs +3 -1
  81. data/sig/openai/models/comparison_filter.rbs +15 -1
  82. data/sig/openai/models/eval_create_response.rbs +2 -2
  83. data/sig/openai/models/eval_list_response.rbs +2 -2
  84. data/sig/openai/models/eval_retrieve_response.rbs +2 -2
  85. data/sig/openai/models/eval_update_response.rbs +2 -2
  86. data/sig/openai/models/realtime/audio_transcription.rbs +2 -2
  87. data/sig/openai/models/vector_store_create_params.rbs +7 -0
  88. data/sig/openai/resources/audio/transcriptions.rbs +4 -0
  89. data/sig/openai/resources/vector_stores.rbs +1 -0
  90. metadata +11 -2
@@ -10,7 +10,8 @@ module OpenAI
10
10
  required :key, String
11
11
 
12
12
  # @!attribute type
13
- # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
13
+ # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
14
+ # `nin`.
14
15
  #
15
16
  # - `eq`: equals
16
17
  # - `ne`: not equal
@@ -18,6 +19,8 @@ module OpenAI
18
19
  # - `gte`: greater than or equal
19
20
  # - `lt`: less than
20
21
  # - `lte`: less than or equal
22
+ # - `in`: in
23
+ # - `nin`: not in
21
24
  #
22
25
  # @return [Symbol, OpenAI::Models::ComparisonFilter::Type]
23
26
  required :type, enum: -> { OpenAI::ComparisonFilter::Type }
@@ -26,7 +29,7 @@ module OpenAI
26
29
  # The value to compare against the attribute key; supports string, number, or
27
30
  # boolean types.
28
31
  #
29
- # @return [String, Float, Boolean]
32
+ # @return [String, Float, Boolean, Array<String, Float>]
30
33
  required :value, union: -> { OpenAI::ComparisonFilter::Value }
31
34
 
32
35
  # @!method initialize(key:, type:, value:)
@@ -38,11 +41,12 @@ module OpenAI
38
41
  #
39
42
  # @param key [String] The key to compare against the value.
40
43
  #
41
- # @param type [Symbol, OpenAI::Models::ComparisonFilter::Type] Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
44
+ # @param type [Symbol, OpenAI::Models::ComparisonFilter::Type] Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`, `
42
45
  #
43
- # @param value [String, Float, Boolean] The value to compare against the attribute key; supports string, number, or bool
46
+ # @param value [String, Float, Boolean, Array<String, Float>] The value to compare against the attribute key; supports string, number, or bool
44
47
 
45
- # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
48
+ # Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`, `in`,
49
+ # `nin`.
46
50
  #
47
51
  # - `eq`: equals
48
52
  # - `ne`: not equal
@@ -50,6 +54,8 @@ module OpenAI
50
54
  # - `gte`: greater than or equal
51
55
  # - `lt`: less than
52
56
  # - `lte`: less than or equal
57
+ # - `in`: in
58
+ # - `nin`: not in
53
59
  #
54
60
  # @see OpenAI::Models::ComparisonFilter#type
55
61
  module Type
@@ -79,8 +85,25 @@ module OpenAI
79
85
 
80
86
  variant OpenAI::Internal::Type::Boolean
81
87
 
88
+ variant -> { OpenAI::Models::ComparisonFilter::Value::UnionMember3Array }
89
+
90
+ module UnionMember3
91
+ extend OpenAI::Internal::Type::Union
92
+
93
+ variant String
94
+
95
+ variant Float
96
+
97
+ # @!method self.variants
98
+ # @return [Array(String, Float)]
99
+ end
100
+
82
101
  # @!method self.variants
83
- # @return [Array(String, Float, Boolean)]
102
+ # @return [Array(String, Float, Boolean, Array<String, Float>)]
103
+
104
+ # @type [OpenAI::Internal::Type::Converter]
105
+ UnionMember3Array =
106
+ OpenAI::Internal::Type::ArrayOf[union: -> { OpenAI::ComparisonFilter::Value::UnionMember3 }]
84
107
  end
85
108
  end
86
109
  end
@@ -466,6 +466,9 @@ module OpenAI
466
466
  # effort can result in faster responses and fewer tokens used on reasoning in a
467
467
  # response.
468
468
  #
469
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
470
+ # effort.
471
+ #
469
472
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
470
473
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
471
474
 
@@ -320,6 +320,9 @@ module OpenAI
320
320
  # effort can result in faster responses and fewer tokens used on reasoning in a
321
321
  # response.
322
322
  #
323
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
324
+ # effort.
325
+ #
323
326
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
324
327
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
325
328
 
@@ -661,6 +664,9 @@ module OpenAI
661
664
  # effort can result in faster responses and fewer tokens used on reasoning in a
662
665
  # response.
663
666
  #
667
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
668
+ # effort.
669
+ #
664
670
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
665
671
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
666
672
 
@@ -232,6 +232,9 @@ module OpenAI
232
232
  # effort can result in faster responses and fewer tokens used on reasoning in a
233
233
  # response.
234
234
  #
235
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
236
+ # effort.
237
+ #
235
238
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
236
239
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
237
240
 
@@ -589,6 +592,9 @@ module OpenAI
589
592
  # effort can result in faster responses and fewer tokens used on reasoning in a
590
593
  # response.
591
594
  #
595
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
596
+ # effort.
597
+ #
592
598
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
593
599
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
594
600
 
@@ -320,6 +320,9 @@ module OpenAI
320
320
  # effort can result in faster responses and fewer tokens used on reasoning in a
321
321
  # response.
322
322
  #
323
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
324
+ # effort.
325
+ #
323
326
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
324
327
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
325
328
 
@@ -661,6 +664,9 @@ module OpenAI
661
664
  # effort can result in faster responses and fewer tokens used on reasoning in a
662
665
  # response.
663
666
  #
667
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
668
+ # effort.
669
+ #
664
670
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
665
671
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
666
672
 
@@ -320,6 +320,9 @@ module OpenAI
320
320
  # effort can result in faster responses and fewer tokens used on reasoning in a
321
321
  # response.
322
322
  #
323
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
324
+ # effort.
325
+ #
323
326
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
324
327
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
325
328
 
@@ -661,6 +664,9 @@ module OpenAI
661
664
  # effort can result in faster responses and fewer tokens used on reasoning in a
662
665
  # response.
663
666
  #
667
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
668
+ # effort.
669
+ #
664
670
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
665
671
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
666
672
 
@@ -320,6 +320,9 @@ module OpenAI
320
320
  # effort can result in faster responses and fewer tokens used on reasoning in a
321
321
  # response.
322
322
  #
323
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
324
+ # effort.
325
+ #
323
326
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
324
327
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
325
328
 
@@ -665,6 +668,9 @@ module OpenAI
665
668
  # effort can result in faster responses and fewer tokens used on reasoning in a
666
669
  # response.
667
670
  #
671
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
672
+ # effort.
673
+ #
668
674
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
669
675
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
670
676
 
@@ -226,6 +226,9 @@ module OpenAI
226
226
  # effort can result in faster responses and fewer tokens used on reasoning in a
227
227
  # response.
228
228
  #
229
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
230
+ # effort.
231
+ #
229
232
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
230
233
  optional :reasoning_effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
231
234
 
@@ -14,7 +14,8 @@ module OpenAI
14
14
 
15
15
  # @!attribute model
16
16
  # The model to use for transcription. Current options are `whisper-1`,
17
- # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
17
+ # `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
18
+ # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
18
19
  #
19
20
  # @return [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model, nil]
20
21
  optional :model, enum: -> { OpenAI::Realtime::AudioTranscription::Model }
@@ -23,8 +24,8 @@ module OpenAI
23
24
  # An optional text to guide the model's style or continue a previous audio
24
25
  # segment. For `whisper-1`, the
25
26
  # [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
26
- # For `gpt-4o-transcribe` models, the prompt is a free text string, for example
27
- # "expect words related to technology".
27
+ # For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
28
+ # prompt is a free text string, for example "expect words related to technology".
28
29
  #
29
30
  # @return [String, nil]
30
31
  optional :prompt, String
@@ -35,21 +36,22 @@ module OpenAI
35
36
  #
36
37
  # @param language [String] The language of the input audio. Supplying the input language in
37
38
  #
38
- # @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-tra
39
+ # @param model [Symbol, OpenAI::Models::Realtime::AudioTranscription::Model] The model to use for transcription. Current options are `whisper-1`, `gpt-4o-min
39
40
  #
40
41
  # @param prompt [String] An optional text to guide the model's style or continue a previous audio
41
42
 
42
43
  # The model to use for transcription. Current options are `whisper-1`,
43
- # `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
44
+ # `gpt-4o-mini-transcribe`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`.
45
+ # Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
44
46
  #
45
47
  # @see OpenAI::Models::Realtime::AudioTranscription#model
46
48
  module Model
47
49
  extend OpenAI::Internal::Type::Enum
48
50
 
49
51
  WHISPER_1 = :"whisper-1"
50
- GPT_4O_TRANSCRIBE_LATEST = :"gpt-4o-transcribe-latest"
51
52
  GPT_4O_MINI_TRANSCRIBE = :"gpt-4o-mini-transcribe"
52
53
  GPT_4O_TRANSCRIBE = :"gpt-4o-transcribe"
54
+ GPT_4O_TRANSCRIBE_DIARIZE = :"gpt-4o-transcribe-diarize"
53
55
 
54
56
  # @!method self.values
55
57
  # @return [Array<Symbol>]
@@ -10,6 +10,9 @@ module OpenAI
10
10
  # effort can result in faster responses and fewer tokens used on reasoning in a
11
11
  # response.
12
12
  #
13
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
14
+ # effort.
15
+ #
13
16
  # @return [Symbol, OpenAI::Models::ReasoningEffort, nil]
14
17
  optional :effort, enum: -> { OpenAI::ReasoningEffort }, nil?: true
15
18
 
@@ -7,6 +7,9 @@ module OpenAI
7
7
  # supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning
8
8
  # effort can result in faster responses and fewer tokens used on reasoning in a
9
9
  # response.
10
+ #
11
+ # Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning
12
+ # effort.
10
13
  module ReasoningEffort
11
14
  extend OpenAI::Internal::Type::Enum
12
15
 
@@ -14,6 +14,13 @@ module OpenAI
14
14
  # @return [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam, nil]
15
15
  optional :chunking_strategy, union: -> { OpenAI::FileChunkingStrategyParam }
16
16
 
17
+ # @!attribute description
18
+ # A description for the vector store. Can be used to describe the vector store's
19
+ # purpose.
20
+ #
21
+ # @return [String, nil]
22
+ optional :description, String
23
+
17
24
  # @!attribute expires_after
18
25
  # The expiration policy for a vector store.
19
26
  #
@@ -45,12 +52,14 @@ module OpenAI
45
52
  # @return [String, nil]
46
53
  optional :name, String
47
54
 
48
- # @!method initialize(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
55
+ # @!method initialize(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
49
56
  # Some parameter documentations has been truncated, see
50
57
  # {OpenAI::Models::VectorStoreCreateParams} for more details.
51
58
  #
52
59
  # @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
53
60
  #
61
+ # @param description [String] A description for the vector store. Can be used to describe the vector store's p
62
+ #
54
63
  # @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
55
64
  #
56
65
  # @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -101,7 +101,7 @@ module OpenAI
101
101
  # @see OpenAI::Models::VectorStores::VectorStoreFile#last_error
102
102
  class LastError < OpenAI::Internal::Type::BaseModel
103
103
  # @!attribute code
104
- # One of `server_error` or `rate_limit_exceeded`.
104
+ # One of `server_error`, `unsupported_file`, or `invalid_file`.
105
105
  #
106
106
  # @return [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code]
107
107
  required :code, enum: -> { OpenAI::VectorStores::VectorStoreFile::LastError::Code }
@@ -116,11 +116,11 @@ module OpenAI
116
116
  # The last error associated with this vector store file. Will be `null` if there
117
117
  # are no errors.
118
118
  #
119
- # @param code [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code] One of `server_error` or `rate_limit_exceeded`.
119
+ # @param code [Symbol, OpenAI::Models::VectorStores::VectorStoreFile::LastError::Code] One of `server_error`, `unsupported_file`, or `invalid_file`.
120
120
  #
121
121
  # @param message [String] A human-readable description of the error.
122
122
 
123
- # One of `server_error` or `rate_limit_exceeded`.
123
+ # One of `server_error`, `unsupported_file`, or `invalid_file`.
124
124
  #
125
125
  # @see OpenAI::Models::VectorStores::VectorStoreFile::LastError#code
126
126
  module Code
@@ -12,7 +12,7 @@ module OpenAI
12
12
  #
13
13
  # Transcribes audio into the input language.
14
14
  #
15
- # @overload create(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
15
+ # @overload create(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
16
16
  #
17
17
  # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
18
18
  #
@@ -22,6 +22,10 @@ module OpenAI
22
22
  #
23
23
  # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
24
24
  #
25
+ # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
26
+ #
27
+ # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
28
+ #
25
29
  # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
26
30
  #
27
31
  # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -34,7 +38,7 @@ module OpenAI
34
38
  #
35
39
  # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
36
40
  #
37
- # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose]
41
+ # @return [OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionDiarized, OpenAI::Models::Audio::TranscriptionVerbose]
38
42
  #
39
43
  # @see OpenAI::Models::Audio::TranscriptionCreateParams
40
44
  def create(params)
@@ -61,7 +65,7 @@ module OpenAI
61
65
  #
62
66
  # Transcribes audio into the input language.
63
67
  #
64
- # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
68
+ # @overload create_streaming(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
65
69
  #
66
70
  # @param file [Pathname, StringIO, IO, String, OpenAI::FilePart] The audio file object (not file name) to transcribe, in one of these formats: fl
67
71
  #
@@ -71,6 +75,10 @@ module OpenAI
71
75
  #
72
76
  # @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
73
77
  #
78
+ # @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
79
+ #
80
+ # @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
81
+ #
74
82
  # @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
75
83
  #
76
84
  # @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -83,7 +91,7 @@ module OpenAI
83
91
  #
84
92
  # @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}, nil]
85
93
  #
86
- # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
94
+ # @return [OpenAI::Internal::Stream<OpenAI::Models::Audio::TranscriptionTextSegmentEvent, OpenAI::Models::Audio::TranscriptionTextDeltaEvent, OpenAI::Models::Audio::TranscriptionTextDoneEvent>]
87
95
  #
88
96
  # @see OpenAI::Models::Audio::TranscriptionCreateParams
89
97
  def create_streaming(params)
@@ -105,7 +105,7 @@ module OpenAI
105
105
  )
106
106
  end
107
107
 
108
- # Delete a file.
108
+ # Delete a file and remove it from all vector stores.
109
109
  #
110
110
  # @overload delete(file_id, request_options: {})
111
111
  #
@@ -14,10 +14,12 @@ module OpenAI
14
14
  #
15
15
  # Create a vector store.
16
16
  #
17
- # @overload create(chunking_strategy: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
17
+ # @overload create(chunking_strategy: nil, description: nil, expires_after: nil, file_ids: nil, metadata: nil, name: nil, request_options: {})
18
18
  #
19
19
  # @param chunking_strategy [OpenAI::Models::AutoFileChunkingStrategyParam, OpenAI::Models::StaticFileChunkingStrategyObjectParam] The chunking strategy used to chunk the file(s). If not set, will use the `auto`
20
20
  #
21
+ # @param description [String] A description for the vector store. Can be used to describe the vector store's p
22
+ #
21
23
  # @param expires_after [OpenAI::Models::VectorStoreCreateParams::ExpiresAfter] The expiration policy for a vector store.
22
24
  #
23
25
  # @param file_ids [Array<String>] A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OpenAI
4
- VERSION = "0.30.0"
4
+ VERSION = "0.32.0"
5
5
  end
data/lib/openai.rb CHANGED
@@ -79,11 +79,14 @@ require_relative "openai/models/audio/speech_model"
79
79
  require_relative "openai/models/audio/transcription"
80
80
  require_relative "openai/models/audio/transcription_create_params"
81
81
  require_relative "openai/models/audio/transcription_create_response"
82
+ require_relative "openai/models/audio/transcription_diarized"
83
+ require_relative "openai/models/audio/transcription_diarized_segment"
82
84
  require_relative "openai/models/audio/transcription_include"
83
85
  require_relative "openai/models/audio/transcription_segment"
84
86
  require_relative "openai/models/audio/transcription_stream_event"
85
87
  require_relative "openai/models/audio/transcription_text_delta_event"
86
88
  require_relative "openai/models/audio/transcription_text_done_event"
89
+ require_relative "openai/models/audio/transcription_text_segment_event"
87
90
  require_relative "openai/models/audio/transcription_verbose"
88
91
  require_relative "openai/models/audio/transcription_word"
89
92
  require_relative "openai/models/audio/translation"
@@ -21,8 +21,8 @@ module OpenAI
21
21
  attr_accessor :file
22
22
 
23
23
  # ID of the model to use. The options are `gpt-4o-transcribe`,
24
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
25
- # Whisper V2 model).
24
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
25
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
26
26
  sig { returns(T.any(String, OpenAI::AudioModel::OrSymbol)) }
27
27
  attr_accessor :model
28
28
 
@@ -30,6 +30,8 @@ module OpenAI
30
30
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
31
31
  # boundaries. `server_vad` object can be provided to tweak VAD detection
32
32
  # parameters manually. If unset, the audio is transcribed as a single block.
33
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
34
+ # seconds.
33
35
  sig do
34
36
  returns(
35
37
  T.nilable(
@@ -46,7 +48,8 @@ module OpenAI
46
48
  # return the log probabilities of the tokens in the response to understand the
47
49
  # model's confidence in the transcription. `logprobs` only works with
48
50
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
49
- # `gpt-4o-mini-transcribe`.
51
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
52
+ # `gpt-4o-transcribe-diarize`.
50
53
  sig do
51
54
  returns(
52
55
  T.nilable(T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol])
@@ -61,6 +64,26 @@ module OpenAI
61
64
  end
62
65
  attr_writer :include
63
66
 
67
+ # Optional list of speaker names that correspond to the audio samples provided in
68
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
69
+ # example `customer` or `agent`). Up to 4 speakers are supported.
70
+ sig { returns(T.nilable(T::Array[String])) }
71
+ attr_reader :known_speaker_names
72
+
73
+ sig { params(known_speaker_names: T::Array[String]).void }
74
+ attr_writer :known_speaker_names
75
+
76
+ # Optional list of audio samples (as
77
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
78
+ # that contain known speaker references matching `known_speaker_names[]`. Each
79
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
80
+ # formats supported by `file`.
81
+ sig { returns(T.nilable(T::Array[String])) }
82
+ attr_reader :known_speaker_references
83
+
84
+ sig { params(known_speaker_references: T::Array[String]).void }
85
+ attr_writer :known_speaker_references
86
+
64
87
  # The language of the input audio. Supplying the input language in
65
88
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
66
89
  # format will improve accuracy and latency.
@@ -73,7 +96,8 @@ module OpenAI
73
96
  # An optional text to guide the model's style or continue a previous audio
74
97
  # segment. The
75
98
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
76
- # should match the audio language.
99
+ # should match the audio language. This field is not supported when using
100
+ # `gpt-4o-transcribe-diarize`.
77
101
  sig { returns(T.nilable(String)) }
78
102
  attr_reader :prompt
79
103
 
@@ -81,8 +105,10 @@ module OpenAI
81
105
  attr_writer :prompt
82
106
 
83
107
  # The format of the output, in one of these options: `json`, `text`, `srt`,
84
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
85
- # the only supported format is `json`.
108
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
109
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
110
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
111
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
86
112
  sig { returns(T.nilable(OpenAI::AudioResponseFormat::OrSymbol)) }
87
113
  attr_reader :response_format
88
114
 
@@ -106,7 +132,8 @@ module OpenAI
106
132
  # `response_format` must be set `verbose_json` to use timestamp granularities.
107
133
  # Either or both of these options are supported: `word`, or `segment`. Note: There
108
134
  # is no additional latency for segment timestamps, but generating word timestamps
109
- # incurs additional latency.
135
+ # incurs additional latency. This option is not available for
136
+ # `gpt-4o-transcribe-diarize`.
110
137
  sig do
111
138
  returns(
112
139
  T.nilable(
@@ -140,6 +167,8 @@ module OpenAI
140
167
  )
141
168
  ),
142
169
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
170
+ known_speaker_names: T::Array[String],
171
+ known_speaker_references: T::Array[String],
143
172
  language: String,
144
173
  prompt: String,
145
174
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -156,20 +185,33 @@ module OpenAI
156
185
  # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
157
186
  file:,
158
187
  # ID of the model to use. The options are `gpt-4o-transcribe`,
159
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
160
- # Whisper V2 model).
188
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
189
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
161
190
  model:,
162
191
  # Controls how the audio is cut into chunks. When set to `"auto"`, the server
163
192
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
164
193
  # boundaries. `server_vad` object can be provided to tweak VAD detection
165
194
  # parameters manually. If unset, the audio is transcribed as a single block.
195
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
196
+ # seconds.
166
197
  chunking_strategy: nil,
167
198
  # Additional information to include in the transcription response. `logprobs` will
168
199
  # return the log probabilities of the tokens in the response to understand the
169
200
  # model's confidence in the transcription. `logprobs` only works with
170
201
  # response_format set to `json` and only with the models `gpt-4o-transcribe` and
171
- # `gpt-4o-mini-transcribe`.
202
+ # `gpt-4o-mini-transcribe`. This field is not supported when using
203
+ # `gpt-4o-transcribe-diarize`.
172
204
  include: nil,
205
+ # Optional list of speaker names that correspond to the audio samples provided in
206
+ # `known_speaker_references[]`. Each entry should be a short identifier (for
207
+ # example `customer` or `agent`). Up to 4 speakers are supported.
208
+ known_speaker_names: nil,
209
+ # Optional list of audio samples (as
210
+ # [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
211
+ # that contain known speaker references matching `known_speaker_names[]`. Each
212
+ # sample must be between 2 and 10 seconds, and can use any of the same input audio
213
+ # formats supported by `file`.
214
+ known_speaker_references: nil,
173
215
  # The language of the input audio. Supplying the input language in
174
216
  # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
175
217
  # format will improve accuracy and latency.
@@ -177,11 +219,14 @@ module OpenAI
177
219
  # An optional text to guide the model's style or continue a previous audio
178
220
  # segment. The
179
221
  # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
180
- # should match the audio language.
222
+ # should match the audio language. This field is not supported when using
223
+ # `gpt-4o-transcribe-diarize`.
181
224
  prompt: nil,
182
225
  # The format of the output, in one of these options: `json`, `text`, `srt`,
183
- # `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
184
- # the only supported format is `json`.
226
+ # `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
227
+ # `gpt-4o-mini-transcribe`, the only supported format is `json`. For
228
+ # `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
229
+ # `diarized_json`, with `diarized_json` required to receive speaker annotations.
185
230
  response_format: nil,
186
231
  # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
187
232
  # output more random, while lower values like 0.2 will make it more focused and
@@ -193,7 +238,8 @@ module OpenAI
193
238
  # `response_format` must be set `verbose_json` to use timestamp granularities.
194
239
  # Either or both of these options are supported: `word`, or `segment`. Note: There
195
240
  # is no additional latency for segment timestamps, but generating word timestamps
196
- # incurs additional latency.
241
+ # incurs additional latency. This option is not available for
242
+ # `gpt-4o-transcribe-diarize`.
197
243
  timestamp_granularities: nil,
198
244
  request_options: {}
199
245
  )
@@ -212,6 +258,8 @@ module OpenAI
212
258
  )
213
259
  ),
214
260
  include: T::Array[OpenAI::Audio::TranscriptionInclude::OrSymbol],
261
+ known_speaker_names: T::Array[String],
262
+ known_speaker_references: T::Array[String],
215
263
  language: String,
216
264
  prompt: String,
217
265
  response_format: OpenAI::AudioResponseFormat::OrSymbol,
@@ -228,8 +276,8 @@ module OpenAI
228
276
  end
229
277
 
230
278
  # ID of the model to use. The options are `gpt-4o-transcribe`,
231
- # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
232
- # Whisper V2 model).
279
+ # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
280
+ # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
233
281
  module Model
234
282
  extend OpenAI::Internal::Type::Union
235
283
 
@@ -251,6 +299,8 @@ module OpenAI
251
299
  # first normalizes loudness and then uses voice activity detection (VAD) to choose
252
300
  # boundaries. `server_vad` object can be provided to tweak VAD detection
253
301
  # parameters manually. If unset, the audio is transcribed as a single block.
302
+ # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
303
+ # seconds.
254
304
  module ChunkingStrategy
255
305
  extend OpenAI::Internal::Type::Union
256
306
 
@@ -12,6 +12,7 @@ module OpenAI
12
12
  T.type_alias do
13
13
  T.any(
14
14
  OpenAI::Audio::Transcription,
15
+ OpenAI::Audio::TranscriptionDiarized,
15
16
  OpenAI::Audio::TranscriptionVerbose
16
17
  )
17
18
  end