openai 4.65.0 → 4.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/README.md +1 -1
  3. package/core.d.ts +4 -1
  4. package/core.d.ts.map +1 -1
  5. package/core.js +4 -2
  6. package/core.js.map +1 -1
  7. package/core.mjs +4 -2
  8. package/core.mjs.map +1 -1
  9. package/package.json +1 -1
  10. package/resources/audio/audio.d.ts +8 -2
  11. package/resources/audio/audio.d.ts.map +1 -1
  12. package/resources/audio/audio.js.map +1 -1
  13. package/resources/audio/audio.mjs.map +1 -1
  14. package/resources/audio/index.d.ts +2 -2
  15. package/resources/audio/index.d.ts.map +1 -1
  16. package/resources/audio/index.js.map +1 -1
  17. package/resources/audio/index.mjs +2 -2
  18. package/resources/audio/index.mjs.map +1 -1
  19. package/resources/audio/transcriptions.d.ts +100 -3
  20. package/resources/audio/transcriptions.d.ts.map +1 -1
  21. package/resources/audio/transcriptions.js +0 -3
  22. package/resources/audio/transcriptions.js.map +1 -1
  23. package/resources/audio/transcriptions.mjs +0 -3
  24. package/resources/audio/transcriptions.mjs.map +1 -1
  25. package/resources/audio/translations.d.ts +28 -3
  26. package/resources/audio/translations.d.ts.map +1 -1
  27. package/resources/audio/translations.js +0 -3
  28. package/resources/audio/translations.js.map +1 -1
  29. package/resources/audio/translations.mjs +0 -3
  30. package/resources/audio/translations.mjs.map +1 -1
  31. package/resources/chat/chat.d.ts +1 -1
  32. package/resources/chat/chat.d.ts.map +1 -1
  33. package/resources/chat/chat.js.map +1 -1
  34. package/resources/chat/chat.mjs.map +1 -1
  35. package/resources/chat/completions.d.ts +16 -2
  36. package/resources/chat/completions.d.ts.map +1 -1
  37. package/resources/chat/completions.js.map +1 -1
  38. package/resources/chat/completions.mjs.map +1 -1
  39. package/resources/completions.d.ts +21 -0
  40. package/resources/completions.d.ts.map +1 -1
  41. package/resources/completions.js.map +1 -1
  42. package/resources/completions.mjs.map +1 -1
  43. package/src/core.ts +6 -3
  44. package/src/resources/audio/audio.ts +16 -2
  45. package/src/resources/audio/index.ts +16 -2
  46. package/src/resources/audio/transcriptions.ts +139 -4
  47. package/src/resources/audio/translations.ts +53 -4
  48. package/src/resources/chat/chat.ts +1 -0
  49. package/src/resources/chat/completions.ts +18 -2
  50. package/src/resources/completions.ts +25 -0
  51. package/src/version.ts +1 -1
  52. package/version.d.ts +1 -1
  53. package/version.js +1 -1
  54. package/version.mjs +1 -1
@@ -9,7 +9,23 @@ export class Transcriptions extends APIResource {
9
9
  /**
10
10
  * Transcribes audio into the input language.
11
11
  */
12
- create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
12
+ create(
13
+ body: TranscriptionCreateParams<'json' | undefined>,
14
+ options?: Core.RequestOptions,
15
+ ): Core.APIPromise<Transcription>;
16
+ create(
17
+ body: TranscriptionCreateParams<'verbose_json'>,
18
+ options?: Core.RequestOptions,
19
+ ): Core.APIPromise<TranscriptionVerbose>;
20
+ create(
21
+ body: TranscriptionCreateParams<'srt' | 'vtt' | 'text'>,
22
+ options?: Core.RequestOptions,
23
+ ): Core.APIPromise<string>;
24
+ create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription>;
25
+ create(
26
+ body: TranscriptionCreateParams,
27
+ options?: Core.RequestOptions,
28
+ ): Core.APIPromise<TranscriptionCreateResponse | string> {
13
29
  return this._client.post('/audio/transcriptions', Core.multipartFormRequestOptions({ body, ...options }));
14
30
  }
15
31
  }
@@ -25,7 +41,118 @@ export interface Transcription {
25
41
  text: string;
26
42
  }
27
43
 
28
- export interface TranscriptionCreateParams {
44
+ export interface TranscriptionSegment {
45
+ /**
46
+ * Unique identifier of the segment.
47
+ */
48
+ id: number;
49
+
50
+ /**
51
+ * Average logprob of the segment. If the value is lower than -1, consider the
52
+ * logprobs failed.
53
+ */
54
+ avg_logprob: number;
55
+
56
+ /**
57
+ * Compression ratio of the segment. If the value is greater than 2.4, consider the
58
+ * compression failed.
59
+ */
60
+ compression_ratio: number;
61
+
62
+ /**
63
+ * End time of the segment in seconds.
64
+ */
65
+ end: number;
66
+
67
+ /**
68
+ * Probability of no speech in the segment. If the value is higher than 1.0 and the
69
+ * `avg_logprob` is below -1, consider this segment silent.
70
+ */
71
+ no_speech_prob: number;
72
+
73
+ /**
74
+ * Seek offset of the segment.
75
+ */
76
+ seek: number;
77
+
78
+ /**
79
+ * Start time of the segment in seconds.
80
+ */
81
+ start: number;
82
+
83
+ /**
84
+ * Temperature parameter used for generating the segment.
85
+ */
86
+ temperature: number;
87
+
88
+ /**
89
+ * Text content of the segment.
90
+ */
91
+ text: string;
92
+
93
+ /**
94
+ * Array of token IDs for the text content.
95
+ */
96
+ tokens: Array<number>;
97
+ }
98
+
99
+ /**
100
+ * Represents a verbose json transcription response returned by model, based on the
101
+ * provided input.
102
+ */
103
+ export interface TranscriptionVerbose {
104
+ /**
105
+ * The duration of the input audio.
106
+ */
107
+ duration: string;
108
+
109
+ /**
110
+ * The language of the input audio.
111
+ */
112
+ language: string;
113
+
114
+ /**
115
+ * The transcribed text.
116
+ */
117
+ text: string;
118
+
119
+ /**
120
+ * Segments of the transcribed text and their corresponding details.
121
+ */
122
+ segments?: Array<TranscriptionSegment>;
123
+
124
+ /**
125
+ * Extracted words and their corresponding timestamps.
126
+ */
127
+ words?: Array<TranscriptionWord>;
128
+ }
129
+
130
+ export interface TranscriptionWord {
131
+ /**
132
+ * End time of the word in seconds.
133
+ */
134
+ end: number;
135
+
136
+ /**
137
+ * Start time of the word in seconds.
138
+ */
139
+ start: number;
140
+
141
+ /**
142
+ * The text content of the word.
143
+ */
144
+ word: string;
145
+ }
146
+
147
+ /**
148
+ * Represents a transcription response returned by model, based on the provided
149
+ * input.
150
+ */
151
+ export type TranscriptionCreateResponse = Transcription | TranscriptionVerbose;
152
+
153
+ export interface TranscriptionCreateParams<
154
+ ResponseFormat extends AudioAPI.AudioResponseFormat | undefined = AudioAPI.AudioResponseFormat | undefined,
155
+ > {
29
156
  /**
30
157
  * The audio file object (not file name) to transcribe, in one of these formats:
31
158
  * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
@@ -57,7 +184,7 @@ export interface TranscriptionCreateParams {
57
184
  * The format of the output, in one of these options: `json`, `text`, `srt`,
58
185
  * `verbose_json`, or `vtt`.
59
186
  */
60
- response_format?: AudioAPI.AudioResponseFormat;
187
+ response_format?: ResponseFormat;
61
188
 
62
189
  /**
63
190
  * The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
@@ -80,5 +207,13 @@ export interface TranscriptionCreateParams {
80
207
 
81
208
  export namespace Transcriptions {
82
209
  export import Transcription = TranscriptionsAPI.Transcription;
83
- export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
210
+ export import TranscriptionSegment = TranscriptionsAPI.TranscriptionSegment;
211
+ export import TranscriptionVerbose = TranscriptionsAPI.TranscriptionVerbose;
212
+ export import TranscriptionWord = TranscriptionsAPI.TranscriptionWord;
213
+ export import TranscriptionCreateResponse = TranscriptionsAPI.TranscriptionCreateResponse;
214
+ export type TranscriptionCreateParams<
215
+ ResponseFormat extends AudioAPI.AudioResponseFormat | undefined =
216
+ | AudioAPI.AudioResponseFormat
217
+ | undefined,
218
+ > = TranscriptionsAPI.TranscriptionCreateParams<ResponseFormat>;
84
219
  }
@@ -4,12 +4,29 @@ import { APIResource } from '../../resource';
4
4
  import * as Core from '../../core';
5
5
  import * as TranslationsAPI from './translations';
6
6
  import * as AudioAPI from './audio';
7
+ import * as TranscriptionsAPI from './transcriptions';
7
8
 
8
9
  export class Translations extends APIResource {
9
10
  /**
10
11
  * Translates audio into English.
11
12
  */
12
- create(body: TranslationCreateParams, options?: Core.RequestOptions): Core.APIPromise<Translation> {
13
+ create(
14
+ body: TranslationCreateParams<'json' | undefined>,
15
+ options?: Core.RequestOptions,
16
+ ): Core.APIPromise<Translation>;
17
+ create(
18
+ body: TranslationCreateParams<'verbose_json'>,
19
+ options?: Core.RequestOptions,
20
+ ): Core.APIPromise<TranslationVerbose>;
21
+ create(
22
+ body: TranslationCreateParams<'text' | 'srt' | 'vtt'>,
23
+ options?: Core.RequestOptions,
24
+ ): Core.APIPromise<string>;
25
+ create(body: TranslationCreateParams, options?: Core.RequestOptions): Core.APIPromise<Translation>;
26
+ create(
27
+ body: TranslationCreateParams,
28
+ options?: Core.RequestOptions,
29
+ ): Core.APIPromise<TranslationCreateResponse | string> {
13
30
  return this._client.post('/audio/translations', Core.multipartFormRequestOptions({ body, ...options }));
14
31
  }
15
32
  }
@@ -18,7 +35,33 @@ export interface Translation {
18
35
  text: string;
19
36
  }
20
37
 
21
- export interface TranslationCreateParams {
38
+ export interface TranslationVerbose {
39
+ /**
40
+ * The duration of the input audio.
41
+ */
42
+ duration: string;
43
+
44
+ /**
45
+ * The language of the output translation (always `english`).
46
+ */
47
+ language: string;
48
+
49
+ /**
50
+ * The translated text.
51
+ */
52
+ text: string;
53
+
54
+ /**
55
+ * Segments of the translated text and their corresponding details.
56
+ */
57
+ segments?: Array<TranscriptionsAPI.TranscriptionSegment>;
58
+ }
59
+
60
+ export type TranslationCreateResponse = Translation | TranslationVerbose;
61
+
62
+ export interface TranslationCreateParams<
63
+ ResponseFormat extends AudioAPI.AudioResponseFormat | undefined = AudioAPI.AudioResponseFormat | undefined,
64
+ > {
22
65
  /**
23
66
  * The audio file object (not file name) translate, in one of these formats: flac,
24
67
  * mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
@@ -43,7 +86,7 @@ export interface TranslationCreateParams {
43
86
  * The format of the output, in one of these options: `json`, `text`, `srt`,
44
87
  * `verbose_json`, or `vtt`.
45
88
  */
46
- response_format?: AudioAPI.AudioResponseFormat;
89
+ response_format?: ResponseFormat;
47
90
 
48
91
  /**
49
92
  * The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
@@ -57,5 +100,11 @@ export interface TranslationCreateParams {
57
100
 
58
101
  export namespace Translations {
59
102
  export import Translation = TranslationsAPI.Translation;
60
- export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
103
+ export import TranslationVerbose = TranslationsAPI.TranslationVerbose;
104
+ export import TranslationCreateResponse = TranslationsAPI.TranslationCreateResponse;
105
+ export type TranslationCreateParams<
106
+ ResponseFormat extends AudioAPI.AudioResponseFormat | undefined =
107
+ | AudioAPI.AudioResponseFormat
108
+ | undefined,
109
+ > = TranslationsAPI.TranslationCreateParams<ResponseFormat>;
61
110
  }
@@ -16,6 +16,7 @@ export type ChatModel =
16
16
  | 'gpt-4o'
17
17
  | 'gpt-4o-2024-08-06'
18
18
  | 'gpt-4o-2024-05-13'
19
+ | 'gpt-4o-realtime-preview-2024-10-01'
19
20
  | 'chatgpt-4o-latest'
20
21
  | 'gpt-4o-mini'
21
22
  | 'gpt-4o-mini-2024-07-18'
@@ -727,8 +727,12 @@ export type ChatCompletionCreateParams =
727
727
 
728
728
  export interface ChatCompletionCreateParamsBase {
729
729
  /**
730
- * A list of messages comprising the conversation so far.
731
- * [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
730
+ * A list of messages comprising the conversation so far. Depending on the
731
+ * [model](https://platform.openai.com/docs/models) you use, different message
732
+ * types (modalities) are supported, like
733
+ * [text](https://platform.openai.com/docs/guides/text-generation),
734
+ * [images](https://platform.openai.com/docs/guides/vision), and
735
+ * [audio](https://platform.openai.com/docs/guides/audio).
732
736
  */
733
737
  messages: Array<ChatCompletionMessageParam>;
734
738
 
@@ -806,6 +810,12 @@ export interface ChatCompletionCreateParamsBase {
806
810
  */
807
811
  max_tokens?: number | null;
808
812
 
813
+ /**
814
+ * Developer-defined tags and values used for filtering completions in the
815
+ * [dashboard](https://platform.openai.com/completions).
816
+ */
817
+ metadata?: Record<string, string> | null;
818
+
809
819
  /**
810
820
  * How many chat completion choices to generate for each input message. Note that
811
821
  * you will be charged based on the number of generated tokens across all of the
@@ -889,6 +899,12 @@ export interface ChatCompletionCreateParamsBase {
889
899
  */
890
900
  stop?: string | null | Array<string>;
891
901
 
902
+ /**
903
+ * Whether or not to store the output of this completion request for traffic
904
+ * logging in the [dashboard](https://platform.openai.com/completions).
905
+ */
906
+ store?: boolean | null;
907
+
892
908
  /**
893
909
  * If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
894
910
  * sent as data-only
@@ -125,6 +125,11 @@ export interface CompletionUsage {
125
125
  * Breakdown of tokens used in a completion.
126
126
  */
127
127
  completion_tokens_details?: CompletionUsage.CompletionTokensDetails;
128
+
129
+ /**
130
+ * Breakdown of tokens used in the prompt.
131
+ */
132
+ prompt_tokens_details?: CompletionUsage.PromptTokensDetails;
128
133
  }
129
134
 
130
135
  export namespace CompletionUsage {
@@ -132,11 +137,31 @@ export namespace CompletionUsage {
132
137
  * Breakdown of tokens used in a completion.
133
138
  */
134
139
  export interface CompletionTokensDetails {
140
+ /**
141
+ * Audio input tokens generated by the model.
142
+ */
143
+ audio_tokens?: number;
144
+
135
145
  /**
136
146
  * Tokens generated by the model for reasoning.
137
147
  */
138
148
  reasoning_tokens?: number;
139
149
  }
150
+
151
+ /**
152
+ * Breakdown of tokens used in the prompt.
153
+ */
154
+ export interface PromptTokensDetails {
155
+ /**
156
+ * Audio input tokens present in the prompt.
157
+ */
158
+ audio_tokens?: number;
159
+
160
+ /**
161
+ * Cached tokens present in the prompt.
162
+ */
163
+ cached_tokens?: number;
164
+ }
140
165
  }
141
166
 
142
167
  export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming;
package/src/version.ts CHANGED
@@ -1 +1 @@
1
- export const VERSION = '4.65.0'; // x-release-please-version
1
+ export const VERSION = '4.67.0'; // x-release-please-version
package/version.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "4.65.0";
1
+ export declare const VERSION = "4.67.0";
2
2
  //# sourceMappingURL=version.d.ts.map
package/version.js CHANGED
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
- exports.VERSION = '4.65.0'; // x-release-please-version
4
+ exports.VERSION = '4.67.0'; // x-release-please-version
5
5
  //# sourceMappingURL=version.js.map
package/version.mjs CHANGED
@@ -1,2 +1,2 @@
1
- export const VERSION = '4.65.0'; // x-release-please-version
1
+ export const VERSION = '4.67.0'; // x-release-please-version
2
2
  //# sourceMappingURL=version.mjs.map