@ai-sdk/openai 4.0.0-canary.68 → 4.0.0-canary.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/index.d.ts +32 -2
- package/dist/index.js +415 -1
- package/dist/index.js.map +1 -1
- package/docs/03-openai.mdx +25 -0
- package/package.json +5 -5
- package/src/index.ts +2 -0
- package/src/openai-provider.ts +38 -0
- package/src/realtime/index.ts +2 -0
- package/src/realtime/openai-realtime-event-mapper.ts +436 -0
- package/src/realtime/openai-realtime-model-options.ts +3 -0
- package/src/realtime/openai-realtime-model.ts +111 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @ai-sdk/openai
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.69
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- ce769dd: feat(provider): add experimental Realtime API support for voice conversations
|
|
8
|
+
|
|
9
|
+
Adds first-class support for realtime (speech-to-speech) APIs:
|
|
10
|
+
|
|
11
|
+
- `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
|
|
12
|
+
- OpenAI, Google, and xAI realtime provider implementations
|
|
13
|
+
- `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
|
|
14
|
+
- `.getToken()` static method on each provider for server-side ephemeral token creation
|
|
15
|
+
- `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
|
|
16
|
+
- `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
|
|
17
|
+
- `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
|
|
18
|
+
|
|
19
|
+
- Updated dependencies [ce769dd]
|
|
20
|
+
- @ai-sdk/provider@4.0.0-canary.18
|
|
21
|
+
- @ai-sdk/provider-utils@5.0.0-canary.46
|
|
22
|
+
|
|
3
23
|
## 4.0.0-canary.68
|
|
4
24
|
|
|
5
25
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _ai_sdk_provider from '@ai-sdk/provider';
|
|
2
|
-
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, FilesV4, SkillsV4 } from '@ai-sdk/provider';
|
|
2
|
+
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, Experimental_RealtimeFactoryV4, FilesV4, SkillsV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
3
3
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
4
4
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
5
5
|
|
|
@@ -1150,6 +1150,11 @@ interface OpenAIProvider extends ProviderV4 {
|
|
|
1150
1150
|
* Creates a model for speech generation.
|
|
1151
1151
|
*/
|
|
1152
1152
|
speech(modelId: OpenAISpeechModelId): SpeechModelV4;
|
|
1153
|
+
/**
|
|
1154
|
+
* Creates an experimental realtime model for bidirectional audio/text
|
|
1155
|
+
* communication over WebSocket.
|
|
1156
|
+
*/
|
|
1157
|
+
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
1153
1158
|
/**
|
|
1154
1159
|
* Returns a FilesV4 interface for uploading files to OpenAI.
|
|
1155
1160
|
*/
|
|
@@ -1203,6 +1208,31 @@ declare function createOpenAI(options?: OpenAIProviderSettings): OpenAIProvider;
|
|
|
1203
1208
|
*/
|
|
1204
1209
|
declare const openai: OpenAIProvider;
|
|
1205
1210
|
|
|
1211
|
+
type OpenAIRealtimeModelConfig = {
|
|
1212
|
+
provider: string;
|
|
1213
|
+
baseURL: string;
|
|
1214
|
+
headers: () => Record<string, string | undefined>;
|
|
1215
|
+
fetch?: FetchFunction;
|
|
1216
|
+
};
|
|
1217
|
+
declare class OpenAIRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
1218
|
+
readonly specificationVersion: "v4";
|
|
1219
|
+
readonly provider: string;
|
|
1220
|
+
readonly modelId: string;
|
|
1221
|
+
private readonly config;
|
|
1222
|
+
constructor(modelId: string, config: OpenAIRealtimeModelConfig);
|
|
1223
|
+
doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
|
|
1224
|
+
getWebSocketConfig(options: {
|
|
1225
|
+
token: string;
|
|
1226
|
+
url: string;
|
|
1227
|
+
}): {
|
|
1228
|
+
url: string;
|
|
1229
|
+
protocols?: string[];
|
|
1230
|
+
};
|
|
1231
|
+
parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent;
|
|
1232
|
+
serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): unknown;
|
|
1233
|
+
buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1206
1236
|
declare const openaiFilesOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
1207
1237
|
purpose?: string | undefined;
|
|
1208
1238
|
expiresAfter?: number | undefined;
|
|
@@ -1263,4 +1293,4 @@ type OpenaiResponsesSourceDocumentProviderMetadata = {
|
|
|
1263
1293
|
|
|
1264
1294
|
declare const VERSION: string;
|
|
1265
1295
|
|
|
1266
|
-
export { type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
|
1296
|
+
export { OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel, type OpenAIRealtimeModelConfig as Experimental_OpenAIRealtimeModelConfig, type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
package/dist/index.js
CHANGED
|
@@ -2869,6 +2869,395 @@ var openaiTools = {
|
|
|
2869
2869
|
toolSearch
|
|
2870
2870
|
};
|
|
2871
2871
|
|
|
2872
|
+
// src/realtime/openai-realtime-event-mapper.ts
|
|
2873
|
+
function parseOpenAIRealtimeServerEvent(raw) {
|
|
2874
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s;
|
|
2875
|
+
const event = raw;
|
|
2876
|
+
const type = event.type;
|
|
2877
|
+
switch (type) {
|
|
2878
|
+
// ── Session lifecycle ──────────────────────────────────────────
|
|
2879
|
+
case "session.created":
|
|
2880
|
+
return {
|
|
2881
|
+
type: "session-created",
|
|
2882
|
+
sessionId: (_a = event.session) == null ? void 0 : _a.id,
|
|
2883
|
+
raw
|
|
2884
|
+
};
|
|
2885
|
+
case "session.updated":
|
|
2886
|
+
return { type: "session-updated", raw };
|
|
2887
|
+
// ── Input audio buffer ─────────────────────────────────────────
|
|
2888
|
+
case "input_audio_buffer.speech_started":
|
|
2889
|
+
return {
|
|
2890
|
+
type: "speech-started",
|
|
2891
|
+
itemId: event.item_id,
|
|
2892
|
+
raw
|
|
2893
|
+
};
|
|
2894
|
+
case "input_audio_buffer.speech_stopped":
|
|
2895
|
+
return {
|
|
2896
|
+
type: "speech-stopped",
|
|
2897
|
+
itemId: event.item_id,
|
|
2898
|
+
raw
|
|
2899
|
+
};
|
|
2900
|
+
case "input_audio_buffer.committed":
|
|
2901
|
+
return {
|
|
2902
|
+
type: "audio-committed",
|
|
2903
|
+
itemId: event.item_id,
|
|
2904
|
+
previousItemId: event.previous_item_id,
|
|
2905
|
+
raw
|
|
2906
|
+
};
|
|
2907
|
+
// ── Conversation items ─────────────────────────────────────────
|
|
2908
|
+
case "conversation.item.added":
|
|
2909
|
+
return {
|
|
2910
|
+
type: "conversation-item-added",
|
|
2911
|
+
itemId: (_c = (_b = event.item) == null ? void 0 : _b.id) != null ? _c : event.item_id,
|
|
2912
|
+
item: event.item,
|
|
2913
|
+
raw
|
|
2914
|
+
};
|
|
2915
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
2916
|
+
return {
|
|
2917
|
+
type: "input-transcription-completed",
|
|
2918
|
+
itemId: event.item_id,
|
|
2919
|
+
transcript: (_d = event.transcript) != null ? _d : "",
|
|
2920
|
+
raw
|
|
2921
|
+
};
|
|
2922
|
+
// ── Response lifecycle ──────────────────────────────────────────
|
|
2923
|
+
case "response.created":
|
|
2924
|
+
return {
|
|
2925
|
+
type: "response-created",
|
|
2926
|
+
responseId: (_f = (_e = event.response) == null ? void 0 : _e.id) != null ? _f : event.response_id,
|
|
2927
|
+
raw
|
|
2928
|
+
};
|
|
2929
|
+
case "response.done":
|
|
2930
|
+
return {
|
|
2931
|
+
type: "response-done",
|
|
2932
|
+
responseId: (_h = (_g = event.response) == null ? void 0 : _g.id) != null ? _h : event.response_id,
|
|
2933
|
+
status: (_j = (_i = event.response) == null ? void 0 : _i.status) != null ? _j : "completed",
|
|
2934
|
+
raw
|
|
2935
|
+
};
|
|
2936
|
+
// ── Output item lifecycle ───────────────────────────────────────
|
|
2937
|
+
case "response.output_item.added":
|
|
2938
|
+
return {
|
|
2939
|
+
type: "output-item-added",
|
|
2940
|
+
responseId: event.response_id,
|
|
2941
|
+
itemId: (_l = (_k = event.item) == null ? void 0 : _k.id) != null ? _l : event.item_id,
|
|
2942
|
+
raw
|
|
2943
|
+
};
|
|
2944
|
+
case "response.output_item.done":
|
|
2945
|
+
return {
|
|
2946
|
+
type: "output-item-done",
|
|
2947
|
+
responseId: event.response_id,
|
|
2948
|
+
itemId: (_n = (_m = event.item) == null ? void 0 : _m.id) != null ? _n : event.item_id,
|
|
2949
|
+
raw
|
|
2950
|
+
};
|
|
2951
|
+
case "response.content_part.added":
|
|
2952
|
+
return {
|
|
2953
|
+
type: "content-part-added",
|
|
2954
|
+
responseId: event.response_id,
|
|
2955
|
+
itemId: event.item_id,
|
|
2956
|
+
raw
|
|
2957
|
+
};
|
|
2958
|
+
case "response.content_part.done":
|
|
2959
|
+
return {
|
|
2960
|
+
type: "content-part-done",
|
|
2961
|
+
responseId: event.response_id,
|
|
2962
|
+
itemId: event.item_id,
|
|
2963
|
+
raw
|
|
2964
|
+
};
|
|
2965
|
+
// ── Audio output ────────────────────────────────────────────────
|
|
2966
|
+
case "response.output_audio.delta":
|
|
2967
|
+
return {
|
|
2968
|
+
type: "audio-delta",
|
|
2969
|
+
responseId: event.response_id,
|
|
2970
|
+
itemId: event.item_id,
|
|
2971
|
+
delta: event.delta,
|
|
2972
|
+
raw
|
|
2973
|
+
};
|
|
2974
|
+
case "response.output_audio.done":
|
|
2975
|
+
return {
|
|
2976
|
+
type: "audio-done",
|
|
2977
|
+
responseId: event.response_id,
|
|
2978
|
+
itemId: event.item_id,
|
|
2979
|
+
raw
|
|
2980
|
+
};
|
|
2981
|
+
// ── Audio transcript output ─────────────────────────────────────
|
|
2982
|
+
case "response.output_audio_transcript.delta":
|
|
2983
|
+
return {
|
|
2984
|
+
type: "audio-transcript-delta",
|
|
2985
|
+
responseId: event.response_id,
|
|
2986
|
+
itemId: event.item_id,
|
|
2987
|
+
delta: event.delta,
|
|
2988
|
+
raw
|
|
2989
|
+
};
|
|
2990
|
+
case "response.output_audio_transcript.done":
|
|
2991
|
+
return {
|
|
2992
|
+
type: "audio-transcript-done",
|
|
2993
|
+
responseId: event.response_id,
|
|
2994
|
+
itemId: event.item_id,
|
|
2995
|
+
transcript: event.transcript,
|
|
2996
|
+
raw
|
|
2997
|
+
};
|
|
2998
|
+
// ── Text output ─────────────────────────────────────────────────
|
|
2999
|
+
case "response.output_text.delta":
|
|
3000
|
+
return {
|
|
3001
|
+
type: "text-delta",
|
|
3002
|
+
responseId: event.response_id,
|
|
3003
|
+
itemId: event.item_id,
|
|
3004
|
+
delta: event.delta,
|
|
3005
|
+
raw
|
|
3006
|
+
};
|
|
3007
|
+
case "response.output_text.done":
|
|
3008
|
+
return {
|
|
3009
|
+
type: "text-done",
|
|
3010
|
+
responseId: event.response_id,
|
|
3011
|
+
itemId: event.item_id,
|
|
3012
|
+
text: event.text,
|
|
3013
|
+
raw
|
|
3014
|
+
};
|
|
3015
|
+
// ── Function calling ────────────────────────────────────────────
|
|
3016
|
+
case "response.function_call_arguments.delta":
|
|
3017
|
+
return {
|
|
3018
|
+
type: "function-call-arguments-delta",
|
|
3019
|
+
responseId: event.response_id,
|
|
3020
|
+
itemId: event.item_id,
|
|
3021
|
+
callId: event.call_id,
|
|
3022
|
+
delta: event.delta,
|
|
3023
|
+
raw
|
|
3024
|
+
};
|
|
3025
|
+
case "response.function_call_arguments.done":
|
|
3026
|
+
return {
|
|
3027
|
+
type: "function-call-arguments-done",
|
|
3028
|
+
responseId: event.response_id,
|
|
3029
|
+
itemId: event.item_id,
|
|
3030
|
+
callId: event.call_id,
|
|
3031
|
+
name: event.name,
|
|
3032
|
+
arguments: event.arguments,
|
|
3033
|
+
raw
|
|
3034
|
+
};
|
|
3035
|
+
// ── Error ───────────────────────────────────────────────────────
|
|
3036
|
+
case "error":
|
|
3037
|
+
return {
|
|
3038
|
+
type: "error",
|
|
3039
|
+
message: (_q = (_p = (_o = event.error) == null ? void 0 : _o.message) != null ? _p : event.message) != null ? _q : "Unknown error",
|
|
3040
|
+
code: (_s = (_r = event.error) == null ? void 0 : _r.code) != null ? _s : event.code,
|
|
3041
|
+
raw
|
|
3042
|
+
};
|
|
3043
|
+
// ── Pass-through ────────────────────────────────────────────────
|
|
3044
|
+
default:
|
|
3045
|
+
return { type: "custom", rawType: type, raw };
|
|
3046
|
+
}
|
|
3047
|
+
}
|
|
3048
|
+
function serializeOpenAIRealtimeClientEvent(event, modelId) {
|
|
3049
|
+
switch (event.type) {
|
|
3050
|
+
case "session-update":
|
|
3051
|
+
return {
|
|
3052
|
+
type: "session.update",
|
|
3053
|
+
session: buildOpenAISessionConfig(event.config, modelId)
|
|
3054
|
+
};
|
|
3055
|
+
case "input-audio-append":
|
|
3056
|
+
return {
|
|
3057
|
+
type: "input_audio_buffer.append",
|
|
3058
|
+
audio: event.audio
|
|
3059
|
+
};
|
|
3060
|
+
case "input-audio-commit":
|
|
3061
|
+
return { type: "input_audio_buffer.commit" };
|
|
3062
|
+
case "input-audio-clear":
|
|
3063
|
+
return { type: "input_audio_buffer.clear" };
|
|
3064
|
+
case "conversation-item-create": {
|
|
3065
|
+
const item = event.item;
|
|
3066
|
+
switch (item.type) {
|
|
3067
|
+
case "text-message":
|
|
3068
|
+
return {
|
|
3069
|
+
type: "conversation.item.create",
|
|
3070
|
+
item: {
|
|
3071
|
+
type: "message",
|
|
3072
|
+
role: item.role,
|
|
3073
|
+
content: [{ type: "input_text", text: item.text }]
|
|
3074
|
+
}
|
|
3075
|
+
};
|
|
3076
|
+
case "audio-message":
|
|
3077
|
+
return {
|
|
3078
|
+
type: "conversation.item.create",
|
|
3079
|
+
item: {
|
|
3080
|
+
type: "message",
|
|
3081
|
+
role: item.role,
|
|
3082
|
+
content: [{ type: "input_audio", audio: item.audio }]
|
|
3083
|
+
}
|
|
3084
|
+
};
|
|
3085
|
+
case "function-call-output":
|
|
3086
|
+
return {
|
|
3087
|
+
type: "conversation.item.create",
|
|
3088
|
+
item: {
|
|
3089
|
+
type: "function_call_output",
|
|
3090
|
+
call_id: item.callId,
|
|
3091
|
+
output: item.output
|
|
3092
|
+
}
|
|
3093
|
+
};
|
|
3094
|
+
}
|
|
3095
|
+
break;
|
|
3096
|
+
}
|
|
3097
|
+
case "conversation-item-truncate":
|
|
3098
|
+
return {
|
|
3099
|
+
type: "conversation.item.truncate",
|
|
3100
|
+
item_id: event.itemId,
|
|
3101
|
+
content_index: event.contentIndex,
|
|
3102
|
+
audio_end_ms: event.audioEndMs
|
|
3103
|
+
};
|
|
3104
|
+
case "response-create":
|
|
3105
|
+
return {
|
|
3106
|
+
type: "response.create",
|
|
3107
|
+
...event.options != null ? {
|
|
3108
|
+
response: {
|
|
3109
|
+
...event.options.modalities != null ? { output_modalities: event.options.modalities } : {},
|
|
3110
|
+
...event.options.instructions != null ? { instructions: event.options.instructions } : {},
|
|
3111
|
+
...event.options.metadata != null ? { metadata: event.options.metadata } : {}
|
|
3112
|
+
}
|
|
3113
|
+
} : {}
|
|
3114
|
+
};
|
|
3115
|
+
case "response-cancel":
|
|
3116
|
+
return { type: "response.cancel" };
|
|
3117
|
+
}
|
|
3118
|
+
}
|
|
3119
|
+
function buildOpenAISessionConfig(config, modelId) {
|
|
3120
|
+
var _a;
|
|
3121
|
+
const session = {
|
|
3122
|
+
type: "realtime",
|
|
3123
|
+
model: modelId
|
|
3124
|
+
};
|
|
3125
|
+
if (config.instructions != null) {
|
|
3126
|
+
session.instructions = config.instructions;
|
|
3127
|
+
}
|
|
3128
|
+
if (config.outputModalities != null) {
|
|
3129
|
+
session.output_modalities = config.outputModalities;
|
|
3130
|
+
}
|
|
3131
|
+
const audio = {};
|
|
3132
|
+
if (config.inputAudioFormat != null || config.inputAudioTranscription != null || config.turnDetection != null) {
|
|
3133
|
+
const input = {};
|
|
3134
|
+
if (config.inputAudioFormat != null) {
|
|
3135
|
+
input.format = {
|
|
3136
|
+
type: config.inputAudioFormat.type,
|
|
3137
|
+
...config.inputAudioFormat.rate != null ? { rate: config.inputAudioFormat.rate } : {}
|
|
3138
|
+
};
|
|
3139
|
+
}
|
|
3140
|
+
if (config.turnDetection != null) {
|
|
3141
|
+
if (config.turnDetection.type === "disabled") {
|
|
3142
|
+
input.turn_detection = null;
|
|
3143
|
+
} else {
|
|
3144
|
+
const td = {
|
|
3145
|
+
type: config.turnDetection.type === "server-vad" ? "server_vad" : "semantic_vad"
|
|
3146
|
+
};
|
|
3147
|
+
if (config.turnDetection.threshold != null) {
|
|
3148
|
+
td.threshold = config.turnDetection.threshold;
|
|
3149
|
+
}
|
|
3150
|
+
if (config.turnDetection.silenceDurationMs != null) {
|
|
3151
|
+
td.silence_duration_ms = config.turnDetection.silenceDurationMs;
|
|
3152
|
+
}
|
|
3153
|
+
if (config.turnDetection.prefixPaddingMs != null) {
|
|
3154
|
+
td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
|
|
3155
|
+
}
|
|
3156
|
+
input.turn_detection = td;
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
if (config.inputAudioTranscription != null) {
|
|
3160
|
+
input.transcription = {
|
|
3161
|
+
model: (_a = config.inputAudioTranscription.model) != null ? _a : "gpt-realtime-whisper",
|
|
3162
|
+
...config.inputAudioTranscription.language != null ? { language: config.inputAudioTranscription.language } : {},
|
|
3163
|
+
...config.inputAudioTranscription.prompt != null ? { prompt: config.inputAudioTranscription.prompt } : {}
|
|
3164
|
+
};
|
|
3165
|
+
}
|
|
3166
|
+
audio.input = input;
|
|
3167
|
+
}
|
|
3168
|
+
if (config.outputAudioFormat != null || config.voice != null) {
|
|
3169
|
+
const output = {};
|
|
3170
|
+
if (config.outputAudioFormat != null) {
|
|
3171
|
+
output.format = {
|
|
3172
|
+
type: config.outputAudioFormat.type,
|
|
3173
|
+
...config.outputAudioFormat.rate != null ? { rate: config.outputAudioFormat.rate } : {}
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
if (config.voice != null) {
|
|
3177
|
+
output.voice = config.voice;
|
|
3178
|
+
}
|
|
3179
|
+
audio.output = output;
|
|
3180
|
+
}
|
|
3181
|
+
if (Object.keys(audio).length > 0) {
|
|
3182
|
+
session.audio = audio;
|
|
3183
|
+
}
|
|
3184
|
+
if (config.tools != null && config.tools.length > 0) {
|
|
3185
|
+
session.tools = config.tools.map((tool) => ({
|
|
3186
|
+
type: tool.type,
|
|
3187
|
+
name: tool.name,
|
|
3188
|
+
description: tool.description,
|
|
3189
|
+
parameters: tool.parameters
|
|
3190
|
+
}));
|
|
3191
|
+
session.tool_choice = "auto";
|
|
3192
|
+
}
|
|
3193
|
+
if (config.providerOptions != null) {
|
|
3194
|
+
Object.assign(session, config.providerOptions);
|
|
3195
|
+
}
|
|
3196
|
+
return session;
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// src/realtime/openai-realtime-model.ts
|
|
3200
|
+
var OpenAIRealtimeModel = class {
|
|
3201
|
+
constructor(modelId, config) {
|
|
3202
|
+
this.specificationVersion = "v4";
|
|
3203
|
+
this.modelId = modelId;
|
|
3204
|
+
this.provider = config.provider;
|
|
3205
|
+
this.config = config;
|
|
3206
|
+
}
|
|
3207
|
+
async doCreateClientSecret(options) {
|
|
3208
|
+
var _a;
|
|
3209
|
+
const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
|
|
3210
|
+
const url = `${this.config.baseURL}/realtime/client_secrets`;
|
|
3211
|
+
const session = options.sessionConfig != null ? buildOpenAISessionConfig(options.sessionConfig, this.modelId) : { type: "realtime", model: this.modelId };
|
|
3212
|
+
const response = await fetchFn(url, {
|
|
3213
|
+
method: "POST",
|
|
3214
|
+
headers: {
|
|
3215
|
+
...this.config.headers(),
|
|
3216
|
+
"Content-Type": "application/json"
|
|
3217
|
+
},
|
|
3218
|
+
body: JSON.stringify({
|
|
3219
|
+
session,
|
|
3220
|
+
...options.expiresAfterSeconds != null ? {
|
|
3221
|
+
// `anchor` is required by the client secrets endpoint; without it
|
|
3222
|
+
// the request fails with "Missing required parameter:
|
|
3223
|
+
// 'expires_after.anchor'".
|
|
3224
|
+
expires_after: {
|
|
3225
|
+
anchor: "created_at",
|
|
3226
|
+
seconds: options.expiresAfterSeconds
|
|
3227
|
+
}
|
|
3228
|
+
} : {}
|
|
3229
|
+
})
|
|
3230
|
+
});
|
|
3231
|
+
if (!response.ok) {
|
|
3232
|
+
const text = await response.text();
|
|
3233
|
+
throw new Error(
|
|
3234
|
+
`OpenAI realtime client secret request failed: ${response.status} ${text}`
|
|
3235
|
+
);
|
|
3236
|
+
}
|
|
3237
|
+
const data = await response.json();
|
|
3238
|
+
return {
|
|
3239
|
+
token: data.value,
|
|
3240
|
+
url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
|
|
3241
|
+
expiresAt: data.expires_at
|
|
3242
|
+
};
|
|
3243
|
+
}
|
|
3244
|
+
getWebSocketConfig(options) {
|
|
3245
|
+
return {
|
|
3246
|
+
url: options.url,
|
|
3247
|
+
protocols: ["realtime", `openai-insecure-api-key.${options.token}`]
|
|
3248
|
+
};
|
|
3249
|
+
}
|
|
3250
|
+
parseServerEvent(raw) {
|
|
3251
|
+
return parseOpenAIRealtimeServerEvent(raw);
|
|
3252
|
+
}
|
|
3253
|
+
serializeClientEvent(event) {
|
|
3254
|
+
return serializeOpenAIRealtimeClientEvent(event, this.modelId);
|
|
3255
|
+
}
|
|
3256
|
+
buildSessionConfig(config) {
|
|
3257
|
+
return buildOpenAISessionConfig(config, this.modelId);
|
|
3258
|
+
}
|
|
3259
|
+
};
|
|
3260
|
+
|
|
2872
3261
|
// src/responses/openai-responses-language-model.ts
|
|
2873
3262
|
import {
|
|
2874
3263
|
APICallError
|
|
@@ -7322,7 +7711,7 @@ var OpenAISkills = class {
|
|
|
7322
7711
|
};
|
|
7323
7712
|
|
|
7324
7713
|
// src/version.ts
|
|
7325
|
-
var VERSION = true ? "4.0.0-canary.
|
|
7714
|
+
var VERSION = true ? "4.0.0-canary.69" : "0.0.0-test";
|
|
7326
7715
|
|
|
7327
7716
|
// src/openai-provider.ts
|
|
7328
7717
|
function createOpenAI(options = {}) {
|
|
@@ -7413,6 +7802,29 @@ function createOpenAI(options = {}) {
|
|
|
7413
7802
|
fileIdPrefixes: ["file-"]
|
|
7414
7803
|
});
|
|
7415
7804
|
};
|
|
7805
|
+
const createRealtimeModel = (modelId) => new OpenAIRealtimeModel(modelId, {
|
|
7806
|
+
provider: `${providerName}.realtime`,
|
|
7807
|
+
baseURL,
|
|
7808
|
+
headers: getHeaders,
|
|
7809
|
+
fetch: options.fetch
|
|
7810
|
+
});
|
|
7811
|
+
const experimentalRealtimeFactory = Object.assign(
|
|
7812
|
+
(modelId) => createRealtimeModel(modelId),
|
|
7813
|
+
{
|
|
7814
|
+
getToken: async (tokenOptions) => {
|
|
7815
|
+
const model = createRealtimeModel(tokenOptions.model);
|
|
7816
|
+
const secret = await model.doCreateClientSecret({
|
|
7817
|
+
sessionConfig: tokenOptions.sessionConfig,
|
|
7818
|
+
expiresAfterSeconds: tokenOptions.expiresAfterSeconds
|
|
7819
|
+
});
|
|
7820
|
+
return {
|
|
7821
|
+
token: secret.token,
|
|
7822
|
+
url: secret.url,
|
|
7823
|
+
expiresAt: secret.expiresAt
|
|
7824
|
+
};
|
|
7825
|
+
}
|
|
7826
|
+
}
|
|
7827
|
+
);
|
|
7416
7828
|
const provider = function(modelId) {
|
|
7417
7829
|
return createLanguageModel(modelId);
|
|
7418
7830
|
};
|
|
@@ -7433,11 +7845,13 @@ function createOpenAI(options = {}) {
|
|
|
7433
7845
|
provider.speechModel = createSpeechModel;
|
|
7434
7846
|
provider.files = createFiles;
|
|
7435
7847
|
provider.skills = createSkills;
|
|
7848
|
+
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
7436
7849
|
provider.tools = openaiTools;
|
|
7437
7850
|
return provider;
|
|
7438
7851
|
}
|
|
7439
7852
|
var openai = createOpenAI();
|
|
7440
7853
|
export {
|
|
7854
|
+
OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel,
|
|
7441
7855
|
VERSION,
|
|
7442
7856
|
createOpenAI,
|
|
7443
7857
|
openai
|