@ai-sdk/openai 4.0.0-canary.68 → 4.0.0-canary.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/dist/index.d.ts +32 -2
- package/dist/index.js +454 -9
- package/dist/index.js.map +1 -1
- package/dist/internal/index.js +39 -8
- package/dist/internal/index.js.map +1 -1
- package/docs/03-openai.mdx +103 -0
- package/package.json +5 -5
- package/src/index.ts +2 -0
- package/src/openai-provider.ts +38 -0
- package/src/realtime/index.ts +2 -0
- package/src/realtime/openai-realtime-event-mapper.ts +436 -0
- package/src/realtime/openai-realtime-model-options.ts +3 -0
- package/src/realtime/openai-realtime-model.ts +111 -0
- package/src/responses/openai-responses-api.ts +13 -5
- package/src/responses/openai-responses-prepare-tools.ts +64 -11
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# @ai-sdk/openai
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.70
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 6a5800e: feat(openai): add namespaces for tool definitions
|
|
8
|
+
|
|
9
|
+
## 4.0.0-canary.69
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- ce769dd: feat(provider): add experimental Realtime API support for voice conversations
|
|
14
|
+
|
|
15
|
+
Adds first-class support for realtime (speech-to-speech) APIs:
|
|
16
|
+
|
|
17
|
+
- `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
|
|
18
|
+
- OpenAI, Google, and xAI realtime provider implementations
|
|
19
|
+
- `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
|
|
20
|
+
- `.getToken()` static method on each provider for server-side ephemeral token creation
|
|
21
|
+
- `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
|
|
22
|
+
- `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
|
|
23
|
+
- `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
|
|
24
|
+
|
|
25
|
+
- Updated dependencies [ce769dd]
|
|
26
|
+
- @ai-sdk/provider@4.0.0-canary.18
|
|
27
|
+
- @ai-sdk/provider-utils@5.0.0-canary.46
|
|
28
|
+
|
|
3
29
|
## 4.0.0-canary.68
|
|
4
30
|
|
|
5
31
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _ai_sdk_provider from '@ai-sdk/provider';
|
|
2
|
-
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, FilesV4, SkillsV4 } from '@ai-sdk/provider';
|
|
2
|
+
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, Experimental_RealtimeFactoryV4, FilesV4, SkillsV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
3
3
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
4
4
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
5
5
|
|
|
@@ -1150,6 +1150,11 @@ interface OpenAIProvider extends ProviderV4 {
|
|
|
1150
1150
|
* Creates a model for speech generation.
|
|
1151
1151
|
*/
|
|
1152
1152
|
speech(modelId: OpenAISpeechModelId): SpeechModelV4;
|
|
1153
|
+
/**
|
|
1154
|
+
* Creates an experimental realtime model for bidirectional audio/text
|
|
1155
|
+
* communication over WebSocket.
|
|
1156
|
+
*/
|
|
1157
|
+
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
1153
1158
|
/**
|
|
1154
1159
|
* Returns a FilesV4 interface for uploading files to OpenAI.
|
|
1155
1160
|
*/
|
|
@@ -1203,6 +1208,31 @@ declare function createOpenAI(options?: OpenAIProviderSettings): OpenAIProvider;
|
|
|
1203
1208
|
*/
|
|
1204
1209
|
declare const openai: OpenAIProvider;
|
|
1205
1210
|
|
|
1211
|
+
type OpenAIRealtimeModelConfig = {
|
|
1212
|
+
provider: string;
|
|
1213
|
+
baseURL: string;
|
|
1214
|
+
headers: () => Record<string, string | undefined>;
|
|
1215
|
+
fetch?: FetchFunction;
|
|
1216
|
+
};
|
|
1217
|
+
declare class OpenAIRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
1218
|
+
readonly specificationVersion: "v4";
|
|
1219
|
+
readonly provider: string;
|
|
1220
|
+
readonly modelId: string;
|
|
1221
|
+
private readonly config;
|
|
1222
|
+
constructor(modelId: string, config: OpenAIRealtimeModelConfig);
|
|
1223
|
+
doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
|
|
1224
|
+
getWebSocketConfig(options: {
|
|
1225
|
+
token: string;
|
|
1226
|
+
url: string;
|
|
1227
|
+
}): {
|
|
1228
|
+
url: string;
|
|
1229
|
+
protocols?: string[];
|
|
1230
|
+
};
|
|
1231
|
+
parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent;
|
|
1232
|
+
serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): unknown;
|
|
1233
|
+
buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1206
1236
|
declare const openaiFilesOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
1207
1237
|
purpose?: string | undefined;
|
|
1208
1238
|
expiresAfter?: number | undefined;
|
|
@@ -1263,4 +1293,4 @@ type OpenaiResponsesSourceDocumentProviderMetadata = {
|
|
|
1263
1293
|
|
|
1264
1294
|
declare const VERSION: string;
|
|
1265
1295
|
|
|
1266
|
-
export { type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
|
1296
|
+
export { OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel, type OpenAIRealtimeModelConfig as Experimental_OpenAIRealtimeModelConfig, type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
package/dist/index.js
CHANGED
|
@@ -2869,6 +2869,395 @@ var openaiTools = {
|
|
|
2869
2869
|
toolSearch
|
|
2870
2870
|
};
|
|
2871
2871
|
|
|
2872
|
+
// src/realtime/openai-realtime-event-mapper.ts
|
|
2873
|
+
function parseOpenAIRealtimeServerEvent(raw) {
|
|
2874
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s;
|
|
2875
|
+
const event = raw;
|
|
2876
|
+
const type = event.type;
|
|
2877
|
+
switch (type) {
|
|
2878
|
+
// ── Session lifecycle ──────────────────────────────────────────
|
|
2879
|
+
case "session.created":
|
|
2880
|
+
return {
|
|
2881
|
+
type: "session-created",
|
|
2882
|
+
sessionId: (_a = event.session) == null ? void 0 : _a.id,
|
|
2883
|
+
raw
|
|
2884
|
+
};
|
|
2885
|
+
case "session.updated":
|
|
2886
|
+
return { type: "session-updated", raw };
|
|
2887
|
+
// ── Input audio buffer ─────────────────────────────────────────
|
|
2888
|
+
case "input_audio_buffer.speech_started":
|
|
2889
|
+
return {
|
|
2890
|
+
type: "speech-started",
|
|
2891
|
+
itemId: event.item_id,
|
|
2892
|
+
raw
|
|
2893
|
+
};
|
|
2894
|
+
case "input_audio_buffer.speech_stopped":
|
|
2895
|
+
return {
|
|
2896
|
+
type: "speech-stopped",
|
|
2897
|
+
itemId: event.item_id,
|
|
2898
|
+
raw
|
|
2899
|
+
};
|
|
2900
|
+
case "input_audio_buffer.committed":
|
|
2901
|
+
return {
|
|
2902
|
+
type: "audio-committed",
|
|
2903
|
+
itemId: event.item_id,
|
|
2904
|
+
previousItemId: event.previous_item_id,
|
|
2905
|
+
raw
|
|
2906
|
+
};
|
|
2907
|
+
// ── Conversation items ─────────────────────────────────────────
|
|
2908
|
+
case "conversation.item.added":
|
|
2909
|
+
return {
|
|
2910
|
+
type: "conversation-item-added",
|
|
2911
|
+
itemId: (_c = (_b = event.item) == null ? void 0 : _b.id) != null ? _c : event.item_id,
|
|
2912
|
+
item: event.item,
|
|
2913
|
+
raw
|
|
2914
|
+
};
|
|
2915
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
2916
|
+
return {
|
|
2917
|
+
type: "input-transcription-completed",
|
|
2918
|
+
itemId: event.item_id,
|
|
2919
|
+
transcript: (_d = event.transcript) != null ? _d : "",
|
|
2920
|
+
raw
|
|
2921
|
+
};
|
|
2922
|
+
// ── Response lifecycle ──────────────────────────────────────────
|
|
2923
|
+
case "response.created":
|
|
2924
|
+
return {
|
|
2925
|
+
type: "response-created",
|
|
2926
|
+
responseId: (_f = (_e = event.response) == null ? void 0 : _e.id) != null ? _f : event.response_id,
|
|
2927
|
+
raw
|
|
2928
|
+
};
|
|
2929
|
+
case "response.done":
|
|
2930
|
+
return {
|
|
2931
|
+
type: "response-done",
|
|
2932
|
+
responseId: (_h = (_g = event.response) == null ? void 0 : _g.id) != null ? _h : event.response_id,
|
|
2933
|
+
status: (_j = (_i = event.response) == null ? void 0 : _i.status) != null ? _j : "completed",
|
|
2934
|
+
raw
|
|
2935
|
+
};
|
|
2936
|
+
// ── Output item lifecycle ───────────────────────────────────────
|
|
2937
|
+
case "response.output_item.added":
|
|
2938
|
+
return {
|
|
2939
|
+
type: "output-item-added",
|
|
2940
|
+
responseId: event.response_id,
|
|
2941
|
+
itemId: (_l = (_k = event.item) == null ? void 0 : _k.id) != null ? _l : event.item_id,
|
|
2942
|
+
raw
|
|
2943
|
+
};
|
|
2944
|
+
case "response.output_item.done":
|
|
2945
|
+
return {
|
|
2946
|
+
type: "output-item-done",
|
|
2947
|
+
responseId: event.response_id,
|
|
2948
|
+
itemId: (_n = (_m = event.item) == null ? void 0 : _m.id) != null ? _n : event.item_id,
|
|
2949
|
+
raw
|
|
2950
|
+
};
|
|
2951
|
+
case "response.content_part.added":
|
|
2952
|
+
return {
|
|
2953
|
+
type: "content-part-added",
|
|
2954
|
+
responseId: event.response_id,
|
|
2955
|
+
itemId: event.item_id,
|
|
2956
|
+
raw
|
|
2957
|
+
};
|
|
2958
|
+
case "response.content_part.done":
|
|
2959
|
+
return {
|
|
2960
|
+
type: "content-part-done",
|
|
2961
|
+
responseId: event.response_id,
|
|
2962
|
+
itemId: event.item_id,
|
|
2963
|
+
raw
|
|
2964
|
+
};
|
|
2965
|
+
// ── Audio output ────────────────────────────────────────────────
|
|
2966
|
+
case "response.output_audio.delta":
|
|
2967
|
+
return {
|
|
2968
|
+
type: "audio-delta",
|
|
2969
|
+
responseId: event.response_id,
|
|
2970
|
+
itemId: event.item_id,
|
|
2971
|
+
delta: event.delta,
|
|
2972
|
+
raw
|
|
2973
|
+
};
|
|
2974
|
+
case "response.output_audio.done":
|
|
2975
|
+
return {
|
|
2976
|
+
type: "audio-done",
|
|
2977
|
+
responseId: event.response_id,
|
|
2978
|
+
itemId: event.item_id,
|
|
2979
|
+
raw
|
|
2980
|
+
};
|
|
2981
|
+
// ── Audio transcript output ─────────────────────────────────────
|
|
2982
|
+
case "response.output_audio_transcript.delta":
|
|
2983
|
+
return {
|
|
2984
|
+
type: "audio-transcript-delta",
|
|
2985
|
+
responseId: event.response_id,
|
|
2986
|
+
itemId: event.item_id,
|
|
2987
|
+
delta: event.delta,
|
|
2988
|
+
raw
|
|
2989
|
+
};
|
|
2990
|
+
case "response.output_audio_transcript.done":
|
|
2991
|
+
return {
|
|
2992
|
+
type: "audio-transcript-done",
|
|
2993
|
+
responseId: event.response_id,
|
|
2994
|
+
itemId: event.item_id,
|
|
2995
|
+
transcript: event.transcript,
|
|
2996
|
+
raw
|
|
2997
|
+
};
|
|
2998
|
+
// ── Text output ─────────────────────────────────────────────────
|
|
2999
|
+
case "response.output_text.delta":
|
|
3000
|
+
return {
|
|
3001
|
+
type: "text-delta",
|
|
3002
|
+
responseId: event.response_id,
|
|
3003
|
+
itemId: event.item_id,
|
|
3004
|
+
delta: event.delta,
|
|
3005
|
+
raw
|
|
3006
|
+
};
|
|
3007
|
+
case "response.output_text.done":
|
|
3008
|
+
return {
|
|
3009
|
+
type: "text-done",
|
|
3010
|
+
responseId: event.response_id,
|
|
3011
|
+
itemId: event.item_id,
|
|
3012
|
+
text: event.text,
|
|
3013
|
+
raw
|
|
3014
|
+
};
|
|
3015
|
+
// ── Function calling ────────────────────────────────────────────
|
|
3016
|
+
case "response.function_call_arguments.delta":
|
|
3017
|
+
return {
|
|
3018
|
+
type: "function-call-arguments-delta",
|
|
3019
|
+
responseId: event.response_id,
|
|
3020
|
+
itemId: event.item_id,
|
|
3021
|
+
callId: event.call_id,
|
|
3022
|
+
delta: event.delta,
|
|
3023
|
+
raw
|
|
3024
|
+
};
|
|
3025
|
+
case "response.function_call_arguments.done":
|
|
3026
|
+
return {
|
|
3027
|
+
type: "function-call-arguments-done",
|
|
3028
|
+
responseId: event.response_id,
|
|
3029
|
+
itemId: event.item_id,
|
|
3030
|
+
callId: event.call_id,
|
|
3031
|
+
name: event.name,
|
|
3032
|
+
arguments: event.arguments,
|
|
3033
|
+
raw
|
|
3034
|
+
};
|
|
3035
|
+
// ── Error ───────────────────────────────────────────────────────
|
|
3036
|
+
case "error":
|
|
3037
|
+
return {
|
|
3038
|
+
type: "error",
|
|
3039
|
+
message: (_q = (_p = (_o = event.error) == null ? void 0 : _o.message) != null ? _p : event.message) != null ? _q : "Unknown error",
|
|
3040
|
+
code: (_s = (_r = event.error) == null ? void 0 : _r.code) != null ? _s : event.code,
|
|
3041
|
+
raw
|
|
3042
|
+
};
|
|
3043
|
+
// ── Pass-through ────────────────────────────────────────────────
|
|
3044
|
+
default:
|
|
3045
|
+
return { type: "custom", rawType: type, raw };
|
|
3046
|
+
}
|
|
3047
|
+
}
|
|
3048
|
+
function serializeOpenAIRealtimeClientEvent(event, modelId) {
|
|
3049
|
+
switch (event.type) {
|
|
3050
|
+
case "session-update":
|
|
3051
|
+
return {
|
|
3052
|
+
type: "session.update",
|
|
3053
|
+
session: buildOpenAISessionConfig(event.config, modelId)
|
|
3054
|
+
};
|
|
3055
|
+
case "input-audio-append":
|
|
3056
|
+
return {
|
|
3057
|
+
type: "input_audio_buffer.append",
|
|
3058
|
+
audio: event.audio
|
|
3059
|
+
};
|
|
3060
|
+
case "input-audio-commit":
|
|
3061
|
+
return { type: "input_audio_buffer.commit" };
|
|
3062
|
+
case "input-audio-clear":
|
|
3063
|
+
return { type: "input_audio_buffer.clear" };
|
|
3064
|
+
case "conversation-item-create": {
|
|
3065
|
+
const item = event.item;
|
|
3066
|
+
switch (item.type) {
|
|
3067
|
+
case "text-message":
|
|
3068
|
+
return {
|
|
3069
|
+
type: "conversation.item.create",
|
|
3070
|
+
item: {
|
|
3071
|
+
type: "message",
|
|
3072
|
+
role: item.role,
|
|
3073
|
+
content: [{ type: "input_text", text: item.text }]
|
|
3074
|
+
}
|
|
3075
|
+
};
|
|
3076
|
+
case "audio-message":
|
|
3077
|
+
return {
|
|
3078
|
+
type: "conversation.item.create",
|
|
3079
|
+
item: {
|
|
3080
|
+
type: "message",
|
|
3081
|
+
role: item.role,
|
|
3082
|
+
content: [{ type: "input_audio", audio: item.audio }]
|
|
3083
|
+
}
|
|
3084
|
+
};
|
|
3085
|
+
case "function-call-output":
|
|
3086
|
+
return {
|
|
3087
|
+
type: "conversation.item.create",
|
|
3088
|
+
item: {
|
|
3089
|
+
type: "function_call_output",
|
|
3090
|
+
call_id: item.callId,
|
|
3091
|
+
output: item.output
|
|
3092
|
+
}
|
|
3093
|
+
};
|
|
3094
|
+
}
|
|
3095
|
+
break;
|
|
3096
|
+
}
|
|
3097
|
+
case "conversation-item-truncate":
|
|
3098
|
+
return {
|
|
3099
|
+
type: "conversation.item.truncate",
|
|
3100
|
+
item_id: event.itemId,
|
|
3101
|
+
content_index: event.contentIndex,
|
|
3102
|
+
audio_end_ms: event.audioEndMs
|
|
3103
|
+
};
|
|
3104
|
+
case "response-create":
|
|
3105
|
+
return {
|
|
3106
|
+
type: "response.create",
|
|
3107
|
+
...event.options != null ? {
|
|
3108
|
+
response: {
|
|
3109
|
+
...event.options.modalities != null ? { output_modalities: event.options.modalities } : {},
|
|
3110
|
+
...event.options.instructions != null ? { instructions: event.options.instructions } : {},
|
|
3111
|
+
...event.options.metadata != null ? { metadata: event.options.metadata } : {}
|
|
3112
|
+
}
|
|
3113
|
+
} : {}
|
|
3114
|
+
};
|
|
3115
|
+
case "response-cancel":
|
|
3116
|
+
return { type: "response.cancel" };
|
|
3117
|
+
}
|
|
3118
|
+
}
|
|
3119
|
+
function buildOpenAISessionConfig(config, modelId) {
|
|
3120
|
+
var _a;
|
|
3121
|
+
const session = {
|
|
3122
|
+
type: "realtime",
|
|
3123
|
+
model: modelId
|
|
3124
|
+
};
|
|
3125
|
+
if (config.instructions != null) {
|
|
3126
|
+
session.instructions = config.instructions;
|
|
3127
|
+
}
|
|
3128
|
+
if (config.outputModalities != null) {
|
|
3129
|
+
session.output_modalities = config.outputModalities;
|
|
3130
|
+
}
|
|
3131
|
+
const audio = {};
|
|
3132
|
+
if (config.inputAudioFormat != null || config.inputAudioTranscription != null || config.turnDetection != null) {
|
|
3133
|
+
const input = {};
|
|
3134
|
+
if (config.inputAudioFormat != null) {
|
|
3135
|
+
input.format = {
|
|
3136
|
+
type: config.inputAudioFormat.type,
|
|
3137
|
+
...config.inputAudioFormat.rate != null ? { rate: config.inputAudioFormat.rate } : {}
|
|
3138
|
+
};
|
|
3139
|
+
}
|
|
3140
|
+
if (config.turnDetection != null) {
|
|
3141
|
+
if (config.turnDetection.type === "disabled") {
|
|
3142
|
+
input.turn_detection = null;
|
|
3143
|
+
} else {
|
|
3144
|
+
const td = {
|
|
3145
|
+
type: config.turnDetection.type === "server-vad" ? "server_vad" : "semantic_vad"
|
|
3146
|
+
};
|
|
3147
|
+
if (config.turnDetection.threshold != null) {
|
|
3148
|
+
td.threshold = config.turnDetection.threshold;
|
|
3149
|
+
}
|
|
3150
|
+
if (config.turnDetection.silenceDurationMs != null) {
|
|
3151
|
+
td.silence_duration_ms = config.turnDetection.silenceDurationMs;
|
|
3152
|
+
}
|
|
3153
|
+
if (config.turnDetection.prefixPaddingMs != null) {
|
|
3154
|
+
td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
|
|
3155
|
+
}
|
|
3156
|
+
input.turn_detection = td;
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
if (config.inputAudioTranscription != null) {
|
|
3160
|
+
input.transcription = {
|
|
3161
|
+
model: (_a = config.inputAudioTranscription.model) != null ? _a : "gpt-realtime-whisper",
|
|
3162
|
+
...config.inputAudioTranscription.language != null ? { language: config.inputAudioTranscription.language } : {},
|
|
3163
|
+
...config.inputAudioTranscription.prompt != null ? { prompt: config.inputAudioTranscription.prompt } : {}
|
|
3164
|
+
};
|
|
3165
|
+
}
|
|
3166
|
+
audio.input = input;
|
|
3167
|
+
}
|
|
3168
|
+
if (config.outputAudioFormat != null || config.voice != null) {
|
|
3169
|
+
const output = {};
|
|
3170
|
+
if (config.outputAudioFormat != null) {
|
|
3171
|
+
output.format = {
|
|
3172
|
+
type: config.outputAudioFormat.type,
|
|
3173
|
+
...config.outputAudioFormat.rate != null ? { rate: config.outputAudioFormat.rate } : {}
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
if (config.voice != null) {
|
|
3177
|
+
output.voice = config.voice;
|
|
3178
|
+
}
|
|
3179
|
+
audio.output = output;
|
|
3180
|
+
}
|
|
3181
|
+
if (Object.keys(audio).length > 0) {
|
|
3182
|
+
session.audio = audio;
|
|
3183
|
+
}
|
|
3184
|
+
if (config.tools != null && config.tools.length > 0) {
|
|
3185
|
+
session.tools = config.tools.map((tool) => ({
|
|
3186
|
+
type: tool.type,
|
|
3187
|
+
name: tool.name,
|
|
3188
|
+
description: tool.description,
|
|
3189
|
+
parameters: tool.parameters
|
|
3190
|
+
}));
|
|
3191
|
+
session.tool_choice = "auto";
|
|
3192
|
+
}
|
|
3193
|
+
if (config.providerOptions != null) {
|
|
3194
|
+
Object.assign(session, config.providerOptions);
|
|
3195
|
+
}
|
|
3196
|
+
return session;
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// src/realtime/openai-realtime-model.ts
|
|
3200
|
+
var OpenAIRealtimeModel = class {
|
|
3201
|
+
constructor(modelId, config) {
|
|
3202
|
+
this.specificationVersion = "v4";
|
|
3203
|
+
this.modelId = modelId;
|
|
3204
|
+
this.provider = config.provider;
|
|
3205
|
+
this.config = config;
|
|
3206
|
+
}
|
|
3207
|
+
async doCreateClientSecret(options) {
|
|
3208
|
+
var _a;
|
|
3209
|
+
const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
|
|
3210
|
+
const url = `${this.config.baseURL}/realtime/client_secrets`;
|
|
3211
|
+
const session = options.sessionConfig != null ? buildOpenAISessionConfig(options.sessionConfig, this.modelId) : { type: "realtime", model: this.modelId };
|
|
3212
|
+
const response = await fetchFn(url, {
|
|
3213
|
+
method: "POST",
|
|
3214
|
+
headers: {
|
|
3215
|
+
...this.config.headers(),
|
|
3216
|
+
"Content-Type": "application/json"
|
|
3217
|
+
},
|
|
3218
|
+
body: JSON.stringify({
|
|
3219
|
+
session,
|
|
3220
|
+
...options.expiresAfterSeconds != null ? {
|
|
3221
|
+
// `anchor` is required by the client secrets endpoint; without it
|
|
3222
|
+
// the request fails with "Missing required parameter:
|
|
3223
|
+
// 'expires_after.anchor'".
|
|
3224
|
+
expires_after: {
|
|
3225
|
+
anchor: "created_at",
|
|
3226
|
+
seconds: options.expiresAfterSeconds
|
|
3227
|
+
}
|
|
3228
|
+
} : {}
|
|
3229
|
+
})
|
|
3230
|
+
});
|
|
3231
|
+
if (!response.ok) {
|
|
3232
|
+
const text = await response.text();
|
|
3233
|
+
throw new Error(
|
|
3234
|
+
`OpenAI realtime client secret request failed: ${response.status} ${text}`
|
|
3235
|
+
);
|
|
3236
|
+
}
|
|
3237
|
+
const data = await response.json();
|
|
3238
|
+
return {
|
|
3239
|
+
token: data.value,
|
|
3240
|
+
url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
|
|
3241
|
+
expiresAt: data.expires_at
|
|
3242
|
+
};
|
|
3243
|
+
}
|
|
3244
|
+
getWebSocketConfig(options) {
|
|
3245
|
+
return {
|
|
3246
|
+
url: options.url,
|
|
3247
|
+
protocols: ["realtime", `openai-insecure-api-key.${options.token}`]
|
|
3248
|
+
};
|
|
3249
|
+
}
|
|
3250
|
+
parseServerEvent(raw) {
|
|
3251
|
+
return parseOpenAIRealtimeServerEvent(raw);
|
|
3252
|
+
}
|
|
3253
|
+
serializeClientEvent(event) {
|
|
3254
|
+
return serializeOpenAIRealtimeClientEvent(event, this.modelId);
|
|
3255
|
+
}
|
|
3256
|
+
buildSessionConfig(config) {
|
|
3257
|
+
return buildOpenAISessionConfig(config, this.modelId);
|
|
3258
|
+
}
|
|
3259
|
+
};
|
|
3260
|
+
|
|
2872
3261
|
// src/responses/openai-responses-language-model.ts
|
|
2873
3262
|
import {
|
|
2874
3263
|
APICallError
|
|
@@ -4805,20 +5194,37 @@ async function prepareResponsesTools({
|
|
|
4805
5194
|
return { tools: void 0, toolChoice: void 0, toolWarnings };
|
|
4806
5195
|
}
|
|
4807
5196
|
const openaiTools2 = [];
|
|
5197
|
+
const namespaceTools = /* @__PURE__ */ new Map();
|
|
4808
5198
|
const resolvedCustomProviderToolNames = customProviderToolNames != null ? customProviderToolNames : /* @__PURE__ */ new Set();
|
|
4809
5199
|
for (const tool of tools) {
|
|
4810
5200
|
switch (tool.type) {
|
|
4811
5201
|
case "function": {
|
|
4812
5202
|
const openaiOptions = (_a = tool.providerOptions) == null ? void 0 : _a.openai;
|
|
4813
|
-
const
|
|
4814
|
-
|
|
4815
|
-
|
|
4816
|
-
name: tool.name,
|
|
4817
|
-
description: tool.description,
|
|
4818
|
-
parameters: tool.inputSchema,
|
|
4819
|
-
...tool.strict != null ? { strict: tool.strict } : {},
|
|
4820
|
-
...deferLoading != null ? { defer_loading: deferLoading } : {}
|
|
5203
|
+
const openaiFunctionTool = prepareFunctionTool({
|
|
5204
|
+
tool,
|
|
5205
|
+
options: openaiOptions
|
|
4821
5206
|
});
|
|
5207
|
+
const namespace = openaiOptions == null ? void 0 : openaiOptions.namespace;
|
|
5208
|
+
if (namespace == null) {
|
|
5209
|
+
openaiTools2.push(openaiFunctionTool);
|
|
5210
|
+
} else {
|
|
5211
|
+
let namespaceTool = namespaceTools.get(namespace.name);
|
|
5212
|
+
if (namespaceTool == null) {
|
|
5213
|
+
namespaceTool = {
|
|
5214
|
+
type: "namespace",
|
|
5215
|
+
name: namespace.name,
|
|
5216
|
+
description: namespace.description,
|
|
5217
|
+
tools: []
|
|
5218
|
+
};
|
|
5219
|
+
namespaceTools.set(namespace.name, namespaceTool);
|
|
5220
|
+
openaiTools2.push(namespaceTool);
|
|
5221
|
+
} else if (namespaceTool.description !== namespace.description) {
|
|
5222
|
+
throw new UnsupportedFunctionalityError5({
|
|
5223
|
+
functionality: `conflicting descriptions for OpenAI tool namespace "${namespace.name}"`
|
|
5224
|
+
});
|
|
5225
|
+
}
|
|
5226
|
+
namespaceTool.tools.push(openaiFunctionTool);
|
|
5227
|
+
}
|
|
4822
5228
|
break;
|
|
4823
5229
|
}
|
|
4824
5230
|
case "provider": {
|
|
@@ -5031,6 +5437,20 @@ async function prepareResponsesTools({
|
|
|
5031
5437
|
}
|
|
5032
5438
|
}
|
|
5033
5439
|
}
|
|
5440
|
+
function prepareFunctionTool({
|
|
5441
|
+
tool,
|
|
5442
|
+
options
|
|
5443
|
+
}) {
|
|
5444
|
+
const deferLoading = options == null ? void 0 : options.deferLoading;
|
|
5445
|
+
return {
|
|
5446
|
+
type: "function",
|
|
5447
|
+
name: tool.name,
|
|
5448
|
+
description: tool.description,
|
|
5449
|
+
parameters: tool.inputSchema,
|
|
5450
|
+
...tool.strict != null ? { strict: tool.strict } : {},
|
|
5451
|
+
...deferLoading != null ? { defer_loading: deferLoading } : {}
|
|
5452
|
+
};
|
|
5453
|
+
}
|
|
5034
5454
|
function mapShellEnvironment(environment) {
|
|
5035
5455
|
if (environment.type === "containerReference") {
|
|
5036
5456
|
const env2 = environment;
|
|
@@ -7322,7 +7742,7 @@ var OpenAISkills = class {
|
|
|
7322
7742
|
};
|
|
7323
7743
|
|
|
7324
7744
|
// src/version.ts
|
|
7325
|
-
var VERSION = true ? "4.0.0-canary.
|
|
7745
|
+
var VERSION = true ? "4.0.0-canary.70" : "0.0.0-test";
|
|
7326
7746
|
|
|
7327
7747
|
// src/openai-provider.ts
|
|
7328
7748
|
function createOpenAI(options = {}) {
|
|
@@ -7413,6 +7833,29 @@ function createOpenAI(options = {}) {
|
|
|
7413
7833
|
fileIdPrefixes: ["file-"]
|
|
7414
7834
|
});
|
|
7415
7835
|
};
|
|
7836
|
+
const createRealtimeModel = (modelId) => new OpenAIRealtimeModel(modelId, {
|
|
7837
|
+
provider: `${providerName}.realtime`,
|
|
7838
|
+
baseURL,
|
|
7839
|
+
headers: getHeaders,
|
|
7840
|
+
fetch: options.fetch
|
|
7841
|
+
});
|
|
7842
|
+
const experimentalRealtimeFactory = Object.assign(
|
|
7843
|
+
(modelId) => createRealtimeModel(modelId),
|
|
7844
|
+
{
|
|
7845
|
+
getToken: async (tokenOptions) => {
|
|
7846
|
+
const model = createRealtimeModel(tokenOptions.model);
|
|
7847
|
+
const secret = await model.doCreateClientSecret({
|
|
7848
|
+
sessionConfig: tokenOptions.sessionConfig,
|
|
7849
|
+
expiresAfterSeconds: tokenOptions.expiresAfterSeconds
|
|
7850
|
+
});
|
|
7851
|
+
return {
|
|
7852
|
+
token: secret.token,
|
|
7853
|
+
url: secret.url,
|
|
7854
|
+
expiresAt: secret.expiresAt
|
|
7855
|
+
};
|
|
7856
|
+
}
|
|
7857
|
+
}
|
|
7858
|
+
);
|
|
7416
7859
|
const provider = function(modelId) {
|
|
7417
7860
|
return createLanguageModel(modelId);
|
|
7418
7861
|
};
|
|
@@ -7433,11 +7876,13 @@ function createOpenAI(options = {}) {
|
|
|
7433
7876
|
provider.speechModel = createSpeechModel;
|
|
7434
7877
|
provider.files = createFiles;
|
|
7435
7878
|
provider.skills = createSkills;
|
|
7879
|
+
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
7436
7880
|
provider.tools = openaiTools;
|
|
7437
7881
|
return provider;
|
|
7438
7882
|
}
|
|
7439
7883
|
var openai = createOpenAI();
|
|
7440
7884
|
export {
|
|
7885
|
+
OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel,
|
|
7441
7886
|
VERSION,
|
|
7442
7887
|
createOpenAI,
|
|
7443
7888
|
openai
|