@ai-sdk/openai 4.0.0-canary.67 → 4.0.0-canary.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/dist/index.d.ts +32 -2
- package/dist/index.js +427 -11
- package/dist/index.js.map +1 -1
- package/dist/internal/index.js +12 -10
- package/dist/internal/index.js.map +1 -1
- package/docs/03-openai.mdx +25 -0
- package/package.json +3 -3
- package/src/index.ts +2 -0
- package/src/openai-provider.ts +38 -0
- package/src/realtime/index.ts +2 -0
- package/src/realtime/openai-realtime-event-mapper.ts +436 -0
- package/src/realtime/openai-realtime-model-options.ts +3 -0
- package/src/realtime/openai-realtime-model.ts +111 -0
- package/src/responses/convert-to-openai-responses-input.ts +13 -0
- package/src/responses/openai-responses-api.ts +1 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,35 @@
|
|
|
1
1
|
# @ai-sdk/openai
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.69
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- ce769dd: feat(provider): add experimental Realtime API support for voice conversations
|
|
8
|
+
|
|
9
|
+
Adds first-class support for realtime (speech-to-speech) APIs:
|
|
10
|
+
|
|
11
|
+
- `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
|
|
12
|
+
- OpenAI, Google, and xAI realtime provider implementations
|
|
13
|
+
- `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
|
|
14
|
+
- `.getToken()` static method on each provider for server-side ephemeral token creation
|
|
15
|
+
- `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
|
|
16
|
+
- `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
|
|
17
|
+
- `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
|
|
18
|
+
|
|
19
|
+
- Updated dependencies [ce769dd]
|
|
20
|
+
- @ai-sdk/provider@4.0.0-canary.18
|
|
21
|
+
- @ai-sdk/provider-utils@5.0.0-canary.46
|
|
22
|
+
|
|
23
|
+
## 4.0.0-canary.68
|
|
24
|
+
|
|
25
|
+
### Patch Changes
|
|
26
|
+
|
|
27
|
+
- 94eba1b: fix(openai): round-trip `namespace` on function_call input items
|
|
28
|
+
|
|
29
|
+
When `tool_search` dispatches a deferred tool, the resulting `function_call` carries a `namespace` field identifying which deferred-tool group the model picked. `#14789` preserved this on the read side (`providerMetadata.openai.namespace`), but the write side still serialized `function_call` input items without `namespace`. Multi-step / multi-turn conversations then failed with `Missing namespace for function_call '<name>'. ... Round-trip the model's function_call item with its namespace field included.`
|
|
30
|
+
|
|
31
|
+
`convert-to-openai-responses-input.ts` now reads `namespace` from `providerOptions.openai.namespace` (or `providerMetadata.openai.namespace`) on `tool-call` parts and includes it on the serialized `function_call` item, mirroring how `itemId` is round-tripped.
|
|
32
|
+
|
|
3
33
|
## 4.0.0-canary.67
|
|
4
34
|
|
|
5
35
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _ai_sdk_provider from '@ai-sdk/provider';
|
|
2
|
-
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, FilesV4, SkillsV4 } from '@ai-sdk/provider';
|
|
2
|
+
import { JSONValue, ProviderV4, LanguageModelV4, EmbeddingModelV4, ImageModelV4, TranscriptionModelV4, SpeechModelV4, Experimental_RealtimeFactoryV4, FilesV4, SkillsV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
3
3
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
4
4
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
5
5
|
|
|
@@ -1150,6 +1150,11 @@ interface OpenAIProvider extends ProviderV4 {
|
|
|
1150
1150
|
* Creates a model for speech generation.
|
|
1151
1151
|
*/
|
|
1152
1152
|
speech(modelId: OpenAISpeechModelId): SpeechModelV4;
|
|
1153
|
+
/**
|
|
1154
|
+
* Creates an experimental realtime model for bidirectional audio/text
|
|
1155
|
+
* communication over WebSocket.
|
|
1156
|
+
*/
|
|
1157
|
+
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
1153
1158
|
/**
|
|
1154
1159
|
* Returns a FilesV4 interface for uploading files to OpenAI.
|
|
1155
1160
|
*/
|
|
@@ -1203,6 +1208,31 @@ declare function createOpenAI(options?: OpenAIProviderSettings): OpenAIProvider;
|
|
|
1203
1208
|
*/
|
|
1204
1209
|
declare const openai: OpenAIProvider;
|
|
1205
1210
|
|
|
1211
|
+
type OpenAIRealtimeModelConfig = {
|
|
1212
|
+
provider: string;
|
|
1213
|
+
baseURL: string;
|
|
1214
|
+
headers: () => Record<string, string | undefined>;
|
|
1215
|
+
fetch?: FetchFunction;
|
|
1216
|
+
};
|
|
1217
|
+
declare class OpenAIRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
1218
|
+
readonly specificationVersion: "v4";
|
|
1219
|
+
readonly provider: string;
|
|
1220
|
+
readonly modelId: string;
|
|
1221
|
+
private readonly config;
|
|
1222
|
+
constructor(modelId: string, config: OpenAIRealtimeModelConfig);
|
|
1223
|
+
doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
|
|
1224
|
+
getWebSocketConfig(options: {
|
|
1225
|
+
token: string;
|
|
1226
|
+
url: string;
|
|
1227
|
+
}): {
|
|
1228
|
+
url: string;
|
|
1229
|
+
protocols?: string[];
|
|
1230
|
+
};
|
|
1231
|
+
parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent;
|
|
1232
|
+
serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): unknown;
|
|
1233
|
+
buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1206
1236
|
declare const openaiFilesOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
1207
1237
|
purpose?: string | undefined;
|
|
1208
1238
|
expiresAfter?: number | undefined;
|
|
@@ -1263,4 +1293,4 @@ type OpenaiResponsesSourceDocumentProviderMetadata = {
|
|
|
1263
1293
|
|
|
1264
1294
|
declare const VERSION: string;
|
|
1265
1295
|
|
|
1266
|
-
export { type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
|
1296
|
+
export { OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel, type OpenAIRealtimeModelConfig as Experimental_OpenAIRealtimeModelConfig, type OpenAILanguageModelChatOptions as OpenAIChatLanguageModelOptions, type OpenAIEmbeddingModelOptions, type OpenAIFilesOptions, type OpenAIImageModelEditOptions, type OpenAIImageModelGenerationOptions, type OpenAIImageModelOptions, type OpenAILanguageModelChatOptions, type OpenAILanguageModelCompletionOptions, type OpenAILanguageModelResponsesOptions, type OpenAIProvider, type OpenAIProviderSettings, type OpenAILanguageModelResponsesOptions as OpenAIResponsesProviderOptions, type OpenAISpeechModelOptions, type OpenAITranscriptionModelOptions, type OpenaiResponsesCompactionProviderMetadata, type OpenaiResponsesProviderMetadata, type OpenaiResponsesReasoningProviderMetadata, type OpenaiResponsesSourceDocumentProviderMetadata, type OpenaiResponsesTextProviderMetadata, VERSION, createOpenAI, openai };
|
package/dist/index.js
CHANGED
|
@@ -2869,6 +2869,395 @@ var openaiTools = {
|
|
|
2869
2869
|
toolSearch
|
|
2870
2870
|
};
|
|
2871
2871
|
|
|
2872
|
+
// src/realtime/openai-realtime-event-mapper.ts
|
|
2873
|
+
function parseOpenAIRealtimeServerEvent(raw) {
|
|
2874
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s;
|
|
2875
|
+
const event = raw;
|
|
2876
|
+
const type = event.type;
|
|
2877
|
+
switch (type) {
|
|
2878
|
+
// ── Session lifecycle ──────────────────────────────────────────
|
|
2879
|
+
case "session.created":
|
|
2880
|
+
return {
|
|
2881
|
+
type: "session-created",
|
|
2882
|
+
sessionId: (_a = event.session) == null ? void 0 : _a.id,
|
|
2883
|
+
raw
|
|
2884
|
+
};
|
|
2885
|
+
case "session.updated":
|
|
2886
|
+
return { type: "session-updated", raw };
|
|
2887
|
+
// ── Input audio buffer ─────────────────────────────────────────
|
|
2888
|
+
case "input_audio_buffer.speech_started":
|
|
2889
|
+
return {
|
|
2890
|
+
type: "speech-started",
|
|
2891
|
+
itemId: event.item_id,
|
|
2892
|
+
raw
|
|
2893
|
+
};
|
|
2894
|
+
case "input_audio_buffer.speech_stopped":
|
|
2895
|
+
return {
|
|
2896
|
+
type: "speech-stopped",
|
|
2897
|
+
itemId: event.item_id,
|
|
2898
|
+
raw
|
|
2899
|
+
};
|
|
2900
|
+
case "input_audio_buffer.committed":
|
|
2901
|
+
return {
|
|
2902
|
+
type: "audio-committed",
|
|
2903
|
+
itemId: event.item_id,
|
|
2904
|
+
previousItemId: event.previous_item_id,
|
|
2905
|
+
raw
|
|
2906
|
+
};
|
|
2907
|
+
// ── Conversation items ─────────────────────────────────────────
|
|
2908
|
+
case "conversation.item.added":
|
|
2909
|
+
return {
|
|
2910
|
+
type: "conversation-item-added",
|
|
2911
|
+
itemId: (_c = (_b = event.item) == null ? void 0 : _b.id) != null ? _c : event.item_id,
|
|
2912
|
+
item: event.item,
|
|
2913
|
+
raw
|
|
2914
|
+
};
|
|
2915
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
2916
|
+
return {
|
|
2917
|
+
type: "input-transcription-completed",
|
|
2918
|
+
itemId: event.item_id,
|
|
2919
|
+
transcript: (_d = event.transcript) != null ? _d : "",
|
|
2920
|
+
raw
|
|
2921
|
+
};
|
|
2922
|
+
// ── Response lifecycle ──────────────────────────────────────────
|
|
2923
|
+
case "response.created":
|
|
2924
|
+
return {
|
|
2925
|
+
type: "response-created",
|
|
2926
|
+
responseId: (_f = (_e = event.response) == null ? void 0 : _e.id) != null ? _f : event.response_id,
|
|
2927
|
+
raw
|
|
2928
|
+
};
|
|
2929
|
+
case "response.done":
|
|
2930
|
+
return {
|
|
2931
|
+
type: "response-done",
|
|
2932
|
+
responseId: (_h = (_g = event.response) == null ? void 0 : _g.id) != null ? _h : event.response_id,
|
|
2933
|
+
status: (_j = (_i = event.response) == null ? void 0 : _i.status) != null ? _j : "completed",
|
|
2934
|
+
raw
|
|
2935
|
+
};
|
|
2936
|
+
// ── Output item lifecycle ───────────────────────────────────────
|
|
2937
|
+
case "response.output_item.added":
|
|
2938
|
+
return {
|
|
2939
|
+
type: "output-item-added",
|
|
2940
|
+
responseId: event.response_id,
|
|
2941
|
+
itemId: (_l = (_k = event.item) == null ? void 0 : _k.id) != null ? _l : event.item_id,
|
|
2942
|
+
raw
|
|
2943
|
+
};
|
|
2944
|
+
case "response.output_item.done":
|
|
2945
|
+
return {
|
|
2946
|
+
type: "output-item-done",
|
|
2947
|
+
responseId: event.response_id,
|
|
2948
|
+
itemId: (_n = (_m = event.item) == null ? void 0 : _m.id) != null ? _n : event.item_id,
|
|
2949
|
+
raw
|
|
2950
|
+
};
|
|
2951
|
+
case "response.content_part.added":
|
|
2952
|
+
return {
|
|
2953
|
+
type: "content-part-added",
|
|
2954
|
+
responseId: event.response_id,
|
|
2955
|
+
itemId: event.item_id,
|
|
2956
|
+
raw
|
|
2957
|
+
};
|
|
2958
|
+
case "response.content_part.done":
|
|
2959
|
+
return {
|
|
2960
|
+
type: "content-part-done",
|
|
2961
|
+
responseId: event.response_id,
|
|
2962
|
+
itemId: event.item_id,
|
|
2963
|
+
raw
|
|
2964
|
+
};
|
|
2965
|
+
// ── Audio output ────────────────────────────────────────────────
|
|
2966
|
+
case "response.output_audio.delta":
|
|
2967
|
+
return {
|
|
2968
|
+
type: "audio-delta",
|
|
2969
|
+
responseId: event.response_id,
|
|
2970
|
+
itemId: event.item_id,
|
|
2971
|
+
delta: event.delta,
|
|
2972
|
+
raw
|
|
2973
|
+
};
|
|
2974
|
+
case "response.output_audio.done":
|
|
2975
|
+
return {
|
|
2976
|
+
type: "audio-done",
|
|
2977
|
+
responseId: event.response_id,
|
|
2978
|
+
itemId: event.item_id,
|
|
2979
|
+
raw
|
|
2980
|
+
};
|
|
2981
|
+
// ── Audio transcript output ─────────────────────────────────────
|
|
2982
|
+
case "response.output_audio_transcript.delta":
|
|
2983
|
+
return {
|
|
2984
|
+
type: "audio-transcript-delta",
|
|
2985
|
+
responseId: event.response_id,
|
|
2986
|
+
itemId: event.item_id,
|
|
2987
|
+
delta: event.delta,
|
|
2988
|
+
raw
|
|
2989
|
+
};
|
|
2990
|
+
case "response.output_audio_transcript.done":
|
|
2991
|
+
return {
|
|
2992
|
+
type: "audio-transcript-done",
|
|
2993
|
+
responseId: event.response_id,
|
|
2994
|
+
itemId: event.item_id,
|
|
2995
|
+
transcript: event.transcript,
|
|
2996
|
+
raw
|
|
2997
|
+
};
|
|
2998
|
+
// ── Text output ─────────────────────────────────────────────────
|
|
2999
|
+
case "response.output_text.delta":
|
|
3000
|
+
return {
|
|
3001
|
+
type: "text-delta",
|
|
3002
|
+
responseId: event.response_id,
|
|
3003
|
+
itemId: event.item_id,
|
|
3004
|
+
delta: event.delta,
|
|
3005
|
+
raw
|
|
3006
|
+
};
|
|
3007
|
+
case "response.output_text.done":
|
|
3008
|
+
return {
|
|
3009
|
+
type: "text-done",
|
|
3010
|
+
responseId: event.response_id,
|
|
3011
|
+
itemId: event.item_id,
|
|
3012
|
+
text: event.text,
|
|
3013
|
+
raw
|
|
3014
|
+
};
|
|
3015
|
+
// ── Function calling ────────────────────────────────────────────
|
|
3016
|
+
case "response.function_call_arguments.delta":
|
|
3017
|
+
return {
|
|
3018
|
+
type: "function-call-arguments-delta",
|
|
3019
|
+
responseId: event.response_id,
|
|
3020
|
+
itemId: event.item_id,
|
|
3021
|
+
callId: event.call_id,
|
|
3022
|
+
delta: event.delta,
|
|
3023
|
+
raw
|
|
3024
|
+
};
|
|
3025
|
+
case "response.function_call_arguments.done":
|
|
3026
|
+
return {
|
|
3027
|
+
type: "function-call-arguments-done",
|
|
3028
|
+
responseId: event.response_id,
|
|
3029
|
+
itemId: event.item_id,
|
|
3030
|
+
callId: event.call_id,
|
|
3031
|
+
name: event.name,
|
|
3032
|
+
arguments: event.arguments,
|
|
3033
|
+
raw
|
|
3034
|
+
};
|
|
3035
|
+
// ── Error ───────────────────────────────────────────────────────
|
|
3036
|
+
case "error":
|
|
3037
|
+
return {
|
|
3038
|
+
type: "error",
|
|
3039
|
+
message: (_q = (_p = (_o = event.error) == null ? void 0 : _o.message) != null ? _p : event.message) != null ? _q : "Unknown error",
|
|
3040
|
+
code: (_s = (_r = event.error) == null ? void 0 : _r.code) != null ? _s : event.code,
|
|
3041
|
+
raw
|
|
3042
|
+
};
|
|
3043
|
+
// ── Pass-through ────────────────────────────────────────────────
|
|
3044
|
+
default:
|
|
3045
|
+
return { type: "custom", rawType: type, raw };
|
|
3046
|
+
}
|
|
3047
|
+
}
|
|
3048
|
+
function serializeOpenAIRealtimeClientEvent(event, modelId) {
|
|
3049
|
+
switch (event.type) {
|
|
3050
|
+
case "session-update":
|
|
3051
|
+
return {
|
|
3052
|
+
type: "session.update",
|
|
3053
|
+
session: buildOpenAISessionConfig(event.config, modelId)
|
|
3054
|
+
};
|
|
3055
|
+
case "input-audio-append":
|
|
3056
|
+
return {
|
|
3057
|
+
type: "input_audio_buffer.append",
|
|
3058
|
+
audio: event.audio
|
|
3059
|
+
};
|
|
3060
|
+
case "input-audio-commit":
|
|
3061
|
+
return { type: "input_audio_buffer.commit" };
|
|
3062
|
+
case "input-audio-clear":
|
|
3063
|
+
return { type: "input_audio_buffer.clear" };
|
|
3064
|
+
case "conversation-item-create": {
|
|
3065
|
+
const item = event.item;
|
|
3066
|
+
switch (item.type) {
|
|
3067
|
+
case "text-message":
|
|
3068
|
+
return {
|
|
3069
|
+
type: "conversation.item.create",
|
|
3070
|
+
item: {
|
|
3071
|
+
type: "message",
|
|
3072
|
+
role: item.role,
|
|
3073
|
+
content: [{ type: "input_text", text: item.text }]
|
|
3074
|
+
}
|
|
3075
|
+
};
|
|
3076
|
+
case "audio-message":
|
|
3077
|
+
return {
|
|
3078
|
+
type: "conversation.item.create",
|
|
3079
|
+
item: {
|
|
3080
|
+
type: "message",
|
|
3081
|
+
role: item.role,
|
|
3082
|
+
content: [{ type: "input_audio", audio: item.audio }]
|
|
3083
|
+
}
|
|
3084
|
+
};
|
|
3085
|
+
case "function-call-output":
|
|
3086
|
+
return {
|
|
3087
|
+
type: "conversation.item.create",
|
|
3088
|
+
item: {
|
|
3089
|
+
type: "function_call_output",
|
|
3090
|
+
call_id: item.callId,
|
|
3091
|
+
output: item.output
|
|
3092
|
+
}
|
|
3093
|
+
};
|
|
3094
|
+
}
|
|
3095
|
+
break;
|
|
3096
|
+
}
|
|
3097
|
+
case "conversation-item-truncate":
|
|
3098
|
+
return {
|
|
3099
|
+
type: "conversation.item.truncate",
|
|
3100
|
+
item_id: event.itemId,
|
|
3101
|
+
content_index: event.contentIndex,
|
|
3102
|
+
audio_end_ms: event.audioEndMs
|
|
3103
|
+
};
|
|
3104
|
+
case "response-create":
|
|
3105
|
+
return {
|
|
3106
|
+
type: "response.create",
|
|
3107
|
+
...event.options != null ? {
|
|
3108
|
+
response: {
|
|
3109
|
+
...event.options.modalities != null ? { output_modalities: event.options.modalities } : {},
|
|
3110
|
+
...event.options.instructions != null ? { instructions: event.options.instructions } : {},
|
|
3111
|
+
...event.options.metadata != null ? { metadata: event.options.metadata } : {}
|
|
3112
|
+
}
|
|
3113
|
+
} : {}
|
|
3114
|
+
};
|
|
3115
|
+
case "response-cancel":
|
|
3116
|
+
return { type: "response.cancel" };
|
|
3117
|
+
}
|
|
3118
|
+
}
|
|
3119
|
+
function buildOpenAISessionConfig(config, modelId) {
|
|
3120
|
+
var _a;
|
|
3121
|
+
const session = {
|
|
3122
|
+
type: "realtime",
|
|
3123
|
+
model: modelId
|
|
3124
|
+
};
|
|
3125
|
+
if (config.instructions != null) {
|
|
3126
|
+
session.instructions = config.instructions;
|
|
3127
|
+
}
|
|
3128
|
+
if (config.outputModalities != null) {
|
|
3129
|
+
session.output_modalities = config.outputModalities;
|
|
3130
|
+
}
|
|
3131
|
+
const audio = {};
|
|
3132
|
+
if (config.inputAudioFormat != null || config.inputAudioTranscription != null || config.turnDetection != null) {
|
|
3133
|
+
const input = {};
|
|
3134
|
+
if (config.inputAudioFormat != null) {
|
|
3135
|
+
input.format = {
|
|
3136
|
+
type: config.inputAudioFormat.type,
|
|
3137
|
+
...config.inputAudioFormat.rate != null ? { rate: config.inputAudioFormat.rate } : {}
|
|
3138
|
+
};
|
|
3139
|
+
}
|
|
3140
|
+
if (config.turnDetection != null) {
|
|
3141
|
+
if (config.turnDetection.type === "disabled") {
|
|
3142
|
+
input.turn_detection = null;
|
|
3143
|
+
} else {
|
|
3144
|
+
const td = {
|
|
3145
|
+
type: config.turnDetection.type === "server-vad" ? "server_vad" : "semantic_vad"
|
|
3146
|
+
};
|
|
3147
|
+
if (config.turnDetection.threshold != null) {
|
|
3148
|
+
td.threshold = config.turnDetection.threshold;
|
|
3149
|
+
}
|
|
3150
|
+
if (config.turnDetection.silenceDurationMs != null) {
|
|
3151
|
+
td.silence_duration_ms = config.turnDetection.silenceDurationMs;
|
|
3152
|
+
}
|
|
3153
|
+
if (config.turnDetection.prefixPaddingMs != null) {
|
|
3154
|
+
td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
|
|
3155
|
+
}
|
|
3156
|
+
input.turn_detection = td;
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
if (config.inputAudioTranscription != null) {
|
|
3160
|
+
input.transcription = {
|
|
3161
|
+
model: (_a = config.inputAudioTranscription.model) != null ? _a : "gpt-realtime-whisper",
|
|
3162
|
+
...config.inputAudioTranscription.language != null ? { language: config.inputAudioTranscription.language } : {},
|
|
3163
|
+
...config.inputAudioTranscription.prompt != null ? { prompt: config.inputAudioTranscription.prompt } : {}
|
|
3164
|
+
};
|
|
3165
|
+
}
|
|
3166
|
+
audio.input = input;
|
|
3167
|
+
}
|
|
3168
|
+
if (config.outputAudioFormat != null || config.voice != null) {
|
|
3169
|
+
const output = {};
|
|
3170
|
+
if (config.outputAudioFormat != null) {
|
|
3171
|
+
output.format = {
|
|
3172
|
+
type: config.outputAudioFormat.type,
|
|
3173
|
+
...config.outputAudioFormat.rate != null ? { rate: config.outputAudioFormat.rate } : {}
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
if (config.voice != null) {
|
|
3177
|
+
output.voice = config.voice;
|
|
3178
|
+
}
|
|
3179
|
+
audio.output = output;
|
|
3180
|
+
}
|
|
3181
|
+
if (Object.keys(audio).length > 0) {
|
|
3182
|
+
session.audio = audio;
|
|
3183
|
+
}
|
|
3184
|
+
if (config.tools != null && config.tools.length > 0) {
|
|
3185
|
+
session.tools = config.tools.map((tool) => ({
|
|
3186
|
+
type: tool.type,
|
|
3187
|
+
name: tool.name,
|
|
3188
|
+
description: tool.description,
|
|
3189
|
+
parameters: tool.parameters
|
|
3190
|
+
}));
|
|
3191
|
+
session.tool_choice = "auto";
|
|
3192
|
+
}
|
|
3193
|
+
if (config.providerOptions != null) {
|
|
3194
|
+
Object.assign(session, config.providerOptions);
|
|
3195
|
+
}
|
|
3196
|
+
return session;
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// src/realtime/openai-realtime-model.ts
|
|
3200
|
+
var OpenAIRealtimeModel = class {
|
|
3201
|
+
constructor(modelId, config) {
|
|
3202
|
+
this.specificationVersion = "v4";
|
|
3203
|
+
this.modelId = modelId;
|
|
3204
|
+
this.provider = config.provider;
|
|
3205
|
+
this.config = config;
|
|
3206
|
+
}
|
|
3207
|
+
async doCreateClientSecret(options) {
|
|
3208
|
+
var _a;
|
|
3209
|
+
const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
|
|
3210
|
+
const url = `${this.config.baseURL}/realtime/client_secrets`;
|
|
3211
|
+
const session = options.sessionConfig != null ? buildOpenAISessionConfig(options.sessionConfig, this.modelId) : { type: "realtime", model: this.modelId };
|
|
3212
|
+
const response = await fetchFn(url, {
|
|
3213
|
+
method: "POST",
|
|
3214
|
+
headers: {
|
|
3215
|
+
...this.config.headers(),
|
|
3216
|
+
"Content-Type": "application/json"
|
|
3217
|
+
},
|
|
3218
|
+
body: JSON.stringify({
|
|
3219
|
+
session,
|
|
3220
|
+
...options.expiresAfterSeconds != null ? {
|
|
3221
|
+
// `anchor` is required by the client secrets endpoint; without it
|
|
3222
|
+
// the request fails with "Missing required parameter:
|
|
3223
|
+
// 'expires_after.anchor'".
|
|
3224
|
+
expires_after: {
|
|
3225
|
+
anchor: "created_at",
|
|
3226
|
+
seconds: options.expiresAfterSeconds
|
|
3227
|
+
}
|
|
3228
|
+
} : {}
|
|
3229
|
+
})
|
|
3230
|
+
});
|
|
3231
|
+
if (!response.ok) {
|
|
3232
|
+
const text = await response.text();
|
|
3233
|
+
throw new Error(
|
|
3234
|
+
`OpenAI realtime client secret request failed: ${response.status} ${text}`
|
|
3235
|
+
);
|
|
3236
|
+
}
|
|
3237
|
+
const data = await response.json();
|
|
3238
|
+
return {
|
|
3239
|
+
token: data.value,
|
|
3240
|
+
url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
|
|
3241
|
+
expiresAt: data.expires_at
|
|
3242
|
+
};
|
|
3243
|
+
}
|
|
3244
|
+
getWebSocketConfig(options) {
|
|
3245
|
+
return {
|
|
3246
|
+
url: options.url,
|
|
3247
|
+
protocols: ["realtime", `openai-insecure-api-key.${options.token}`]
|
|
3248
|
+
};
|
|
3249
|
+
}
|
|
3250
|
+
parseServerEvent(raw) {
|
|
3251
|
+
return parseOpenAIRealtimeServerEvent(raw);
|
|
3252
|
+
}
|
|
3253
|
+
serializeClientEvent(event) {
|
|
3254
|
+
return serializeOpenAIRealtimeClientEvent(event, this.modelId);
|
|
3255
|
+
}
|
|
3256
|
+
buildSessionConfig(config) {
|
|
3257
|
+
return buildOpenAISessionConfig(config, this.modelId);
|
|
3258
|
+
}
|
|
3259
|
+
};
|
|
3260
|
+
|
|
2872
3261
|
// src/responses/openai-responses-language-model.ts
|
|
2873
3262
|
import {
|
|
2874
3263
|
APICallError
|
|
@@ -2963,7 +3352,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
2963
3352
|
hasApplyPatchTool = false,
|
|
2964
3353
|
customProviderToolNames
|
|
2965
3354
|
}) {
|
|
2966
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s;
|
|
3355
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x;
|
|
2967
3356
|
let input = [];
|
|
2968
3357
|
const warnings = [];
|
|
2969
3358
|
const processedApprovalIds = /* @__PURE__ */ new Set();
|
|
@@ -3093,6 +3482,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3093
3482
|
}
|
|
3094
3483
|
case "tool-call": {
|
|
3095
3484
|
const id = (_f = (_c = (_b = part.providerOptions) == null ? void 0 : _b[providerOptionsName]) == null ? void 0 : _c.itemId) != null ? _f : (_e = (_d = part.providerMetadata) == null ? void 0 : _d[providerOptionsName]) == null ? void 0 : _e.itemId;
|
|
3485
|
+
const namespace = (_k = (_h = (_g = part.providerOptions) == null ? void 0 : _g[providerOptionsName]) == null ? void 0 : _h.namespace) != null ? _k : (_j = (_i = part.providerMetadata) == null ? void 0 : _i[providerOptionsName]) == null ? void 0 : _j.namespace;
|
|
3096
3486
|
if (hasConversation && id != null) {
|
|
3097
3487
|
break;
|
|
3098
3488
|
}
|
|
@@ -3116,7 +3506,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3116
3506
|
type: "tool_search_call",
|
|
3117
3507
|
id: id != null ? id : part.toolCallId,
|
|
3118
3508
|
execution,
|
|
3119
|
-
call_id: (
|
|
3509
|
+
call_id: (_l = parsedInput.call_id) != null ? _l : null,
|
|
3120
3510
|
status: "completed",
|
|
3121
3511
|
arguments: parsedInput.arguments
|
|
3122
3512
|
});
|
|
@@ -3131,7 +3521,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3131
3521
|
if (hasPreviousResponseId && store && id != null) {
|
|
3132
3522
|
break;
|
|
3133
3523
|
}
|
|
3134
|
-
const isProviderDefinedToolCall = hasLocalShellTool && resolvedToolName === "local_shell" || hasShellTool && resolvedToolName === "shell" || hasApplyPatchTool && resolvedToolName === "apply_patch" || ((
|
|
3524
|
+
const isProviderDefinedToolCall = hasLocalShellTool && resolvedToolName === "local_shell" || hasShellTool && resolvedToolName === "shell" || hasApplyPatchTool && resolvedToolName === "apply_patch" || ((_m = customProviderToolNames == null ? void 0 : customProviderToolNames.has(resolvedToolName)) != null ? _m : false);
|
|
3135
3525
|
if (store && id != null && isProviderDefinedToolCall) {
|
|
3136
3526
|
input.push({ type: "item_reference", id });
|
|
3137
3527
|
break;
|
|
@@ -3202,7 +3592,8 @@ async function convertToOpenAIResponsesInput({
|
|
|
3202
3592
|
type: "function_call",
|
|
3203
3593
|
call_id: part.toolCallId,
|
|
3204
3594
|
name: resolvedToolName,
|
|
3205
|
-
arguments: serializeToolCallArguments2(part.input)
|
|
3595
|
+
arguments: serializeToolCallArguments2(part.input),
|
|
3596
|
+
...namespace != null && { namespace }
|
|
3206
3597
|
});
|
|
3207
3598
|
break;
|
|
3208
3599
|
}
|
|
@@ -3218,7 +3609,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3218
3609
|
part.toolName
|
|
3219
3610
|
);
|
|
3220
3611
|
if (resolvedResultToolName === "tool_search") {
|
|
3221
|
-
const itemId = (
|
|
3612
|
+
const itemId = (_p = (_o = (_n = part.providerOptions) == null ? void 0 : _n[providerOptionsName]) == null ? void 0 : _o.itemId) != null ? _p : part.toolCallId;
|
|
3222
3613
|
if (store) {
|
|
3223
3614
|
input.push({ type: "item_reference", id: itemId });
|
|
3224
3615
|
} else if (part.output.type === "json") {
|
|
@@ -3259,7 +3650,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3259
3650
|
break;
|
|
3260
3651
|
}
|
|
3261
3652
|
if (store) {
|
|
3262
|
-
const itemId = (
|
|
3653
|
+
const itemId = (_s = (_r = (_q = part.providerOptions) == null ? void 0 : _q[providerOptionsName]) == null ? void 0 : _r.itemId) != null ? _s : part.toolCallId;
|
|
3263
3654
|
input.push({ type: "item_reference", id: itemId });
|
|
3264
3655
|
} else {
|
|
3265
3656
|
warnings.push({
|
|
@@ -3344,7 +3735,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3344
3735
|
}
|
|
3345
3736
|
case "custom": {
|
|
3346
3737
|
if (part.kind === "openai.compaction") {
|
|
3347
|
-
const providerOptions = (
|
|
3738
|
+
const providerOptions = (_t = part.providerOptions) == null ? void 0 : _t[providerOptionsName];
|
|
3348
3739
|
const id = providerOptions == null ? void 0 : providerOptions.itemId;
|
|
3349
3740
|
if (hasConversation && id != null) {
|
|
3350
3741
|
break;
|
|
@@ -3391,7 +3782,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3391
3782
|
}
|
|
3392
3783
|
const output = part.output;
|
|
3393
3784
|
if (output.type === "execution-denied") {
|
|
3394
|
-
const approvalId = (
|
|
3785
|
+
const approvalId = (_v = (_u = output.providerOptions) == null ? void 0 : _u.openai) == null ? void 0 : _v.approvalId;
|
|
3395
3786
|
if (approvalId) {
|
|
3396
3787
|
continue;
|
|
3397
3788
|
}
|
|
@@ -3465,7 +3856,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3465
3856
|
outputValue = output.value;
|
|
3466
3857
|
break;
|
|
3467
3858
|
case "execution-denied":
|
|
3468
|
-
outputValue = (
|
|
3859
|
+
outputValue = (_w = output.reason) != null ? _w : "Tool call execution denied.";
|
|
3469
3860
|
break;
|
|
3470
3861
|
case "json":
|
|
3471
3862
|
case "error-json":
|
|
@@ -3542,7 +3933,7 @@ async function convertToOpenAIResponsesInput({
|
|
|
3542
3933
|
contentValue = output.value;
|
|
3543
3934
|
break;
|
|
3544
3935
|
case "execution-denied":
|
|
3545
|
-
contentValue = (
|
|
3936
|
+
contentValue = (_x = output.reason) != null ? _x : "Tool call execution denied.";
|
|
3546
3937
|
break;
|
|
3547
3938
|
case "json":
|
|
3548
3939
|
case "error-json":
|
|
@@ -7320,7 +7711,7 @@ var OpenAISkills = class {
|
|
|
7320
7711
|
};
|
|
7321
7712
|
|
|
7322
7713
|
// src/version.ts
|
|
7323
|
-
var VERSION = true ? "4.0.0-canary.
|
|
7714
|
+
var VERSION = true ? "4.0.0-canary.69" : "0.0.0-test";
|
|
7324
7715
|
|
|
7325
7716
|
// src/openai-provider.ts
|
|
7326
7717
|
function createOpenAI(options = {}) {
|
|
@@ -7411,6 +7802,29 @@ function createOpenAI(options = {}) {
|
|
|
7411
7802
|
fileIdPrefixes: ["file-"]
|
|
7412
7803
|
});
|
|
7413
7804
|
};
|
|
7805
|
+
const createRealtimeModel = (modelId) => new OpenAIRealtimeModel(modelId, {
|
|
7806
|
+
provider: `${providerName}.realtime`,
|
|
7807
|
+
baseURL,
|
|
7808
|
+
headers: getHeaders,
|
|
7809
|
+
fetch: options.fetch
|
|
7810
|
+
});
|
|
7811
|
+
const experimentalRealtimeFactory = Object.assign(
|
|
7812
|
+
(modelId) => createRealtimeModel(modelId),
|
|
7813
|
+
{
|
|
7814
|
+
getToken: async (tokenOptions) => {
|
|
7815
|
+
const model = createRealtimeModel(tokenOptions.model);
|
|
7816
|
+
const secret = await model.doCreateClientSecret({
|
|
7817
|
+
sessionConfig: tokenOptions.sessionConfig,
|
|
7818
|
+
expiresAfterSeconds: tokenOptions.expiresAfterSeconds
|
|
7819
|
+
});
|
|
7820
|
+
return {
|
|
7821
|
+
token: secret.token,
|
|
7822
|
+
url: secret.url,
|
|
7823
|
+
expiresAt: secret.expiresAt
|
|
7824
|
+
};
|
|
7825
|
+
}
|
|
7826
|
+
}
|
|
7827
|
+
);
|
|
7414
7828
|
const provider = function(modelId) {
|
|
7415
7829
|
return createLanguageModel(modelId);
|
|
7416
7830
|
};
|
|
@@ -7431,11 +7845,13 @@ function createOpenAI(options = {}) {
|
|
|
7431
7845
|
provider.speechModel = createSpeechModel;
|
|
7432
7846
|
provider.files = createFiles;
|
|
7433
7847
|
provider.skills = createSkills;
|
|
7848
|
+
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
7434
7849
|
provider.tools = openaiTools;
|
|
7435
7850
|
return provider;
|
|
7436
7851
|
}
|
|
7437
7852
|
var openai = createOpenAI();
|
|
7438
7853
|
export {
|
|
7854
|
+
OpenAIRealtimeModel as Experimental_OpenAIRealtimeModel,
|
|
7439
7855
|
VERSION,
|
|
7440
7856
|
createOpenAI,
|
|
7441
7857
|
openai
|