@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
|
@@ -4,16 +4,24 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import crypto from "node:crypto";
|
|
7
|
-
import
|
|
8
|
-
import {
|
|
7
|
+
import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime";
|
|
8
|
+
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
|
9
|
+
import type { SessionEntry } from "../api.js";
|
|
10
|
+
import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js";
|
|
11
|
+
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
|
12
|
+
import { resolveVoiceResponseModel } from "./response-model.js";
|
|
9
13
|
|
|
10
14
|
export type VoiceResponseParams = {
|
|
11
15
|
/** Voice call config */
|
|
12
16
|
voiceConfig: VoiceCallConfig;
|
|
13
17
|
/** Core OpenClaw config */
|
|
14
18
|
coreConfig: CoreConfig;
|
|
19
|
+
/** Injected host agent runtime */
|
|
20
|
+
agentRuntime: CoreAgentDeps;
|
|
15
21
|
/** Call ID for session tracking */
|
|
16
22
|
callId: string;
|
|
23
|
+
/** Persisted call session key */
|
|
24
|
+
sessionKey?: string;
|
|
17
25
|
/** Caller's phone number */
|
|
18
26
|
from: string;
|
|
19
27
|
/** Conversation transcript */
|
|
@@ -27,11 +35,153 @@ export type VoiceResponseResult = {
|
|
|
27
35
|
error?: string;
|
|
28
36
|
};
|
|
29
37
|
|
|
30
|
-
type
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
type VoiceResponsePayload = {
|
|
39
|
+
text?: string;
|
|
40
|
+
isError?: boolean;
|
|
41
|
+
isReasoning?: boolean;
|
|
33
42
|
};
|
|
34
43
|
|
|
44
|
+
const VOICE_SPOKEN_OUTPUT_CONTRACT = [
|
|
45
|
+
"Output format requirements:",
|
|
46
|
+
'- Return only valid JSON in this exact shape: {"spoken":"..."}',
|
|
47
|
+
"- Do not include markdown, code fences, planning text, or extra keys.",
|
|
48
|
+
'- Put exactly what should be spoken to the caller into "spoken".',
|
|
49
|
+
'- If there is nothing to say, return {"spoken":""}.',
|
|
50
|
+
].join("\n");
|
|
51
|
+
|
|
52
|
+
function normalizeSpokenText(value: string): string | null {
|
|
53
|
+
const normalized = value.replace(/\s+/g, " ").trim();
|
|
54
|
+
return normalized.length > 0 ? normalized : null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function tryParseSpokenJson(text: string): string | null {
|
|
58
|
+
const candidates: string[] = [];
|
|
59
|
+
const trimmed = text.trim();
|
|
60
|
+
if (!trimmed) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
candidates.push(trimmed);
|
|
64
|
+
|
|
65
|
+
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
|
|
66
|
+
if (fenced?.[1]) {
|
|
67
|
+
candidates.push(fenced[1]);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const firstBrace = trimmed.indexOf("{");
|
|
71
|
+
const lastBrace = trimmed.lastIndexOf("}");
|
|
72
|
+
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
|
73
|
+
candidates.push(trimmed.slice(firstBrace, lastBrace + 1));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const candidate of candidates) {
|
|
77
|
+
try {
|
|
78
|
+
const parsed = JSON.parse(candidate) as { spoken?: unknown };
|
|
79
|
+
if (typeof parsed?.spoken !== "string") {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
return normalizeSpokenText(parsed.spoken) ?? "";
|
|
83
|
+
} catch {
|
|
84
|
+
// Continue trying other candidates.
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const inlineSpokenMatch = trimmed.match(/"spoken"\s*:\s*"((?:[^"\\]|\\.)*)"/i);
|
|
89
|
+
if (!inlineSpokenMatch) {
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const decoded = JSON.parse(`"${inlineSpokenMatch[1] ?? ""}"`) as string;
|
|
95
|
+
return normalizeSpokenText(decoded) ?? "";
|
|
96
|
+
} catch {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function isLikelyMetaReasoningParagraph(paragraph: string): boolean {
|
|
102
|
+
const lower = normalizeLowercaseStringOrEmpty(paragraph);
|
|
103
|
+
if (!lower) {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (lower.startsWith("thinking process")) {
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
if (lower.startsWith("reasoning:") || lower.startsWith("analysis:")) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
if (
|
|
114
|
+
lower.startsWith("the user ") &&
|
|
115
|
+
(lower.includes("i should") || lower.includes("i need to") || lower.includes("i will"))
|
|
116
|
+
) {
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
if (
|
|
120
|
+
lower.includes("this is a natural continuation of the conversation") ||
|
|
121
|
+
lower.includes("keep the conversation flowing")
|
|
122
|
+
) {
|
|
123
|
+
return true;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function sanitizePlainSpokenText(text: string): string | null {
|
|
130
|
+
const withoutCodeFences = text.replace(/```[\s\S]*?```/g, " ").trim();
|
|
131
|
+
if (!withoutCodeFences) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const paragraphs = withoutCodeFences
|
|
136
|
+
.split(/\n\s*\n+/)
|
|
137
|
+
.map((paragraph) => paragraph.trim())
|
|
138
|
+
.filter(Boolean);
|
|
139
|
+
|
|
140
|
+
while (paragraphs.length > 1 && isLikelyMetaReasoningParagraph(paragraphs[0])) {
|
|
141
|
+
paragraphs.shift();
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return normalizeSpokenText(paragraphs.join(" "));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function extractSpokenTextFromPayloads(payloads: VoiceResponsePayload[]): string | null {
|
|
148
|
+
const spokenSegments: string[] = [];
|
|
149
|
+
|
|
150
|
+
for (const payload of payloads) {
|
|
151
|
+
if (payload.isError || payload.isReasoning) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const rawText = payload.text?.trim() ?? "";
|
|
156
|
+
if (!rawText) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const structured = tryParseSpokenJson(rawText);
|
|
161
|
+
if (structured !== null) {
|
|
162
|
+
if (structured.length > 0) {
|
|
163
|
+
spokenSegments.push(structured);
|
|
164
|
+
}
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const plain = sanitizePlainSpokenText(rawText);
|
|
169
|
+
if (plain) {
|
|
170
|
+
spokenSegments.push(plain);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return spokenSegments.length > 0 ? spokenSegments.join(" ").trim() : null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): string {
|
|
178
|
+
const trimmed = sessionKey.trim();
|
|
179
|
+
if (trimmed.toLowerCase().startsWith("agent:")) {
|
|
180
|
+
return trimmed;
|
|
181
|
+
}
|
|
182
|
+
return `agent:${agentId}:${trimmed}`;
|
|
183
|
+
}
|
|
184
|
+
|
|
35
185
|
/**
|
|
36
186
|
* Generate a voice response using the embedded Pi agent with full tool support.
|
|
37
187
|
* Uses the same agent infrastructure as messaging for consistent behavior.
|
|
@@ -39,66 +189,79 @@ type SessionEntry = {
|
|
|
39
189
|
export async function generateVoiceResponse(
|
|
40
190
|
params: VoiceResponseParams,
|
|
41
191
|
): Promise<VoiceResponseResult> {
|
|
42
|
-
const {
|
|
192
|
+
const {
|
|
193
|
+
voiceConfig,
|
|
194
|
+
callId,
|
|
195
|
+
sessionKey,
|
|
196
|
+
from,
|
|
197
|
+
transcript,
|
|
198
|
+
userMessage,
|
|
199
|
+
coreConfig,
|
|
200
|
+
agentRuntime,
|
|
201
|
+
} = params;
|
|
43
202
|
|
|
44
203
|
if (!coreConfig) {
|
|
45
204
|
return { text: null, error: "Core config unavailable for voice response" };
|
|
46
205
|
}
|
|
47
|
-
|
|
48
|
-
let deps: Awaited<ReturnType<typeof loadCoreAgentDeps>>;
|
|
49
|
-
try {
|
|
50
|
-
deps = await loadCoreAgentDeps();
|
|
51
|
-
} catch (err) {
|
|
52
|
-
return {
|
|
53
|
-
text: null,
|
|
54
|
-
error: err instanceof Error ? err.message : "Unable to load core agent dependencies",
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
206
|
const cfg = coreConfig;
|
|
58
207
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
208
|
+
const resolvedSessionKey = resolveVoiceCallSessionKey({
|
|
209
|
+
config: voiceConfig,
|
|
210
|
+
callId,
|
|
211
|
+
phone: from,
|
|
212
|
+
explicitSessionKey: sessionKey,
|
|
213
|
+
});
|
|
214
|
+
const agentId = voiceConfig.agentId ?? "main";
|
|
63
215
|
|
|
64
216
|
// Resolve paths
|
|
65
|
-
const storePath =
|
|
66
|
-
const agentDir =
|
|
67
|
-
const workspaceDir =
|
|
217
|
+
const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });
|
|
218
|
+
const agentDir = agentRuntime.resolveAgentDir(cfg, agentId);
|
|
219
|
+
const workspaceDir = agentRuntime.resolveAgentWorkspaceDir(cfg, agentId);
|
|
68
220
|
|
|
69
221
|
// Ensure workspace exists
|
|
70
|
-
await
|
|
222
|
+
await agentRuntime.ensureAgentWorkspace({ dir: workspaceDir });
|
|
71
223
|
|
|
72
224
|
// Load or create session entry
|
|
73
|
-
const sessionStore =
|
|
225
|
+
const sessionStore = agentRuntime.session.loadSessionStore(storePath);
|
|
74
226
|
const now = Date.now();
|
|
75
|
-
|
|
227
|
+
const existingSessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined;
|
|
76
228
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
sessionId: crypto.randomUUID(),
|
|
80
|
-
updatedAt: now,
|
|
81
|
-
};
|
|
82
|
-
sessionStore[sessionKey] = sessionEntry;
|
|
83
|
-
await deps.saveSessionStore(storePath, sessionStore);
|
|
84
|
-
}
|
|
229
|
+
// Resolve model from config
|
|
230
|
+
const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
|
|
85
231
|
|
|
232
|
+
let sessionEntry = existingSessionEntry;
|
|
233
|
+
if (!sessionEntry?.sessionId || voiceConfig.responseModel) {
|
|
234
|
+
sessionEntry = await agentRuntime.session.updateSessionStore(storePath, (store) => {
|
|
235
|
+
let entry = store[resolvedSessionKey] as SessionEntry | undefined;
|
|
236
|
+
if (!entry?.sessionId) {
|
|
237
|
+
entry = {
|
|
238
|
+
...entry,
|
|
239
|
+
sessionId: crypto.randomUUID(),
|
|
240
|
+
updatedAt: now,
|
|
241
|
+
};
|
|
242
|
+
store[resolvedSessionKey] = entry;
|
|
243
|
+
}
|
|
244
|
+
if (voiceConfig.responseModel) {
|
|
245
|
+
applyModelOverrideToSessionEntry({
|
|
246
|
+
entry,
|
|
247
|
+
selection: { provider, model },
|
|
248
|
+
selectionSource: "auto",
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
return entry;
|
|
252
|
+
});
|
|
253
|
+
}
|
|
86
254
|
const sessionId = sessionEntry.sessionId;
|
|
87
|
-
|
|
255
|
+
|
|
256
|
+
const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
|
|
88
257
|
agentId,
|
|
89
258
|
});
|
|
90
259
|
|
|
91
|
-
// Resolve model from config
|
|
92
|
-
const modelRef = voiceConfig.responseModel || `${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
|
93
|
-
const slashIndex = modelRef.indexOf("/");
|
|
94
|
-
const provider = slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
|
95
|
-
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
|
|
96
|
-
|
|
97
260
|
// Resolve thinking level
|
|
98
|
-
const thinkLevel =
|
|
261
|
+
const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });
|
|
99
262
|
|
|
100
263
|
// Resolve agent identity for personalized prompt
|
|
101
|
-
const identity =
|
|
264
|
+
const identity = agentRuntime.resolveAgentIdentity(cfg, agentId);
|
|
102
265
|
const agentName = identity?.name?.trim() || "assistant";
|
|
103
266
|
|
|
104
267
|
// Build system prompt with conversation history
|
|
@@ -113,15 +276,18 @@ export async function generateVoiceResponse(
|
|
|
113
276
|
.join("\n");
|
|
114
277
|
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
|
115
278
|
}
|
|
279
|
+
extraSystemPrompt = `${extraSystemPrompt}\n\n${VOICE_SPOKEN_OUTPUT_CONTRACT}`;
|
|
116
280
|
|
|
117
281
|
// Resolve timeout
|
|
118
|
-
const timeoutMs = voiceConfig.responseTimeoutMs ??
|
|
282
|
+
const timeoutMs = voiceConfig.responseTimeoutMs ?? agentRuntime.resolveAgentTimeoutMs({ cfg });
|
|
119
283
|
const runId = `voice:${callId}:${Date.now()}`;
|
|
120
284
|
|
|
121
285
|
try {
|
|
122
|
-
const result = await
|
|
286
|
+
const result = await agentRuntime.runEmbeddedPiAgent({
|
|
123
287
|
sessionId,
|
|
124
|
-
sessionKey,
|
|
288
|
+
sessionKey: resolvedSessionKey,
|
|
289
|
+
sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey),
|
|
290
|
+
agentId,
|
|
125
291
|
messageProvider: "voice",
|
|
126
292
|
sessionFile,
|
|
127
293
|
workspaceDir,
|
|
@@ -138,13 +304,7 @@ export async function generateVoiceResponse(
|
|
|
138
304
|
agentDir,
|
|
139
305
|
});
|
|
140
306
|
|
|
141
|
-
|
|
142
|
-
const texts = (result.payloads ?? [])
|
|
143
|
-
.filter((p) => p.text && !p.isError)
|
|
144
|
-
.map((p) => p.text?.trim())
|
|
145
|
-
.filter(Boolean);
|
|
146
|
-
|
|
147
|
-
const text = texts.join(" ") || null;
|
|
307
|
+
const text = extractSpokenTextFromPayloads((result.payloads ?? []) as VoiceResponsePayload[]);
|
|
148
308
|
|
|
149
309
|
if (!text && result.meta?.aborted) {
|
|
150
310
|
return { text: null, error: "Response generation was aborted" };
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { VoiceCallConfigSchema } from "./config.js";
|
|
3
|
+
import type { CoreAgentDeps } from "./core-bridge.js";
|
|
4
|
+
import { resolveVoiceResponseModel } from "./response-model.js";
|
|
5
|
+
|
|
6
|
+
const agentRuntime = {
|
|
7
|
+
defaults: {
|
|
8
|
+
provider: "together",
|
|
9
|
+
model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
10
|
+
},
|
|
11
|
+
} as unknown as CoreAgentDeps;
|
|
12
|
+
|
|
13
|
+
describe("resolveVoiceResponseModel", () => {
|
|
14
|
+
it("falls back to the runtime default model", () => {
|
|
15
|
+
expect(
|
|
16
|
+
resolveVoiceResponseModel({
|
|
17
|
+
voiceConfig: VoiceCallConfigSchema.parse({}),
|
|
18
|
+
agentRuntime,
|
|
19
|
+
}),
|
|
20
|
+
).toEqual({
|
|
21
|
+
modelRef: "together/Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
22
|
+
provider: "together",
|
|
23
|
+
model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("uses an explicit provider/model ref", () => {
|
|
28
|
+
expect(
|
|
29
|
+
resolveVoiceResponseModel({
|
|
30
|
+
voiceConfig: VoiceCallConfigSchema.parse({
|
|
31
|
+
responseModel: "openai/gpt-5.4-mini",
|
|
32
|
+
}),
|
|
33
|
+
agentRuntime,
|
|
34
|
+
}),
|
|
35
|
+
).toEqual({
|
|
36
|
+
modelRef: "openai/gpt-5.4-mini",
|
|
37
|
+
provider: "openai",
|
|
38
|
+
model: "gpt-5.4-mini",
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("uses the runtime default provider for bare model overrides", () => {
|
|
43
|
+
expect(
|
|
44
|
+
resolveVoiceResponseModel({
|
|
45
|
+
voiceConfig: VoiceCallConfigSchema.parse({
|
|
46
|
+
responseModel: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
47
|
+
}),
|
|
48
|
+
agentRuntime,
|
|
49
|
+
}),
|
|
50
|
+
).toEqual({
|
|
51
|
+
modelRef: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
52
|
+
provider: "meta-llama",
|
|
53
|
+
model: "Llama-4-Scout-17B-16E-Instruct",
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("keeps legacy single-segment overrides on the runtime default provider", () => {
|
|
58
|
+
expect(
|
|
59
|
+
resolveVoiceResponseModel({
|
|
60
|
+
voiceConfig: VoiceCallConfigSchema.parse({
|
|
61
|
+
responseModel: "gpt-5.4-mini",
|
|
62
|
+
}),
|
|
63
|
+
agentRuntime,
|
|
64
|
+
}),
|
|
65
|
+
).toEqual({
|
|
66
|
+
modelRef: "gpt-5.4-mini",
|
|
67
|
+
provider: "together",
|
|
68
|
+
model: "gpt-5.4-mini",
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { VoiceCallConfig } from "./config.js";
|
|
2
|
+
import type { CoreAgentDeps } from "./core-bridge.js";
|
|
3
|
+
|
|
4
|
+
export function resolveVoiceResponseModel(params: {
|
|
5
|
+
voiceConfig: VoiceCallConfig;
|
|
6
|
+
agentRuntime: CoreAgentDeps;
|
|
7
|
+
}): {
|
|
8
|
+
modelRef: string;
|
|
9
|
+
provider: string;
|
|
10
|
+
model: string;
|
|
11
|
+
} {
|
|
12
|
+
const modelRef =
|
|
13
|
+
params.voiceConfig.responseModel ??
|
|
14
|
+
`${params.agentRuntime.defaults.provider}/${params.agentRuntime.defaults.model}`;
|
|
15
|
+
const slashIndex = modelRef.indexOf("/");
|
|
16
|
+
|
|
17
|
+
return {
|
|
18
|
+
modelRef,
|
|
19
|
+
provider:
|
|
20
|
+
slashIndex === -1 ? params.agentRuntime.defaults.provider : modelRef.slice(0, slashIndex),
|
|
21
|
+
model: slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1),
|
|
22
|
+
};
|
|
23
|
+
}
|