@vellumai/assistant 0.4.23 → 0.4.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +3 -0
- package/package.json +2 -1
- package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +0 -15
- package/src/__tests__/assistant-events-sse-hardening.test.ts +9 -3
- package/src/__tests__/config-schema.test.ts +38 -178
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +4 -1
- package/src/__tests__/credential-security-invariants.test.ts +0 -2
- package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +2 -2
- package/src/__tests__/ipc-snapshot.test.ts +0 -9
- package/src/__tests__/onboarding-template-contract.test.ts +10 -20
- package/src/__tests__/relay-server.test.ts +3 -3
- package/src/__tests__/runtime-events-sse-parity.test.ts +10 -0
- package/src/__tests__/runtime-events-sse.test.ts +7 -0
- package/src/__tests__/session-runtime-assembly.test.ts +34 -8
- package/src/__tests__/system-prompt.test.ts +7 -1
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +12 -8
- package/src/__tests__/twilio-routes-twiml.test.ts +2 -2
- package/src/__tests__/twilio-routes.test.ts +2 -3
- package/src/__tests__/voice-quality.test.ts +21 -132
- package/src/calls/relay-server.ts +11 -5
- package/src/calls/twilio-routes.ts +4 -38
- package/src/calls/voice-quality.ts +7 -63
- package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +7 -10
- package/src/config/bundled-skills/messaging/SKILL.md +3 -5
- package/src/config/bundled-skills/phone-calls/SKILL.md +143 -82
- package/src/config/bundled-skills/sms-setup/SKILL.md +0 -20
- package/src/config/bundled-skills/twilio-setup/SKILL.md +9 -17
- package/src/config/bundled-skills/voice-setup/SKILL.md +36 -1
- package/src/config/bundled-skills/voice-setup/icon.svg +20 -0
- package/src/config/calls-schema.ts +3 -53
- package/src/config/elevenlabs-schema.ts +33 -0
- package/src/config/schema.ts +183 -137
- package/src/config/types.ts +0 -1
- package/src/daemon/handlers/browser.ts +1 -6
- package/src/daemon/ipc-contract/browser.ts +5 -14
- package/src/daemon/ipc-contract-inventory.json +0 -2
- package/src/daemon/session-agent-loop-handlers.ts +3 -0
- package/src/daemon/session-runtime-assembly.ts +9 -7
- package/src/mcp/client.ts +2 -1
- package/src/memory/conversation-crud.ts +339 -166
- package/src/runtime/routes/events-routes.ts +7 -0
- package/src/runtime/routes/inbound-message-handler.ts +3 -4
- package/src/schedule/scheduler.ts +159 -45
- package/src/security/secure-keys.ts +3 -3
- package/src/tools/browser/browser-manager.ts +72 -228
- package/src/tools/browser/browser-screencast.ts +0 -5
- package/src/tools/network/script-proxy/certs.ts +7 -237
- package/src/tools/network/script-proxy/connect-tunnel.ts +1 -82
- package/src/tools/network/script-proxy/http-forwarder.ts +2 -151
- package/src/tools/network/script-proxy/logging.ts +12 -196
- package/src/tools/network/script-proxy/mitm-handler.ts +2 -270
- package/src/tools/network/script-proxy/policy.ts +4 -152
- package/src/tools/network/script-proxy/router.ts +2 -60
- package/src/tools/network/script-proxy/server.ts +5 -137
- package/src/tools/network/script-proxy/types.ts +19 -125
- package/src/tools/system/voice-config.ts +23 -1
- package/src/util/logger.ts +4 -1
- package/src/__tests__/elevenlabs-config.test.ts +0 -95
- package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -407
- package/src/calls/elevenlabs-config.ts +0 -32
|
@@ -44,6 +44,14 @@ describe("resolveChannelCapabilities", () => {
|
|
|
44
44
|
expect(caps.supportsVoiceInput).toBe(true);
|
|
45
45
|
});
|
|
46
46
|
|
|
47
|
+
test("vellum channel with vellum interface supports dynamic UI", () => {
|
|
48
|
+
const caps = resolveChannelCapabilities("vellum", "vellum");
|
|
49
|
+
expect(caps.channel).toBe("vellum");
|
|
50
|
+
expect(caps.dashboardCapable).toBe(false);
|
|
51
|
+
expect(caps.supportsDynamicUi).toBe(true);
|
|
52
|
+
expect(caps.supportsVoiceInput).toBe(false);
|
|
53
|
+
});
|
|
54
|
+
|
|
47
55
|
test("defaults to vellum for null source channel", () => {
|
|
48
56
|
const caps = resolveChannelCapabilities(null);
|
|
49
57
|
expect(caps.channel).toBe("vellum");
|
|
@@ -407,6 +415,24 @@ describe("trust-gating via channel capabilities", () => {
|
|
|
407
415
|
expect(injected).toContain("Present information as well-formatted text");
|
|
408
416
|
expect(injected).toContain("desktop app");
|
|
409
417
|
});
|
|
418
|
+
|
|
419
|
+
test("vellum web interface allows dynamic UI but constrains dashboard references", () => {
|
|
420
|
+
const caps = resolveChannelCapabilities("vellum", "vellum");
|
|
421
|
+
const message: Message = {
|
|
422
|
+
role: "user",
|
|
423
|
+
content: [{ type: "text", text: "Show me a form" }],
|
|
424
|
+
};
|
|
425
|
+
|
|
426
|
+
const result = injectChannelCapabilityContext(message, caps);
|
|
427
|
+
const injected = (result.content[0] as { type: "text"; text: string }).text;
|
|
428
|
+
|
|
429
|
+
expect(injected).toContain("CHANNEL CONSTRAINTS");
|
|
430
|
+
expect(injected).toContain("Do NOT reference the dashboard UI");
|
|
431
|
+
expect(injected).not.toContain("Do NOT use ui_show");
|
|
432
|
+
expect(injected).not.toContain("Present information as well-formatted text");
|
|
433
|
+
expect(injected).toContain("supports_dynamic_ui: true");
|
|
434
|
+
expect(injected).toContain("dashboard_capable: false");
|
|
435
|
+
});
|
|
410
436
|
});
|
|
411
437
|
|
|
412
438
|
// ---------------------------------------------------------------------------
|
|
@@ -994,12 +1020,12 @@ describe("sanitizePttActivationKey", () => {
|
|
|
994
1020
|
expect(sanitizePttActivationKey("none")).toBe("none");
|
|
995
1021
|
});
|
|
996
1022
|
|
|
997
|
-
test(
|
|
998
|
-
expect(
|
|
999
|
-
"
|
|
1000
|
-
);
|
|
1001
|
-
expect(sanitizePttActivationKey("arbitrary_value")).
|
|
1002
|
-
expect(sanitizePttActivationKey("")).
|
|
1023
|
+
test("returns undefined for invalid keys", () => {
|
|
1024
|
+
expect(
|
|
1025
|
+
sanitizePttActivationKey("malicious\nprompt injection"),
|
|
1026
|
+
).toBeUndefined();
|
|
1027
|
+
expect(sanitizePttActivationKey("arbitrary_value")).toBeUndefined();
|
|
1028
|
+
expect(sanitizePttActivationKey("")).toBeUndefined();
|
|
1003
1029
|
});
|
|
1004
1030
|
});
|
|
1005
1031
|
|
|
@@ -1015,11 +1041,11 @@ describe("resolveChannelCapabilities with PTT metadata", () => {
|
|
|
1015
1041
|
expect(caps.pttActivationKey).toBe("fn");
|
|
1016
1042
|
});
|
|
1017
1043
|
|
|
1018
|
-
test("sanitizes invalid pttActivationKey to
|
|
1044
|
+
test("sanitizes invalid pttActivationKey to undefined", () => {
|
|
1019
1045
|
const caps = resolveChannelCapabilities("macos", "macos", {
|
|
1020
1046
|
pttActivationKey: "evil\nprompt",
|
|
1021
1047
|
});
|
|
1022
|
-
expect(caps.pttActivationKey).
|
|
1048
|
+
expect(caps.pttActivationKey).toBeUndefined();
|
|
1023
1049
|
});
|
|
1024
1050
|
|
|
1025
1051
|
test("passes through microphonePermissionGranted", () => {
|
|
@@ -75,9 +75,15 @@ const {
|
|
|
75
75
|
buildPhoneCallsRoutingSection,
|
|
76
76
|
} = await import("../config/system-prompt.js");
|
|
77
77
|
|
|
78
|
-
/** Strip the Configuration and
|
|
78
|
+
/** Strip the Configuration, Skills, and hardcoded preamble sections so base-prompt tests stay focused. */
|
|
79
79
|
function basePrompt(result: string): string {
|
|
80
80
|
let s = result;
|
|
81
|
+
// Strip the hardcoded em-dash instruction preamble
|
|
82
|
+
const emDashLine =
|
|
83
|
+
"IMPORTANT: Never use em dashes (\u2014) in your messages. Use commas, periods, or just start a new sentence instead.";
|
|
84
|
+
if (s.startsWith(emDashLine)) {
|
|
85
|
+
s = s.slice(emDashLine.length).replace(/^\n\n/, "");
|
|
86
|
+
}
|
|
81
87
|
for (const heading of [
|
|
82
88
|
"## Configuration",
|
|
83
89
|
"## Skills Catalog",
|
|
@@ -120,8 +120,14 @@ mock.module("../config/env.js", () => ({
|
|
|
120
120
|
getGatewayInternalBaseUrl: () => "http://localhost:3000",
|
|
121
121
|
}));
|
|
122
122
|
|
|
123
|
+
// ── User reference mock ──
|
|
124
|
+
mock.module("../config/user-reference.js", () => ({
|
|
125
|
+
resolveUserReference: () => "my human",
|
|
126
|
+
}));
|
|
127
|
+
|
|
123
128
|
// Import module under test AFTER mocks are set up
|
|
124
129
|
import type { ChannelId } from "../channels/types.js";
|
|
130
|
+
import { resolveUserReference } from "../config/user-reference.js";
|
|
125
131
|
import type { GuardianContext } from "../runtime/guardian-context-resolver.js";
|
|
126
132
|
|
|
127
133
|
// We need to test the private functions by importing the module.
|
|
@@ -220,9 +226,7 @@ async function simulateNotifierPoll(params: {
|
|
|
220
226
|
}
|
|
221
227
|
}
|
|
222
228
|
|
|
223
|
-
const waitingText = guardianName
|
|
224
|
-
? `Waiting for ${guardianName}'s approval...`
|
|
225
|
-
: "Waiting for your guardian's approval...";
|
|
229
|
+
const waitingText = `Waiting for ${guardianName ?? resolveUserReference()}'s approval...`;
|
|
226
230
|
|
|
227
231
|
try {
|
|
228
232
|
await deliverChannelReply(
|
|
@@ -330,7 +334,7 @@ describe("trusted-contact pending-approval notifier", () => {
|
|
|
330
334
|
);
|
|
331
335
|
});
|
|
332
336
|
|
|
333
|
-
test("
|
|
337
|
+
test("falls back to user reference when no guardian name is available", async () => {
|
|
334
338
|
mockPendingApprovals = [
|
|
335
339
|
{
|
|
336
340
|
requestId: "req-3",
|
|
@@ -359,11 +363,11 @@ describe("trusted-contact pending-approval notifier", () => {
|
|
|
359
363
|
|
|
360
364
|
expect(deliveredReplies).toHaveLength(1);
|
|
361
365
|
expect(deliveredReplies[0].payload.text).toBe(
|
|
362
|
-
"Waiting for
|
|
366
|
+
"Waiting for my human's approval...",
|
|
363
367
|
);
|
|
364
368
|
});
|
|
365
369
|
|
|
366
|
-
test("
|
|
370
|
+
test("falls back to user reference when no guardian binding exists", async () => {
|
|
367
371
|
mockPendingApprovals = [
|
|
368
372
|
{
|
|
369
373
|
requestId: "req-4",
|
|
@@ -388,7 +392,7 @@ describe("trusted-contact pending-approval notifier", () => {
|
|
|
388
392
|
|
|
389
393
|
expect(deliveredReplies).toHaveLength(1);
|
|
390
394
|
expect(deliveredReplies[0].payload.text).toBe(
|
|
391
|
-
"Waiting for
|
|
395
|
+
"Waiting for my human's approval...",
|
|
392
396
|
);
|
|
393
397
|
});
|
|
394
398
|
|
|
@@ -736,7 +740,7 @@ describe("trusted-contact pending-approval notifier", () => {
|
|
|
736
740
|
expect(deliveredReplies).toHaveLength(1);
|
|
737
741
|
// Falls back to generic phrasing
|
|
738
742
|
expect(deliveredReplies[0].payload.text).toBe(
|
|
739
|
-
"Waiting for
|
|
743
|
+
"Waiting for my human's approval...",
|
|
740
744
|
);
|
|
741
745
|
});
|
|
742
746
|
});
|
|
@@ -46,7 +46,7 @@ describe("generateTwiML with voice quality profile", () => {
|
|
|
46
46
|
expect(twiml).toContain('voice="voice123-turbo_v2_5-1_0.5_0.75"');
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
-
test("voice attribute reflects configured voice
|
|
49
|
+
test("voice attribute reflects configured Google voice", () => {
|
|
50
50
|
const twiml = generateTwiML(callSessionId, relayUrl, welcomeGreeting, {
|
|
51
51
|
language: "en-US",
|
|
52
52
|
transcriptionProvider: "Deepgram",
|
|
@@ -57,7 +57,7 @@ describe("generateTwiML with voice quality profile", () => {
|
|
|
57
57
|
expect(twiml).toContain('voice="Google.en-US-Journey-O"');
|
|
58
58
|
});
|
|
59
59
|
|
|
60
|
-
test("voice attribute reflects configured voice
|
|
60
|
+
test("voice attribute reflects configured ElevenLabs voice", () => {
|
|
61
61
|
const twiml = generateTwiML(callSessionId, relayUrl, welcomeGreeting, {
|
|
62
62
|
language: "en-US",
|
|
63
63
|
transcriptionProvider: "Deepgram",
|
|
@@ -60,13 +60,12 @@ const mockConfigObj = {
|
|
|
60
60
|
memory: { enabled: false },
|
|
61
61
|
rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
|
|
62
62
|
secretDetection: { enabled: false },
|
|
63
|
+
elevenlabs: { voiceId: "21m00Tcm4TlvDq8ikWAM" },
|
|
63
64
|
calls: {
|
|
64
65
|
voice: {
|
|
65
|
-
mode: "twilio_standard",
|
|
66
66
|
language: "en-US",
|
|
67
67
|
transcriptionProvider: "Deepgram",
|
|
68
|
-
|
|
69
|
-
elevenlabs: { voiceId: "" },
|
|
68
|
+
elevenlabs: {},
|
|
70
69
|
},
|
|
71
70
|
},
|
|
72
71
|
};
|
|
@@ -8,7 +8,6 @@ mock.module("../config/loader.js", () => ({
|
|
|
8
8
|
|
|
9
9
|
import {
|
|
10
10
|
buildElevenLabsVoiceSpec,
|
|
11
|
-
isVoiceProfileValid,
|
|
12
11
|
resolveVoiceQualityProfile,
|
|
13
12
|
} from "../calls/voice-quality.js";
|
|
14
13
|
|
|
@@ -62,176 +61,66 @@ describe("buildElevenLabsVoiceSpec", () => {
|
|
|
62
61
|
});
|
|
63
62
|
|
|
64
63
|
describe("resolveVoiceQualityProfile", () => {
|
|
65
|
-
test("returns
|
|
64
|
+
test("always returns ElevenLabs ttsProvider", () => {
|
|
66
65
|
mockConfig = {
|
|
66
|
+
elevenlabs: { voiceId: "21m00Tcm4TlvDq8ikWAM" },
|
|
67
67
|
calls: {
|
|
68
68
|
voice: {
|
|
69
|
-
mode: "twilio_standard",
|
|
70
69
|
language: "en-US",
|
|
71
|
-
transcriptionProvider: "
|
|
72
|
-
fallbackToStandardOnError: false,
|
|
73
|
-
elevenlabs: {},
|
|
74
|
-
},
|
|
75
|
-
},
|
|
76
|
-
};
|
|
77
|
-
const profile = resolveVoiceQualityProfile();
|
|
78
|
-
expect(profile.mode).toBe("twilio_standard");
|
|
79
|
-
expect(profile.ttsProvider).toBe("Google");
|
|
80
|
-
expect(profile.voice).toBe("Google.en-US-Journey-O");
|
|
81
|
-
expect(profile.validationErrors).toHaveLength(0);
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
test("returns elevenlabs profile for twilio_elevenlabs_tts mode", () => {
|
|
85
|
-
mockConfig = {
|
|
86
|
-
calls: {
|
|
87
|
-
voice: {
|
|
88
|
-
mode: "twilio_elevenlabs_tts",
|
|
89
|
-
language: "en-US",
|
|
90
|
-
transcriptionProvider: "Google",
|
|
91
|
-
fallbackToStandardOnError: false,
|
|
92
|
-
elevenlabs: { voiceId: "elvoice1" },
|
|
70
|
+
transcriptionProvider: "Deepgram",
|
|
93
71
|
},
|
|
94
72
|
},
|
|
95
73
|
};
|
|
96
74
|
const profile = resolveVoiceQualityProfile();
|
|
97
|
-
expect(profile.mode).toBe("twilio_elevenlabs_tts");
|
|
98
75
|
expect(profile.ttsProvider).toBe("ElevenLabs");
|
|
99
|
-
expect(profile.voice).toBe("elvoice1");
|
|
100
|
-
expect(profile.validationErrors).toHaveLength(0);
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
test("falls back to standard when voiceId missing and fallback enabled", () => {
|
|
104
|
-
mockConfig = {
|
|
105
|
-
calls: {
|
|
106
|
-
voice: {
|
|
107
|
-
mode: "twilio_elevenlabs_tts",
|
|
108
|
-
language: "en-US",
|
|
109
|
-
transcriptionProvider: "Google",
|
|
110
|
-
fallbackToStandardOnError: true,
|
|
111
|
-
elevenlabs: { voiceId: "" },
|
|
112
|
-
},
|
|
113
|
-
},
|
|
114
|
-
};
|
|
115
|
-
const profile = resolveVoiceQualityProfile();
|
|
116
|
-
expect(profile.mode).toBe("twilio_standard");
|
|
117
|
-
expect(profile.validationErrors.length).toBeGreaterThan(0);
|
|
118
|
-
expect(profile.validationErrors[0]).toContain("falling back");
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
test("returns validation error when voiceId missing and fallback disabled", () => {
|
|
122
|
-
mockConfig = {
|
|
123
|
-
calls: {
|
|
124
|
-
voice: {
|
|
125
|
-
mode: "twilio_elevenlabs_tts",
|
|
126
|
-
language: "en-US",
|
|
127
|
-
transcriptionProvider: "Google",
|
|
128
|
-
fallbackToStandardOnError: false,
|
|
129
|
-
elevenlabs: { voiceId: "" },
|
|
130
|
-
},
|
|
131
|
-
},
|
|
132
|
-
};
|
|
133
|
-
const profile = resolveVoiceQualityProfile();
|
|
134
|
-
expect(profile.mode).toBe("twilio_elevenlabs_tts");
|
|
135
|
-
expect(profile.validationErrors.length).toBeGreaterThan(0);
|
|
136
|
-
expect(profile.validationErrors[0]).toContain("voiceId is required");
|
|
137
76
|
});
|
|
138
77
|
|
|
139
|
-
test("
|
|
78
|
+
test("voice ID comes from elevenlabs.voiceId", () => {
|
|
140
79
|
mockConfig = {
|
|
80
|
+
elevenlabs: { voiceId: "custom-voice-123" },
|
|
141
81
|
calls: {
|
|
142
82
|
voice: {
|
|
143
|
-
mode: "elevenlabs_agent",
|
|
144
83
|
language: "en-US",
|
|
145
|
-
transcriptionProvider: "
|
|
146
|
-
fallbackToStandardOnError: false,
|
|
147
|
-
elevenlabs: { voiceId: "voice1", agentId: "agent123" },
|
|
84
|
+
transcriptionProvider: "Deepgram",
|
|
148
85
|
},
|
|
149
86
|
},
|
|
150
87
|
};
|
|
151
88
|
const profile = resolveVoiceQualityProfile();
|
|
152
|
-
expect(profile.
|
|
153
|
-
expect(profile.agentId).toBe("agent123");
|
|
154
|
-
expect(profile.validationErrors).toHaveLength(0);
|
|
89
|
+
expect(profile.voice).toBe("custom-voice-123");
|
|
155
90
|
});
|
|
156
91
|
|
|
157
|
-
test("
|
|
92
|
+
test("uses language from calls.voice config", () => {
|
|
158
93
|
mockConfig = {
|
|
94
|
+
elevenlabs: { voiceId: "abc" },
|
|
159
95
|
calls: {
|
|
160
96
|
voice: {
|
|
161
|
-
|
|
162
|
-
language: "en-US",
|
|
97
|
+
language: "es-MX",
|
|
163
98
|
transcriptionProvider: "Google",
|
|
164
|
-
fallbackToStandardOnError: true,
|
|
165
|
-
elevenlabs: { voiceId: "voice1", agentId: "" },
|
|
166
99
|
},
|
|
167
100
|
},
|
|
168
101
|
};
|
|
169
102
|
const profile = resolveVoiceQualityProfile();
|
|
170
|
-
expect(profile.
|
|
171
|
-
expect(profile.
|
|
103
|
+
expect(profile.language).toBe("es-MX");
|
|
104
|
+
expect(profile.transcriptionProvider).toBe("Google");
|
|
172
105
|
});
|
|
173
106
|
|
|
174
|
-
test("
|
|
107
|
+
test("builds voice spec with model and tuning params", () => {
|
|
175
108
|
mockConfig = {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
elevenlabs: { voiceId: "voice1", agentId: "" },
|
|
183
|
-
},
|
|
109
|
+
elevenlabs: {
|
|
110
|
+
voiceId: "voice1",
|
|
111
|
+
voiceModelId: "turbo_v2_5",
|
|
112
|
+
speed: 0.9,
|
|
113
|
+
stability: 0.8,
|
|
114
|
+
similarityBoost: 0.9,
|
|
184
115
|
},
|
|
185
|
-
};
|
|
186
|
-
const profile = resolveVoiceQualityProfile();
|
|
187
|
-
expect(profile.mode).toBe("elevenlabs_agent");
|
|
188
|
-
expect(profile.validationErrors.length).toBeGreaterThan(0);
|
|
189
|
-
expect(profile.validationErrors[0]).toContain("agentId is required");
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
test("returns standard profile for unknown mode", () => {
|
|
193
|
-
mockConfig = {
|
|
194
116
|
calls: {
|
|
195
117
|
voice: {
|
|
196
|
-
mode: "unknown_mode",
|
|
197
118
|
language: "en-US",
|
|
198
|
-
transcriptionProvider: "
|
|
199
|
-
fallbackToStandardOnError: false,
|
|
200
|
-
elevenlabs: {},
|
|
119
|
+
transcriptionProvider: "Deepgram",
|
|
201
120
|
},
|
|
202
121
|
},
|
|
203
122
|
};
|
|
204
123
|
const profile = resolveVoiceQualityProfile();
|
|
205
|
-
expect(profile.
|
|
206
|
-
});
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
describe("isVoiceProfileValid", () => {
|
|
210
|
-
test("returns true for profile with no errors", () => {
|
|
211
|
-
expect(
|
|
212
|
-
isVoiceProfileValid({
|
|
213
|
-
mode: "twilio_standard",
|
|
214
|
-
language: "en-US",
|
|
215
|
-
transcriptionProvider: "Google",
|
|
216
|
-
ttsProvider: "Google",
|
|
217
|
-
voice: "Google.en-US-Journey-O",
|
|
218
|
-
fallbackToStandardOnError: false,
|
|
219
|
-
validationErrors: [],
|
|
220
|
-
}),
|
|
221
|
-
).toBe(true);
|
|
222
|
-
});
|
|
223
|
-
|
|
224
|
-
test("returns false for profile with errors", () => {
|
|
225
|
-
expect(
|
|
226
|
-
isVoiceProfileValid({
|
|
227
|
-
mode: "twilio_elevenlabs_tts",
|
|
228
|
-
language: "en-US",
|
|
229
|
-
transcriptionProvider: "Google",
|
|
230
|
-
ttsProvider: "ElevenLabs",
|
|
231
|
-
voice: "",
|
|
232
|
-
fallbackToStandardOnError: false,
|
|
233
|
-
validationErrors: ["voiceId is required"],
|
|
234
|
-
}),
|
|
235
|
-
).toBe(false);
|
|
124
|
+
expect(profile.voice).toBe("voice1-turbo_v2_5-0.9_0.8_0.9");
|
|
236
125
|
});
|
|
237
126
|
});
|
|
@@ -11,6 +11,7 @@ import { randomInt } from 'node:crypto';
|
|
|
11
11
|
import type { ServerWebSocket } from 'bun';
|
|
12
12
|
|
|
13
13
|
import { getConfig } from '../config/loader.js';
|
|
14
|
+
import { resolveUserReference } from '../config/user-reference.js';
|
|
14
15
|
import { getAssistantName } from '../daemon/identity-helpers.js';
|
|
15
16
|
import { getCanonicalGuardianRequest } from '../memory/canonical-guardian-store.js';
|
|
16
17
|
import { listActiveBindingsByAssistant } from '../memory/channel-guardian-store.js';
|
|
@@ -1214,10 +1215,14 @@ export class RelayConnection {
|
|
|
1214
1215
|
|
|
1215
1216
|
updateCallSession(this.callSessionId, { status: 'waiting_on_user' });
|
|
1216
1217
|
|
|
1217
|
-
// Start the heartbeat timer for periodic progress updates
|
|
1218
|
-
|
|
1218
|
+
// Start the heartbeat timer for periodic progress updates.
|
|
1219
|
+
// Delay the first heartbeat by the estimated TTS playback duration so
|
|
1220
|
+
// the initial hold message finishes before any heartbeat fires.
|
|
1219
1221
|
this.heartbeatSequence = 0;
|
|
1220
|
-
this.
|
|
1222
|
+
this.accessRequestHeartbeatTimer = setTimeout(() => {
|
|
1223
|
+
this.accessRequestWaitStartedAt = Date.now();
|
|
1224
|
+
this.scheduleNextHeartbeat();
|
|
1225
|
+
}, getTtsPlaybackDelayMs());
|
|
1221
1226
|
|
|
1222
1227
|
// Poll the canonical request status
|
|
1223
1228
|
this.accessRequestPollTimer = setInterval(() => {
|
|
@@ -1658,7 +1663,7 @@ export class RelayConnection {
|
|
|
1658
1663
|
/**
|
|
1659
1664
|
* Resolve a human-readable guardian label for voice wait copy.
|
|
1660
1665
|
* Prefers displayName from the guardian binding metadata, falls back
|
|
1661
|
-
* to @username, then
|
|
1666
|
+
* to @username, then the user's preferred name from USER.md.
|
|
1662
1667
|
*/
|
|
1663
1668
|
private resolveGuardianLabel(): string {
|
|
1664
1669
|
const assistantId = this.accessRequestAssistantId ?? DAEMON_INTERNAL_ASSISTANT_ID;
|
|
@@ -1690,7 +1695,8 @@ export class RelayConnection {
|
|
|
1690
1695
|
// ignore malformed metadata
|
|
1691
1696
|
}
|
|
1692
1697
|
}
|
|
1693
|
-
|
|
1698
|
+
|
|
1699
|
+
return resolveUserReference();
|
|
1694
1700
|
}
|
|
1695
1701
|
|
|
1696
1702
|
/**
|
|
@@ -29,7 +29,7 @@ import {
|
|
|
29
29
|
} from './call-store.js';
|
|
30
30
|
import { getTwilioConfig } from './twilio-config.js';
|
|
31
31
|
import type { CallStatus } from './types.js';
|
|
32
|
-
import {
|
|
32
|
+
import { resolveVoiceQualityProfile } from './voice-quality.js';
|
|
33
33
|
|
|
34
34
|
const log = getLogger('twilio-routes');
|
|
35
35
|
|
|
@@ -144,7 +144,7 @@ function mapTwilioStatus(twilioStatus: string): CallStatus | null {
|
|
|
144
144
|
* Receives the initial voice webhook when Twilio connects the call.
|
|
145
145
|
* Returns TwiML XML that tells Twilio to open a ConversationRelay WebSocket.
|
|
146
146
|
*
|
|
147
|
-
* Supports two
|
|
147
|
+
* Supports two flows:
|
|
148
148
|
* - **Outbound** (callSessionId present in query): uses the existing session
|
|
149
149
|
* - **Inbound** (callSessionId absent): creates or reuses a session keyed
|
|
150
150
|
* by the Twilio CallSid. Uses daemon internal scope for assistant identity.
|
|
@@ -214,43 +214,9 @@ function buildVoiceWebhookTwiml(
|
|
|
214
214
|
task: string | null,
|
|
215
215
|
guardianVerificationSessionId?: string | null,
|
|
216
216
|
): Response {
|
|
217
|
-
|
|
217
|
+
const profile = resolveVoiceQualityProfile(loadConfig());
|
|
218
218
|
|
|
219
|
-
log.info({ callSessionId,
|
|
220
|
-
|
|
221
|
-
if (profile.validationErrors.length > 0) {
|
|
222
|
-
log.warn({ callSessionId, errors: profile.validationErrors }, 'Voice quality profile has validation warnings');
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// WS-A: Enforce strict fallback semantics — reject invalid profiles when fallback is disabled
|
|
226
|
-
if (!isVoiceProfileValid(profile)) {
|
|
227
|
-
if (!profile.fallbackToStandardOnError) {
|
|
228
|
-
const errorMsg = `Voice quality configuration error: ${profile.validationErrors.join('; ')}`;
|
|
229
|
-
log.error({ callSessionId, errors: profile.validationErrors }, errorMsg);
|
|
230
|
-
return new Response(errorMsg, { status: 500 });
|
|
231
|
-
}
|
|
232
|
-
// Fallback is enabled — profile already resolved to standard; log explicitly
|
|
233
|
-
log.info({ callSessionId }, 'Profile invalid with fallback enabled; proceeding with standard mode');
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
// WS-B: Guard elevenlabs_agent until consultation bridge exists.
|
|
237
|
-
// This fires BEFORE any ElevenLabs API calls, blocking the entire mode.
|
|
238
|
-
if (profile.mode === 'elevenlabs_agent') {
|
|
239
|
-
if (!profile.fallbackToStandardOnError) {
|
|
240
|
-
const msg = 'elevenlabs_agent mode is restricted: consultation bridging (waiting_on_user) is not yet supported. Set calls.voice.fallbackToStandardOnError=true to fall back to standard mode.';
|
|
241
|
-
log.error({ callSessionId }, msg);
|
|
242
|
-
return new Response(msg, { status: 501 });
|
|
243
|
-
}
|
|
244
|
-
log.warn({ callSessionId }, 'elevenlabs_agent mode is restricted/experimental — consultation bridging is not yet supported; falling back to standard ConversationRelay TwiML');
|
|
245
|
-
const standardConfig = loadConfig();
|
|
246
|
-
profile = resolveVoiceQualityProfile({
|
|
247
|
-
...standardConfig,
|
|
248
|
-
calls: {
|
|
249
|
-
...standardConfig.calls,
|
|
250
|
-
voice: { ...standardConfig.calls.voice, mode: 'twilio_standard' },
|
|
251
|
-
},
|
|
252
|
-
});
|
|
253
|
-
}
|
|
219
|
+
log.info({ callSessionId, ttsProvider: profile.ttsProvider, voice: profile.voice }, 'Voice quality profile resolved');
|
|
254
220
|
|
|
255
221
|
const twilioConfig = getTwilioConfig();
|
|
256
222
|
let relayUrl: string;
|
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
import { loadConfig } from '../config/loader.js';
|
|
2
2
|
|
|
3
3
|
export interface VoiceQualityProfile {
|
|
4
|
-
mode: 'twilio_standard' | 'twilio_elevenlabs_tts' | 'elevenlabs_agent';
|
|
5
4
|
language: string;
|
|
6
5
|
transcriptionProvider: string;
|
|
7
6
|
ttsProvider: string;
|
|
8
7
|
voice: string;
|
|
9
|
-
agentId?: string;
|
|
10
|
-
fallbackToStandardOnError: boolean;
|
|
11
|
-
validationErrors: string[];
|
|
12
8
|
}
|
|
13
9
|
|
|
14
10
|
/**
|
|
@@ -45,70 +41,18 @@ export function buildElevenLabsVoiceSpec(config: {
|
|
|
45
41
|
|
|
46
42
|
/**
|
|
47
43
|
* Resolve the effective voice quality profile from config.
|
|
48
|
-
*
|
|
49
|
-
*
|
|
44
|
+
*
|
|
45
|
+
* Always uses ElevenLabs TTS via Twilio ConversationRelay.
|
|
46
|
+
* The voice ID comes from the shared `elevenlabs.voiceId` config
|
|
47
|
+
* (defaults to Rachel — 21m00Tcm4TlvDq8ikWAM).
|
|
50
48
|
*/
|
|
51
49
|
export function resolveVoiceQualityProfile(config?: ReturnType<typeof loadConfig>): VoiceQualityProfile {
|
|
52
50
|
const cfg = config ?? loadConfig();
|
|
53
51
|
const voice = cfg.calls.voice;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
// Default/standard profile
|
|
57
|
-
const standardProfile: VoiceQualityProfile = {
|
|
58
|
-
mode: 'twilio_standard',
|
|
52
|
+
return {
|
|
59
53
|
language: voice.language,
|
|
60
54
|
transcriptionProvider: voice.transcriptionProvider,
|
|
61
|
-
ttsProvider: '
|
|
62
|
-
voice:
|
|
63
|
-
fallbackToStandardOnError: voice.fallbackToStandardOnError,
|
|
64
|
-
validationErrors: [],
|
|
55
|
+
ttsProvider: 'ElevenLabs',
|
|
56
|
+
voice: buildElevenLabsVoiceSpec(cfg.elevenlabs),
|
|
65
57
|
};
|
|
66
|
-
|
|
67
|
-
if (voice.mode === 'twilio_standard') {
|
|
68
|
-
return standardProfile;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (voice.mode === 'twilio_elevenlabs_tts') {
|
|
72
|
-
if (!voice.elevenlabs.voiceId && !voice.fallbackToStandardOnError) {
|
|
73
|
-
errors.push('calls.voice.elevenlabs.voiceId is required for twilio_elevenlabs_tts mode when fallback is disabled');
|
|
74
|
-
}
|
|
75
|
-
if (!voice.elevenlabs.voiceId && voice.fallbackToStandardOnError) {
|
|
76
|
-
return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.voiceId is empty; falling back to twilio_standard'] };
|
|
77
|
-
}
|
|
78
|
-
return {
|
|
79
|
-
mode: 'twilio_elevenlabs_tts',
|
|
80
|
-
language: voice.language,
|
|
81
|
-
transcriptionProvider: voice.transcriptionProvider,
|
|
82
|
-
ttsProvider: 'ElevenLabs',
|
|
83
|
-
voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
|
|
84
|
-
fallbackToStandardOnError: voice.fallbackToStandardOnError,
|
|
85
|
-
validationErrors: errors,
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
if (voice.mode === 'elevenlabs_agent') {
|
|
90
|
-
if (!voice.elevenlabs.agentId && !voice.fallbackToStandardOnError) {
|
|
91
|
-
errors.push('calls.voice.elevenlabs.agentId is required for elevenlabs_agent mode when fallback is disabled');
|
|
92
|
-
}
|
|
93
|
-
if (!voice.elevenlabs.agentId && voice.fallbackToStandardOnError) {
|
|
94
|
-
return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.agentId is empty; falling back to twilio_standard'] };
|
|
95
|
-
}
|
|
96
|
-
return {
|
|
97
|
-
mode: 'elevenlabs_agent',
|
|
98
|
-
language: voice.language,
|
|
99
|
-
transcriptionProvider: voice.transcriptionProvider,
|
|
100
|
-
ttsProvider: 'ElevenLabs',
|
|
101
|
-
voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
|
|
102
|
-
agentId: voice.elevenlabs.agentId,
|
|
103
|
-
fallbackToStandardOnError: voice.fallbackToStandardOnError,
|
|
104
|
-
validationErrors: errors,
|
|
105
|
-
};
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
return standardProfile;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/** Returns false when the profile has any validation errors. */
|
|
112
|
-
export function isVoiceProfileValid(profile: VoiceQualityProfile): boolean {
|
|
113
|
-
return profile.validationErrors.length === 0;
|
|
114
58
|
}
|