@vellumai/assistant 0.4.23 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/bun.lock +3 -0
  2. package/package.json +2 -1
  3. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +0 -15
  4. package/src/__tests__/assistant-events-sse-hardening.test.ts +9 -3
  5. package/src/__tests__/call-controller.test.ts +80 -0
  6. package/src/__tests__/config-schema.test.ts +38 -178
  7. package/src/__tests__/conversation-routes-guardian-reply.test.ts +4 -1
  8. package/src/__tests__/credential-security-invariants.test.ts +0 -2
  9. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +2 -2
  10. package/src/__tests__/ipc-snapshot.test.ts +0 -9
  11. package/src/__tests__/onboarding-template-contract.test.ts +10 -20
  12. package/src/__tests__/relay-server.test.ts +3 -3
  13. package/src/__tests__/runtime-events-sse-parity.test.ts +10 -0
  14. package/src/__tests__/runtime-events-sse.test.ts +7 -0
  15. package/src/__tests__/session-runtime-assembly.test.ts +34 -8
  16. package/src/__tests__/system-prompt.test.ts +7 -1
  17. package/src/__tests__/trusted-contact-approval-notifier.test.ts +12 -8
  18. package/src/__tests__/twilio-routes-twiml.test.ts +2 -2
  19. package/src/__tests__/twilio-routes.test.ts +2 -3
  20. package/src/__tests__/voice-quality.test.ts +21 -132
  21. package/src/calls/call-controller.ts +34 -29
  22. package/src/calls/relay-server.ts +11 -5
  23. package/src/calls/twilio-routes.ts +4 -38
  24. package/src/calls/voice-quality.ts +7 -63
  25. package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +7 -10
  26. package/src/config/bundled-skills/messaging/SKILL.md +3 -5
  27. package/src/config/bundled-skills/phone-calls/SKILL.md +144 -83
  28. package/src/config/bundled-skills/sms-setup/SKILL.md +0 -20
  29. package/src/config/bundled-skills/twilio-setup/SKILL.md +9 -17
  30. package/src/config/bundled-skills/voice-setup/SKILL.md +36 -1
  31. package/src/config/bundled-skills/voice-setup/icon.svg +20 -0
  32. package/src/config/calls-schema.ts +3 -53
  33. package/src/config/elevenlabs-schema.ts +33 -0
  34. package/src/config/schema.ts +183 -137
  35. package/src/config/types.ts +0 -1
  36. package/src/daemon/handlers/browser.ts +1 -6
  37. package/src/daemon/ipc-contract/browser.ts +5 -14
  38. package/src/daemon/ipc-contract-inventory.json +0 -2
  39. package/src/daemon/session-agent-loop-handlers.ts +3 -0
  40. package/src/daemon/session-runtime-assembly.ts +9 -7
  41. package/src/mcp/client.ts +2 -1
  42. package/src/memory/conversation-crud.ts +339 -166
  43. package/src/runtime/auth/middleware.ts +87 -26
  44. package/src/runtime/routes/events-routes.ts +7 -0
  45. package/src/runtime/routes/inbound-message-handler.ts +3 -4
  46. package/src/schedule/scheduler.ts +159 -45
  47. package/src/security/secure-keys.ts +3 -3
  48. package/src/tools/browser/browser-manager.ts +72 -228
  49. package/src/tools/browser/browser-screencast.ts +0 -5
  50. package/src/tools/network/script-proxy/certs.ts +7 -237
  51. package/src/tools/network/script-proxy/connect-tunnel.ts +1 -82
  52. package/src/tools/network/script-proxy/http-forwarder.ts +2 -151
  53. package/src/tools/network/script-proxy/logging.ts +12 -196
  54. package/src/tools/network/script-proxy/mitm-handler.ts +2 -270
  55. package/src/tools/network/script-proxy/policy.ts +4 -152
  56. package/src/tools/network/script-proxy/router.ts +2 -60
  57. package/src/tools/network/script-proxy/server.ts +5 -137
  58. package/src/tools/network/script-proxy/types.ts +19 -125
  59. package/src/tools/system/voice-config.ts +23 -1
  60. package/src/util/logger.ts +4 -1
  61. package/src/__tests__/elevenlabs-config.test.ts +0 -95
  62. package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -407
  63. package/src/calls/elevenlabs-config.ts +0 -32
@@ -29,7 +29,7 @@ import {
29
29
  } from './call-store.js';
30
30
  import { getTwilioConfig } from './twilio-config.js';
31
31
  import type { CallStatus } from './types.js';
32
- import { isVoiceProfileValid,resolveVoiceQualityProfile } from './voice-quality.js';
32
+ import { resolveVoiceQualityProfile } from './voice-quality.js';
33
33
 
34
34
  const log = getLogger('twilio-routes');
35
35
 
@@ -144,7 +144,7 @@ function mapTwilioStatus(twilioStatus: string): CallStatus | null {
144
144
  * Receives the initial voice webhook when Twilio connects the call.
145
145
  * Returns TwiML XML that tells Twilio to open a ConversationRelay WebSocket.
146
146
  *
147
- * Supports two modes:
147
+ * Supports two flows:
148
148
  * - **Outbound** (callSessionId present in query): uses the existing session
149
149
  * - **Inbound** (callSessionId absent): creates or reuses a session keyed
150
150
  * by the Twilio CallSid. Uses daemon internal scope for assistant identity.
@@ -214,43 +214,9 @@ function buildVoiceWebhookTwiml(
214
214
  task: string | null,
215
215
  guardianVerificationSessionId?: string | null,
216
216
  ): Response {
217
- let profile = resolveVoiceQualityProfile(loadConfig());
217
+ const profile = resolveVoiceQualityProfile(loadConfig());
218
218
 
219
- log.info({ callSessionId, mode: profile.mode, ttsProvider: profile.ttsProvider, voice: profile.voice }, 'Voice quality profile resolved');
220
-
221
- if (profile.validationErrors.length > 0) {
222
- log.warn({ callSessionId, errors: profile.validationErrors }, 'Voice quality profile has validation warnings');
223
- }
224
-
225
- // WS-A: Enforce strict fallback semantics — reject invalid profiles when fallback is disabled
226
- if (!isVoiceProfileValid(profile)) {
227
- if (!profile.fallbackToStandardOnError) {
228
- const errorMsg = `Voice quality configuration error: ${profile.validationErrors.join('; ')}`;
229
- log.error({ callSessionId, errors: profile.validationErrors }, errorMsg);
230
- return new Response(errorMsg, { status: 500 });
231
- }
232
- // Fallback is enabled — profile already resolved to standard; log explicitly
233
- log.info({ callSessionId }, 'Profile invalid with fallback enabled; proceeding with standard mode');
234
- }
235
-
236
- // WS-B: Guard elevenlabs_agent until consultation bridge exists.
237
- // This fires BEFORE any ElevenLabs API calls, blocking the entire mode.
238
- if (profile.mode === 'elevenlabs_agent') {
239
- if (!profile.fallbackToStandardOnError) {
240
- const msg = 'elevenlabs_agent mode is restricted: consultation bridging (waiting_on_user) is not yet supported. Set calls.voice.fallbackToStandardOnError=true to fall back to standard mode.';
241
- log.error({ callSessionId }, msg);
242
- return new Response(msg, { status: 501 });
243
- }
244
- log.warn({ callSessionId }, 'elevenlabs_agent mode is restricted/experimental — consultation bridging is not yet supported; falling back to standard ConversationRelay TwiML');
245
- const standardConfig = loadConfig();
246
- profile = resolveVoiceQualityProfile({
247
- ...standardConfig,
248
- calls: {
249
- ...standardConfig.calls,
250
- voice: { ...standardConfig.calls.voice, mode: 'twilio_standard' },
251
- },
252
- });
253
- }
219
+ log.info({ callSessionId, ttsProvider: profile.ttsProvider, voice: profile.voice }, 'Voice quality profile resolved');
254
220
 
255
221
  const twilioConfig = getTwilioConfig();
256
222
  let relayUrl: string;
@@ -1,14 +1,10 @@
1
1
  import { loadConfig } from '../config/loader.js';
2
2
 
3
3
  export interface VoiceQualityProfile {
4
- mode: 'twilio_standard' | 'twilio_elevenlabs_tts' | 'elevenlabs_agent';
5
4
  language: string;
6
5
  transcriptionProvider: string;
7
6
  ttsProvider: string;
8
7
  voice: string;
9
- agentId?: string;
10
- fallbackToStandardOnError: boolean;
11
- validationErrors: string[];
12
8
  }
13
9
 
14
10
  /**
@@ -45,70 +41,18 @@ export function buildElevenLabsVoiceSpec(config: {
45
41
 
46
42
  /**
47
43
  * Resolve the effective voice quality profile from config.
48
- * Returns a profile with all resolved values ready for use by TwiML generation
49
- * and call orchestration.
44
+ *
45
+ * Always uses ElevenLabs TTS via Twilio ConversationRelay.
46
+ * The voice ID comes from the shared `elevenlabs.voiceId` config
47
+ * (defaults to Rachel — 21m00Tcm4TlvDq8ikWAM).
50
48
  */
51
49
  export function resolveVoiceQualityProfile(config?: ReturnType<typeof loadConfig>): VoiceQualityProfile {
52
50
  const cfg = config ?? loadConfig();
53
51
  const voice = cfg.calls.voice;
54
- const errors: string[] = [];
55
-
56
- // Default/standard profile
57
- const standardProfile: VoiceQualityProfile = {
58
- mode: 'twilio_standard',
52
+ return {
59
53
  language: voice.language,
60
54
  transcriptionProvider: voice.transcriptionProvider,
61
- ttsProvider: 'Google',
62
- voice: 'Google.en-US-Journey-O',
63
- fallbackToStandardOnError: voice.fallbackToStandardOnError,
64
- validationErrors: [],
55
+ ttsProvider: 'ElevenLabs',
56
+ voice: buildElevenLabsVoiceSpec(cfg.elevenlabs),
65
57
  };
66
-
67
- if (voice.mode === 'twilio_standard') {
68
- return standardProfile;
69
- }
70
-
71
- if (voice.mode === 'twilio_elevenlabs_tts') {
72
- if (!voice.elevenlabs.voiceId && !voice.fallbackToStandardOnError) {
73
- errors.push('calls.voice.elevenlabs.voiceId is required for twilio_elevenlabs_tts mode when fallback is disabled');
74
- }
75
- if (!voice.elevenlabs.voiceId && voice.fallbackToStandardOnError) {
76
- return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.voiceId is empty; falling back to twilio_standard'] };
77
- }
78
- return {
79
- mode: 'twilio_elevenlabs_tts',
80
- language: voice.language,
81
- transcriptionProvider: voice.transcriptionProvider,
82
- ttsProvider: 'ElevenLabs',
83
- voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
84
- fallbackToStandardOnError: voice.fallbackToStandardOnError,
85
- validationErrors: errors,
86
- };
87
- }
88
-
89
- if (voice.mode === 'elevenlabs_agent') {
90
- if (!voice.elevenlabs.agentId && !voice.fallbackToStandardOnError) {
91
- errors.push('calls.voice.elevenlabs.agentId is required for elevenlabs_agent mode when fallback is disabled');
92
- }
93
- if (!voice.elevenlabs.agentId && voice.fallbackToStandardOnError) {
94
- return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.agentId is empty; falling back to twilio_standard'] };
95
- }
96
- return {
97
- mode: 'elevenlabs_agent',
98
- language: voice.language,
99
- transcriptionProvider: voice.transcriptionProvider,
100
- ttsProvider: 'ElevenLabs',
101
- voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
102
- agentId: voice.elevenlabs.agentId,
103
- fallbackToStandardOnError: voice.fallbackToStandardOnError,
104
- validationErrors: errors,
105
- };
106
- }
107
-
108
- return standardProfile;
109
- }
110
-
111
- /** Returns false when the profile has any validation errors. */
112
- export function isVoiceProfileValid(profile: VoiceQualityProfile): boolean {
113
- return profile.validationErrors.length === 0;
114
58
  }
@@ -1,11 +1,11 @@
1
1
  ---
2
2
  name: "Guardian Verify Setup"
3
- description: "Set up guardian verification for SMS, voice, or Telegram channels via outbound verification flow"
3
+ description: "Set up guardian verification for voice or Telegram channels via outbound verification flow"
4
4
  user-invocable: true
5
5
  metadata: {"vellum": {"emoji": "\ud83d\udd10"}}
6
6
  ---
7
7
 
8
- You are helping your user set up guardian verification for a messaging channel (SMS, voice, or Telegram). This links their identity as the trusted guardian for the chosen channel. All API calls go through the gateway HTTP API using `curl` with bearer auth.
8
+ You are helping your user set up guardian verification for a messaging channel (voice or Telegram). This links their identity as the trusted guardian for the chosen channel. All API calls go through the gateway HTTP API using `curl` with bearer auth.
9
9
 
10
10
  ## Prerequisites
11
11
 
@@ -19,17 +19,16 @@ You are helping your user set up guardian verification for a messaging channel (
19
19
 
20
20
  Ask the user which channel they want to verify:
21
21
 
22
- - **sms** -- verify a phone number for SMS messaging
23
22
  - **voice** -- verify a phone number for voice calls
24
23
  - **telegram** -- verify a Telegram account
25
24
 
26
- If the user's intent already specifies a channel (e.g. "verify my phone number for SMS"), skip the prompt and proceed.
25
+ If the user's intent already specifies a channel (e.g. "verify my phone number for voice calls"), skip the prompt and proceed.
27
26
 
28
27
  ## Step 2: Collect Destination
29
28
 
30
29
  Based on the chosen channel, ask for the required destination:
31
30
 
32
- - **SMS or voice**: Ask for their phone number. Accept any common format (e.g. +15551234567, (555) 123-4567, 555-123-4567). The API normalizes it to E.164.
31
+ - **Voice**: Ask for their phone number. Accept any common format (e.g. +15551234567, (555) 123-4567, 555-123-4567). The API normalizes it to E.164.
33
32
  - **Telegram**: Ask for their Telegram chat ID (numeric) or @handle. Explain:
34
33
  - If they know their numeric chat ID, provide it directly. The bot will send the code to that chat.
35
34
  - If they only know their @handle, the flow uses a bootstrap deep-link that they must click first.
@@ -45,13 +44,12 @@ curl -s -X POST "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/outbound/st
45
44
  -d '{"channel": "<channel>", "destination": "<destination>"}'
46
45
  ```
47
46
 
48
- Replace `<channel>` with `sms`, `voice`, or `telegram`, and `<destination>` with the phone number or Telegram destination.
47
+ Replace `<channel>` with `voice` or `telegram`, and `<destination>` with the phone number or Telegram destination.
49
48
 
50
49
  ### On success (`success: true`)
51
50
 
52
51
  Report the exact next action based on the channel:
53
52
 
54
- - **SMS**: "I've sent a 6-digit verification code to [number]. Reply with the code from that SMS conversation (not here) to complete verification — the code can only be consumed through the SMS channel."
55
53
  - **Voice**: The response includes a `secret` field with the verification code. Tell the user the code BEFORE the call connects: "I'm calling [number] now. Your verification code is [secret]. When you answer the call, enter this code using your phone's keypad." The `/outbound/start` API call already initiates the voice call. Do NOT place a separate `call_start` call. **After delivering the code, immediately begin the voice auto-check polling loop** (see [Voice Auto-Check Polling](#voice-auto-check-polling) below).
56
54
  - **Telegram with chat ID** (no `telegramBootstrapUrl` in response): The response includes a `secret` field. Show it in the current chat: "Your verification code is **[secret]**. I've also sent it to your Telegram. Open the Telegram bot chat and reply with that 6-digit code to complete verification." If the response does not contain a `secret` field, treat this as a control-plane error: tell the user something went wrong and ask them to retry from Step 3 or resend (Step 4).
57
55
  - **Telegram with handle** (`telegramBootstrapUrl` present in response): "Tap this deep-link first: [telegramBootstrapUrl]. After Telegram binds your identity, I'll send your verification code."
@@ -68,7 +66,7 @@ Handle each error code:
68
66
  | `invalid_destination` | Tell the user the format is invalid. For phone: suggest E.164 format (+15551234567). For Telegram: explain that group chat IDs (negative numbers) are not supported. |
69
67
  | `already_bound` | Tell the user a guardian is already bound for this channel. Ask if they want to replace it. If yes, re-run the start request with `"rebind": true` added to the JSON body. |
70
68
  | `rate_limited` | Tell the user they have sent too many verification attempts to this destination. Ask them to wait and try again later. |
71
- | `unsupported_channel` | Tell the user the channel is not supported. Only sms, voice, and telegram are valid. |
69
+ | `unsupported_channel` | Tell the user the channel is not supported. Only voice and telegram are valid. |
72
70
  | `no_bot_username` | Telegram bot is not configured. Load and run the `telegram-setup` skill first. |
73
71
 
74
72
  ## Step 4: Handle Resend
@@ -84,7 +82,6 @@ curl -s -X POST "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/outbound/re
84
82
 
85
83
  On success, report the next action based on the channel:
86
84
 
87
- - **SMS**: "I've sent a new verification code to [number]. Reply with the code from that SMS conversation to complete verification."
88
85
  - **Voice**: The resend response includes a fresh `secret` field with a new verification code. Tell the user the new code BEFORE the call connects — just like the initial start flow: "I'm calling [number] again. Your new verification code is [secret]. When you answer the call, enter this code using your phone's keypad." The `/outbound/resend` API call already initiates the voice call. Do NOT place a separate `call_start` call. **After delivering the code, immediately begin the voice auto-check polling loop** (see [Voice Auto-Check Polling](#voice-auto-check-polling) below).
89
86
  - **Telegram**: The resend response includes a fresh `secret` field. Show the new code in the current chat: "Your new verification code is **[secret]**. I've also sent it to your Telegram. Open the Telegram bot chat and reply with that 6-digit code to complete verification." If the response does not contain a `secret` field, treat this as a control-plane error: tell the user something went wrong and ask them to retry from Step 3.
90
87
 
@@ -140,7 +137,7 @@ When in a **rebind flow** (i.e., the `start_outbound` request included `"rebind"
140
137
  - Non-rebind flows (fresh verification with no prior binding) are unaffected — the first `bound: true` is trustworthy.
141
138
 
142
139
  **Important polling rules:**
143
- - This polling loop is voice-only. Do NOT poll for SMS or Telegram channels (SMS codes are entered through the SMS channel itself; Telegram has its own bot-driven flow).
140
+ - This polling loop is voice-only. Do NOT poll for Telegram channels (Telegram has its own bot-driven flow).
144
141
  - Do NOT require the user to ask "did it work?" — the whole point is proactive confirmation.
145
142
  - If the user sends a message while polling is in progress, handle their message normally. If their message is about verification status, the next poll iteration will provide the answer.
146
143
 
@@ -84,15 +84,13 @@ SMS messaging uses Twilio as the telephony provider. Twilio credentials and phon
84
84
 
85
85
  The sms-setup skill handles: Twilio credential storage (Account SID + Auth Token), phone number provisioning or assignment, public ingress setup, SMS compliance verification, and end-to-end test sending. Once SMS is set up, messaging is available automatically — no additional feature flag is needed.
86
86
 
87
- The sms-setup skill also includes optional **guardian verification** for SMS, which links your phone number as the trusted guardian.
87
+ ### Guardian Verification (Voice or Telegram)
88
88
 
89
- ### Guardian Verification (SMS, Voice, or Telegram)
90
-
91
- If the user asks to verify their guardian identity for any channel (SMS, voice, or Telegram), load the **guardian-verify-setup** skill:
89
+ If the user asks to verify their guardian identity for voice or Telegram, load the **guardian-verify-setup** skill:
92
90
 
93
91
  - Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
94
92
 
95
- The guardian-verify-setup skill handles the full outbound verification flow for all supported channels. It collects the user's destination (phone number or Telegram chat ID/handle), initiates an outbound verification session, and guides the user through entering or replying with the verification code. This is the single source of truth for guardian verification setup -- do not duplicate the verification flow inline.
93
+ The guardian-verify-setup skill handles the full outbound verification flow for voice and Telegram channels. It collects the user's destination (phone number or Telegram chat ID/handle), initiates an outbound verification session, and guides the user through entering or replying with the verification code. This is the single source of truth for guardian verification setup -- do not duplicate the verification flow inline.
96
94
 
97
95
  ## Error Recovery
98
96
 
@@ -11,7 +11,7 @@ You are helping the user set up and manage phone calls via Twilio. This skill co
11
11
 
12
12
  ## Overview
13
13
 
14
- The calling system uses Twilio's ConversationRelay for both **outbound** and **inbound** voice calls. Twilio works out of the box as the default voice provider. Optionally, you can enable ElevenLabs integration for higher-quality, more natural-sounding voices but this is entirely optional.
14
+ The calling system uses Twilio's ConversationRelay for both **outbound** and **inbound** voice calls with **ElevenLabs** providing the text-to-speech voice. After Twilio setup, the assistant configures ElevenLabs as the TTS provider and prompts the user to choose a voice from a curated list of supported options.
15
15
 
16
16
  ### Outbound calls
17
17
 
@@ -34,14 +34,6 @@ When someone dials the assistant's Twilio phone number:
34
34
  5. Once verified (or if no challenge is pending), the LLM orchestrator greets the caller in a receptionist style: "Hello, this is [user]'s assistant. How can I help you today?"
35
35
  6. The assistant converses naturally, using ASK_GUARDIAN to consult the user when needed, just like outbound calls.
36
36
 
37
- Three voice quality modes are available:
38
-
39
- - **`twilio_standard`** (default) — Fully supported. Standard Twilio TTS with Google voices. No extra setup required.
40
- - **`twilio_elevenlabs_tts`** — Fully supported. Uses ElevenLabs voices through Twilio ConversationRelay for more natural speech.
41
- - **`elevenlabs_agent`** — **Experimental/restricted.** Full ElevenLabs conversational agent mode. Consultation bridging (`waiting_on_user`) is not yet supported in this mode; the runtime guard blocks it before any ElevenLabs API calls are made. See the "Runtime behavior" section below for fallback and strict-fail details.
42
-
43
- You can keep using Twilio only — no changes needed. Enabling ElevenLabs can improve naturalness and quality.
44
-
45
37
  The user's assistant gets its own personal phone number through Twilio. All implicit calls (without an explicit mode) always use this assistant number. Optionally, users can call from their own phone number if it's authorized with the Twilio account — this must be explicitly requested per call via `caller_identity_mode="user_number"`.
46
38
 
47
39
  ## Step 1: Verify Twilio Setup
@@ -79,18 +71,105 @@ Verify:
79
71
  vellum config get calls.enabled
80
72
  ```
81
73
 
82
- ## Step 3: Verify Setup (Test Call)
74
+ ## Step 3: Choose a Voice
75
+
76
+ After enabling calls, let the user choose an ElevenLabs voice. Twilio has a native ElevenLabs integration — no separate ElevenLabs account or API key is needed.
77
+
78
+ ### Voice consistency with in-app TTS
79
+
80
+ The shared config key `elevenlabs.voiceId` is the single source of truth for ElevenLabs voice identity. Both in-app TTS and phone calls read from it (defaulting to **Rachel** — `21m00Tcm4TlvDq8ikWAM`).
81
+
82
+ Before presenting the voice list, check the current shared voice:
83
+
84
+ ```bash
85
+ vellum config get elevenlabs.voiceId
86
+ ```
87
+
88
+ **If a non-default voice is already set**, the user chose it during voice-setup or a previous session. Tell them:
89
+
90
+ > "Your assistant currently uses [voice name] for both in-app chat and phone calls. I'll keep the same voice for calls. You can change it if you'd like."
91
+
92
+ Skip the selection prompt unless the user wants to change.
93
+
94
+ **If the default (Rachel) is set or no override exists**, present the curated voice list below and let them pick. When they choose, set the shared config so both in-app TTS and phone calls use it:
95
+
96
+ ### Voice selection
97
+
98
+ Present the user with a list of supported ElevenLabs voices. These are pre-made voices with stable IDs that work with Twilio ConversationRelay out of the box.
99
+
100
+ **Ask the user: "Which voice would you like your assistant to use on phone calls?"**
101
+
102
+ Present these voices grouped by category:
103
+
104
+ #### Female voices
105
+
106
+ | Voice | Style | Voice ID |
107
+ | --------- | ------------------------------ | ------------------------------ |
108
+ | Rachel | Calm, warm, conversational | `21m00Tcm4TlvDq8ikWAM` |
109
+ | Sarah | Soft, young, approachable | `EXAVITQu4vr4xnSDxMaL` |
110
+ | Charlotte | Warm, Swedish-accented | `XB0fDUnXU5powFXDhCwa` |
111
+ | Alice | Confident, British | `Xb7hH8MSUJpSbSDYk0k2` |
112
+ | Matilda | Warm, friendly, young | `XrExE9yKIg1WjnnlVkGX` |
113
+ | Lily | Warm, British | `pFZP5JQG7iQjIQuC4Bku` |
114
+
115
+ #### Male voices
116
+
117
+ | Voice | Style | Voice ID |
118
+ | ------- | -------------------------------- | ------------------------------ |
119
+ | Antoni | Warm, well-rounded | `ErXwobaYiN019PkySvjV` |
120
+ | Josh | Deep, young, clear | `TxGEqnHWrfWFTfGW9XjX` |
121
+ | Arnold | Crisp, narrative | `VR6AewLTigWG4xSOukaG` |
122
+ | Adam | Deep, middle-aged, professional | `pNInz6obpgDQGcFmaJgB` |
123
+ | Bill | Trustworthy, American | `pqHfZKP75CvOlQylNhV4` |
124
+ | George | Warm, British, distinguished | `JBFqnCBsd6RMkjVDRZzb` |
125
+ | Daniel | Authoritative, British | `onwK4e9ZLuTAKqWW03F9` |
126
+ | Charlie | Casual, Australian | `IKne3meq5aSn9XLyUdCD` |
127
+ | Liam | Young, articulate | `TX3LPaxmHKxFdv7VOQHJ` |
128
+
129
+ After the user picks a voice, use `voice_config_update` to set the shared voice ID. This writes to the config file (`elevenlabs.voiceId`) for phone calls **and** pushes to the macOS app via IPC (`ttsVoiceId`) for in-app TTS in one call:
130
+
131
+ ```
132
+ voice_config_update setting="tts_voice_id" value="<selected-voice-id>"
133
+ ```
134
+
135
+ **If the user wants a voice not on this list**, they can browse more voices at https://elevenlabs.io/voice-library and provide the voice ID manually.
136
+
137
+ ## Step 4: Verify Setup (Test Call)
83
138
 
84
139
  Before making real calls, offer a quick verification:
85
140
 
86
141
  1. Confirm credentials are stored: check the Twilio config endpoint for `hasCredentials: true` and `phoneNumber`
87
142
  2. Confirm ingress is running: `ingress.publicBaseUrl` must be set and the tunnel active
88
143
  3. Confirm calls are enabled: `calls.enabled` must be `true`
144
+ 4. Confirm voice is configured: `elevenlabs.voiceId` should be set
89
145
 
90
- Suggest a test call to the user's own phone: **"Want to do a quick test call to your phone to make sure everything works?"**
146
+ Suggest a test call to the user's own phone: **"Want to do a quick test call to your phone to make sure everything works? This is a good way to hear how your chosen voice sounds."**
91
147
 
92
148
  If they agree, ask for their personal phone number and place a test call with a simple task like "Introduce yourself and confirm the call system is working."
93
149
 
150
+ ## Step 5: Verify Guardian Identity (Voice)
151
+
152
+ Now link the user's phone number as the trusted voice guardian. Tell the user: "Now let's verify your guardian identity for voice. This links your phone number so the assistant can verify inbound callers."
153
+
154
+ Load the **guardian-verify-setup** skill to handle the verification flow:
155
+
156
+ - Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
157
+
158
+ When invoking the skill, indicate the channel is `voice`. The guardian-verify-setup skill manages the full outbound verification flow, including:
159
+
160
+ - Collecting the user's phone number as the destination
161
+ - Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start` with `channel: "voice"`
162
+ - Calling the phone number and providing a code for the user to enter via their phone's keypad
163
+ - Proactively polling for completion (voice auto-check) so the user gets instant confirmation
164
+ - Checking guardian status to confirm the binding was created
165
+ - Handling resend, cancel, and error cases
166
+
167
+ Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted voice guardian."_
168
+
169
+ After the guardian-verify-setup skill completes (or the user skips), continue to the next sections.
170
+
171
+ **Note:** Guardian verification is optional but recommended. If the user declines or wants to skip, proceed without blocking. Once verified, inbound callers can be prompted for voice verification before calls proceed (see the **Guardian voice verification for inbound calls** section below).
172
+
94
173
  ## Caller Identity
95
174
 
96
175
  All implicit calls (calls without an explicit `caller_identity_mode`) always use the assistant's Twilio phone number. This is the number that appears on the recipient's caller ID.
@@ -133,88 +212,83 @@ An optional verification step where the callee must enter a numeric code via the
133
212
  | `calls.verification.enabled` | Enable DTMF callee verification | `false` |
134
213
  | `calls.verification.codeLength` | Number of digits in the verification code | `6` |
135
214
 
136
- ## Optional: Higher Quality Voice with ElevenLabs
215
+ ## Advanced Voice Configuration
137
216
 
138
- ElevenLabs integration is entirely optional. The standard Twilio-only setup works unchanged — this section is only relevant if you want to improve voice quality.
217
+ ElevenLabs is the TTS provider for all calls. This section covers advanced voice selection and tuning.
139
218
 
140
- ### Mode: `twilio_elevenlabs_tts`
219
+ ### Changing the voice
141
220
 
142
- Uses ElevenLabs voices through Twilio's ConversationRelay. Speech is more natural-sounding than the default Google TTS voices.
221
+ To switch to a different voice after initial setup, use `voice_config_update` to set the shared voice ID. This writes to the config file (`elevenlabs.voiceId`) for phone calls **and** pushes to the macOS app via IPC for in-app TTS:
143
222
 
144
- **Recommended user-friendly workflow (no technical IDs required):**
145
-
146
- 1. Ask what kind of voice the user wants (examples: "warm", "professional", "playful", "calm", "deeper", "brighter")
147
- 2. If the user doesn't care, keep `twilio_standard` (simplest path)
148
- 3. If they want higher-quality voice, switch to `twilio_elevenlabs_tts` and choose a matching ElevenLabs voice on their behalf
223
+ ```
224
+ voice_config_update setting="tts_voice_id" value="<new-voice-id>"
225
+ ```
149
226
 
150
- The user should not need to know what a `voiceId` is unless they explicitly want advanced/manual control.
227
+ Browse more voices at https://elevenlabs.io/voice-library.
151
228
 
152
- **Manual/advanced setup (optional):**
229
+ ### Advanced voice selection with an ElevenLabs account
153
230
 
154
- ```bash
155
- vellum config set calls.voice.mode twilio_elevenlabs_tts
156
- vellum config set calls.voice.elevenlabs.voiceId "<your-voice-id>"
157
- ```
231
+ Users who have an ElevenLabs account and API key (e.g., from the **voice-setup** skill) can go beyond the curated voice list. With an API key, they can:
158
232
 
159
- By default, the system sends a **bare** `voiceId` to Twilio ConversationRelay (no model/tuning suffix). This is the safest default across voice IDs.
233
+ - **Browse the full ElevenLabs voice library programmatically** the ElevenLabs API (`GET https://api.elevenlabs.io/v2/voices`) supports searching by name, category, language, and accent. This returns voice IDs, names, labels, and preview URLs.
234
+ - **Use custom or cloned voices** — if the user has created a custom voice or voice clone in their ElevenLabs account, they can use its voice ID here. These voices are available in Twilio ConversationRelay just like pre-made voices.
235
+ - **Preview voices before choosing** — each voice in the API response includes a `preview_url` with an audio sample.
160
236
 
161
- If you want to force Twilio's extended voice spec, you can optionally set a model ID:
237
+ To check if the user has an API key stored:
162
238
 
163
239
  ```bash
164
- vellum config set calls.voice.elevenlabs.voiceModelId "flash_v2_5"
240
+ credential_store action=get service=elevenlabs field=api_key
165
241
  ```
166
242
 
167
- When `voiceModelId` is set, the emitted voice string becomes:
168
- `voiceId-model-speed_stability_similarity`.
169
-
170
- ### Mode: `elevenlabs_agent` (experimental/restricted)
243
+ If they have a key and want to browse voices, fetch the voice list:
171
244
 
172
- Full ElevenLabs conversational agent mode. This requires an ElevenLabs account with an agent configured on their platform.
245
+ ```bash
246
+ curl -s "https://api.elevenlabs.io/v2/voices?category=premade&page_size=50" \
247
+ -H "xi-api-key: <api_key_from_credential_store>" | python3 -m json.tool
248
+ ```
173
249
 
174
- **Restriction:** This mode is currently restricted because consultation bridging (`waiting_on_user`) is not yet supported. A runtime guard in `handleVoiceWebhook` blocks `elevenlabs_agent` before any ElevenLabs API calls are made.
250
+ To search for a specific voice style:
175
251
 
176
- **Setup:**
252
+ ```bash
253
+ curl -s "https://api.elevenlabs.io/v2/voices?search=warm+female&page_size=10" \
254
+ -H "xi-api-key: <api_key_from_credential_store>" | python3 -m json.tool
255
+ ```
177
256
 
178
- 1. Store your ElevenLabs API key securely:
257
+ After the user picks a voice, set the shared voice ID:
179
258
 
180
259
  ```
181
- credential_store action=store service=elevenlabs field=api_key value=<your_api_key>
260
+ voice_config_update setting="tts_voice_id" value="<selected-voice-id>"
182
261
  ```
183
262
 
184
- 2. Set the voice mode and agent ID:
263
+ ### Voice tuning parameters
185
264
 
186
- ```bash
187
- vellum config set calls.voice.mode elevenlabs_agent
188
- vellum config set calls.voice.elevenlabs.agentId "<your-agent-id>"
189
- ```
265
+ Fine-tune how the selected voice sounds. These parameters apply to all ElevenLabs modes:
190
266
 
191
- ### Fallback behavior and `fallbackToStandardOnError`
267
+ ```bash
268
+ # Playback speed (0.7 = slower, 1.0 = normal, 1.2 = faster)
269
+ vellum config set elevenlabs.speed 1.0
192
270
 
193
- By default, `calls.voice.fallbackToStandardOnError` is `true`. This setting controls what happens when an ElevenLabs mode encounters errors or is restricted.
271
+ # Stability (0.0 = more expressive/variable, 1.0 = more consistent/monotone)
272
+ vellum config set elevenlabs.stability 0.5
194
273
 
195
- #### Invalid configuration (e.g., missing voiceId or agentId)
274
+ # Similarity boost (0.0 = more creative, 1.0 = closer to original voice)
275
+ vellum config set elevenlabs.similarityBoost 0.75
276
+ ```
196
277
 
197
- - **`true` (default):** The profile resolver silently falls back to `twilio_standard` mode and logs a warning. The call proceeds with standard Twilio TTS.
198
- - **`false`:** The voice webhook returns **HTTP 500** with the specific configuration error details (e.g., `"Voice quality configuration error: calls.voice.elevenlabs.voiceId is required..."`).
278
+ Lower stability makes the voice more expressive but less predictable good for conversational calls. Higher stability is better for scripted/formal calls.
199
279
 
200
- #### `elevenlabs_agent` mode guard (consultation bridging unsupported)
280
+ ### Voice model tuning
201
281
 
202
- - **`true` (default):** The `elevenlabs_agent` mode is silently downgraded to standard ConversationRelay TwiML with a warning log. The call proceeds normally with standard Twilio TTS. No ElevenLabs API calls are made.
203
- - **`false`:** The voice webhook returns **HTTP 501** with the message: `"elevenlabs_agent mode is restricted: consultation bridging (waiting_on_user) is not yet supported."`. No ElevenLabs API calls are made.
282
+ By default, the system sends a **bare** `voiceId` to Twilio ConversationRelay (no model/tuning suffix). This is the safest default across voice IDs.
204
283
 
205
- You can disable fallback if you want strict ElevenLabs-only behavior:
284
+ If you want to force Twilio's extended voice spec, you can optionally set a model ID:
206
285
 
207
286
  ```bash
208
- vellum config set calls.voice.fallbackToStandardOnError false
287
+ vellum config set elevenlabs.voiceModelId "flash_v2_5"
209
288
  ```
210
289
 
211
- ### Reverting to standard Twilio
212
-
213
- To go back to the default voice at any time:
214
-
215
- ```bash
216
- vellum config set calls.voice.mode twilio_standard
217
- ```
290
+ When `voiceModelId` is set, the emitted voice string becomes:
291
+ `voiceId-model-speed_stability_similarity`.
218
292
 
219
293
  ## Making Outbound Calls
220
294
 
@@ -477,16 +551,13 @@ All call-related settings can be managed via `vellum config`:
477
551
  | `calls.model` | Override LLM model for call orchestration | _(uses default model)_ |
478
552
  | `calls.callerIdentity.allowPerCallOverride` | Allow per-call caller identity selection | `true` |
479
553
  | `calls.callerIdentity.userNumber` | E.164 phone number for user-number mode | _(empty)_ |
480
- | `calls.voice.mode` | Voice quality mode (`twilio_standard`, `twilio_elevenlabs_tts`, `elevenlabs_agent`) | `twilio_standard` |
481
554
  | `calls.voice.language` | Language code for TTS and transcription | `en-US` |
482
555
  | `calls.voice.transcriptionProvider` | Speech-to-text provider (`Deepgram`, `Google`) | `Deepgram` |
483
- | `calls.voice.fallbackToStandardOnError` | Auto-fallback to standard Twilio TTS on ElevenLabs errors | `true` |
484
- | `calls.voice.elevenlabs.voiceId` | Advanced/internal ElevenLabs voice identifier. Usually set by the assistant based on requested voice style | _(empty)_ |
485
- | `calls.voice.elevenlabs.voiceModelId` | Optional Twilio ConversationRelay model suffix. Leave empty to send bare `voiceId` | _(empty)_ |
486
- | `calls.voice.elevenlabs.agentId` | ElevenLabs agent ID (for `elevenlabs_agent` mode) | _(empty)_ |
487
- | `calls.voice.elevenlabs.speed` | Playback speed (`0.7` – `1.2`) | `1.0` |
488
- | `calls.voice.elevenlabs.stability` | Voice stability (`0.0` – `1.0`) | `0.5` |
489
- | `calls.voice.elevenlabs.similarityBoost` | Voice similarity boost (`0.0` – `1.0`) | `0.75` |
556
+ | `elevenlabs.voiceId` | ElevenLabs voice ID used by both in-app TTS and phone calls. Set during setup from the curated voice list. Defaults to Rachel | `21m00Tcm4TlvDq8ikWAM` |
557
+ | `elevenlabs.voiceModelId` | Optional Twilio ConversationRelay model suffix. Leave empty to send bare `voiceId` | _(empty)_ |
558
+ | `elevenlabs.speed` | Playback speed (`0.7` `1.2`) | `1.0` |
559
+ | `elevenlabs.stability` | Voice stability (`0.0` `1.0`) | `0.5` |
560
+ | `elevenlabs.similarityBoost` | Voice similarity boost (`0.0` – `1.0`) | `0.75` |
490
561
 
491
562
  ### Adjusting settings
492
563
 
@@ -558,27 +629,17 @@ Or re-run the public-ingress skill to auto-detect and save the new URL.
558
629
 
559
630
  ### Call drops after 30 seconds of silence
560
631
 
561
- The system has a 30-second silence timeout. If nobody speaks for 30 seconds, the agent will ask "Are you still there?" This is expected behavior.
632
+ The system has a 30-second silence timeout. If nobody speaks for 30 seconds during normal conversation, the agent will ask "Are you still there?" This is expected behavior. During guardian wait states (inbound access-request wait or in-call guardian consultation wait), this generic silence nudge is suppressed — the guardian-wait heartbeat messaging is used instead.
562
633
 
563
- ### Call quality didn't improve after enabling ElevenLabs
634
+ ### Call quality sounds off
564
635
 
565
- - Verify `calls.voice.mode` is set to `twilio_elevenlabs_tts` or `elevenlabs_agent` (not still `twilio_standard`)
636
+ - Verify `elevenlabs.voiceId` is set to a valid ElevenLabs voice ID
566
637
  - Ask for the desired voice style again and try a different voice selection
567
- - If configuring manually: check that `calls.voice.elevenlabs.voiceId` contains a valid ElevenLabs voice ID
568
- - If mode is `elevenlabs_agent`, ensure `calls.voice.elevenlabs.agentId` is also set
569
638
 
570
639
  ### Twilio says "application error" right after answer
571
640
 
572
641
  - This often means ConversationRelay rejected voice configuration after TwiML fetch
573
- - Keep `calls.voice.elevenlabs.voiceModelId` empty first (bare `voiceId` mode)
642
+ - Keep `elevenlabs.voiceModelId` empty first (bare `voiceId` mode)
574
643
  - If you set `voiceModelId`, try clearing it and retesting:
575
- `vellum config set calls.voice.elevenlabs.voiceModelId ""`
576
-
577
- ### ElevenLabs mode falls back to standard
578
-
579
- When `calls.voice.fallbackToStandardOnError` is `true` (the default), the system silently falls back to standard Twilio TTS if ElevenLabs encounters an error or restriction. Check:
644
+ `vellum config set elevenlabs.voiceModelId ""`
580
645
 
581
- - For `elevenlabs_agent` mode: this mode is currently restricted (consultation bridging not yet supported) and will always fall back to standard when fallback is enabled. If fallback is disabled, the voice webhook returns HTTP 501.
582
- - For `twilio_elevenlabs_tts` mode: verify `calls.voice.elevenlabs.voiceId` is set to a valid voice ID
583
- - For invalid configs (missing voiceId/agentId): if fallback is disabled, the voice webhook returns HTTP 500 with the config error
584
- - Review daemon logs for warning messages about fallback or guard activation
@@ -144,26 +144,6 @@ After deletion, return to Step 3b to collect information and resubmit. Warn the
144
144
 
145
145
  **On failure:** Report the exact error message and guide the user through resolution.
146
146
 
147
- ## Step 3.5: Guardian Verification (SMS)
148
-
149
- Now link the user's phone number as the trusted SMS guardian. Tell the user: "Now let's verify your guardian identity for SMS. This links your phone number as the trusted guardian for SMS messaging."
150
-
151
- Load the **guardian-verify-setup** skill to handle the verification flow:
152
-
153
- - Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
154
-
155
- When invoking the skill, indicate the channel is `sms`. The guardian-verify-setup skill manages the full outbound verification flow, including:
156
-
157
- - Collecting the user's phone number as the destination (accepts any common format -- the API normalizes to E.164)
158
- - Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start` with `channel: "sms"`
159
- - Sending a 6-digit code to the phone number that the user must reply with from the SMS channel
160
- - Checking guardian status to confirm the binding was created
161
- - Handling resend, cancel, and error cases
162
-
163
- Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted SMS guardian."_
164
-
165
- **Note:** Guardian verification is optional but recommended. If the user declines or wants to skip, proceed to Step 4 without blocking.
166
-
167
147
  ## Step 4: Test Send
168
148
 
169
149
  Run a test SMS to verify end-to-end delivery: