npm - @vellumai/assistant - Versions diffs - 0.4.23 → 0.4.26 - Mend

@vellumai/assistant 0.4.23 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/bun.lock +3 -0
package/package.json +2 -1
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +0 -15
package/src/__tests__/assistant-events-sse-hardening.test.ts +9 -3
package/src/__tests__/call-controller.test.ts +80 -0
package/src/__tests__/config-schema.test.ts +38 -178
package/src/__tests__/conversation-routes-guardian-reply.test.ts +4 -1
package/src/__tests__/credential-security-invariants.test.ts +0 -2
package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +2 -2
package/src/__tests__/ipc-snapshot.test.ts +0 -9
package/src/__tests__/onboarding-template-contract.test.ts +10 -20
package/src/__tests__/relay-server.test.ts +3 -3
package/src/__tests__/runtime-events-sse-parity.test.ts +10 -0
package/src/__tests__/runtime-events-sse.test.ts +7 -0
package/src/__tests__/session-runtime-assembly.test.ts +34 -8
package/src/__tests__/system-prompt.test.ts +7 -1
package/src/__tests__/trusted-contact-approval-notifier.test.ts +12 -8
package/src/__tests__/twilio-routes-twiml.test.ts +2 -2
package/src/__tests__/twilio-routes.test.ts +2 -3
package/src/__tests__/voice-quality.test.ts +21 -132
package/src/calls/call-controller.ts +34 -29
package/src/calls/relay-server.ts +11 -5
package/src/calls/twilio-routes.ts +4 -38
package/src/calls/voice-quality.ts +7 -63
package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +7 -10
package/src/config/bundled-skills/messaging/SKILL.md +3 -5
package/src/config/bundled-skills/phone-calls/SKILL.md +144 -83
package/src/config/bundled-skills/sms-setup/SKILL.md +0 -20
package/src/config/bundled-skills/twilio-setup/SKILL.md +9 -17
package/src/config/bundled-skills/voice-setup/SKILL.md +36 -1
package/src/config/bundled-skills/voice-setup/icon.svg +20 -0
package/src/config/calls-schema.ts +3 -53
package/src/config/elevenlabs-schema.ts +33 -0
package/src/config/schema.ts +183 -137
package/src/config/types.ts +0 -1
package/src/daemon/handlers/browser.ts +1 -6
package/src/daemon/ipc-contract/browser.ts +5 -14
package/src/daemon/ipc-contract-inventory.json +0 -2
package/src/daemon/session-agent-loop-handlers.ts +3 -0
package/src/daemon/session-runtime-assembly.ts +9 -7
package/src/mcp/client.ts +2 -1
package/src/memory/conversation-crud.ts +339 -166
package/src/runtime/auth/middleware.ts +87 -26
package/src/runtime/routes/events-routes.ts +7 -0
package/src/runtime/routes/inbound-message-handler.ts +3 -4
package/src/schedule/scheduler.ts +159 -45
package/src/security/secure-keys.ts +3 -3
package/src/tools/browser/browser-manager.ts +72 -228
package/src/tools/browser/browser-screencast.ts +0 -5
package/src/tools/network/script-proxy/certs.ts +7 -237
package/src/tools/network/script-proxy/connect-tunnel.ts +1 -82
package/src/tools/network/script-proxy/http-forwarder.ts +2 -151
package/src/tools/network/script-proxy/logging.ts +12 -196
package/src/tools/network/script-proxy/mitm-handler.ts +2 -270
package/src/tools/network/script-proxy/policy.ts +4 -152
package/src/tools/network/script-proxy/router.ts +2 -60
package/src/tools/network/script-proxy/server.ts +5 -137
package/src/tools/network/script-proxy/types.ts +19 -125
package/src/tools/system/voice-config.ts +23 -1
package/src/util/logger.ts +4 -1
package/src/__tests__/elevenlabs-config.test.ts +0 -95
package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -407
package/src/calls/elevenlabs-config.ts +0 -32

package/src/calls/twilio-routes.ts CHANGED Viewed

@@ -29,7 +29,7 @@ import {
 } from './call-store.js';
 import { getTwilioConfig } from './twilio-config.js';
 import type { CallStatus } from './types.js';
-import { isVoiceProfileValid,resolveVoiceQualityProfile } from './voice-quality.js';
+import { resolveVoiceQualityProfile } from './voice-quality.js';
 const log = getLogger('twilio-routes');
@@ -144,7 +144,7 @@ function mapTwilioStatus(twilioStatus: string): CallStatus | null {
  * Receives the initial voice webhook when Twilio connects the call.
  * Returns TwiML XML that tells Twilio to open a ConversationRelay WebSocket.
  *
- * Supports two modes:
+ * Supports two flows:
  * - **Outbound** (callSessionId present in query): uses the existing session
  * - **Inbound** (callSessionId absent): creates or reuses a session keyed
  *   by the Twilio CallSid. Uses daemon internal scope for assistant identity.
@@ -214,43 +214,9 @@ function buildVoiceWebhookTwiml(
   task: string | null,
   guardianVerificationSessionId?: string | null,
 ): Response {
-  let profile = resolveVoiceQualityProfile(loadConfig());
+  const profile = resolveVoiceQualityProfile(loadConfig());
-  log.info({ callSessionId, mode: profile.mode, ttsProvider: profile.ttsProvider, voice: profile.voice }, 'Voice quality profile resolved');
-  if (profile.validationErrors.length > 0) {
-    log.warn({ callSessionId, errors: profile.validationErrors }, 'Voice quality profile has validation warnings');
-  }
-  // WS-A: Enforce strict fallback semantics — reject invalid profiles when fallback is disabled
-  if (!isVoiceProfileValid(profile)) {
-    if (!profile.fallbackToStandardOnError) {
-      const errorMsg = `Voice quality configuration error: ${profile.validationErrors.join('; ')}`;
-      log.error({ callSessionId, errors: profile.validationErrors }, errorMsg);
-      return new Response(errorMsg, { status: 500 });
-    }
-    // Fallback is enabled — profile already resolved to standard; log explicitly
-    log.info({ callSessionId }, 'Profile invalid with fallback enabled; proceeding with standard mode');
-  }
-  // WS-B: Guard elevenlabs_agent until consultation bridge exists.
-  // This fires BEFORE any ElevenLabs API calls, blocking the entire mode.
-  if (profile.mode === 'elevenlabs_agent') {
-    if (!profile.fallbackToStandardOnError) {
-      const msg = 'elevenlabs_agent mode is restricted: consultation bridging (waiting_on_user) is not yet supported. Set calls.voice.fallbackToStandardOnError=true to fall back to standard mode.';
-      log.error({ callSessionId }, msg);
-      return new Response(msg, { status: 501 });
-    }
-    log.warn({ callSessionId }, 'elevenlabs_agent mode is restricted/experimental — consultation bridging is not yet supported; falling back to standard ConversationRelay TwiML');
-    const standardConfig = loadConfig();
-    profile = resolveVoiceQualityProfile({
-      ...standardConfig,
-      calls: {
-        ...standardConfig.calls,
-        voice: { ...standardConfig.calls.voice, mode: 'twilio_standard' },
-      },
-    });
-  }
+  log.info({ callSessionId, ttsProvider: profile.ttsProvider, voice: profile.voice }, 'Voice quality profile resolved');
   const twilioConfig = getTwilioConfig();
   let relayUrl: string;

package/src/calls/voice-quality.ts CHANGED Viewed

@@ -1,14 +1,10 @@
 import { loadConfig } from '../config/loader.js';
 export interface VoiceQualityProfile {
-  mode: 'twilio_standard' | 'twilio_elevenlabs_tts' | 'elevenlabs_agent';
   language: string;
   transcriptionProvider: string;
   ttsProvider: string;
   voice: string;
-  agentId?: string;
-  fallbackToStandardOnError: boolean;
-  validationErrors: string[];
 }
 /**
@@ -45,70 +41,18 @@ export function buildElevenLabsVoiceSpec(config: {
 /**
  * Resolve the effective voice quality profile from config.
- * Returns a profile with all resolved values ready for use by TwiML generation
- * and call orchestration.
+ *
+ * Always uses ElevenLabs TTS via Twilio ConversationRelay.
+ * The voice ID comes from the shared `elevenlabs.voiceId` config
+ * (defaults to Rachel — 21m00Tcm4TlvDq8ikWAM).
  */
 export function resolveVoiceQualityProfile(config?: ReturnType<typeof loadConfig>): VoiceQualityProfile {
   const cfg = config ?? loadConfig();
   const voice = cfg.calls.voice;
-  const errors: string[] = [];
-  // Default/standard profile
-  const standardProfile: VoiceQualityProfile = {
-    mode: 'twilio_standard',
+  return {
     language: voice.language,
     transcriptionProvider: voice.transcriptionProvider,
-    ttsProvider: 'Google',
-    voice: 'Google.en-US-Journey-O',
-    fallbackToStandardOnError: voice.fallbackToStandardOnError,
-    validationErrors: [],
+    ttsProvider: 'ElevenLabs',
+    voice: buildElevenLabsVoiceSpec(cfg.elevenlabs),
   };
-  if (voice.mode === 'twilio_standard') {
-    return standardProfile;
-  }
-  if (voice.mode === 'twilio_elevenlabs_tts') {
-    if (!voice.elevenlabs.voiceId && !voice.fallbackToStandardOnError) {
-      errors.push('calls.voice.elevenlabs.voiceId is required for twilio_elevenlabs_tts mode when fallback is disabled');
-    }
-    if (!voice.elevenlabs.voiceId && voice.fallbackToStandardOnError) {
-      return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.voiceId is empty; falling back to twilio_standard'] };
-    }
-    return {
-      mode: 'twilio_elevenlabs_tts',
-      language: voice.language,
-      transcriptionProvider: voice.transcriptionProvider,
-      ttsProvider: 'ElevenLabs',
-      voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
-      fallbackToStandardOnError: voice.fallbackToStandardOnError,
-      validationErrors: errors,
-    };
-  }
-  if (voice.mode === 'elevenlabs_agent') {
-    if (!voice.elevenlabs.agentId && !voice.fallbackToStandardOnError) {
-      errors.push('calls.voice.elevenlabs.agentId is required for elevenlabs_agent mode when fallback is disabled');
-    }
-    if (!voice.elevenlabs.agentId && voice.fallbackToStandardOnError) {
-      return { ...standardProfile, validationErrors: ['calls.voice.elevenlabs.agentId is empty; falling back to twilio_standard'] };
-    }
-    return {
-      mode: 'elevenlabs_agent',
-      language: voice.language,
-      transcriptionProvider: voice.transcriptionProvider,
-      ttsProvider: 'ElevenLabs',
-      voice: buildElevenLabsVoiceSpec(voice.elevenlabs),
-      agentId: voice.elevenlabs.agentId,
-      fallbackToStandardOnError: voice.fallbackToStandardOnError,
-      validationErrors: errors,
-    };
-  }
-  return standardProfile;
-}
-/** Returns false when the profile has any validation errors. */
-export function isVoiceProfileValid(profile: VoiceQualityProfile): boolean {
-  return profile.validationErrors.length === 0;
 }

package/src/config/bundled-skills/guardian-verify-setup/SKILL.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
 name: "Guardian Verify Setup"
-description: "Set up guardian verification for SMS, voice, or Telegram channels via outbound verification flow"
+description: "Set up guardian verification for voice or Telegram channels via outbound verification flow"
 user-invocable: true
 metadata: {"vellum": {"emoji": "\ud83d\udd10"}}
 ---
-You are helping your user set up guardian verification for a messaging channel (SMS, voice, or Telegram). This links their identity as the trusted guardian for the chosen channel. All API calls go through the gateway HTTP API using `curl` with bearer auth.
+You are helping your user set up guardian verification for a messaging channel (voice or Telegram). This links their identity as the trusted guardian for the chosen channel. All API calls go through the gateway HTTP API using `curl` with bearer auth.
 ## Prerequisites
@@ -19,17 +19,16 @@ You are helping your user set up guardian verification for a messaging channel (
 Ask the user which channel they want to verify:
-- **sms** -- verify a phone number for SMS messaging
 - **voice** -- verify a phone number for voice calls
 - **telegram** -- verify a Telegram account
-If the user's intent already specifies a channel (e.g. "verify my phone number for SMS"), skip the prompt and proceed.
+If the user's intent already specifies a channel (e.g. "verify my phone number for voice calls"), skip the prompt and proceed.
 ## Step 2: Collect Destination
 Based on the chosen channel, ask for the required destination:
-- **SMS or voice**: Ask for their phone number. Accept any common format (e.g. +15551234567, (555) 123-4567, 555-123-4567). The API normalizes it to E.164.
+- **Voice**: Ask for their phone number. Accept any common format (e.g. +15551234567, (555) 123-4567, 555-123-4567). The API normalizes it to E.164.
 - **Telegram**: Ask for their Telegram chat ID (numeric) or @handle. Explain:
   - If they know their numeric chat ID, provide it directly. The bot will send the code to that chat.
   - If they only know their @handle, the flow uses a bootstrap deep-link that they must click first.
@@ -45,13 +44,12 @@ curl -s -X POST "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/outbound/st
   -d '{"channel": "<channel>", "destination": "<destination>"}'
 ```
-Replace `<channel>` with `sms`, `voice`, or `telegram`, and `<destination>` with the phone number or Telegram destination.
+Replace `<channel>` with `voice` or `telegram`, and `<destination>` with the phone number or Telegram destination.
 ### On success (`success: true`)
 Report the exact next action based on the channel:
-- **SMS**: "I've sent a 6-digit verification code to [number]. Reply with the code from that SMS conversation (not here) to complete verification — the code can only be consumed through the SMS channel."
 - **Voice**: The response includes a `secret` field with the verification code. Tell the user the code BEFORE the call connects: "I'm calling [number] now. Your verification code is [secret]. When you answer the call, enter this code using your phone's keypad." The `/outbound/start` API call already initiates the voice call. Do NOT place a separate `call_start` call. **After delivering the code, immediately begin the voice auto-check polling loop** (see [Voice Auto-Check Polling](#voice-auto-check-polling) below).
 - **Telegram with chat ID** (no `telegramBootstrapUrl` in response): The response includes a `secret` field. Show it in the current chat: "Your verification code is **[secret]**. I've also sent it to your Telegram. Open the Telegram bot chat and reply with that 6-digit code to complete verification." If the response does not contain a `secret` field, treat this as a control-plane error: tell the user something went wrong and ask them to retry from Step 3 or resend (Step 4).
 - **Telegram with handle** (`telegramBootstrapUrl` present in response): "Tap this deep-link first: [telegramBootstrapUrl]. After Telegram binds your identity, I'll send your verification code."
@@ -68,7 +66,7 @@ Handle each error code:
 | `invalid_destination` | Tell the user the format is invalid. For phone: suggest E.164 format (+15551234567). For Telegram: explain that group chat IDs (negative numbers) are not supported. |
 | `already_bound` | Tell the user a guardian is already bound for this channel. Ask if they want to replace it. If yes, re-run the start request with `"rebind": true` added to the JSON body. |
 | `rate_limited` | Tell the user they have sent too many verification attempts to this destination. Ask them to wait and try again later. |
-| `unsupported_channel` | Tell the user the channel is not supported. Only sms, voice, and telegram are valid. |
+| `unsupported_channel` | Tell the user the channel is not supported. Only voice and telegram are valid. |
 | `no_bot_username` | Telegram bot is not configured. Load and run the `telegram-setup` skill first. |
 ## Step 4: Handle Resend
@@ -84,7 +82,6 @@ curl -s -X POST "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/outbound/re
 On success, report the next action based on the channel:
-- **SMS**: "I've sent a new verification code to [number]. Reply with the code from that SMS conversation to complete verification."
 - **Voice**: The resend response includes a fresh `secret` field with a new verification code. Tell the user the new code BEFORE the call connects — just like the initial start flow: "I'm calling [number] again. Your new verification code is [secret]. When you answer the call, enter this code using your phone's keypad." The `/outbound/resend` API call already initiates the voice call. Do NOT place a separate `call_start` call. **After delivering the code, immediately begin the voice auto-check polling loop** (see [Voice Auto-Check Polling](#voice-auto-check-polling) below).
 - **Telegram**: The resend response includes a fresh `secret` field. Show the new code in the current chat: "Your new verification code is **[secret]**. I've also sent it to your Telegram. Open the Telegram bot chat and reply with that 6-digit code to complete verification." If the response does not contain a `secret` field, treat this as a control-plane error: tell the user something went wrong and ask them to retry from Step 3.
@@ -140,7 +137,7 @@ When in a **rebind flow** (i.e., the `start_outbound` request included `"rebind"
 - Non-rebind flows (fresh verification with no prior binding) are unaffected — the first `bound: true` is trustworthy.
 **Important polling rules:**
-- This polling loop is voice-only. Do NOT poll for SMS or Telegram channels (SMS codes are entered through the SMS channel itself; Telegram has its own bot-driven flow).
+- This polling loop is voice-only. Do NOT poll for Telegram channels (Telegram has its own bot-driven flow).
 - Do NOT require the user to ask "did it work?" — the whole point is proactive confirmation.
 - If the user sends a message while polling is in progress, handle their message normally. If their message is about verification status, the next poll iteration will provide the answer.

package/src/config/bundled-skills/messaging/SKILL.md CHANGED Viewed

@@ -84,15 +84,13 @@ SMS messaging uses Twilio as the telephony provider. Twilio credentials and phon
 The sms-setup skill handles: Twilio credential storage (Account SID + Auth Token), phone number provisioning or assignment, public ingress setup, SMS compliance verification, and end-to-end test sending. Once SMS is set up, messaging is available automatically — no additional feature flag is needed.
-The sms-setup skill also includes optional **guardian verification** for SMS, which links your phone number as the trusted guardian.
+### Guardian Verification (Voice or Telegram)
-### Guardian Verification (SMS, Voice, or Telegram)
-If the user asks to verify their guardian identity for any channel (SMS, voice, or Telegram), load the **guardian-verify-setup** skill:
+If the user asks to verify their guardian identity for voice or Telegram, load the **guardian-verify-setup** skill:
 - Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
-The guardian-verify-setup skill handles the full outbound verification flow for all supported channels. It collects the user's destination (phone number or Telegram chat ID/handle), initiates an outbound verification session, and guides the user through entering or replying with the verification code. This is the single source of truth for guardian verification setup -- do not duplicate the verification flow inline.
+The guardian-verify-setup skill handles the full outbound verification flow for voice and Telegram channels. It collects the user's destination (phone number or Telegram chat ID/handle), initiates an outbound verification session, and guides the user through entering or replying with the verification code. This is the single source of truth for guardian verification setup -- do not duplicate the verification flow inline.
 ## Error Recovery

package/src/config/bundled-skills/phone-calls/SKILL.md CHANGED Viewed

@@ -11,7 +11,7 @@ You are helping the user set up and manage phone calls via Twilio. This skill co
 ## Overview
-The calling system uses Twilio's ConversationRelay for both **outbound** and **inbound** voice calls. Twilio works out of the box as the default voice provider. Optionally, you can enable ElevenLabs integration for higher-quality, more natural-sounding voices — but this is entirely optional.
+The calling system uses Twilio's ConversationRelay for both **outbound** and **inbound** voice calls with **ElevenLabs** providing the text-to-speech voice. After Twilio setup, the assistant configures ElevenLabs as the TTS provider and prompts the user to choose a voice from a curated list of supported options.
 ### Outbound calls
@@ -34,14 +34,6 @@ When someone dials the assistant's Twilio phone number:
 5. Once verified (or if no challenge is pending), the LLM orchestrator greets the caller in a receptionist style: "Hello, this is [user]'s assistant. How can I help you today?"
 6. The assistant converses naturally, using ASK_GUARDIAN to consult the user when needed, just like outbound calls.
-Three voice quality modes are available:
-- **`twilio_standard`** (default) — Fully supported. Standard Twilio TTS with Google voices. No extra setup required.
-- **`twilio_elevenlabs_tts`** — Fully supported. Uses ElevenLabs voices through Twilio ConversationRelay for more natural speech.
-- **`elevenlabs_agent`** — **Experimental/restricted.** Full ElevenLabs conversational agent mode. Consultation bridging (`waiting_on_user`) is not yet supported in this mode; the runtime guard blocks it before any ElevenLabs API calls are made. See the "Runtime behavior" section below for fallback and strict-fail details.
-You can keep using Twilio only — no changes needed. Enabling ElevenLabs can improve naturalness and quality.
 The user's assistant gets its own personal phone number through Twilio. All implicit calls (without an explicit mode) always use this assistant number. Optionally, users can call from their own phone number if it's authorized with the Twilio account — this must be explicitly requested per call via `caller_identity_mode="user_number"`.
 ## Step 1: Verify Twilio Setup
@@ -79,18 +71,105 @@ Verify:
 vellum config get calls.enabled
 ```
-## Step 3: Verify Setup (Test Call)
+## Step 3: Choose a Voice
+After enabling calls, let the user choose an ElevenLabs voice. Twilio has a native ElevenLabs integration — no separate ElevenLabs account or API key is needed.
+### Voice consistency with in-app TTS
+The shared config key `elevenlabs.voiceId` is the single source of truth for ElevenLabs voice identity. Both in-app TTS and phone calls read from it (defaulting to **Rachel** — `21m00Tcm4TlvDq8ikWAM`).
+Before presenting the voice list, check the current shared voice:
+```bash
+vellum config get elevenlabs.voiceId
+```
+**If a non-default voice is already set**, the user chose it during voice-setup or a previous session. Tell them:
+> "Your assistant currently uses [voice name] for both in-app chat and phone calls. I'll keep the same voice for calls. You can change it if you'd like."
+Skip the selection prompt unless the user wants to change.
+**If the default (Rachel) is set or no override exists**, present the curated voice list below and let them pick. When they choose, set the shared config so both in-app TTS and phone calls use it:
+### Voice selection
+Present the user with a list of supported ElevenLabs voices. These are pre-made voices with stable IDs that work with Twilio ConversationRelay out of the box.
+**Ask the user: "Which voice would you like your assistant to use on phone calls?"**
+Present these voices grouped by category:
+#### Female voices
+| Voice     | Style                          | Voice ID                       |
+| --------- | ------------------------------ | ------------------------------ |
+| Rachel    | Calm, warm, conversational     | `21m00Tcm4TlvDq8ikWAM`        |
+| Sarah     | Soft, young, approachable      | `EXAVITQu4vr4xnSDxMaL`        |
+| Charlotte | Warm, Swedish-accented         | `XB0fDUnXU5powFXDhCwa`        |
+| Alice     | Confident, British             | `Xb7hH8MSUJpSbSDYk0k2`        |
+| Matilda   | Warm, friendly, young          | `XrExE9yKIg1WjnnlVkGX`        |
+| Lily      | Warm, British                  | `pFZP5JQG7iQjIQuC4Bku`        |
+#### Male voices
+| Voice   | Style                            | Voice ID                       |
+| ------- | -------------------------------- | ------------------------------ |
+| Antoni  | Warm, well-rounded               | `ErXwobaYiN019PkySvjV`        |
+| Josh    | Deep, young, clear               | `TxGEqnHWrfWFTfGW9XjX`       |
+| Arnold  | Crisp, narrative                  | `VR6AewLTigWG4xSOukaG`        |
+| Adam    | Deep, middle-aged, professional  | `pNInz6obpgDQGcFmaJgB`        |
+| Bill    | Trustworthy, American            | `pqHfZKP75CvOlQylNhV4`        |
+| George  | Warm, British, distinguished     | `JBFqnCBsd6RMkjVDRZzb`        |
+| Daniel  | Authoritative, British           | `onwK4e9ZLuTAKqWW03F9`        |
+| Charlie | Casual, Australian               | `IKne3meq5aSn9XLyUdCD`        |
+| Liam    | Young, articulate                | `TX3LPaxmHKxFdv7VOQHJ`        |
+After the user picks a voice, use `voice_config_update` to set the shared voice ID. This writes to the config file (`elevenlabs.voiceId`) for phone calls **and** pushes to the macOS app via IPC (`ttsVoiceId`) for in-app TTS in one call:
+```
+voice_config_update setting="tts_voice_id" value="<selected-voice-id>"
+```
+**If the user wants a voice not on this list**, they can browse more voices at https://elevenlabs.io/voice-library and provide the voice ID manually.
+## Step 4: Verify Setup (Test Call)
 Before making real calls, offer a quick verification:
 1. Confirm credentials are stored: check the Twilio config endpoint for `hasCredentials: true` and `phoneNumber`
 2. Confirm ingress is running: `ingress.publicBaseUrl` must be set and the tunnel active
 3. Confirm calls are enabled: `calls.enabled` must be `true`
+4. Confirm voice is configured: `elevenlabs.voiceId` should be set
-Suggest a test call to the user's own phone: **"Want to do a quick test call to your phone to make sure everything works?"**
+Suggest a test call to the user's own phone: **"Want to do a quick test call to your phone to make sure everything works? This is a good way to hear how your chosen voice sounds."**
 If they agree, ask for their personal phone number and place a test call with a simple task like "Introduce yourself and confirm the call system is working."
+## Step 5: Verify Guardian Identity (Voice)
+Now link the user's phone number as the trusted voice guardian. Tell the user: "Now let's verify your guardian identity for voice. This links your phone number so the assistant can verify inbound callers."
+Load the **guardian-verify-setup** skill to handle the verification flow:
+- Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
+When invoking the skill, indicate the channel is `voice`. The guardian-verify-setup skill manages the full outbound verification flow, including:
+- Collecting the user's phone number as the destination
+- Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start` with `channel: "voice"`
+- Calling the phone number and providing a code for the user to enter via their phone's keypad
+- Proactively polling for completion (voice auto-check) so the user gets instant confirmation
+- Checking guardian status to confirm the binding was created
+- Handling resend, cancel, and error cases
+Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted voice guardian."_
+After the guardian-verify-setup skill completes (or the user skips), continue to the next sections.
+**Note:** Guardian verification is optional but recommended. If the user declines or wants to skip, proceed without blocking. Once verified, inbound callers can be prompted for voice verification before calls proceed (see the **Guardian voice verification for inbound calls** section below).
 ## Caller Identity
 All implicit calls (calls without an explicit `caller_identity_mode`) always use the assistant's Twilio phone number. This is the number that appears on the recipient's caller ID.
@@ -133,88 +212,83 @@ An optional verification step where the callee must enter a numeric code via the
 | `calls.verification.enabled`    | Enable DTMF callee verification           | `false` |
 | `calls.verification.codeLength` | Number of digits in the verification code | `6`     |
-## Optional: Higher Quality Voice with ElevenLabs
+## Advanced Voice Configuration
-ElevenLabs integration is entirely optional. The standard Twilio-only setup works unchanged — this section is only relevant if you want to improve voice quality.
+ElevenLabs is the TTS provider for all calls. This section covers advanced voice selection and tuning.
-### Mode: `twilio_elevenlabs_tts`
+### Changing the voice
-Uses ElevenLabs voices through Twilio's ConversationRelay. Speech is more natural-sounding than the default Google TTS voices.
+To switch to a different voice after initial setup, use `voice_config_update` to set the shared voice ID. This writes to the config file (`elevenlabs.voiceId`) for phone calls **and** pushes to the macOS app via IPC for in-app TTS:
-**Recommended user-friendly workflow (no technical IDs required):**
-1. Ask what kind of voice the user wants (examples: "warm", "professional", "playful", "calm", "deeper", "brighter")
-2. If the user doesn't care, keep `twilio_standard` (simplest path)
-3. If they want higher-quality voice, switch to `twilio_elevenlabs_tts` and choose a matching ElevenLabs voice on their behalf
+```
+voice_config_update setting="tts_voice_id" value="<new-voice-id>"
+```
-The user should not need to know what a `voiceId` is unless they explicitly want advanced/manual control.
+Browse more voices at https://elevenlabs.io/voice-library.
-**Manual/advanced setup (optional):**
+### Advanced voice selection with an ElevenLabs account
-```bash
-vellum config set calls.voice.mode twilio_elevenlabs_tts
-vellum config set calls.voice.elevenlabs.voiceId "<your-voice-id>"
-```
+Users who have an ElevenLabs account and API key (e.g., from the **voice-setup** skill) can go beyond the curated voice list. With an API key, they can:
-By default, the system sends a **bare** `voiceId` to Twilio ConversationRelay (no model/tuning suffix). This is the safest default across voice IDs.
+- **Browse the full ElevenLabs voice library programmatically** — the ElevenLabs API (`GET https://api.elevenlabs.io/v2/voices`) supports searching by name, category, language, and accent. This returns voice IDs, names, labels, and preview URLs.
+- **Use custom or cloned voices** — if the user has created a custom voice or voice clone in their ElevenLabs account, they can use its voice ID here. These voices are available in Twilio ConversationRelay just like pre-made voices.
+- **Preview voices before choosing** — each voice in the API response includes a `preview_url` with an audio sample.
-If you want to force Twilio's extended voice spec, you can optionally set a model ID:
+To check if the user has an API key stored:
 ```bash
-vellum config set calls.voice.elevenlabs.voiceModelId "flash_v2_5"
+credential_store action=get service=elevenlabs field=api_key
 ```
-When `voiceModelId` is set, the emitted voice string becomes:
-`voiceId-model-speed_stability_similarity`.
-### Mode: `elevenlabs_agent` (experimental/restricted)
+If they have a key and want to browse voices, fetch the voice list:
-Full ElevenLabs conversational agent mode. This requires an ElevenLabs account with an agent configured on their platform.
+```bash
+curl -s "https://api.elevenlabs.io/v2/voices?category=premade&page_size=50" \
+  -H "xi-api-key: <api_key_from_credential_store>" | python3 -m json.tool
+```
-**Restriction:** This mode is currently restricted because consultation bridging (`waiting_on_user`) is not yet supported. A runtime guard in `handleVoiceWebhook` blocks `elevenlabs_agent` before any ElevenLabs API calls are made.
+To search for a specific voice style:
-**Setup:**
+```bash
+curl -s "https://api.elevenlabs.io/v2/voices?search=warm+female&page_size=10" \
+  -H "xi-api-key: <api_key_from_credential_store>" | python3 -m json.tool
+```
-1. Store your ElevenLabs API key securely:
+After the user picks a voice, set the shared voice ID:
 ```
-credential_store action=store service=elevenlabs field=api_key value=<your_api_key>
+voice_config_update setting="tts_voice_id" value="<selected-voice-id>"
 ```
-2. Set the voice mode and agent ID:
+### Voice tuning parameters
-```bash
-vellum config set calls.voice.mode elevenlabs_agent
-vellum config set calls.voice.elevenlabs.agentId "<your-agent-id>"
-```
+Fine-tune how the selected voice sounds. These parameters apply to all ElevenLabs modes:
-### Fallback behavior and `fallbackToStandardOnError`
+```bash
+# Playback speed (0.7 = slower, 1.0 = normal, 1.2 = faster)
+vellum config set elevenlabs.speed 1.0
-By default, `calls.voice.fallbackToStandardOnError` is `true`. This setting controls what happens when an ElevenLabs mode encounters errors or is restricted.
+# Stability (0.0 = more expressive/variable, 1.0 = more consistent/monotone)
+vellum config set elevenlabs.stability 0.5
-#### Invalid configuration (e.g., missing voiceId or agentId)
+# Similarity boost (0.0 = more creative, 1.0 = closer to original voice)
+vellum config set elevenlabs.similarityBoost 0.75
+```
-- **`true` (default):** The profile resolver silently falls back to `twilio_standard` mode and logs a warning. The call proceeds with standard Twilio TTS.
-- **`false`:** The voice webhook returns **HTTP 500** with the specific configuration error details (e.g., `"Voice quality configuration error: calls.voice.elevenlabs.voiceId is required..."`).
+Lower stability makes the voice more expressive but less predictable — good for conversational calls. Higher stability is better for scripted/formal calls.
-#### `elevenlabs_agent` mode guard (consultation bridging unsupported)
+### Voice model tuning
-- **`true` (default):** The `elevenlabs_agent` mode is silently downgraded to standard ConversationRelay TwiML with a warning log. The call proceeds normally with standard Twilio TTS. No ElevenLabs API calls are made.
-- **`false`:** The voice webhook returns **HTTP 501** with the message: `"elevenlabs_agent mode is restricted: consultation bridging (waiting_on_user) is not yet supported."`. No ElevenLabs API calls are made.
+By default, the system sends a **bare** `voiceId` to Twilio ConversationRelay (no model/tuning suffix). This is the safest default across voice IDs.
-You can disable fallback if you want strict ElevenLabs-only behavior:
+If you want to force Twilio's extended voice spec, you can optionally set a model ID:
 ```bash
-vellum config set calls.voice.fallbackToStandardOnError false
+vellum config set elevenlabs.voiceModelId "flash_v2_5"
 ```
-### Reverting to standard Twilio
-To go back to the default voice at any time:
-```bash
-vellum config set calls.voice.mode twilio_standard
-```
+When `voiceModelId` is set, the emitted voice string becomes:
+`voiceId-model-speed_stability_similarity`.
 ## Making Outbound Calls
@@ -477,16 +551,13 @@ All call-related settings can be managed via `vellum config`:
 | `calls.model`                               | Override LLM model for call orchestration                                                                  | _(uses default model)_                                                                                   |
 | `calls.callerIdentity.allowPerCallOverride` | Allow per-call caller identity selection                                                                   | `true`                                                                                                   |
 | `calls.callerIdentity.userNumber`           | E.164 phone number for user-number mode                                                                    | _(empty)_                                                                                                |
-| `calls.voice.mode`                          | Voice quality mode (`twilio_standard`, `twilio_elevenlabs_tts`, `elevenlabs_agent`)                        | `twilio_standard`                                                                                        |
 | `calls.voice.language`                      | Language code for TTS and transcription                                                                    | `en-US`                                                                                                  |
 | `calls.voice.transcriptionProvider`         | Speech-to-text provider (`Deepgram`, `Google`)                                                             | `Deepgram`                                                                                               |
-| `calls.voice.fallbackToStandardOnError`     | Auto-fallback to standard Twilio TTS on ElevenLabs errors                                                  | `true`                                                                                                   |
-| `calls.voice.elevenlabs.voiceId`            | Advanced/internal ElevenLabs voice identifier. Usually set by the assistant based on requested voice style | _(empty)_                                                                                                |
-| `calls.voice.elevenlabs.voiceModelId`       | Optional Twilio ConversationRelay model suffix. Leave empty to send bare `voiceId`                         | _(empty)_                                                                                                |
-| `calls.voice.elevenlabs.agentId`            | ElevenLabs agent ID (for `elevenlabs_agent` mode)                                                          | _(empty)_                                                                                                |
-| `calls.voice.elevenlabs.speed`              | Playback speed (`0.7` – `1.2`)                                                                             | `1.0`                                                                                                    |
-| `calls.voice.elevenlabs.stability`          | Voice stability (`0.0` – `1.0`)                                                                            | `0.5`                                                                                                    |
-| `calls.voice.elevenlabs.similarityBoost`    | Voice similarity boost (`0.0` – `1.0`)                                                                     | `0.75`                                                                                                   |
+| `elevenlabs.voiceId`                        | ElevenLabs voice ID used by both in-app TTS and phone calls. Set during setup from the curated voice list. Defaults to Rachel  | `21m00Tcm4TlvDq8ikWAM`                                                                                  |
+| `elevenlabs.voiceModelId`                   | Optional Twilio ConversationRelay model suffix. Leave empty to send bare `voiceId`                         | _(empty)_                                                                                                |
+| `elevenlabs.speed`                          | Playback speed (`0.7` – `1.2`)                                                                             | `1.0`                                                                                                    |
+| `elevenlabs.stability`                      | Voice stability (`0.0` – `1.0`)                                                                            | `0.5`                                                                                                    |
+| `elevenlabs.similarityBoost`                | Voice similarity boost (`0.0` – `1.0`)                                                                     | `0.75`                                                                                                   |
 ### Adjusting settings
@@ -558,27 +629,17 @@ Or re-run the public-ingress skill to auto-detect and save the new URL.
 ### Call drops after 30 seconds of silence
-The system has a 30-second silence timeout. If nobody speaks for 30 seconds, the agent will ask "Are you still there?" This is expected behavior.
+The system has a 30-second silence timeout. If nobody speaks for 30 seconds during normal conversation, the agent will ask "Are you still there?" This is expected behavior. During guardian wait states (inbound access-request wait or in-call guardian consultation wait), this generic silence nudge is suppressed — the guardian-wait heartbeat messaging is used instead.
-### Call quality didn't improve after enabling ElevenLabs
+### Call quality sounds off
-- Verify `calls.voice.mode` is set to `twilio_elevenlabs_tts` or `elevenlabs_agent` (not still `twilio_standard`)
+- Verify `elevenlabs.voiceId` is set to a valid ElevenLabs voice ID
 - Ask for the desired voice style again and try a different voice selection
-- If configuring manually: check that `calls.voice.elevenlabs.voiceId` contains a valid ElevenLabs voice ID
-- If mode is `elevenlabs_agent`, ensure `calls.voice.elevenlabs.agentId` is also set
 ### Twilio says "application error" right after answer
 - This often means ConversationRelay rejected voice configuration after TwiML fetch
-- Keep `calls.voice.elevenlabs.voiceModelId` empty first (bare `voiceId` mode)
+- Keep `elevenlabs.voiceModelId` empty first (bare `voiceId` mode)
 - If you set `voiceModelId`, try clearing it and retesting:
-  `vellum config set calls.voice.elevenlabs.voiceModelId ""`
-### ElevenLabs mode falls back to standard
-When `calls.voice.fallbackToStandardOnError` is `true` (the default), the system silently falls back to standard Twilio TTS if ElevenLabs encounters an error or restriction. Check:
+  `vellum config set elevenlabs.voiceModelId ""`
-- For `elevenlabs_agent` mode: this mode is currently restricted (consultation bridging not yet supported) and will always fall back to standard when fallback is enabled. If fallback is disabled, the voice webhook returns HTTP 501.
-- For `twilio_elevenlabs_tts` mode: verify `calls.voice.elevenlabs.voiceId` is set to a valid voice ID
-- For invalid configs (missing voiceId/agentId): if fallback is disabled, the voice webhook returns HTTP 500 with the config error
-- Review daemon logs for warning messages about fallback or guard activation

package/src/config/bundled-skills/sms-setup/SKILL.md CHANGED Viewed

@@ -144,26 +144,6 @@ After deletion, return to Step 3b to collect information and resubmit. Warn the
 **On failure:** Report the exact error message and guide the user through resolution.
-## Step 3.5: Guardian Verification (SMS)
-Now link the user's phone number as the trusted SMS guardian. Tell the user: "Now let's verify your guardian identity for SMS. This links your phone number as the trusted guardian for SMS messaging."
-Load the **guardian-verify-setup** skill to handle the verification flow:
-- Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
-When invoking the skill, indicate the channel is `sms`. The guardian-verify-setup skill manages the full outbound verification flow, including:
-- Collecting the user's phone number as the destination (accepts any common format -- the API normalizes to E.164)
-- Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start` with `channel: "sms"`
-- Sending a 6-digit code to the phone number that the user must reply with from the SMS channel
-- Checking guardian status to confirm the binding was created
-- Handling resend, cancel, and error cases
-Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted SMS guardian."_
-**Note:** Guardian verification is optional but recommended. If the user declines or wants to skip, proceed to Step 4 without blocking.
 ## Step 4: Test Send
 Run a test SMS to verify end-to-end delivery: