@vellumai/assistant 0.4.23 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/bun.lock +3 -0
  2. package/package.json +2 -1
  3. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +0 -15
  4. package/src/__tests__/assistant-events-sse-hardening.test.ts +9 -3
  5. package/src/__tests__/call-controller.test.ts +80 -0
  6. package/src/__tests__/config-schema.test.ts +38 -178
  7. package/src/__tests__/conversation-routes-guardian-reply.test.ts +4 -1
  8. package/src/__tests__/credential-security-invariants.test.ts +0 -2
  9. package/src/__tests__/guardian-verify-setup-skill-regression.test.ts +2 -2
  10. package/src/__tests__/ipc-snapshot.test.ts +0 -9
  11. package/src/__tests__/onboarding-template-contract.test.ts +10 -20
  12. package/src/__tests__/relay-server.test.ts +3 -3
  13. package/src/__tests__/runtime-events-sse-parity.test.ts +10 -0
  14. package/src/__tests__/runtime-events-sse.test.ts +7 -0
  15. package/src/__tests__/session-runtime-assembly.test.ts +34 -8
  16. package/src/__tests__/system-prompt.test.ts +7 -1
  17. package/src/__tests__/trusted-contact-approval-notifier.test.ts +12 -8
  18. package/src/__tests__/twilio-routes-twiml.test.ts +2 -2
  19. package/src/__tests__/twilio-routes.test.ts +2 -3
  20. package/src/__tests__/voice-quality.test.ts +21 -132
  21. package/src/calls/call-controller.ts +34 -29
  22. package/src/calls/relay-server.ts +11 -5
  23. package/src/calls/twilio-routes.ts +4 -38
  24. package/src/calls/voice-quality.ts +7 -63
  25. package/src/config/bundled-skills/guardian-verify-setup/SKILL.md +7 -10
  26. package/src/config/bundled-skills/messaging/SKILL.md +3 -5
  27. package/src/config/bundled-skills/phone-calls/SKILL.md +144 -83
  28. package/src/config/bundled-skills/sms-setup/SKILL.md +0 -20
  29. package/src/config/bundled-skills/twilio-setup/SKILL.md +9 -17
  30. package/src/config/bundled-skills/voice-setup/SKILL.md +36 -1
  31. package/src/config/bundled-skills/voice-setup/icon.svg +20 -0
  32. package/src/config/calls-schema.ts +3 -53
  33. package/src/config/elevenlabs-schema.ts +33 -0
  34. package/src/config/schema.ts +183 -137
  35. package/src/config/types.ts +0 -1
  36. package/src/daemon/handlers/browser.ts +1 -6
  37. package/src/daemon/ipc-contract/browser.ts +5 -14
  38. package/src/daemon/ipc-contract-inventory.json +0 -2
  39. package/src/daemon/session-agent-loop-handlers.ts +3 -0
  40. package/src/daemon/session-runtime-assembly.ts +9 -7
  41. package/src/mcp/client.ts +2 -1
  42. package/src/memory/conversation-crud.ts +339 -166
  43. package/src/runtime/auth/middleware.ts +87 -26
  44. package/src/runtime/routes/events-routes.ts +7 -0
  45. package/src/runtime/routes/inbound-message-handler.ts +3 -4
  46. package/src/schedule/scheduler.ts +159 -45
  47. package/src/security/secure-keys.ts +3 -3
  48. package/src/tools/browser/browser-manager.ts +72 -228
  49. package/src/tools/browser/browser-screencast.ts +0 -5
  50. package/src/tools/network/script-proxy/certs.ts +7 -237
  51. package/src/tools/network/script-proxy/connect-tunnel.ts +1 -82
  52. package/src/tools/network/script-proxy/http-forwarder.ts +2 -151
  53. package/src/tools/network/script-proxy/logging.ts +12 -196
  54. package/src/tools/network/script-proxy/mitm-handler.ts +2 -270
  55. package/src/tools/network/script-proxy/policy.ts +4 -152
  56. package/src/tools/network/script-proxy/router.ts +2 -60
  57. package/src/tools/network/script-proxy/server.ts +5 -137
  58. package/src/tools/network/script-proxy/types.ts +19 -125
  59. package/src/tools/system/voice-config.ts +23 -1
  60. package/src/util/logger.ts +4 -1
  61. package/src/__tests__/elevenlabs-config.test.ts +0 -95
  62. package/src/__tests__/twilio-routes-elevenlabs.test.ts +0 -407
  63. package/src/calls/elevenlabs-config.ts +0 -32
@@ -215,44 +215,36 @@ Confirm:
215
215
 
216
216
  Tell the user: **"Twilio is configured. Your assistant's phone number is {phoneNumber}. This number is used for both voice calls and SMS messaging."**
217
217
 
218
- ## Step 5.5: Guardian Verification (SMS and Voice)
218
+ ## Step 5.5: Guardian Verification (Voice)
219
219
 
220
- Now link the user's phone number as the trusted guardian for SMS and/or voice channels. Tell the user: "Now let's verify your guardian identity. This links your phone number as the trusted guardian for messaging and calls."
220
+ Now link the user's phone number as the trusted voice guardian. Tell the user: "Now let's verify your guardian identity for voice. This links your phone number so the assistant can verify inbound callers."
221
221
 
222
222
  Load the **guardian-verify-setup** skill to handle the verification flow:
223
223
 
224
224
  - Call `skill_load` with `skill: "guardian-verify-setup"` to load the dependency skill.
225
225
 
226
- The guardian-verify-setup skill manages the full outbound verification flow for **one channel at a time** (sms, voice, or telegram). Each invocation handles:
226
+ When invoking the skill, indicate the channel is `voice`. The guardian-verify-setup skill manages the full outbound verification flow, including:
227
227
 
228
228
  - Collecting the user's phone number as the destination (accepts any common format -- the API normalizes to E.164)
229
- - Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start`
230
- - For **SMS**: sending a 6-digit code to the phone number that the user must reply with from the SMS channel
231
- - For **voice**: calling the phone number and providing a code for the user to enter via their phone's keypad
229
+ - Starting the outbound verification session via the gateway endpoint `POST /v1/integrations/guardian/outbound/start` with `channel: "voice"`
230
+ - Calling the phone number and providing a code for the user to enter via their phone's keypad
231
+ - Proactively polling for completion (voice auto-check) so the user gets instant confirmation
232
232
  - Checking guardian status to confirm the binding was created
233
233
  - Handling resend, cancel, and error cases
234
234
 
235
- **If the user wants to verify both SMS and voice**, load the skill twice -- once for SMS and once for voice. Each channel requires its own separate verification session.
235
+ Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted voice guardian."_
236
236
 
237
- Tell the user: _"I've loaded the guardian verification guide. It will walk you through linking your phone number as the trusted guardian. We'll verify one channel at a time."_
238
-
239
- After the guardian-verify-setup skill completes verification for a channel, load it again for the next channel if needed. Once all desired channels are verified (or the user skips), continue to Step 6.
237
+ After the guardian-verify-setup skill completes (or the user skips), continue to Step 6.
240
238
 
241
239
  **Note:** Guardian verification is optional but recommended. If the user declines or wants to skip, proceed to Step 6 without blocking.
242
240
 
243
- To re-check guardian status later, query the channel(s) that were verified:
241
+ To re-check guardian status later:
244
242
 
245
243
  ```bash
246
- # Check SMS guardian status
247
- curl -s "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/status?channel=sms" \
248
- -H "Authorization: Bearer $GATEWAY_AUTH_TOKEN"
249
- # Check voice guardian status
250
244
  curl -s "$INTERNAL_GATEWAY_BASE_URL/v1/integrations/guardian/status?channel=voice" \
251
245
  -H "Authorization: Bearer $GATEWAY_AUTH_TOKEN"
252
246
  ```
253
247
 
254
- Check the status for whichever channel(s) the user actually verified (SMS, voice, or both). Report the guardian verification result per channel: **"Guardian identity — SMS: {verified | not configured}, Voice: {verified | not configured}."**
255
-
256
248
  ## Step 6: Enable Features
257
249
 
258
250
  Now that Twilio is configured, the user can enable the features that depend on it:
@@ -9,7 +9,7 @@ You are helping the user set up and troubleshoot voice features (push-to-talk, w
9
9
 
10
10
  ## Available Tools
11
11
 
12
- - `voice_config_update` — Change any voice setting (PTT key, wake word enabled/keyword/timeout)
12
+ - `voice_config_update` — Change any voice setting (PTT key, wake word enabled/keyword/timeout, TTS voice ID)
13
13
  - `open_system_settings` — Open macOS System Settings to a specific privacy pane
14
14
  - `navigate_settings_tab` — Open the Vellum settings panel to the Voice tab
15
15
  - `credential_store` — Collect API keys securely (for ElevenLabs TTS)
@@ -66,6 +66,41 @@ Ask if they want high-quality text-to-speech voices via ElevenLabs (optional —
66
66
  2. Use `credential_store` with `action: "prompt"`, `service: "elevenlabs"`, `field: "api_key"` to show a secure input dialog.
67
67
  3. After the key is stored, confirm success.
68
68
 
69
+ #### Choose an ElevenLabs voice
70
+
71
+ After storing the API key, let the user pick their preferred voice. The shared config key `elevenlabs.voiceId` controls the voice for **both** in-app TTS and phone calls (defaulting to Rachel).
72
+
73
+ Check the current voice:
74
+
75
+ ```bash
76
+ vellum config get elevenlabs.voiceId
77
+ ```
78
+
79
+ Ask the user if they want to change their TTS voice. If yes, use `voice_config_update` with `setting: "tts_voice_id"` and the chosen voice ID. This writes to both the config file (`elevenlabs.voiceId`) and pushes to the macOS app via IPC in one call.
80
+
81
+ Common choices from the curated ElevenLabs list:
82
+ - **Rachel** (`21m00Tcm4TlvDq8ikWAM`) — Calm, warm, conversational (default)
83
+ - **Sarah** (`EXAVITQu4vr4xnSDxMaL`) — Soft, young, approachable
84
+ - **Charlotte** (`XB0fDUnXU5powFXDhCwa`) — Warm, Swedish-accented
85
+ - **Josh** (`TxGEqnHWrfWFTfGW9XjX`) — Deep, young, clear
86
+ - **Adam** (`pNInz6obpgDQGcFmaJgB`) — Deep, middle-aged, professional
87
+
88
+ If the user wants to browse more voices, they can search at https://elevenlabs.io/voice-library or use the ElevenLabs API with their key.
89
+
90
+ #### Sync with phone calls
91
+
92
+ After setting the voice, check whether phone calls are configured:
93
+
94
+ ```bash
95
+ vellum config get calls.enabled
96
+ ```
97
+
98
+ **If phone calls are enabled** (`calls.enabled` is `true`):
99
+ - Tell the user their phone calls will automatically use the same voice they just chose, since both in-app TTS and phone calls read from `elevenlabs.voiceId`.
100
+
101
+ **If phone calls are not yet configured** (`calls.enabled` is `false` or not set):
102
+ - Tell the user: "When you set up phone calls later, they'll automatically use the same voice for a consistent experience."
103
+
69
104
  ### 5. Verification
70
105
 
71
106
  After setup is complete:
@@ -0,0 +1,20 @@
1
+ <svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg">
2
+ <rect x="7" y="1" width="2" height="2" fill="#4A90E2"/>
3
+ <rect x="6" y="3" width="4" height="1" fill="#4A90E2"/>
4
+ <rect x="5" y="4" width="6" height="1" fill="#7B68EE"/>
5
+ <rect x="4" y="5" width="8" height="1" fill="#7B68EE"/>
6
+ <rect x="4" y="6" width="8" height="1" fill="#4A90E2"/>
7
+ <rect x="5" y="7" width="6" height="1" fill="#4A90E2"/>
8
+ <rect x="6" y="8" width="4" height="1" fill="#7B68EE"/>
9
+ <rect x="7" y="9" width="2" height="1" fill="#7B68EE"/>
10
+ <rect x="3" y="10" width="2" height="4" fill="#50C878"/>
11
+ <rect x="4" y="10" width="1" height="1" fill="#50C878"/>
12
+ <rect x="5" y="11" width="1" height="1" fill="#50C878"/>
13
+ <rect x="11" y="10" width="2" height="4" fill="#E74C3C"/>
14
+ <rect x="10" y="10" width="1" height="1" fill="#E74C3C"/>
15
+ <rect x="9" y="11" width="1" height="1" fill="#E74C3C"/>
16
+ <rect x="6" y="11" width="4" height="1" fill="#FFD700"/>
17
+ <rect x="5" y="12" width="6" height="1" fill="#FFD700"/>
18
+ <rect x="4" y="13" width="8" height="1" fill="#FFD700"/>
19
+ <rect x="7" y="14" width="2" height="2" fill="#4A90E2"/>
20
+ </svg>
@@ -1,7 +1,6 @@
1
1
  import { z } from 'zod';
2
2
 
3
3
  const VALID_CALL_PROVIDERS = ['twilio'] as const;
4
- const VALID_CALL_VOICE_MODES = ['twilio_standard', 'twilio_elevenlabs_tts', 'elevenlabs_agent'] as const;
5
4
  export const VALID_CALLER_IDENTITY_MODES = ['assistant_number', 'user_number'] as const;
6
5
  const VALID_CALL_TRANSCRIPTION_PROVIDERS = ['Deepgram', 'Google'] as const;
7
6
 
@@ -20,51 +19,7 @@ export const CallsSafetyConfigSchema = z.object({
20
19
  .default([]),
21
20
  });
22
21
 
23
- export const CallsElevenLabsConfigSchema = z.object({
24
- voiceId: z
25
- .string({ error: 'calls.voice.elevenlabs.voiceId must be a string' })
26
- .default(''),
27
- voiceModelId: z
28
- .string({ error: 'calls.voice.elevenlabs.voiceModelId must be a string' })
29
- .default(''),
30
- speed: z
31
- .number({ error: 'calls.voice.elevenlabs.speed must be a number' })
32
- .min(0.7, 'calls.voice.elevenlabs.speed must be >= 0.7')
33
- .max(1.2, 'calls.voice.elevenlabs.speed must be <= 1.2')
34
- .default(1.0),
35
- stability: z
36
- .number({ error: 'calls.voice.elevenlabs.stability must be a number' })
37
- .min(0, 'calls.voice.elevenlabs.stability must be >= 0')
38
- .max(1, 'calls.voice.elevenlabs.stability must be <= 1')
39
- .default(0.5),
40
- similarityBoost: z
41
- .number({ error: 'calls.voice.elevenlabs.similarityBoost must be a number' })
42
- .min(0, 'calls.voice.elevenlabs.similarityBoost must be >= 0')
43
- .max(1, 'calls.voice.elevenlabs.similarityBoost must be <= 1')
44
- .default(0.75),
45
- useSpeakerBoost: z
46
- .boolean({ error: 'calls.voice.elevenlabs.useSpeakerBoost must be a boolean' })
47
- .default(true),
48
- agentId: z
49
- .string({ error: 'calls.voice.elevenlabs.agentId must be a string' })
50
- .default(''),
51
- apiBaseUrl: z
52
- .string({ error: 'calls.voice.elevenlabs.apiBaseUrl must be a string' })
53
- .default('https://api.elevenlabs.io'),
54
- registerCallTimeoutMs: z
55
- .number({ error: 'calls.voice.elevenlabs.registerCallTimeoutMs must be a number' })
56
- .int('calls.voice.elevenlabs.registerCallTimeoutMs must be an integer')
57
- .min(1000, 'calls.voice.elevenlabs.registerCallTimeoutMs must be >= 1000')
58
- .max(15000, 'calls.voice.elevenlabs.registerCallTimeoutMs must be <= 15000')
59
- .default(5000),
60
- });
61
-
62
22
  export const CallsVoiceConfigSchema = z.object({
63
- mode: z
64
- .enum(VALID_CALL_VOICE_MODES, {
65
- error: `calls.voice.mode must be one of: ${VALID_CALL_VOICE_MODES.join(', ')}`,
66
- })
67
- .default('twilio_standard'),
68
23
  language: z
69
24
  .string({ error: 'calls.voice.language must be a string' })
70
25
  .default('en-US'),
@@ -73,10 +28,6 @@ export const CallsVoiceConfigSchema = z.object({
73
28
  error: `calls.voice.transcriptionProvider must be one of: ${VALID_CALL_TRANSCRIPTION_PROVIDERS.join(', ')}`,
74
29
  })
75
30
  .default('Deepgram'),
76
- fallbackToStandardOnError: z
77
- .boolean({ error: 'calls.voice.fallbackToStandardOnError must be a boolean' })
78
- .default(true),
79
- elevenlabs: CallsElevenLabsConfigSchema.default(CallsElevenLabsConfigSchema.parse({})),
80
31
  });
81
32
 
82
33
  export const CallerIdentityConfigSchema = z.object({
@@ -142,7 +93,7 @@ export const CallsConfigSchema = z.object({
142
93
  .int('calls.guardianWaitUpdateInitialIntervalMs must be an integer')
143
94
  .min(1000, 'calls.guardianWaitUpdateInitialIntervalMs must be >= 1000')
144
95
  .max(60_000, 'calls.guardianWaitUpdateInitialIntervalMs must be at most 60000')
145
- .default(5000),
96
+ .default(15_000),
146
97
  guardianWaitUpdateInitialWindowMs: z
147
98
  .number({ error: 'calls.guardianWaitUpdateInitialWindowMs must be a number' })
148
99
  .int('calls.guardianWaitUpdateInitialWindowMs must be an integer')
@@ -154,13 +105,13 @@ export const CallsConfigSchema = z.object({
154
105
  .int('calls.guardianWaitUpdateSteadyMinIntervalMs must be an integer')
155
106
  .min(1000, 'calls.guardianWaitUpdateSteadyMinIntervalMs must be >= 1000')
156
107
  .max(60_000, 'calls.guardianWaitUpdateSteadyMinIntervalMs must be at most 60000')
157
- .default(7000),
108
+ .default(20_000),
158
109
  guardianWaitUpdateSteadyMaxIntervalMs: z
159
110
  .number({ error: 'calls.guardianWaitUpdateSteadyMaxIntervalMs must be a number' })
160
111
  .int('calls.guardianWaitUpdateSteadyMaxIntervalMs must be an integer')
161
112
  .min(1000, 'calls.guardianWaitUpdateSteadyMaxIntervalMs must be >= 1000')
162
113
  .max(60_000, 'calls.guardianWaitUpdateSteadyMaxIntervalMs must be at most 60000')
163
- .default(10_000),
114
+ .default(30_000),
164
115
  disclosure: CallsDisclosureConfigSchema.default(CallsDisclosureConfigSchema.parse({})),
165
116
  safety: CallsSafetyConfigSchema.default(CallsSafetyConfigSchema.parse({})),
166
117
  voice: CallsVoiceConfigSchema.default(CallsVoiceConfigSchema.parse({})),
@@ -175,6 +126,5 @@ export type CallsConfig = z.infer<typeof CallsConfigSchema>;
175
126
  export type CallsDisclosureConfig = z.infer<typeof CallsDisclosureConfigSchema>;
176
127
  export type CallsSafetyConfig = z.infer<typeof CallsSafetyConfigSchema>;
177
128
  export type CallsVoiceConfig = z.infer<typeof CallsVoiceConfigSchema>;
178
- export type CallsElevenLabsConfig = z.infer<typeof CallsElevenLabsConfigSchema>;
179
129
  export type CallerIdentityConfig = z.infer<typeof CallerIdentityConfigSchema>;
180
130
  export type CallsVerificationConfig = z.infer<typeof CallsVerificationConfigSchema>;
@@ -0,0 +1,33 @@
1
+ import { z } from 'zod';
2
+
3
+ // Default ElevenLabs voice — "Rachel" (calm, warm, conversational).
4
+ // Used by both in-app TTS and phone calls (via Twilio ConversationRelay).
5
+ // Mirrored in: clients/macos/.../OpenAIVoiceService.swift (defaultVoiceId)
6
+ export const DEFAULT_ELEVENLABS_VOICE_ID = '21m00Tcm4TlvDq8ikWAM';
7
+
8
+ export const ElevenLabsConfigSchema = z.object({
9
+ voiceId: z
10
+ .string({ error: 'elevenlabs.voiceId must be a string' })
11
+ .min(1, 'elevenlabs.voiceId must not be empty')
12
+ .default(DEFAULT_ELEVENLABS_VOICE_ID),
13
+ voiceModelId: z
14
+ .string({ error: 'elevenlabs.voiceModelId must be a string' })
15
+ .default(''),
16
+ speed: z
17
+ .number({ error: 'elevenlabs.speed must be a number' })
18
+ .min(0.7, 'elevenlabs.speed must be >= 0.7')
19
+ .max(1.2, 'elevenlabs.speed must be <= 1.2')
20
+ .default(1.0),
21
+ stability: z
22
+ .number({ error: 'elevenlabs.stability must be a number' })
23
+ .min(0, 'elevenlabs.stability must be >= 0')
24
+ .max(1, 'elevenlabs.stability must be <= 1')
25
+ .default(0.5),
26
+ similarityBoost: z
27
+ .number({ error: 'elevenlabs.similarityBoost must be a number' })
28
+ .min(0, 'elevenlabs.similarityBoost must be >= 0')
29
+ .max(1, 'elevenlabs.similarityBoost must be <= 1')
30
+ .default(0.75),
31
+ });
32
+
33
+ export type ElevenLabsConfig = z.infer<typeof ElevenLabsConfigSchema>;