vellum 0.2.10 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +6 -2
- package/package.json +2 -2
- package/src/__tests__/gateway-only-enforcement.test.ts +9 -35
- package/src/__tests__/oauth2-gateway-transport.test.ts +14 -33
- package/src/__tests__/skills.test.ts +2 -2
- package/src/__tests__/twilio-routes.test.ts +78 -153
- package/src/__tests__/twitter-auth-handler.test.ts +1 -1
- package/src/cli/main-screen.tsx +15 -117
- package/src/config/bundled-skills/macos-automation/SKILL.md +66 -0
- package/src/config/bundled-skills/phone-calls/SKILL.md +334 -0
- package/src/config/system-prompt.ts +9 -59
- package/src/daemon/lifecycle.ts +36 -7
- package/src/home-base/prebuilt/seed.ts +1 -1
- package/src/memory/db.ts +36 -0
- package/src/security/oauth2.ts +8 -8
- package/src/util/logger.ts +4 -4
package/src/cli/main-screen.tsx
CHANGED
|
@@ -1,19 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { Box, render as inkRender, Text } from "ink";
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
4
3
|
import { getSocketPath, getWorkspaceDir } from "../util/platform.js";
|
|
5
|
-
import { APP_VERSION } from "../version.js";
|
|
6
4
|
|
|
7
5
|
const LEFT_PANEL_WIDTH = 36;
|
|
8
|
-
|
|
9
|
-
const VELLY_ART = [
|
|
10
|
-
" ,___,",
|
|
11
|
-
" ( O O )",
|
|
12
|
-
" /)V(\\",
|
|
13
|
-
" // \\\\",
|
|
14
|
-
' /" "\\',
|
|
15
|
-
" ^ ^",
|
|
16
|
-
];
|
|
6
|
+
const RIGHT_LINE_COUNT = 11;
|
|
17
7
|
|
|
18
8
|
export interface MainScreenLayout {
|
|
19
9
|
height: number;
|
|
@@ -21,116 +11,24 @@ export interface MainScreenLayout {
|
|
|
21
11
|
statusCol: number;
|
|
22
12
|
}
|
|
23
13
|
|
|
24
|
-
function
|
|
14
|
+
export function renderMainScreen(): MainScreenLayout {
|
|
25
15
|
const socketPath = getSocketPath();
|
|
26
16
|
const workspace = getWorkspaceDir();
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
const tips = [
|
|
30
|
-
"Send a message to start chatting",
|
|
31
|
-
"Use /help to see available commands",
|
|
32
|
-
];
|
|
33
|
-
|
|
34
|
-
const leftLines = [
|
|
35
|
-
" ",
|
|
36
|
-
" Meet your Assistant!",
|
|
37
|
-
" ",
|
|
38
|
-
...VELLY_ART.map((l) => ` ${l}`),
|
|
39
|
-
" ",
|
|
40
|
-
` ${socketPath}`,
|
|
41
|
-
` ~/${dirName}`,
|
|
42
|
-
];
|
|
43
|
-
|
|
44
|
-
const rightLines = [
|
|
45
|
-
" ",
|
|
46
|
-
"Tips for getting started",
|
|
47
|
-
...tips,
|
|
48
|
-
" ",
|
|
49
|
-
"Daemon",
|
|
50
|
-
"connecting...",
|
|
51
|
-
"Version",
|
|
52
|
-
APP_VERSION,
|
|
53
|
-
"Status",
|
|
54
|
-
"checking...",
|
|
55
|
-
];
|
|
17
|
+
const assistantId = workspace.split("/").pop() ?? "vellum";
|
|
56
18
|
|
|
57
|
-
const
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const line = leftLines[i] ?? " ";
|
|
66
|
-
if (i === 1) {
|
|
67
|
-
return (
|
|
68
|
-
<Text key={i} bold>
|
|
69
|
-
{line}
|
|
70
|
-
</Text>
|
|
71
|
-
);
|
|
72
|
-
}
|
|
73
|
-
if (i > 2 && i <= 2 + VELLY_ART.length) {
|
|
74
|
-
return (
|
|
75
|
-
<Text key={i} color="magenta">
|
|
76
|
-
{line}
|
|
77
|
-
</Text>
|
|
78
|
-
);
|
|
79
|
-
}
|
|
80
|
-
if (i > 2 + VELLY_ART.length) {
|
|
81
|
-
return (
|
|
82
|
-
<Text key={i} dimColor>
|
|
83
|
-
{line}
|
|
84
|
-
</Text>
|
|
85
|
-
);
|
|
86
|
-
}
|
|
87
|
-
return <Text key={i}>{line}</Text>;
|
|
88
|
-
})}
|
|
89
|
-
</Box>
|
|
90
|
-
<Box flexDirection="column">
|
|
91
|
-
{Array.from({ length: maxLines }, (_, i) => {
|
|
92
|
-
const line = rightLines[i] ?? " ";
|
|
93
|
-
const isHeading = i === 1 || i === 6;
|
|
94
|
-
const isDim = i === 5 || i === 7 || i === 9;
|
|
95
|
-
if (isHeading) {
|
|
96
|
-
return (
|
|
97
|
-
<Text key={i} color="magenta">
|
|
98
|
-
{line}
|
|
99
|
-
</Text>
|
|
100
|
-
);
|
|
101
|
-
}
|
|
102
|
-
if (isDim) {
|
|
103
|
-
return (
|
|
104
|
-
<Text key={i} dimColor>
|
|
105
|
-
{line}
|
|
106
|
-
</Text>
|
|
107
|
-
);
|
|
108
|
-
}
|
|
109
|
-
return <Text key={i}>{line}</Text>;
|
|
110
|
-
})}
|
|
111
|
-
</Box>
|
|
112
|
-
</Box>
|
|
113
|
-
<Text dimColor>{"─".repeat(72)}</Text>
|
|
114
|
-
<Text> </Text>
|
|
115
|
-
<Text dimColor> ? for shortcuts</Text>
|
|
116
|
-
<Text> </Text>
|
|
117
|
-
</Box>
|
|
118
|
-
);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
export function renderMainScreen(): MainScreenLayout {
|
|
122
|
-
const leftLineCount = 3 + VELLY_ART.length + 3;
|
|
123
|
-
const rightLineCount = 11;
|
|
124
|
-
const maxLines = Math.max(leftLineCount, rightLineCount);
|
|
19
|
+
const require = createRequire(import.meta.url);
|
|
20
|
+
const cliPkgPath = require.resolve("@vellumai/cli/package.json");
|
|
21
|
+
const cliRoot = dirname(cliPkgPath);
|
|
22
|
+
// Dynamic require to bypass NodeNext strict module resolution for the
|
|
23
|
+
// CLI package which ships raw TypeScript with bundler-style imports.
|
|
24
|
+
const { render } = require(join(cliRoot, "src", "components", "DefaultMainScreen.tsx")) as {
|
|
25
|
+
render: (runtimeUrl: string, assistantId: string, species: string) => number;
|
|
26
|
+
};
|
|
125
27
|
|
|
126
|
-
const
|
|
127
|
-
exitOnCtrlC: false,
|
|
128
|
-
});
|
|
129
|
-
unmount();
|
|
28
|
+
const height = render(socketPath, assistantId, "vellum");
|
|
130
29
|
|
|
131
|
-
const statusCanvasLine =
|
|
30
|
+
const statusCanvasLine = RIGHT_LINE_COUNT + 1;
|
|
132
31
|
const statusCol = LEFT_PANEL_WIDTH + 1;
|
|
133
|
-
const height = 1 + maxLines + 4;
|
|
134
32
|
|
|
135
33
|
return { height, statusLine: statusCanvasLine, statusCol };
|
|
136
34
|
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "macOS Automation"
|
|
3
|
+
description: "Automate native macOS apps and system interactions via osascript (AppleScript)"
|
|
4
|
+
user-invocable: false
|
|
5
|
+
disable-model-invocation: false
|
|
6
|
+
metadata: {"vellum": {"emoji": "🍎", "os": ["darwin"]}}
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Use this skill to interact with native macOS apps and system-level features via `osascript` (AppleScript) through `host_bash`. Always prefer osascript over browser automation or computer-use for anything involving a native macOS app.
|
|
10
|
+
|
|
11
|
+
## Supported Apps
|
|
12
|
+
|
|
13
|
+
**Communication:** Messages, Mail, Microsoft Outlook, FaceTime
|
|
14
|
+
**Contacts & Calendar:** Contacts, Calendar, Reminders
|
|
15
|
+
**Notes & Writing:** Notes, TextEdit, Pages, BBEdit, CotEditor
|
|
16
|
+
**Files:** Finder, Path Finder
|
|
17
|
+
**Browsers:** Safari, Google Chrome
|
|
18
|
+
**Music & Media:** Music (iTunes), Spotify, VLC, Podcasts, TV
|
|
19
|
+
**Productivity:** OmniFocus, Things 3, OmniOutliner, OmniPlan, OmniGraffle
|
|
20
|
+
**Office:** Microsoft Word, Microsoft Excel, Numbers, Keynote
|
|
21
|
+
**Developer tools:** Xcode, Terminal, iTerm2, Script Editor
|
|
22
|
+
**System:** System Events (UI scripting for any app), System Settings
|
|
23
|
+
**Automation:** Keyboard Maestro, Alfred, Automator
|
|
24
|
+
**Creative:** Adobe Photoshop, Final Cut Pro
|
|
25
|
+
|
|
26
|
+
For any unlisted app, check scriptability first:
|
|
27
|
+
```bash
|
|
28
|
+
osascript -e 'tell application "AppName" to get name'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Examples
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Send an iMessage
|
|
35
|
+
osascript -e 'tell application "Messages" to send "Hello!" to buddy "user@example.com"'
|
|
36
|
+
|
|
37
|
+
# Look up a contact
|
|
38
|
+
osascript -e 'tell application "Contacts" to get {name, phones} of every person whose name contains "Marina"'
|
|
39
|
+
|
|
40
|
+
# Read upcoming calendar events
|
|
41
|
+
osascript -e 'tell application "Calendar" to get summary of every event of calendar "Home" whose start date > (current date)'
|
|
42
|
+
|
|
43
|
+
# Create a reminder
|
|
44
|
+
osascript -e 'tell application "Reminders" to make new reminder with properties {name:"Buy milk", due date:((current date) + 1 * hours)}'
|
|
45
|
+
|
|
46
|
+
# Send an email
|
|
47
|
+
osascript -e 'tell application "Mail" to send (make new outgoing message with properties {subject:"Hi", content:"Hello", visible:true})'
|
|
48
|
+
|
|
49
|
+
# Create a note
|
|
50
|
+
osascript -e 'tell application "Notes" to make new note at folder "Notes" with properties {body:"My note"}'
|
|
51
|
+
|
|
52
|
+
# Open a URL in Safari
|
|
53
|
+
osascript -e 'tell application "Safari" to open location "https://example.com"'
|
|
54
|
+
|
|
55
|
+
# Play/pause Music
|
|
56
|
+
osascript -e 'tell application "Music" to playpause'
|
|
57
|
+
|
|
58
|
+
# Display a system notification
|
|
59
|
+
osascript -e 'display notification "Done!" with title "Vellum"'
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Tips
|
|
63
|
+
|
|
64
|
+
- For multi-line scripts, write them to a `.applescript` file and run with `osascript path/to/script.applescript`
|
|
65
|
+
- Use `System Events` for UI scripting apps that don't have their own AppleScript dictionary
|
|
66
|
+
- AppleScript permissions are gated by macOS TCC — if a command fails with a permission error, use `request_system_permission` to prompt the user
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "Phone Calls"
|
|
3
|
+
description: "Set up Twilio for outgoing phone calls and place AI-powered voice calls on behalf of the user"
|
|
4
|
+
user-invocable: true
|
|
5
|
+
metadata: {"vellum": {"emoji": "📞", "requires": {"config": ["calls.enabled"]}}}
|
|
6
|
+
includes: ["public-ingress"]
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
You are helping the user set up and make outgoing phone calls via Twilio. This skill covers the full lifecycle: Twilio account setup, credential storage, public ingress configuration, enabling the calls feature, placing calls, and monitoring live transcripts.
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
The calling system uses Twilio's ConversationRelay to place outbound phone calls. When a call is placed:
|
|
14
|
+
|
|
15
|
+
1. The assistant initiates an outbound call via the Twilio REST API
|
|
16
|
+
2. Twilio connects to the gateway's voice webhook, which returns TwiML
|
|
17
|
+
3. Twilio opens a ConversationRelay WebSocket for real-time voice streaming
|
|
18
|
+
4. An LLM-driven orchestrator manages the conversation — receiving caller speech (transcribed by Deepgram), generating responses via Claude, and streaming text back for TTS playback
|
|
19
|
+
5. The transcript is relayed live to the user's conversation thread
|
|
20
|
+
|
|
21
|
+
The user's assistant gets its own personal phone number through Twilio.
|
|
22
|
+
|
|
23
|
+
## Step 1: Check Current Configuration
|
|
24
|
+
|
|
25
|
+
First, check whether Twilio is already configured:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
vellum config get calls.enabled
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Also check for existing credentials:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
credential_store action=get service=credential:twilio:account_sid
|
|
35
|
+
credential_store action=get service=credential:twilio:auth_token
|
|
36
|
+
credential_store action=get service=credential:twilio:phone_number
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
If all three credentials exist and `calls.enabled` is `true`, skip to the **Making Calls** section. If credentials are partially configured, skip to whichever step is still needed.
|
|
40
|
+
|
|
41
|
+
## Step 2: Create a Twilio Account
|
|
42
|
+
|
|
43
|
+
If the user doesn't have a Twilio account yet, guide them through setup:
|
|
44
|
+
|
|
45
|
+
1. Tell the user: **"You'll need a Twilio account to make phone calls. Sign up at https://www.twilio.com/try-twilio — it's free to start and includes trial credit."**
|
|
46
|
+
2. Once they have an account, they need three pieces of information:
|
|
47
|
+
- **Account SID** — found on the Twilio Console dashboard at https://console.twilio.com
|
|
48
|
+
- **Auth Token** — found on the same dashboard (click "Show" to reveal it)
|
|
49
|
+
- **Phone Number** — a Twilio phone number capable of making voice calls
|
|
50
|
+
|
|
51
|
+
### Getting a Twilio Phone Number
|
|
52
|
+
|
|
53
|
+
If the user doesn't have a Twilio phone number yet:
|
|
54
|
+
|
|
55
|
+
1. Direct them to https://console.twilio.com/us1/develop/phone-numbers/manage/incoming
|
|
56
|
+
2. Click **"Buy a Number"**
|
|
57
|
+
3. Select a number with **Voice** capability enabled
|
|
58
|
+
4. For trial accounts, Twilio provides one free number automatically — check "Active Numbers" first
|
|
59
|
+
|
|
60
|
+
Tell the user: **"This will be your assistant's personal phone number — the number that shows up on caller ID when calls are placed."**
|
|
61
|
+
|
|
62
|
+
## Step 3: Store Twilio Credentials
|
|
63
|
+
|
|
64
|
+
Once the user provides their credentials, store them securely using the `credential_store` tool. Ask the user to paste each value, then store them one at a time:
|
|
65
|
+
|
|
66
|
+
**Account SID:**
|
|
67
|
+
```
|
|
68
|
+
credential_store action=set service=credential:twilio:account_sid value=<their_account_sid>
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Auth Token:**
|
|
72
|
+
```
|
|
73
|
+
credential_store action=set service=credential:twilio:auth_token value=<their_auth_token>
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Phone Number** (must be in E.164 format, e.g. `+14155551234`):
|
|
77
|
+
```
|
|
78
|
+
credential_store action=set service=credential:twilio:phone_number value=<their_phone_number>
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
After storing, verify each credential was saved:
|
|
82
|
+
```
|
|
83
|
+
credential_store action=get service=credential:twilio:account_sid
|
|
84
|
+
credential_store action=get service=credential:twilio:auth_token
|
|
85
|
+
credential_store action=get service=credential:twilio:phone_number
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**Important:** Credentials are stored in the OS keychain (macOS Keychain / Linux secret-service) or encrypted at rest. They are never logged or exposed in plaintext.
|
|
89
|
+
|
|
90
|
+
## Step 4: Set Up Public Ingress
|
|
91
|
+
|
|
92
|
+
Twilio needs a publicly reachable URL to send voice webhooks and establish the ConversationRelay WebSocket. The **public-ingress** skill handles this via ngrok.
|
|
93
|
+
|
|
94
|
+
Check if ingress is already configured:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
vellum config get ingress.publicBaseUrl
|
|
98
|
+
vellum config get ingress.enabled
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
If not configured, load and run the public-ingress skill:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
skill_load skill=public-ingress
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Follow the public-ingress skill's instructions to set up the ngrok tunnel. Once complete, the gateway will be reachable at the configured `ingress.publicBaseUrl`.
|
|
108
|
+
|
|
109
|
+
**Twilio needs these webhook endpoints (handled automatically by the gateway):**
|
|
110
|
+
- Voice webhook: `{publicBaseUrl}/webhooks/twilio/voice`
|
|
111
|
+
- Status callback: `{publicBaseUrl}/webhooks/twilio/status`
|
|
112
|
+
- ConversationRelay WebSocket: `{publicBaseUrl}/webhooks/twilio/relay` (wss://)
|
|
113
|
+
|
|
114
|
+
No manual Twilio webhook configuration is needed — the assistant registers webhook URLs dynamically when placing each call.
|
|
115
|
+
|
|
116
|
+
## Step 5: Enable Calls
|
|
117
|
+
|
|
118
|
+
Enable the calls feature:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
vellum config set calls.enabled true
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Verify:
|
|
125
|
+
```bash
|
|
126
|
+
vellum config get calls.enabled
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Step 6: Verify Setup (Test Call)
|
|
130
|
+
|
|
131
|
+
Before making real calls, offer a quick verification:
|
|
132
|
+
|
|
133
|
+
1. Confirm credentials are stored: all three `credential:twilio:*` keys must be present
|
|
134
|
+
2. Confirm ingress is running: `ingress.publicBaseUrl` must be set and the tunnel active
|
|
135
|
+
3. Confirm calls are enabled: `calls.enabled` must be `true`
|
|
136
|
+
|
|
137
|
+
Suggest a test call to the user's own phone: **"Want to do a quick test call to your phone to make sure everything works?"**
|
|
138
|
+
|
|
139
|
+
If they agree, ask for their personal phone number and place a test call with a simple task like "Introduce yourself and confirm the call system is working."
|
|
140
|
+
|
|
141
|
+
## Making Calls
|
|
142
|
+
|
|
143
|
+
Use the `call_start` tool to place outbound calls. Every call requires:
|
|
144
|
+
- **phone_number**: The number to call in E.164 format (e.g. `+14155551234`)
|
|
145
|
+
- **task**: What the call should accomplish — this becomes the AI voice agent's objective
|
|
146
|
+
- **context** (optional): Additional background information for the conversation
|
|
147
|
+
|
|
148
|
+
### Example calls:
|
|
149
|
+
|
|
150
|
+
**Making a reservation:**
|
|
151
|
+
```
|
|
152
|
+
call_start phone_number="+14155551234" task="Make a dinner reservation for 2 people tonight at 7pm" context="The user's name is John Smith. Prefer a table by the window if available."
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Calling a business:**
|
|
156
|
+
```
|
|
157
|
+
call_start phone_number="+18005551234" task="Check if they have a specific product in stock" context="Looking for a 65-inch Samsung OLED TV, model QN65S95D. Ask about availability and price."
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Following up on an appointment:**
|
|
161
|
+
```
|
|
162
|
+
call_start phone_number="+12125551234" task="Confirm the dentist appointment scheduled for next Tuesday at 2pm" context="The appointment is under the name Jane Doe, DOB 03/15/1990."
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Phone number format
|
|
166
|
+
|
|
167
|
+
Phone numbers MUST be in E.164 format: `+` followed by country code and number with no spaces, dashes, or parentheses.
|
|
168
|
+
- US/Canada: `+1XXXXXXXXXX` (e.g. `+14155551234`)
|
|
169
|
+
- UK: `+44XXXXXXXXXX` (e.g. `+442071234567`)
|
|
170
|
+
- International: `+{country_code}{number}`
|
|
171
|
+
|
|
172
|
+
If the user provides a number in a different format, convert it to E.164 before calling. If the country is ambiguous, ask.
|
|
173
|
+
|
|
174
|
+
### Trial account limitations
|
|
175
|
+
|
|
176
|
+
On Twilio trial accounts, outbound calls can ONLY be made to **verified numbers**. If a call fails with a "not verified" error:
|
|
177
|
+
1. Tell the user they need to verify the number at https://console.twilio.com/us1/develop/phone-numbers/manage/verified
|
|
178
|
+
2. Or upgrade to a paid Twilio account to call any number
|
|
179
|
+
|
|
180
|
+
## Live Call Monitoring
|
|
181
|
+
|
|
182
|
+
### Showing the live transcript
|
|
183
|
+
|
|
184
|
+
By default, always show the live transcript of the call as it happens. When a call is in progress:
|
|
185
|
+
|
|
186
|
+
1. After placing the call with `call_start`, immediately begin polling with `call_status` to track the call state
|
|
187
|
+
2. The system fires transcript notifications as the conversation unfolds — both caller speech and assistant responses appear in real time in the conversation thread
|
|
188
|
+
3. Present each transcript entry clearly as it arrives:
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
📞 Call in progress...
|
|
192
|
+
|
|
193
|
+
🗣️ Assistant: "Hi, I'm calling on behalf of John to make a dinner reservation for tonight."
|
|
194
|
+
👤 Caller: "Sure, what time would you like?"
|
|
195
|
+
🗣️ Assistant: "We'd like a table for two at 7pm, please."
|
|
196
|
+
👤 Caller: "Let me check... yes, we have availability at 7pm."
|
|
197
|
+
🗣️ Assistant: "Wonderful! The reservation would be under John Smith."
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
4. Continue monitoring until the call completes or fails
|
|
201
|
+
|
|
202
|
+
### Handling questions during a call
|
|
203
|
+
|
|
204
|
+
The AI voice agent may encounter situations where it needs input from the user. When this happens:
|
|
205
|
+
|
|
206
|
+
1. The call status changes to `waiting_on_user`
|
|
207
|
+
2. A **pending question** appears in `call_status` output
|
|
208
|
+
3. Present the question prominently to the user:
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
❓ The person on the call asked something the assistant needs your help with:
|
|
212
|
+
"They're asking if you'd prefer the smoking or non-smoking section?"
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
4. The user can reply directly in the chat — their response is automatically routed to the live call via the call bridge
|
|
216
|
+
5. The AI voice agent receives the answer and continues the conversation naturally
|
|
217
|
+
|
|
218
|
+
**Important:** Respond to pending questions quickly. There is a consultation timeout (default: 2 minutes). If no answer is provided in time, the AI voice agent will move on.
|
|
219
|
+
|
|
220
|
+
### Call status values
|
|
221
|
+
|
|
222
|
+
- **initiated** — Call is being placed
|
|
223
|
+
- **ringing** — Phone is ringing on the other end
|
|
224
|
+
- **in_progress** — Call is connected, conversation is active
|
|
225
|
+
- **waiting_on_user** — AI agent needs input from the user (check pending question)
|
|
226
|
+
- **completed** — Call ended successfully
|
|
227
|
+
- **failed** — Call failed (check lastError for details)
|
|
228
|
+
- **cancelled** — Call was manually cancelled
|
|
229
|
+
|
|
230
|
+
### Ending a call early
|
|
231
|
+
|
|
232
|
+
Use `call_end` with the call session ID to terminate an active call:
|
|
233
|
+
```
|
|
234
|
+
call_end call_session_id="<session_id>" reason="User requested to end the call"
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Call Quality Tips
|
|
238
|
+
|
|
239
|
+
When crafting tasks for the AI voice agent, follow these guidelines for the best call experience:
|
|
240
|
+
|
|
241
|
+
### Writing good task descriptions
|
|
242
|
+
|
|
243
|
+
- **Be specific about the objective**: "Make a dinner reservation for 2 at 7pm tonight" is better than "Call the restaurant"
|
|
244
|
+
- **Include relevant context**: Names, account numbers, appointment details — anything the agent might need
|
|
245
|
+
- **Specify what information to collect**: "Ask about their return policy and store hours" tells the agent what to gather
|
|
246
|
+
- **Set clear completion criteria**: The agent knows to end the call when the task is fulfilled
|
|
247
|
+
|
|
248
|
+
### Providing context
|
|
249
|
+
|
|
250
|
+
The `context` field is powerful — use it to give the agent background that helps it sound natural:
|
|
251
|
+
|
|
252
|
+
- User's name and identifying details (for making appointments, verifying accounts)
|
|
253
|
+
- Preferences and constraints (dietary restrictions, budget limits, scheduling conflicts)
|
|
254
|
+
- Previous interaction history ("I called last week and spoke with Sarah about...")
|
|
255
|
+
- Special instructions ("If they put you on hold for more than 5 minutes, hang up and we'll try again later")
|
|
256
|
+
|
|
257
|
+
### Things the AI voice agent handles well
|
|
258
|
+
|
|
259
|
+
- Making reservations and appointments
|
|
260
|
+
- Checking business hours, availability, or pricing
|
|
261
|
+
- Confirming or rescheduling existing appointments
|
|
262
|
+
- Gathering information (store policies, product availability)
|
|
263
|
+
- Simple customer service interactions
|
|
264
|
+
- Leaving voicemails (it will speak the message if voicemail picks up)
|
|
265
|
+
|
|
266
|
+
### Things to be aware of
|
|
267
|
+
|
|
268
|
+
- Calls have a maximum duration (configurable via `calls.maxDurationSeconds`, default: 1 hour)
|
|
269
|
+
- The agent gives a 2-minute warning before the time limit
|
|
270
|
+
- Emergency numbers (911, 112, 999, etc.) are blocked and cannot be called
|
|
271
|
+
- The AI disclosure setting (`calls.disclosure.enabled`) controls whether the agent announces it's an AI at the start of the call
|
|
272
|
+
|
|
273
|
+
## Configuration Reference
|
|
274
|
+
|
|
275
|
+
All call-related settings can be managed via `vellum config`:
|
|
276
|
+
|
|
277
|
+
| Setting | Description | Default |
|
|
278
|
+
|---|---|---|
|
|
279
|
+
| `calls.enabled` | Master switch for the calling feature | `false` |
|
|
280
|
+
| `calls.provider` | Voice provider (currently only `twilio`) | `twilio` |
|
|
281
|
+
| `calls.maxDurationSeconds` | Maximum call length in seconds | `3600` (1 hour) |
|
|
282
|
+
| `calls.userConsultTimeoutSeconds` | How long to wait for user answers | `120` (2 min) |
|
|
283
|
+
| `calls.disclosure.enabled` | Whether the AI announces itself at call start | `true` |
|
|
284
|
+
| `calls.disclosure.text` | The disclosure message spoken at call start | `"I should let you know that I'm an AI assistant calling on behalf of my user."` |
|
|
285
|
+
|
|
286
|
+
### Adjusting settings
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
# Increase max call duration to 2 hours
|
|
290
|
+
vellum config set calls.maxDurationSeconds 7200
|
|
291
|
+
|
|
292
|
+
# Disable AI disclosure (check local regulations first)
|
|
293
|
+
vellum config set calls.disclosure.enabled false
|
|
294
|
+
|
|
295
|
+
# Custom disclosure message
|
|
296
|
+
vellum config set calls.disclosure.text "Just so you know, this is an AI assistant calling for my user."
|
|
297
|
+
|
|
298
|
+
# Give more time for user consultation
|
|
299
|
+
vellum config set calls.userConsultTimeoutSeconds 300
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Troubleshooting
|
|
303
|
+
|
|
304
|
+
### "Twilio credentials not configured"
|
|
305
|
+
Run Step 3 to store your Account SID, Auth Token, and Phone Number via `credential_store`.
|
|
306
|
+
|
|
307
|
+
### "Calls feature is disabled"
|
|
308
|
+
Run `vellum config set calls.enabled true`.
|
|
309
|
+
|
|
310
|
+
### "No public base URL configured"
|
|
311
|
+
Run the **public-ingress** skill to set up ngrok and configure `ingress.publicBaseUrl`.
|
|
312
|
+
|
|
313
|
+
### Call fails immediately after initiating
|
|
314
|
+
- Check that the phone number is in E.164 format
|
|
315
|
+
- Verify Twilio credentials are correct (wrong auth token causes API errors)
|
|
316
|
+
- On trial accounts, ensure the destination number is verified
|
|
317
|
+
- Check that the ngrok tunnel is still running (`curl -s http://127.0.0.1:4040/api/tunnels`)
|
|
318
|
+
|
|
319
|
+
### Call connects but no audio / one-way audio
|
|
320
|
+
- The ConversationRelay WebSocket may not be connecting. Check that `ingress.publicBaseUrl` is correct and the tunnel is active
|
|
321
|
+
- Verify the gateway is running on `http://127.0.0.1:${GATEWAY_PORT:-7830}`
|
|
322
|
+
|
|
323
|
+
### "This phone number is not allowed to be called"
|
|
324
|
+
Emergency numbers (911, 112, 999, 000, 110, 119) are permanently blocked for safety.
|
|
325
|
+
|
|
326
|
+
### ngrok tunnel URL changed
|
|
327
|
+
If you restarted ngrok, the public URL has changed. Update it:
|
|
328
|
+
```bash
|
|
329
|
+
vellum config set ingress.publicBaseUrl "<new-url>"
|
|
330
|
+
```
|
|
331
|
+
Or re-run the public-ingress skill to auto-detect and save the new URL.
|
|
332
|
+
|
|
333
|
+
### Call drops after 30 seconds of silence
|
|
334
|
+
The system has a 30-second silence timeout. If nobody speaks for 30 seconds, the agent will ask "Are you still there?" This is expected behavior.
|
|
@@ -410,63 +410,8 @@ function buildAccessPreferenceSection(): string {
|
|
|
410
410
|
'If yes to any of these, use that path instead of the browser.',
|
|
411
411
|
...(isMacOS() ? [
|
|
412
412
|
'',
|
|
413
|
-
'
|
|
414
|
-
'',
|
|
415
|
-
'When interacting with native macOS apps or performing system-level actions, prefer **osascript**',
|
|
416
|
-
'via host_bash over browser automation or computer-use.',
|
|
417
|
-
'',
|
|
418
|
-
'The following apps support AppleScript and should be automated via osascript:',
|
|
419
|
-
'',
|
|
420
|
-
'**Communication:** Messages, Mail, Microsoft Outlook, FaceTime',
|
|
421
|
-
'**Contacts & Calendar:** Contacts, Calendar, Reminders',
|
|
422
|
-
'**Notes & Writing:** Notes, TextEdit, Pages, BBEdit, CotEditor',
|
|
423
|
-
'**Files & Finder:** Finder, Path Finder',
|
|
424
|
-
'**Browsers:** Safari, Google Chrome',
|
|
425
|
-
'**Music & Media:** Music (iTunes), Spotify, VLC, Podcasts, TV',
|
|
426
|
-
'**Productivity:** OmniFocus, Things 3, OmniOutliner, OmniPlan, OmniGraffle',
|
|
427
|
-
'**Office:** Microsoft Word, Microsoft Excel, Numbers, Keynote',
|
|
428
|
-
'**Developer tools:** Xcode, Terminal, iTerm2, Script Editor',
|
|
429
|
-
'**System:** Finder, System Events (UI scripting for any app), System Settings',
|
|
430
|
-
'**Automation:** Keyboard Maestro, Alfred, Automator',
|
|
431
|
-
'**Creative:** Adobe Photoshop, Final Cut Pro',
|
|
432
|
-
'',
|
|
433
|
-
'For any other app, try osascript first — check scriptability with:',
|
|
434
|
-
'```bash',
|
|
435
|
-
'osascript -e \'tell application "AppName" to get name\'',
|
|
436
|
-
'```',
|
|
437
|
-
'',
|
|
438
|
-
'Common examples:',
|
|
439
|
-
'```bash',
|
|
440
|
-
'# Send an iMessage',
|
|
441
|
-
'osascript -e \'tell application "Messages" to send "Hello!" to buddy "user@example.com"\'',
|
|
442
|
-
'',
|
|
443
|
-
'# Look up a contact',
|
|
444
|
-
'osascript -e \'tell application "Contacts" to get {name, phones} of every person whose name contains "Marina"\'',
|
|
445
|
-
'',
|
|
446
|
-
'# Read upcoming calendar events',
|
|
447
|
-
'osascript -e \'tell application "Calendar" to get summary of every event of calendar "Home" whose start date > (current date)\'',
|
|
448
|
-
'',
|
|
449
|
-
'# Create a reminder',
|
|
450
|
-
'osascript -e \'tell application "Reminders" to make new reminder with properties {name:"Buy milk", due date:((current date) + 1 * hours)}\'',
|
|
451
|
-
'',
|
|
452
|
-
'# Send an email',
|
|
453
|
-
'osascript -e \'tell application "Mail" to send (make new outgoing message with properties {subject:"Hi", content:"Hello", visible:true})\'',
|
|
454
|
-
'',
|
|
455
|
-
'# Create a note',
|
|
456
|
-
'osascript -e \'tell application "Notes" to make new note at folder "Notes" with properties {body:"My note"}\'',
|
|
457
|
-
'',
|
|
458
|
-
'# Open a URL in Safari',
|
|
459
|
-
'osascript -e \'tell application "Safari" to open location "https://example.com"\'',
|
|
460
|
-
'',
|
|
461
|
-
'# Play/pause Music',
|
|
462
|
-
'osascript -e \'tell application "Music" to playpause\'',
|
|
463
|
-
'',
|
|
464
|
-
'# Display a system notification',
|
|
465
|
-
'osascript -e \'display notification "Done!" with title "Vellum"\'',
|
|
466
|
-
'```',
|
|
467
|
-
'',
|
|
468
|
-
'osascript (AppleScript/JXA) has direct, reliable access to macOS app APIs and system events.',
|
|
469
|
-
'Use it whenever the task involves a native macOS app or system-level interaction.',
|
|
413
|
+
'On macOS, also consider the `macos-automation` skill for interacting with native apps',
|
|
414
|
+
'(Messages, Contacts, Calendar, Mail, Reminders, Music, Finder, etc.) via osascript.',
|
|
470
415
|
] : []),
|
|
471
416
|
].join('\n');
|
|
472
417
|
}
|
|
@@ -702,8 +647,13 @@ function escapeXml(str: string): string {
|
|
|
702
647
|
}
|
|
703
648
|
|
|
704
649
|
function formatSkillsCatalog(skills: SkillSummary[]): string {
|
|
705
|
-
// Filter out skills with disableModelInvocation
|
|
706
|
-
const visible = skills.filter(s =>
|
|
650
|
+
// Filter out skills with disableModelInvocation or unsupported OS
|
|
651
|
+
const visible = skills.filter(s => {
|
|
652
|
+
if (s.disableModelInvocation) return false;
|
|
653
|
+
const os = s.metadata?.os;
|
|
654
|
+
if (os && os.length > 0 && !os.includes(process.platform)) return false;
|
|
655
|
+
return true;
|
|
656
|
+
});
|
|
707
657
|
if (visible.length === 0) return '';
|
|
708
658
|
|
|
709
659
|
const lines = ['<available_skills>'];
|