vellum 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +4 -4
- package/package.json +4 -3
- package/src/__tests__/asset-materialize-tool.test.ts +2 -2
- package/src/__tests__/checker.test.ts +104 -0
- package/src/__tests__/config-schema.test.ts +0 -6
- package/src/__tests__/forbidden-legacy-symbols.test.ts +69 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +538 -0
- package/src/__tests__/ingress-url-consistency.test.ts +214 -0
- package/src/__tests__/ipc-snapshot.test.ts +17 -5
- package/src/__tests__/oauth-callback-registry.test.ts +85 -0
- package/src/__tests__/oauth2-gateway-transport.test.ts +304 -0
- package/src/__tests__/provider-commit-message-generator.test.ts +51 -12
- package/src/__tests__/public-ingress-urls.test.ts +222 -0
- package/src/__tests__/runtime-events-sse-parity.test.ts +343 -0
- package/src/__tests__/runtime-events-sse.test.ts +162 -0
- package/src/__tests__/tool-executor.test.ts +88 -0
- package/src/__tests__/turn-commit.test.ts +64 -0
- package/src/__tests__/twilio-provider.test.ts +1 -1
- package/src/__tests__/twilio-routes.test.ts +4 -4
- package/src/__tests__/twitter-auth-handler.test.ts +87 -2
- package/src/calls/call-domain.ts +8 -6
- package/src/calls/twilio-config.ts +18 -3
- package/src/calls/twilio-routes.ts +10 -2
- package/src/config/bundled-skills/tasks/TOOLS.json +25 -0
- package/src/config/bundled-skills/tasks/tools/task-queue-run.ts +9 -0
- package/src/config/bundled-skills/transcribe/SKILL.md +25 -0
- package/src/config/bundled-skills/transcribe/TOOLS.json +32 -0
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +370 -0
- package/src/config/defaults.ts +4 -1
- package/src/config/schema.ts +30 -6
- package/src/config/system-prompt.ts +1 -1
- package/src/config/types.ts +1 -0
- package/src/config/vellum-skills/google-oauth-setup/SKILL.md +5 -4
- package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +4 -2
- package/src/config/vellum-skills/telegram-setup/SKILL.md +3 -3
- package/src/daemon/computer-use-session.ts +2 -1
- package/src/daemon/handlers/config.ts +49 -17
- package/src/daemon/handlers/sessions.ts +2 -2
- package/src/daemon/handlers/shared.ts +1 -0
- package/src/daemon/handlers/subagents.ts +85 -2
- package/src/daemon/handlers/twitter-auth.ts +31 -2
- package/src/daemon/handlers/work-items.ts +1 -1
- package/src/daemon/ipc-contract-inventory.json +8 -4
- package/src/daemon/ipc-contract.ts +34 -15
- package/src/daemon/lifecycle.ts +9 -4
- package/src/daemon/server.ts +7 -0
- package/src/daemon/session-tool-setup.ts +8 -1
- package/src/inbound/public-ingress-urls.ts +112 -0
- package/src/memory/attachments-store.ts +0 -1
- package/src/memory/channel-delivery-store.ts +0 -1
- package/src/memory/conversation-key-store.ts +0 -1
- package/src/memory/db.ts +472 -148
- package/src/memory/llm-usage-store.ts +0 -1
- package/src/memory/runs-store.ts +51 -6
- package/src/memory/schema.ts +2 -6
- package/src/runtime/gateway-client.ts +7 -1
- package/src/runtime/http-server.ts +174 -7
- package/src/runtime/routes/channel-routes.ts +7 -2
- package/src/runtime/routes/events-routes.ts +79 -0
- package/src/runtime/routes/run-routes.ts +43 -0
- package/src/runtime/run-orchestrator.ts +64 -7
- package/src/security/oauth-callback-registry.ts +66 -0
- package/src/security/oauth2.ts +208 -58
- package/src/subagent/manager.ts +3 -1
- package/src/swarm/backend-claude-code.ts +1 -1
- package/src/tools/assets/search.ts +1 -36
- package/src/tools/claude-code/claude-code.ts +3 -3
- package/src/tools/tasks/work-item-list.ts +16 -2
- package/src/tools/tasks/work-item-run.ts +78 -0
- package/src/util/platform.ts +1 -1
- package/src/work-items/work-item-runner.ts +171 -0
- package/src/workspace/provider-commit-message-generator.ts +39 -23
- package/src/workspace/turn-commit.ts +6 -2
- package/src/__tests__/handlers-twilio-config.test.ts +0 -221
- package/src/calls/__tests__/twilio-webhook-urls.test.ts +0 -162
- package/src/calls/twilio-webhook-urls.ts +0 -50
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"tools": [
|
|
4
|
+
{
|
|
5
|
+
"name": "transcribe_media",
|
|
6
|
+
"description": "Transcribe an audio or video file using Whisper. Provide either a file_path to a local file or an attachment_id for an uploaded attachment. Set mode to 'api' (OpenAI cloud) or 'local' (whisper.cpp on-device). Ask the user which mode they prefer before calling.",
|
|
7
|
+
"category": "transcribe",
|
|
8
|
+
"risk": "low",
|
|
9
|
+
"input_schema": {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"file_path": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Absolute path to a local audio or video file to transcribe"
|
|
15
|
+
},
|
|
16
|
+
"attachment_id": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "The ID of an attached audio or video file to transcribe"
|
|
19
|
+
},
|
|
20
|
+
"mode": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["api", "local"],
|
|
23
|
+
"description": "Transcription backend: 'api' for OpenAI Whisper API (cloud), 'local' for whisper.cpp (on-device)"
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"required": ["mode"]
|
|
27
|
+
},
|
|
28
|
+
"executor": "tools/transcribe-media.ts",
|
|
29
|
+
"execution_target": "host"
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
}
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import { tmpdir } from 'node:os';
|
|
2
|
+
import { join, extname } from 'node:path';
|
|
3
|
+
import { writeFile, unlink, access, readFile, mkdir, readdir } from 'node:fs/promises';
|
|
4
|
+
import { randomUUID } from 'node:crypto';
|
|
5
|
+
import type { ToolContext, ToolExecutionResult } from '../../../../tools/types.js';
|
|
6
|
+
import { getAttachmentsByIds } from '../../../../memory/attachments-store.js';
|
|
7
|
+
import { getConfig } from '../../../../config/loader.js';
|
|
8
|
+
|
|
9
|
+
const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v', '.mpeg', '.mpg']);
|
|
10
|
+
const AUDIO_EXTENSIONS = new Set(['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.aiff', '.wma']);
|
|
11
|
+
|
|
12
|
+
/** Timeout for ffmpeg operations. */
|
|
13
|
+
const FFMPEG_TIMEOUT_MS = 120_000;
|
|
14
|
+
|
|
15
|
+
/** Max file size for a single OpenAI Whisper API request (25MB). */
|
|
16
|
+
const WHISPER_API_MAX_BYTES = 25 * 1024 * 1024;
|
|
17
|
+
|
|
18
|
+
/** Duration per chunk when splitting for the API (10 minutes — stays well under 25MB as WAV). */
|
|
19
|
+
const API_CHUNK_DURATION_SECS = 600;
|
|
20
|
+
|
|
21
|
+
/** Timeout for a single Whisper API request. */
|
|
22
|
+
const API_REQUEST_TIMEOUT_MS = 300_000;
|
|
23
|
+
|
|
24
|
+
/** Timeout for a single whisper.cpp chunk transcription. */
|
|
25
|
+
const LOCAL_CHUNK_TIMEOUT_MS = 600_000;
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Helpers
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
function spawnWithTimeout(
|
|
32
|
+
cmd: string[],
|
|
33
|
+
timeoutMs: number,
|
|
34
|
+
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
|
35
|
+
return new Promise((resolve, reject) => {
|
|
36
|
+
const proc = Bun.spawn(cmd, { stdout: 'pipe', stderr: 'pipe' });
|
|
37
|
+
const timer = setTimeout(() => {
|
|
38
|
+
proc.kill();
|
|
39
|
+
reject(new Error(`Process timed out after ${timeoutMs}ms: ${cmd[0]}`));
|
|
40
|
+
}, timeoutMs);
|
|
41
|
+
proc.exited.then(async (exitCode) => {
|
|
42
|
+
clearTimeout(timer);
|
|
43
|
+
const stdout = await new Response(proc.stdout).text();
|
|
44
|
+
const stderr = await new Response(proc.stderr).text();
|
|
45
|
+
resolve({ exitCode, stdout, stderr });
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function getAudioDuration(audioPath: string): Promise<number> {
|
|
51
|
+
const result = await spawnWithTimeout([
|
|
52
|
+
'ffprobe', '-v', 'error',
|
|
53
|
+
'-show_entries', 'format=duration',
|
|
54
|
+
'-of', 'csv=p=0',
|
|
55
|
+
audioPath,
|
|
56
|
+
], 10_000);
|
|
57
|
+
if (result.exitCode !== 0) return 0;
|
|
58
|
+
return parseFloat(result.stdout.trim()) || 0;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function splitAudio(
|
|
62
|
+
audioPath: string,
|
|
63
|
+
chunkDir: string,
|
|
64
|
+
chunkDurationSecs: number,
|
|
65
|
+
): Promise<string[]> {
|
|
66
|
+
const chunkPattern = join(chunkDir, 'chunk-%03d.wav');
|
|
67
|
+
const result = await spawnWithTimeout([
|
|
68
|
+
'ffmpeg', '-y',
|
|
69
|
+
'-i', audioPath,
|
|
70
|
+
'-f', 'segment',
|
|
71
|
+
'-segment_time', String(chunkDurationSecs),
|
|
72
|
+
'-acodec', 'pcm_s16le',
|
|
73
|
+
'-ar', '16000',
|
|
74
|
+
'-ac', '1',
|
|
75
|
+
chunkPattern,
|
|
76
|
+
], FFMPEG_TIMEOUT_MS);
|
|
77
|
+
if (result.exitCode !== 0) {
|
|
78
|
+
throw new Error(`Failed to split audio: ${result.stderr.slice(0, 300)}`);
|
|
79
|
+
}
|
|
80
|
+
const files = await readdir(chunkDir);
|
|
81
|
+
return files
|
|
82
|
+
.filter(f => f.startsWith('chunk-') && f.endsWith('.wav'))
|
|
83
|
+
.sort()
|
|
84
|
+
.map(f => join(chunkDir, f));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
// Source resolution
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
async function resolveSource(
|
|
92
|
+
input: Record<string, unknown>,
|
|
93
|
+
): Promise<{ inputPath: string; isVideo: boolean; tempFile: string | null } | ToolExecutionResult> {
|
|
94
|
+
const filePath = input.file_path as string | undefined;
|
|
95
|
+
const attachmentId = input.attachment_id as string | undefined;
|
|
96
|
+
|
|
97
|
+
if (filePath) {
|
|
98
|
+
try { await access(filePath); } catch {
|
|
99
|
+
return { content: `File not found: ${filePath}`, isError: true };
|
|
100
|
+
}
|
|
101
|
+
const ext = extname(filePath).toLowerCase();
|
|
102
|
+
const isVideo = VIDEO_EXTENSIONS.has(ext);
|
|
103
|
+
const isAudio = AUDIO_EXTENSIONS.has(ext);
|
|
104
|
+
if (!isVideo && !isAudio) {
|
|
105
|
+
return { content: `Unsupported file type: ${ext}. Only video and audio files can be transcribed.`, isError: true };
|
|
106
|
+
}
|
|
107
|
+
return { inputPath: filePath, isVideo, tempFile: null };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (attachmentId) {
|
|
111
|
+
const attachments = getAttachmentsByIds([attachmentId]);
|
|
112
|
+
if (attachments.length === 0) {
|
|
113
|
+
return { content: `Attachment not found: ${attachmentId}`, isError: true };
|
|
114
|
+
}
|
|
115
|
+
const attachment = attachments[0];
|
|
116
|
+
const mime = attachment.mimeType;
|
|
117
|
+
if (!mime.startsWith('video/') && !mime.startsWith('audio/')) {
|
|
118
|
+
return { content: `Unsupported file type: ${mime}. Only video and audio files can be transcribed.`, isError: true };
|
|
119
|
+
}
|
|
120
|
+
const ext = mime.startsWith('video/') ? '.mp4' : '.m4a';
|
|
121
|
+
const tempPath = join(tmpdir(), `vellum-transcribe-in-${randomUUID()}${ext}`);
|
|
122
|
+
await writeFile(tempPath, Buffer.from(attachment.dataBase64, 'base64'));
|
|
123
|
+
return { inputPath: tempPath, isVideo: mime.startsWith('video/'), tempFile: tempPath };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return { content: 'Provide either file_path or attachment_id.', isError: true };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Convert source to 16kHz mono WAV for consistent processing. */
|
|
130
|
+
async function toWav(inputPath: string, isVideo: boolean): Promise<string> {
|
|
131
|
+
const wavPath = join(tmpdir(), `vellum-transcribe-${randomUUID()}.wav`);
|
|
132
|
+
const args = ['ffmpeg', '-y', '-i', inputPath];
|
|
133
|
+
if (isVideo) args.push('-vn');
|
|
134
|
+
args.push('-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', wavPath);
|
|
135
|
+
const result = await spawnWithTimeout(args, FFMPEG_TIMEOUT_MS);
|
|
136
|
+
if (result.exitCode !== 0) {
|
|
137
|
+
throw new Error(`ffmpeg failed: ${result.stderr.slice(0, 500)}`);
|
|
138
|
+
}
|
|
139
|
+
return wavPath;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// API mode — OpenAI Whisper API
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
async function transcribeViaApi(
|
|
147
|
+
audioPath: string,
|
|
148
|
+
apiKey: string,
|
|
149
|
+
context: ToolContext,
|
|
150
|
+
): Promise<string> {
|
|
151
|
+
const duration = await getAudioDuration(audioPath);
|
|
152
|
+
const fileSize = Bun.file(audioPath).size;
|
|
153
|
+
|
|
154
|
+
// If small enough, send directly
|
|
155
|
+
if (fileSize <= WHISPER_API_MAX_BYTES) {
|
|
156
|
+
return await whisperApiRequest(audioPath, apiKey);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Split into chunks for large files
|
|
160
|
+
const chunkDir = join(tmpdir(), `vellum-transcribe-api-chunks-${randomUUID()}`);
|
|
161
|
+
await mkdir(chunkDir, { recursive: true });
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
context.onOutput?.(`Large file (${Math.round(duration / 60)}min) — splitting into chunks...\n`);
|
|
165
|
+
const chunks = await splitAudio(audioPath, chunkDir, API_CHUNK_DURATION_SECS);
|
|
166
|
+
const parts: string[] = [];
|
|
167
|
+
|
|
168
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
169
|
+
if (context.signal?.aborted) throw new Error('Cancelled');
|
|
170
|
+
context.onOutput?.(` Transcribing chunk ${i + 1}/${chunks.length}...\n`);
|
|
171
|
+
const text = await whisperApiRequest(chunks[i], apiKey);
|
|
172
|
+
if (text) parts.push(text);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return parts.join(' ');
|
|
176
|
+
} finally {
|
|
177
|
+
const { rm } = await import('node:fs/promises');
|
|
178
|
+
await rm(chunkDir, { recursive: true, force: true }).catch(() => {});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function whisperApiRequest(audioPath: string, apiKey: string): Promise<string> {
|
|
183
|
+
const audioData = await readFile(audioPath);
|
|
184
|
+
const formData = new FormData();
|
|
185
|
+
formData.append('file', new Blob([audioData], { type: 'audio/wav' }), 'audio.wav');
|
|
186
|
+
formData.append('model', 'whisper-1');
|
|
187
|
+
|
|
188
|
+
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
189
|
+
method: 'POST',
|
|
190
|
+
headers: { 'Authorization': `Bearer ${apiKey}` },
|
|
191
|
+
body: formData,
|
|
192
|
+
signal: AbortSignal.timeout(API_REQUEST_TIMEOUT_MS),
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
if (!response.ok) {
|
|
196
|
+
const body = await response.text().catch(() => '');
|
|
197
|
+
throw new Error(`Whisper API error (${response.status}): ${body.slice(0, 300)}`);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const result = await response.json() as { text?: string };
|
|
201
|
+
return result.text?.trim() ?? '';
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
// Local mode — whisper.cpp
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
async function transcribeViaLocal(
|
|
209
|
+
audioPath: string,
|
|
210
|
+
context: ToolContext,
|
|
211
|
+
): Promise<string> {
|
|
212
|
+
// Check if whisper-cpp is installed
|
|
213
|
+
const whichResult = await spawnWithTimeout(['which', 'whisper-cpp'], 5_000);
|
|
214
|
+
if (whichResult.exitCode !== 0) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
'whisper-cpp is not installed. Install it with: brew install whisper-cpp'
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Resolve model path — use the base model, download if needed
|
|
221
|
+
const modelPath = await resolveWhisperModel(context);
|
|
222
|
+
|
|
223
|
+
const duration = await getAudioDuration(audioPath);
|
|
224
|
+
|
|
225
|
+
if (duration > 0 && duration <= 1800) {
|
|
226
|
+
// Under 30 minutes — transcribe directly (whisper.cpp handles long files well)
|
|
227
|
+
context.onOutput?.(`Transcribing ${Math.round(duration / 60)}min of audio locally...\n`);
|
|
228
|
+
return await whisperCppRun(audioPath, modelPath);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Very long files — split into 10-minute chunks to show progress
|
|
232
|
+
const chunkDir = join(tmpdir(), `vellum-transcribe-local-chunks-${randomUUID()}`);
|
|
233
|
+
await mkdir(chunkDir, { recursive: true });
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
context.onOutput?.(`Large file (${Math.round(duration / 60)}min) — splitting into chunks...\n`);
|
|
237
|
+
const chunks = await splitAudio(audioPath, chunkDir, 600);
|
|
238
|
+
const parts: string[] = [];
|
|
239
|
+
|
|
240
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
241
|
+
if (context.signal?.aborted) throw new Error('Cancelled');
|
|
242
|
+
context.onOutput?.(` Transcribing chunk ${i + 1}/${chunks.length}...\n`);
|
|
243
|
+
const text = await whisperCppRun(chunks[i], modelPath);
|
|
244
|
+
if (text) parts.push(text);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return parts.join(' ');
|
|
248
|
+
} finally {
|
|
249
|
+
const { rm } = await import('node:fs/promises');
|
|
250
|
+
await rm(chunkDir, { recursive: true, force: true }).catch(() => {});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async function resolveWhisperModel(context: ToolContext): Promise<string> {
|
|
255
|
+
// Check common locations for the base model
|
|
256
|
+
const homeDir = process.env.HOME ?? '/tmp';
|
|
257
|
+
const candidates = [
|
|
258
|
+
join(homeDir, '.vellum', 'models', 'ggml-base.en.bin'),
|
|
259
|
+
join(homeDir, '.vellum', 'models', 'ggml-base.bin'),
|
|
260
|
+
'/usr/local/share/whisper-cpp/models/ggml-base.en.bin',
|
|
261
|
+
'/opt/homebrew/share/whisper-cpp/models/ggml-base.en.bin',
|
|
262
|
+
];
|
|
263
|
+
|
|
264
|
+
for (const p of candidates) {
|
|
265
|
+
try { await access(p); return p; } catch { /* next */ }
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Download the base.en model (~140MB)
|
|
269
|
+
const modelDir = join(homeDir, '.vellum', 'models');
|
|
270
|
+
await mkdir(modelDir, { recursive: true });
|
|
271
|
+
const modelPath = join(modelDir, 'ggml-base.en.bin');
|
|
272
|
+
const modelUrl = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin';
|
|
273
|
+
|
|
274
|
+
context.onOutput?.('Downloading Whisper base.en model (~140MB)...\n');
|
|
275
|
+
|
|
276
|
+
const response = await fetch(modelUrl);
|
|
277
|
+
if (!response.ok) {
|
|
278
|
+
throw new Error(`Failed to download model: ${response.status}`);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const data = Buffer.from(await response.arrayBuffer());
|
|
282
|
+
await writeFile(modelPath, data);
|
|
283
|
+
context.onOutput?.('Model downloaded.\n');
|
|
284
|
+
|
|
285
|
+
return modelPath;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async function whisperCppRun(audioPath: string, modelPath: string): Promise<string> {
|
|
289
|
+
const result = await spawnWithTimeout([
|
|
290
|
+
'whisper-cpp',
|
|
291
|
+
'-m', modelPath,
|
|
292
|
+
'-f', audioPath,
|
|
293
|
+
'--no-timestamps',
|
|
294
|
+
], LOCAL_CHUNK_TIMEOUT_MS);
|
|
295
|
+
|
|
296
|
+
if (result.exitCode !== 0) {
|
|
297
|
+
throw new Error(`whisper-cpp failed: ${result.stderr.slice(0, 300)}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// whisper-cpp outputs transcription to stderr with some logging, and
|
|
301
|
+
// the actual text lines to stdout. Clean up whitespace.
|
|
302
|
+
return result.stdout
|
|
303
|
+
.split('\n')
|
|
304
|
+
.map(l => l.trim())
|
|
305
|
+
.filter(l => l.length > 0)
|
|
306
|
+
.join(' ')
|
|
307
|
+
.trim();
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
// Main entry point
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
export async function run(
|
|
315
|
+
input: Record<string, unknown>,
|
|
316
|
+
context: ToolContext,
|
|
317
|
+
): Promise<ToolExecutionResult> {
|
|
318
|
+
const mode = input.mode as 'api' | 'local';
|
|
319
|
+
if (!mode || (mode !== 'api' && mode !== 'local')) {
|
|
320
|
+
return {
|
|
321
|
+
content: "Please specify mode: 'api' (OpenAI cloud) or 'local' (whisper.cpp on-device). Ask the user which they prefer.",
|
|
322
|
+
isError: true,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Validate API key for api mode
|
|
327
|
+
if (mode === 'api') {
|
|
328
|
+
const config = getConfig();
|
|
329
|
+
const apiKey = config.apiKeys.openai;
|
|
330
|
+
if (!apiKey) {
|
|
331
|
+
return {
|
|
332
|
+
content: 'No OpenAI API key configured. Set your OpenAI API key to use cloud transcription, or use mode "local" for on-device transcription with whisper.cpp.',
|
|
333
|
+
isError: true,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const source = await resolveSource(input);
|
|
339
|
+
if ('isError' in source) return source;
|
|
340
|
+
|
|
341
|
+
const { inputPath, isVideo, tempFile } = source;
|
|
342
|
+
let wavPath: string | null = null;
|
|
343
|
+
|
|
344
|
+
try {
|
|
345
|
+
// Convert to WAV
|
|
346
|
+
wavPath = await toWav(inputPath, isVideo);
|
|
347
|
+
|
|
348
|
+
let text: string;
|
|
349
|
+
if (mode === 'api') {
|
|
350
|
+
const config = getConfig();
|
|
351
|
+
text = await transcribeViaApi(wavPath, config.apiKeys.openai!, context);
|
|
352
|
+
} else {
|
|
353
|
+
text = await transcribeViaLocal(wavPath, context);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (!text.trim()) {
|
|
357
|
+
return { content: 'No speech detected in the audio.', isError: false };
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return { content: text, isError: false };
|
|
361
|
+
} catch (err) {
|
|
362
|
+
return {
|
|
363
|
+
content: `Transcription failed: ${(err as Error).message}`,
|
|
364
|
+
isError: true,
|
|
365
|
+
};
|
|
366
|
+
} finally {
|
|
367
|
+
if (tempFile) { try { await unlink(tempFile); } catch { /* ignore */ } }
|
|
368
|
+
if (wavPath) { try { await unlink(wavPath); } catch { /* ignore */ } }
|
|
369
|
+
}
|
|
370
|
+
}
|
package/src/config/defaults.ts
CHANGED
|
@@ -217,7 +217,6 @@ export const DEFAULT_CONFIG: AssistantConfig = {
|
|
|
217
217
|
calls: {
|
|
218
218
|
enabled: true,
|
|
219
219
|
provider: 'twilio' as const,
|
|
220
|
-
webhookBaseUrl: '',
|
|
221
220
|
maxDurationSeconds: 3600,
|
|
222
221
|
userConsultTimeoutSeconds: 120,
|
|
223
222
|
disclosure: {
|
|
@@ -228,4 +227,8 @@ export const DEFAULT_CONFIG: AssistantConfig = {
|
|
|
228
227
|
denyCategories: [],
|
|
229
228
|
},
|
|
230
229
|
},
|
|
230
|
+
ingress: {
|
|
231
|
+
publicBaseUrl: '',
|
|
232
|
+
mode: 'gateway_only' as const,
|
|
233
|
+
},
|
|
231
234
|
};
|
package/src/config/schema.ts
CHANGED
|
@@ -9,6 +9,7 @@ const VALID_SANDBOX_BACKENDS = ['native', 'docker'] as const;
|
|
|
9
9
|
const VALID_DOCKER_NETWORKS = ['none', 'bridge'] as const;
|
|
10
10
|
const VALID_PERMISSIONS_MODES = ['legacy', 'strict'] as const;
|
|
11
11
|
const VALID_CALL_PROVIDERS = ['twilio'] as const;
|
|
12
|
+
const VALID_INGRESS_MODES = ['gateway_only', 'compat'] as const;
|
|
12
13
|
|
|
13
14
|
export const TimeoutConfigSchema = z.object({
|
|
14
15
|
shellMaxTimeoutSec: z
|
|
@@ -780,8 +781,19 @@ export const WorkspaceGitConfigSchema = z.object({
|
|
|
780
781
|
.int().positive().default(2000),
|
|
781
782
|
backoffMaxMs: z.number({ error: 'workspaceGit.commitMessageLLM.breaker.backoffMaxMs must be a number' })
|
|
782
783
|
.int().positive().default(60000),
|
|
783
|
-
}).default({}),
|
|
784
|
-
}).default({
|
|
784
|
+
}).default({ openAfterFailures: 3, backoffBaseMs: 2000, backoffMaxMs: 60000 }),
|
|
785
|
+
}).default({
|
|
786
|
+
enabled: false,
|
|
787
|
+
useConfiguredProvider: true,
|
|
788
|
+
providerFastModelOverrides: {},
|
|
789
|
+
timeoutMs: 600,
|
|
790
|
+
maxTokens: 120,
|
|
791
|
+
temperature: 0.2,
|
|
792
|
+
maxFilesInPrompt: 30,
|
|
793
|
+
maxDiffBytes: 12000,
|
|
794
|
+
minRemainingTurnBudgetMs: 1000,
|
|
795
|
+
breaker: { openAfterFailures: 3, backoffBaseMs: 2000, backoffMaxMs: 60000 },
|
|
796
|
+
}),
|
|
785
797
|
});
|
|
786
798
|
|
|
787
799
|
export const AgentHeartbeatConfigSchema = z.object({
|
|
@@ -883,9 +895,6 @@ export const CallsConfigSchema = z.object({
|
|
|
883
895
|
error: `calls.provider must be one of: ${VALID_CALL_PROVIDERS.join(', ')}`,
|
|
884
896
|
})
|
|
885
897
|
.default('twilio'),
|
|
886
|
-
webhookBaseUrl: z
|
|
887
|
-
.string({ error: 'calls.webhookBaseUrl must be a string' })
|
|
888
|
-
.default(''),
|
|
889
898
|
maxDurationSeconds: z
|
|
890
899
|
.number({ error: 'calls.maxDurationSeconds must be a number' })
|
|
891
900
|
.int('calls.maxDurationSeconds must be an integer')
|
|
@@ -914,6 +923,17 @@ export const SkillsConfigSchema = z.object({
|
|
|
914
923
|
allowBundled: z.array(z.string()).nullable().default(null),
|
|
915
924
|
});
|
|
916
925
|
|
|
926
|
+
export const IngressConfigSchema = z.object({
|
|
927
|
+
publicBaseUrl: z
|
|
928
|
+
.string({ error: 'ingress.publicBaseUrl must be a string' })
|
|
929
|
+
.default(''),
|
|
930
|
+
mode: z
|
|
931
|
+
.enum(VALID_INGRESS_MODES, {
|
|
932
|
+
error: `ingress.mode must be one of: ${VALID_INGRESS_MODES.join(', ')}`,
|
|
933
|
+
})
|
|
934
|
+
.default('gateway_only'),
|
|
935
|
+
});
|
|
936
|
+
|
|
917
937
|
export const AssistantConfigSchema = z.object({
|
|
918
938
|
provider: z
|
|
919
939
|
.enum(VALID_PROVIDERS, {
|
|
@@ -1152,7 +1172,6 @@ export const AssistantConfigSchema = z.object({
|
|
|
1152
1172
|
calls: CallsConfigSchema.default({
|
|
1153
1173
|
enabled: true,
|
|
1154
1174
|
provider: 'twilio',
|
|
1155
|
-
webhookBaseUrl: '',
|
|
1156
1175
|
maxDurationSeconds: 3600,
|
|
1157
1176
|
userConsultTimeoutSeconds: 120,
|
|
1158
1177
|
disclosure: {
|
|
@@ -1163,6 +1182,10 @@ export const AssistantConfigSchema = z.object({
|
|
|
1163
1182
|
denyCategories: [],
|
|
1164
1183
|
},
|
|
1165
1184
|
}),
|
|
1185
|
+
ingress: IngressConfigSchema.default({
|
|
1186
|
+
publicBaseUrl: '',
|
|
1187
|
+
mode: 'gateway_only',
|
|
1188
|
+
}),
|
|
1166
1189
|
}).superRefine((config, ctx) => {
|
|
1167
1190
|
if (config.contextWindow.targetInputTokens >= config.contextWindow.maxInputTokens) {
|
|
1168
1191
|
ctx.addIssue({
|
|
@@ -1223,3 +1246,4 @@ export type WorkspaceGitConfig = z.infer<typeof WorkspaceGitConfigSchema>;
|
|
|
1223
1246
|
export type CallsConfig = z.infer<typeof CallsConfigSchema>;
|
|
1224
1247
|
export type CallsDisclosureConfig = z.infer<typeof CallsDisclosureConfigSchema>;
|
|
1225
1248
|
export type CallsSafetyConfig = z.infer<typeof CallsSafetyConfigSchema>;
|
|
1249
|
+
export type IngressConfig = z.infer<typeof IngressConfigSchema>;
|
|
@@ -218,7 +218,7 @@ function buildTaskScheduleReminderRoutingSection(): string {
|
|
|
218
218
|
'',
|
|
219
219
|
'You can create ad-hoc work items by providing just a `title` to `task_list_add` — no existing task template is needed. A lightweight template is auto-created behind the scenes. For reusable task definitions with templates and input schemas, use `task_save` first.',
|
|
220
220
|
'',
|
|
221
|
-
'**IMPORTANT:** When you call `task_list_show`, the Tasks window opens automatically on the client. Do NOT also create a separate surface/UI (via `ui_show` or `app_create`) to display the task queue
|
|
221
|
+
'**IMPORTANT:** When you call `task_list_show`, the Tasks window opens automatically on the client AND the tool returns the current task list. Present a brief summary of the tasks in your chat response so the user can see them inline. Do NOT also create a separate surface/UI (via `ui_show` or `app_create`) to display the task queue — that causes duplicate windows.',
|
|
222
222
|
'',
|
|
223
223
|
'### Schedules (schedule_create / schedule_list / schedule_update / schedule_delete)',
|
|
224
224
|
'For recurring automated jobs that run on a recurrence schedule (cron or RRULE). Use ONLY when the user explicitly wants:',
|
package/src/config/types.ts
CHANGED
|
@@ -124,7 +124,7 @@ Tell the user: "Consent screen is configured! Almost there — just need to crea
|
|
|
124
124
|
|
|
125
125
|
> **Create OAuth Credentials**
|
|
126
126
|
>
|
|
127
|
-
> I'm about to create OAuth
|
|
127
|
+
> I'm about to create OAuth Web Application credentials for Vellum Assistant. This generates a client ID that Vellum uses to initiate the authorization flow. The redirect URI will point to the gateway's OAuth callback endpoint.
|
|
128
128
|
|
|
129
129
|
Wait for the user to approve. If they decline, explain that credentials are the final step needed and offer to try again or cancel.
|
|
130
130
|
|
|
@@ -133,8 +133,9 @@ Once approved, navigate to `https://console.cloud.google.com/apis/credentials?pr
|
|
|
133
133
|
Use `browser_click` on "+ Create Credentials" at the top, then select "OAuth client ID" from the dropdown.
|
|
134
134
|
|
|
135
135
|
Take a `browser_snapshot` and fill in:
|
|
136
|
-
1. **Application type:** Select "
|
|
137
|
-
2. **Name:** "Vellum Assistant
|
|
136
|
+
1. **Application type:** Select "Web application" from the dropdown
|
|
137
|
+
2. **Name:** "Vellum Assistant"
|
|
138
|
+
3. **Authorized redirect URIs:** Click "Add URI" and enter `${ingress.publicBaseUrl}/webhooks/oauth/callback` (e.g. `https://abc123.ngrok-free.app/webhooks/oauth/callback`). Read the `ingress.publicBaseUrl` value from the assistant's workspace config (Settings > Public Ingress) or the `INGRESS_PUBLIC_BASE_URL` environment variable.
|
|
138
139
|
|
|
139
140
|
Use `browser_click` on the "Create" button.
|
|
140
141
|
|
|
@@ -179,7 +180,7 @@ Summarize what was accomplished:
|
|
|
179
180
|
- Created a Google Cloud project (or used an existing one)
|
|
180
181
|
- Enabled the Gmail API and Google Calendar API
|
|
181
182
|
- Configured the OAuth consent screen with appropriate scopes (including calendar)
|
|
182
|
-
- Created OAuth
|
|
183
|
+
- Created OAuth Web Application credentials with gateway callback redirect URI
|
|
183
184
|
- Connected your Gmail and Google Calendar accounts
|
|
184
185
|
|
|
185
186
|
## Error Handling
|
|
@@ -85,14 +85,16 @@ Tell the user: "Permissions configured! Now let's set up the redirect URL and ge
|
|
|
85
85
|
|
|
86
86
|
Navigate to the "OAuth & Permissions" page if not already there.
|
|
87
87
|
|
|
88
|
+
The redirect URL must point to the gateway's OAuth callback endpoint. Determine the URL by reading the `ingress.publicBaseUrl` value from the assistant's workspace config (Settings > Public Ingress) or the `INGRESS_PUBLIC_BASE_URL` environment variable. The callback path is `/webhooks/oauth/callback`.
|
|
89
|
+
|
|
88
90
|
In the "Redirect URLs" section:
|
|
89
91
|
1. Click "Add New Redirect URL"
|
|
90
|
-
2. Enter `
|
|
92
|
+
2. Enter `${ingress.publicBaseUrl}/webhooks/oauth/callback` (e.g. `https://abc123.ngrok-free.app/webhooks/oauth/callback`)
|
|
91
93
|
3. Click "Add" then "Save URLs"
|
|
92
94
|
|
|
93
95
|
Take a `browser_snapshot` to confirm.
|
|
94
96
|
|
|
95
|
-
Tell the user: "Redirect URL configured."
|
|
97
|
+
Tell the user: "Redirect URL configured. Make sure your tunnel is running and `ingress.publicBaseUrl` is set in Settings so the callback can reach the gateway."
|
|
96
98
|
|
|
97
99
|
## Step 5: Extract Client ID and Client Secret
|
|
98
100
|
|
|
@@ -5,14 +5,14 @@ user-invocable: true
|
|
|
5
5
|
metadata: {"vellum": {"emoji": "\ud83e\udd16"}}
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
You are helping your user connect a Telegram bot to the Vellum Assistant gateway. When this skill is invoked, walk through each step below using only existing tools.
|
|
8
|
+
You are helping your user connect a Telegram bot to the Vellum Assistant gateway. Telegram webhooks are received exclusively by the gateway (the public ingress boundary) — they never hit the assistant runtime directly. When this skill is invoked, walk through each step below using only existing tools.
|
|
9
9
|
|
|
10
10
|
## What You Need
|
|
11
11
|
|
|
12
12
|
1. **Bot token** from Telegram's @BotFather (the user provides this)
|
|
13
|
-
2. **Gateway webhook URL**
|
|
13
|
+
2. **Gateway webhook URL** — derived from the canonical ingress setting: `${ingress.publicBaseUrl}/webhooks/telegram`. The gateway is the only publicly reachable endpoint; Telegram sends webhooks to the gateway, which validates and forwards them to the assistant runtime internally. If `ingress.publicBaseUrl` is configured (Settings UI > Public Ingress, or `INGRESS_PUBLIC_BASE_URL` env var), use it to auto-derive the webhook URL. If it is not configured, ask the user to set it before proceeding.
|
|
14
14
|
|
|
15
|
-
If the user has already provided the bot token in the conversation, use it directly. Otherwise, ask for it.
|
|
15
|
+
If the user has already provided the bot token in the conversation, use it directly. Otherwise, ask for it.
|
|
16
16
|
|
|
17
17
|
## Setup Steps
|
|
18
18
|
|
|
@@ -15,6 +15,7 @@ import { AgentLoop } from '../agent/loop.js';
|
|
|
15
15
|
import { ToolExecutor } from '../tools/executor.js';
|
|
16
16
|
import { PermissionPrompter } from '../permissions/prompter.js';
|
|
17
17
|
import { SecretPrompter } from '../permissions/secret-prompter.js';
|
|
18
|
+
import type { UserDecision } from '../permissions/types.js';
|
|
18
19
|
import { allUiSurfaceTools } from '../tools/ui-surface/definitions.js';
|
|
19
20
|
import { allComputerUseTools } from '../tools/computer-use/definitions.js';
|
|
20
21
|
import { registerSkillTools } from '../tools/registry.js';
|
|
@@ -893,7 +894,7 @@ export class ComputerUseSession {
|
|
|
893
894
|
|
|
894
895
|
handleConfirmationResponse(
|
|
895
896
|
requestId: string,
|
|
896
|
-
decision:
|
|
897
|
+
decision: UserDecision,
|
|
897
898
|
selectedPattern?: string,
|
|
898
899
|
selectedScope?: string,
|
|
899
900
|
): void {
|