vellum 0.2.2 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/bun.lock +68 -100
  2. package/package.json +3 -3
  3. package/src/__tests__/asset-materialize-tool.test.ts +2 -2
  4. package/src/__tests__/checker.test.ts +104 -0
  5. package/src/__tests__/config-schema.test.ts +6 -0
  6. package/src/__tests__/gateway-only-enforcement.test.ts +458 -0
  7. package/src/__tests__/handlers-twilio-config.test.ts +221 -0
  8. package/src/__tests__/ipc-snapshot.test.ts +20 -0
  9. package/src/__tests__/memory-regressions.test.ts +100 -2
  10. package/src/__tests__/oauth-callback-registry.test.ts +85 -0
  11. package/src/__tests__/oauth2-gateway-transport.test.ts +298 -0
  12. package/src/__tests__/provider-commit-message-generator.test.ts +342 -0
  13. package/src/__tests__/public-ingress-urls.test.ts +206 -0
  14. package/src/__tests__/session-conflict-gate.test.ts +28 -25
  15. package/src/__tests__/tool-executor.test.ts +88 -0
  16. package/src/__tests__/turn-commit.test.ts +64 -0
  17. package/src/calls/__tests__/twilio-webhook-urls.test.ts +162 -0
  18. package/src/calls/call-domain.ts +3 -3
  19. package/src/calls/twilio-config.ts +25 -9
  20. package/src/calls/twilio-provider.ts +4 -4
  21. package/src/calls/twilio-routes.ts +10 -2
  22. package/src/calls/twilio-webhook-urls.ts +47 -0
  23. package/src/cli/map.ts +30 -6
  24. package/src/config/defaults.ts +5 -0
  25. package/src/config/schema.ts +34 -2
  26. package/src/config/system-prompt.ts +1 -1
  27. package/src/config/types.ts +1 -0
  28. package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -5
  29. package/src/daemon/computer-use-session.ts +2 -1
  30. package/src/daemon/handlers/config.ts +95 -4
  31. package/src/daemon/handlers/sessions.ts +2 -2
  32. package/src/daemon/handlers/work-items.ts +1 -1
  33. package/src/daemon/ipc-contract-inventory.json +8 -0
  34. package/src/daemon/ipc-contract.ts +39 -1
  35. package/src/daemon/ride-shotgun-handler.ts +2 -1
  36. package/src/daemon/session-agent-loop.ts +37 -2
  37. package/src/daemon/session-conflict-gate.ts +18 -109
  38. package/src/daemon/session-tool-setup.ts +7 -0
  39. package/src/inbound/public-ingress-urls.ts +106 -0
  40. package/src/memory/attachments-store.ts +0 -1
  41. package/src/memory/channel-delivery-store.ts +0 -1
  42. package/src/memory/conflict-intent.ts +114 -0
  43. package/src/memory/conversation-key-store.ts +0 -1
  44. package/src/memory/db.ts +346 -149
  45. package/src/memory/job-handlers/conflict.ts +23 -1
  46. package/src/memory/runs-store.ts +0 -3
  47. package/src/memory/schema.ts +0 -4
  48. package/src/runtime/gateway-client.ts +36 -0
  49. package/src/runtime/http-server.ts +140 -2
  50. package/src/runtime/routes/channel-routes.ts +121 -79
  51. package/src/security/oauth-callback-registry.ts +56 -0
  52. package/src/security/oauth2.ts +174 -58
  53. package/src/swarm/backend-claude-code.ts +1 -1
  54. package/src/tools/assets/search.ts +1 -36
  55. package/src/tools/browser/api-map.ts +123 -50
  56. package/src/tools/claude-code/claude-code.ts +131 -1
  57. package/src/tools/tasks/work-item-list.ts +16 -2
  58. package/src/workspace/commit-message-enrichment-service.ts +3 -3
  59. package/src/workspace/provider-commit-message-generator.ts +57 -14
  60. package/src/workspace/turn-commit.ts +6 -2
@@ -28,6 +28,25 @@ const VALID_PROFILES: readonly WorkerProfile[] = ['general', 'researcher', 'code
28
28
  const MAX_CLAUDE_CODE_DEPTH = 1;
29
29
  const DEPTH_ENV_VAR = 'VELLUM_CLAUDE_CODE_DEPTH';
30
30
 
31
+ function summarizeToolInput(toolName: string, input: Record<string, unknown>): string {
32
+ // Extract the most relevant field for each tool type
33
+ const name = toolName.toLowerCase();
34
+ if (name === 'bash') return String(input.command ?? '');
35
+ if (name === 'read' || name === 'file_read') return String(input.file_path ?? input.path ?? '');
36
+ if (name === 'edit' || name === 'file_edit') return String(input.file_path ?? input.path ?? '');
37
+ if (name === 'write' || name === 'file_write') return String(input.file_path ?? input.path ?? '');
38
+ if (name === 'glob') return String(input.pattern ?? '');
39
+ if (name === 'grep') return String(input.pattern ?? '');
40
+ if (name === 'websearch' || name === 'web_search') return String(input.query ?? '');
41
+ if (name === 'webfetch' || name === 'web_fetch') return String(input.url ?? '');
42
+ if (name === 'task') return String(input.description ?? '');
43
+ // Fallback: first string value
44
+ for (const val of Object.values(input)) {
45
+ if (typeof val === 'string' && val.length > 0 && val.length < 200) return val;
46
+ }
47
+ return '';
48
+ }
49
+
31
50
  export const claudeCodeTool: Tool = {
32
51
  name: 'claude_code',
33
52
  description: 'Delegate a coding task to Claude Code, an AI-powered coding agent that can read, write, and edit files, run shell commands, and perform complex multi-step software engineering tasks autonomously.',
@@ -203,12 +222,21 @@ export const claudeCodeTool: Tool = {
203
222
  queryOptions.resume = resumeSessionId;
204
223
  }
205
224
 
225
+ // Declared outside try so the catch block can emit a final tool_complete on error.
226
+ let lastSubToolName: string | null = null;
227
+ let activeToolUseId: string | null = null;
228
+
206
229
  try {
207
230
  const conversation = query({ prompt, options: queryOptions });
208
231
  let resultText = '';
209
232
  let sessionId = '';
210
233
  let hasError = false;
211
234
 
235
+ // Track tool_use_id → {name, inputSummary} for enriching progress events.
236
+ const toolUseIdInfo = new Map<string, { name: string; inputSummary: string }>();
237
+ // Track tool_use_ids that we've already emitted tool_start for (to avoid duplicates).
238
+ const emittedToolUseIds = new Set<string>();
239
+
212
240
  for await (const message of conversation) {
213
241
  switch (message.type) {
214
242
  case 'assistant': {
@@ -225,12 +253,103 @@ export const claudeCodeTool: Tool = {
225
253
  context.onOutput?.(block.text);
226
254
  resultText += block.text;
227
255
  }
256
+ if (block.type === 'tool_use') {
257
+ // Capture info keyed by tool_use_id for enriching tool_progress events.
258
+ const inputSummary = summarizeToolInput(block.name, block.input as Record<string, unknown>);
259
+ toolUseIdInfo.set(block.id, { name: block.name, inputSummary });
260
+
261
+ // Emit tool_start if we haven't already (tool_progress may have fired first).
262
+ // NOTE: Do NOT emit tool_complete for the previous tool here. An assistant
263
+ // message may contain multiple tool_use blocks (parallel tool use) and none
264
+ // of them have executed yet at this point. Completions are handled by
265
+ // tool_use_summary and tool_progress events.
266
+ if (!emittedToolUseIds.has(block.id)) {
267
+ context.onOutput?.(JSON.stringify({
268
+ subType: 'tool_start',
269
+ subToolName: block.name,
270
+ subToolInput: inputSummary,
271
+ subToolId: block.id,
272
+ }));
273
+ emittedToolUseIds.add(block.id);
274
+ lastSubToolName = block.name;
275
+ activeToolUseId = block.id;
276
+ }
277
+ }
228
278
  }
229
279
  }
230
280
  sessionId = message.session_id;
231
281
  break;
232
282
  }
283
+ case 'tool_progress': {
284
+ // The SDK fires tool_progress periodically DURING tool execution.
285
+ // This is our primary signal for live sub-tool progress.
286
+ const toolUseId = message.tool_use_id;
287
+ const toolName = message.tool_name;
288
+ sessionId = message.session_id;
289
+
290
+ // Record tool name if we don't have it yet (tool_progress fires before assistant sometimes).
291
+ if (!toolUseIdInfo.has(toolUseId)) {
292
+ toolUseIdInfo.set(toolUseId, { name: toolName, inputSummary: '' });
293
+ }
294
+
295
+ if (!emittedToolUseIds.has(toolUseId)) {
296
+ // New tool — mark previous as complete and emit tool_start.
297
+ if (lastSubToolName && activeToolUseId !== toolUseId) {
298
+ context.onOutput?.(JSON.stringify({
299
+ subType: 'tool_complete',
300
+ subToolName: lastSubToolName,
301
+ subToolId: activeToolUseId,
302
+ }));
303
+ }
304
+ const inputSummary = toolUseIdInfo.get(toolUseId)?.inputSummary ?? '';
305
+ context.onOutput?.(JSON.stringify({
306
+ subType: 'tool_start',
307
+ subToolName: toolName,
308
+ subToolInput: inputSummary,
309
+ subToolId: toolUseId,
310
+ }));
311
+ emittedToolUseIds.add(toolUseId);
312
+ lastSubToolName = toolName;
313
+ }
314
+ activeToolUseId = toolUseId;
315
+ break;
316
+ }
317
+ case 'tool_use_summary': {
318
+ // The SDK fires tool_use_summary after tool execution with a summary
319
+ // and the IDs of tools that were executed.
320
+ sessionId = message.session_id;
321
+ for (const completedId of message.preceding_tool_use_ids) {
322
+ const info = toolUseIdInfo.get(completedId);
323
+ const completedName: string | null = info?.name ?? lastSubToolName;
324
+ if (completedName && emittedToolUseIds.has(completedId)) {
325
+ context.onOutput?.(JSON.stringify({
326
+ subType: 'tool_complete',
327
+ subToolName: completedName,
328
+ subToolId: completedId,
329
+ }));
330
+ if (lastSubToolName === completedName) {
331
+ lastSubToolName = null;
332
+ }
333
+ }
334
+ // Prune completed entries to keep memory flat across long sessions.
335
+ toolUseIdInfo.delete(completedId);
336
+ emittedToolUseIds.delete(completedId);
337
+ }
338
+ activeToolUseId = null;
339
+ break;
340
+ }
233
341
  case 'result': {
342
+ // Mark the final sub-tool as complete (flag error if the session failed).
343
+ if (lastSubToolName) {
344
+ const isFailure = message.subtype !== 'success';
345
+ context.onOutput?.(JSON.stringify({
346
+ subType: 'tool_complete',
347
+ subToolName: lastSubToolName,
348
+ subToolId: activeToolUseId,
349
+ ...(isFailure && { subToolIsError: true }),
350
+ }));
351
+ lastSubToolName = null;
352
+ }
234
353
  sessionId = message.session_id;
235
354
  const resultMeta = {
236
355
  subtype: message.subtype,
@@ -259,7 +378,7 @@ export const claudeCodeTool: Tool = {
259
378
  parts.push(`Errors: ${errors.join('; ')}`);
260
379
  }
261
380
  if (denials.length > 0) {
262
- const denialSummary = denials.map(d => `${d.tool_name}`).join(', ');
381
+ const denialSummary = denials.map((d: { tool_name: string }) => `${d.tool_name}`).join(', ');
263
382
  parts.push(`Permission denied: ${denialSummary}`);
264
383
  }
265
384
  resultText += `\n\n${parts.join('\n')}`;
@@ -281,6 +400,17 @@ export const claudeCodeTool: Tool = {
281
400
  isError: hasError,
282
401
  };
283
402
  } catch (err) {
403
+ // Mark the last sub-tool as failed so the UI shows an error icon.
404
+ if (lastSubToolName) {
405
+ context.onOutput?.(JSON.stringify({
406
+ subType: 'tool_complete',
407
+ subToolName: lastSubToolName,
408
+ subToolId: activeToolUseId,
409
+ subToolIsError: true,
410
+ }));
411
+ lastSubToolName = null;
412
+ }
413
+
284
414
  const errMessage = err instanceof Error ? err.message : String(err);
285
415
  const recentStderr = stderrLines.slice(-20);
286
416
  log.error({ err, stderrTail: recentStderr }, 'Claude Code execution failed');
@@ -1,5 +1,17 @@
1
1
  import type { ToolContext, ToolExecutionResult } from '../types.js';
2
- import { listWorkItems, type WorkItemStatus } from '../../work-items/work-item-store.js';
2
+ import { listWorkItems, type WorkItem, type WorkItemStatus } from '../../work-items/work-item-store.js';
3
+
4
+ const PRIORITY_LABELS: Record<number, string> = { 0: 'High', 1: 'Medium', 2: 'Low' };
5
+
6
+ function formatTaskList(items: WorkItem[]): string {
7
+ const lines: string[] = [];
8
+ for (const item of items) {
9
+ const priority = PRIORITY_LABELS[item.priorityTier] ?? 'Medium';
10
+ const status = item.status.replace(/_/g, ' ');
11
+ lines.push(`- [${priority}] ${item.title} (${status})`);
12
+ }
13
+ return lines.join('\n');
14
+ }
3
15
 
4
16
  export async function executeTaskListShow(
5
17
  input: Record<string, unknown>,
@@ -33,7 +45,9 @@ export async function executeTaskListShow(
33
45
  ? `${count} ${Array.isArray(statusFilter) ? 'matching' : statusFilter} item${count === 1 ? '' : 's'}`
34
46
  : `${count} item${count === 1 ? '' : 's'}`;
35
47
 
36
- return { content: `Opened Tasks window (${label}).`, isError: false };
48
+ const taskList = formatTaskList(items);
49
+
50
+ return { content: `Opened Tasks window (${label}).\n\nCurrent tasks:\n${taskList}`, isError: false };
37
51
  } catch (err) {
38
52
  const msg = err instanceof Error ? err.message : String(err);
39
53
  return { content: `Error: ${msg}`, isError: true };
@@ -183,6 +183,9 @@ export class CommitEnrichmentService {
183
183
  // has already settled with the timeout error, that rejection is orphaned.
184
184
  // The .catch() swallows it to prevent an unhandled promise rejection.
185
185
  const enrichmentPromise = this.doEnrichment(job, controller.signal);
186
+ enrichmentPromise.catch(() => {
187
+ // Intentionally swallowed — the timeout branch already handled the error
188
+ });
186
189
  await Promise.race([
187
190
  enrichmentPromise,
188
191
  new Promise<never>((_, reject) => {
@@ -192,9 +195,6 @@ export class CommitEnrichmentService {
192
195
  }, this.jobTimeoutMs);
193
196
  }),
194
197
  ]);
195
- enrichmentPromise.catch(() => {
196
- // Intentionally swallowed — the timeout branch already handled the error
197
- });
198
198
  this.succeededCount++;
199
199
  log.debug(
200
200
  { commitHash: job.commitHash, attempts: job.attempts },
@@ -9,9 +9,11 @@ const log = getLogger('commit-message-llm');
9
9
  export type CommitMessageSource = 'llm' | 'deterministic';
10
10
  export type LLMFallbackReason =
11
11
  | 'disabled'
12
- | 'provider_not_initialized'
12
+ | 'missing_provider_api_key'
13
13
  | 'breaker_open'
14
14
  | 'insufficient_budget'
15
+ | 'missing_fast_model'
16
+ | 'provider_not_initialized'
15
17
  | 'timeout'
16
18
  | 'provider_error'
17
19
  | 'invalid_output';
@@ -36,6 +38,15 @@ Rules:
36
38
  - Total output must be under 300 characters
37
39
  - If you cannot determine a meaningful message, respond with exactly: FALLBACK`;
38
40
 
41
+ const PROVIDER_DEFAULT_FAST_MODELS: Record<string, string> = {
42
+ anthropic: 'claude-haiku-4-5-20251001',
43
+ openai: 'gpt-4o-mini',
44
+ gemini: 'gemini-2.0-flash',
45
+ };
46
+
47
+ // Providers that can be initialized without an API key (e.g., Ollama runs locally)
48
+ const KEYLESS_PROVIDERS = new Set(['ollama']);
49
+
39
50
  const deterministicProvider = new DefaultCommitMessageProvider();
40
51
 
41
52
  function buildDeterministicResult(
@@ -93,16 +104,33 @@ export class ProviderCommitMessageGenerator {
93
104
  const config = getConfig();
94
105
  const llmConfig = config.workspaceGit.commitMessageLLM;
95
106
 
107
+ // ── Fallback check order (canonical) ──────────────────────────────
108
+ // 1. disabled
109
+ // 2. missing_provider_api_key (except keyless providers like ollama)
110
+ // 3. breaker_open
111
+ // 4. insufficient_budget
112
+ // 5. missing_fast_model
113
+ // 6. provider_not_initialized
114
+ // 7. call provider → timeout / provider_error / invalid_output
115
+ // ──────────────────────────────────────────────────────────────────
116
+
96
117
  // Step 1: Feature gate
97
118
  if (!llmConfig.enabled) {
98
119
  return buildDeterministicResult(context, 'disabled');
99
120
  }
100
-
101
- // Step 2: Provider gate
102
121
  if (!llmConfig.useConfiguredProvider) {
103
122
  return buildDeterministicResult(context, 'disabled');
104
123
  }
105
124
 
125
+ // Step 2: API key preflight (skip for providers that run without a key)
126
+ if (!KEYLESS_PROVIDERS.has(config.provider)) {
127
+ const providerApiKey = config.apiKeys[config.provider];
128
+ if (!providerApiKey || providerApiKey === '') {
129
+ log.debug('Provider API key missing; falling back to deterministic');
130
+ return buildDeterministicResult(context, 'missing_provider_api_key');
131
+ }
132
+ }
133
+
106
134
  // Step 3: Circuit breaker
107
135
  if (this.isBreakerOpen()) {
108
136
  log.debug(
@@ -124,7 +152,19 @@ export class ProviderCommitMessageGenerator {
124
152
  }
125
153
  }
126
154
 
127
- // Step 5: Call the provider
155
+ // Step 5: Fast model preflight — resolve before any provider call
156
+ const fastModel = llmConfig.providerFastModelOverrides[config.provider]
157
+ ?? PROVIDER_DEFAULT_FAST_MODELS[config.provider];
158
+
159
+ if (!fastModel) {
160
+ log.debug(
161
+ { provider: config.provider },
162
+ 'No fast model resolvable for provider; falling back to deterministic',
163
+ );
164
+ return buildDeterministicResult(context, 'missing_fast_model');
165
+ }
166
+
167
+ // Step 6 + 7: Call the provider
128
168
  try {
129
169
  const { getProvider } = await import('../providers/registry.js');
130
170
 
@@ -172,7 +212,11 @@ export class ProviderCommitMessageGenerator {
172
212
  SYSTEM_PROMPT,
173
213
  {
174
214
  signal: ac.signal,
175
- config: { max_tokens: llmConfig.maxTokens, temperature: llmConfig.temperature },
215
+ config: {
216
+ model: fastModel,
217
+ max_tokens: llmConfig.maxTokens,
218
+ temperature: llmConfig.temperature,
219
+ },
176
220
  },
177
221
  );
178
222
  } catch (err: unknown) {
@@ -203,21 +247,20 @@ export class ProviderCommitMessageGenerator {
203
247
  return buildDeterministicResult(context, 'invalid_output');
204
248
  }
205
249
 
206
- // Validate single-line subject: first line must be <= 72 chars
207
- const firstLine = text.split('\n')[0];
208
- if (firstLine.length > 72) {
250
+ // Cap subject line to 72 chars deterministically (no fallback, no breaker failure)
251
+ const lines = text.split('\n');
252
+ if (lines[0].length > 72) {
209
253
  log.debug(
210
- { subjectLength: firstLine.length },
211
- 'LLM subject line too long; falling back to deterministic',
254
+ { originalLength: lines[0].length },
255
+ 'Capping LLM subject line to 72 chars',
212
256
  );
213
- this.recordFailure();
214
- return buildDeterministicResult(context, 'invalid_output');
257
+ lines[0] = lines[0].slice(0, 72);
215
258
  }
259
+ const finalMessage = lines.join('\n');
216
260
 
217
261
  this.recordSuccess();
218
- return { message: text, source: 'llm' };
262
+ return { message: finalMessage, source: 'llm' };
219
263
  } catch (err: unknown) {
220
- // Step 6: Any error -> deterministic fallback
221
264
  log.warn(
222
265
  { err: err instanceof Error ? err.message : String(err) },
223
266
  'Commit message LLM provider error; falling back to deterministic',
@@ -72,10 +72,14 @@ export async function commitTurnChanges(
72
72
  if (!provider) {
73
73
  // Guard: skip pre-check if deadline already elapsed to avoid unnecessary mutex contention
74
74
  let preClean = false;
75
+ let candidateChangedFiles: string[] = [];
75
76
  if (!deadlineMs || Date.now() < deadlineMs) {
76
77
  try {
77
78
  const preStatus = await gitService.getStatus();
78
79
  preClean = preStatus.clean;
80
+ if (!preClean) {
81
+ candidateChangedFiles = [...new Set([...preStatus.staged, ...preStatus.modified, ...preStatus.untracked])];
82
+ }
79
83
  } catch {
80
84
  // If we can't determine status, assume dirty so we don't skip the commit
81
85
  }
@@ -90,10 +94,10 @@ export async function commitTurnChanges(
90
94
  trigger: 'turn',
91
95
  sessionId,
92
96
  turnNumber,
93
- changedFiles: [], // File list unavailable outside the git mutex; generator handles empty arrays
97
+ changedFiles: candidateChangedFiles,
94
98
  timestampMs: Date.now(),
95
99
  },
96
- { deadlineMs, changedFiles: [] },
100
+ { deadlineMs, changedFiles: candidateChangedFiles },
97
101
  );
98
102
  commitMessageSource = result.source;
99
103
  llmFallbackReason = result.reason;