npm - vellum - Versions diffs - 0.2.2 → 0.2.8 - Mend

vellum 0.2.2 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/bun.lock +68 -100
package/package.json +3 -3
package/src/__tests__/asset-materialize-tool.test.ts +2 -2
package/src/__tests__/checker.test.ts +104 -0
package/src/__tests__/config-schema.test.ts +6 -0
package/src/__tests__/gateway-only-enforcement.test.ts +458 -0
package/src/__tests__/handlers-twilio-config.test.ts +221 -0
package/src/__tests__/ipc-snapshot.test.ts +20 -0
package/src/__tests__/memory-regressions.test.ts +100 -2
package/src/__tests__/oauth-callback-registry.test.ts +85 -0
package/src/__tests__/oauth2-gateway-transport.test.ts +298 -0
package/src/__tests__/provider-commit-message-generator.test.ts +342 -0
package/src/__tests__/public-ingress-urls.test.ts +206 -0
package/src/__tests__/session-conflict-gate.test.ts +28 -25
package/src/__tests__/tool-executor.test.ts +88 -0
package/src/__tests__/turn-commit.test.ts +64 -0
package/src/calls/__tests__/twilio-webhook-urls.test.ts +162 -0
package/src/calls/call-domain.ts +3 -3
package/src/calls/twilio-config.ts +25 -9
package/src/calls/twilio-provider.ts +4 -4
package/src/calls/twilio-routes.ts +10 -2
package/src/calls/twilio-webhook-urls.ts +47 -0
package/src/cli/map.ts +30 -6
package/src/config/defaults.ts +5 -0
package/src/config/schema.ts +34 -2
package/src/config/system-prompt.ts +1 -1
package/src/config/types.ts +1 -0
package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -5
package/src/daemon/computer-use-session.ts +2 -1
package/src/daemon/handlers/config.ts +95 -4
package/src/daemon/handlers/sessions.ts +2 -2
package/src/daemon/handlers/work-items.ts +1 -1
package/src/daemon/ipc-contract-inventory.json +8 -0
package/src/daemon/ipc-contract.ts +39 -1
package/src/daemon/ride-shotgun-handler.ts +2 -1
package/src/daemon/session-agent-loop.ts +37 -2
package/src/daemon/session-conflict-gate.ts +18 -109
package/src/daemon/session-tool-setup.ts +7 -0
package/src/inbound/public-ingress-urls.ts +106 -0
package/src/memory/attachments-store.ts +0 -1
package/src/memory/channel-delivery-store.ts +0 -1
package/src/memory/conflict-intent.ts +114 -0
package/src/memory/conversation-key-store.ts +0 -1
package/src/memory/db.ts +346 -149
package/src/memory/job-handlers/conflict.ts +23 -1
package/src/memory/runs-store.ts +0 -3
package/src/memory/schema.ts +0 -4
package/src/runtime/gateway-client.ts +36 -0
package/src/runtime/http-server.ts +140 -2
package/src/runtime/routes/channel-routes.ts +121 -79
package/src/security/oauth-callback-registry.ts +56 -0
package/src/security/oauth2.ts +174 -58
package/src/swarm/backend-claude-code.ts +1 -1
package/src/tools/assets/search.ts +1 -36
package/src/tools/browser/api-map.ts +123 -50
package/src/tools/claude-code/claude-code.ts +131 -1
package/src/tools/tasks/work-item-list.ts +16 -2
package/src/workspace/commit-message-enrichment-service.ts +3 -3
package/src/workspace/provider-commit-message-generator.ts +57 -14
package/src/workspace/turn-commit.ts +6 -2

package/src/tools/claude-code/claude-code.ts CHANGED Viewed

@@ -28,6 +28,25 @@ const VALID_PROFILES: readonly WorkerProfile[] = ['general', 'researcher', 'code
 const MAX_CLAUDE_CODE_DEPTH = 1;
 const DEPTH_ENV_VAR = 'VELLUM_CLAUDE_CODE_DEPTH';
+function summarizeToolInput(toolName: string, input: Record<string, unknown>): string {
+  // Extract the most relevant field for each tool type
+  const name = toolName.toLowerCase();
+  if (name === 'bash') return String(input.command ?? '');
+  if (name === 'read' || name === 'file_read') return String(input.file_path ?? input.path ?? '');
+  if (name === 'edit' || name === 'file_edit') return String(input.file_path ?? input.path ?? '');
+  if (name === 'write' || name === 'file_write') return String(input.file_path ?? input.path ?? '');
+  if (name === 'glob') return String(input.pattern ?? '');
+  if (name === 'grep') return String(input.pattern ?? '');
+  if (name === 'websearch' || name === 'web_search') return String(input.query ?? '');
+  if (name === 'webfetch' || name === 'web_fetch') return String(input.url ?? '');
+  if (name === 'task') return String(input.description ?? '');
+  // Fallback: first string value
+  for (const val of Object.values(input)) {
+    if (typeof val === 'string' && val.length > 0 && val.length < 200) return val;
+  }
+  return '';
+}
 export const claudeCodeTool: Tool = {
   name: 'claude_code',
   description: 'Delegate a coding task to Claude Code, an AI-powered coding agent that can read, write, and edit files, run shell commands, and perform complex multi-step software engineering tasks autonomously.',
@@ -203,12 +222,21 @@ export const claudeCodeTool: Tool = {
       queryOptions.resume = resumeSessionId;
     }
+    // Declared outside try so the catch block can emit a final tool_complete on error.
+    let lastSubToolName: string | null = null;
+    let activeToolUseId: string | null = null;
     try {
       const conversation = query({ prompt, options: queryOptions });
       let resultText = '';
       let sessionId = '';
       let hasError = false;
+      // Track tool_use_id → {name, inputSummary} for enriching progress events.
+      const toolUseIdInfo = new Map<string, { name: string; inputSummary: string }>();
+      // Track tool_use_ids that we've already emitted tool_start for (to avoid duplicates).
+      const emittedToolUseIds = new Set<string>();
       for await (const message of conversation) {
         switch (message.type) {
           case 'assistant': {
@@ -225,12 +253,103 @@ export const claudeCodeTool: Tool = {
                   context.onOutput?.(block.text);
                   resultText += block.text;
                 }
+                if (block.type === 'tool_use') {
+                  // Capture info keyed by tool_use_id for enriching tool_progress events.
+                  const inputSummary = summarizeToolInput(block.name, block.input as Record<string, unknown>);
+                  toolUseIdInfo.set(block.id, { name: block.name, inputSummary });
+                  // Emit tool_start if we haven't already (tool_progress may have fired first).
+                  // NOTE: Do NOT emit tool_complete for the previous tool here. An assistant
+                  // message may contain multiple tool_use blocks (parallel tool use) and none
+                  // of them have executed yet at this point. Completions are handled by
+                  // tool_use_summary and tool_progress events.
+                  if (!emittedToolUseIds.has(block.id)) {
+                    context.onOutput?.(JSON.stringify({
+                      subType: 'tool_start',
+                      subToolName: block.name,
+                      subToolInput: inputSummary,
+                      subToolId: block.id,
+                    }));
+                    emittedToolUseIds.add(block.id);
+                    lastSubToolName = block.name;
+                    activeToolUseId = block.id;
+                  }
+                }
               }
             }
             sessionId = message.session_id;
             break;
           }
+          case 'tool_progress': {
+            // The SDK fires tool_progress periodically DURING tool execution.
+            // This is our primary signal for live sub-tool progress.
+            const toolUseId = message.tool_use_id;
+            const toolName = message.tool_name;
+            sessionId = message.session_id;
+            // Record tool name if we don't have it yet (tool_progress fires before assistant sometimes).
+            if (!toolUseIdInfo.has(toolUseId)) {
+              toolUseIdInfo.set(toolUseId, { name: toolName, inputSummary: '' });
+            }
+            if (!emittedToolUseIds.has(toolUseId)) {
+              // New tool — mark previous as complete and emit tool_start.
+              if (lastSubToolName && activeToolUseId !== toolUseId) {
+                context.onOutput?.(JSON.stringify({
+                  subType: 'tool_complete',
+                  subToolName: lastSubToolName,
+                  subToolId: activeToolUseId,
+                }));
+              }
+              const inputSummary = toolUseIdInfo.get(toolUseId)?.inputSummary ?? '';
+              context.onOutput?.(JSON.stringify({
+                subType: 'tool_start',
+                subToolName: toolName,
+                subToolInput: inputSummary,
+                subToolId: toolUseId,
+              }));
+              emittedToolUseIds.add(toolUseId);
+              lastSubToolName = toolName;
+            }
+            activeToolUseId = toolUseId;
+            break;
+          }
+          case 'tool_use_summary': {
+            // The SDK fires tool_use_summary after tool execution with a summary
+            // and the IDs of tools that were executed.
+            sessionId = message.session_id;
+            for (const completedId of message.preceding_tool_use_ids) {
+              const info = toolUseIdInfo.get(completedId);
+              const completedName: string | null = info?.name ?? lastSubToolName;
+              if (completedName && emittedToolUseIds.has(completedId)) {
+                context.onOutput?.(JSON.stringify({
+                  subType: 'tool_complete',
+                  subToolName: completedName,
+                  subToolId: completedId,
+                }));
+                if (lastSubToolName === completedName) {
+                  lastSubToolName = null;
+                }
+              }
+              // Prune completed entries to keep memory flat across long sessions.
+              toolUseIdInfo.delete(completedId);
+              emittedToolUseIds.delete(completedId);
+            }
+            activeToolUseId = null;
+            break;
+          }
           case 'result': {
+            // Mark the final sub-tool as complete (flag error if the session failed).
+            if (lastSubToolName) {
+              const isFailure = message.subtype !== 'success';
+              context.onOutput?.(JSON.stringify({
+                subType: 'tool_complete',
+                subToolName: lastSubToolName,
+                subToolId: activeToolUseId,
+                ...(isFailure && { subToolIsError: true }),
+              }));
+              lastSubToolName = null;
+            }
             sessionId = message.session_id;
             const resultMeta = {
               subtype: message.subtype,
@@ -259,7 +378,7 @@ export const claudeCodeTool: Tool = {
                 parts.push(`Errors: ${errors.join('; ')}`);
               }
               if (denials.length > 0) {
-                const denialSummary = denials.map(d => `${d.tool_name}`).join(', ');
+                const denialSummary = denials.map((d: { tool_name: string }) => `${d.tool_name}`).join(', ');
                 parts.push(`Permission denied: ${denialSummary}`);
               }
               resultText += `\n\n${parts.join('\n')}`;
@@ -281,6 +400,17 @@ export const claudeCodeTool: Tool = {
         isError: hasError,
       };
     } catch (err) {
+      // Mark the last sub-tool as failed so the UI shows an error icon.
+      if (lastSubToolName) {
+        context.onOutput?.(JSON.stringify({
+          subType: 'tool_complete',
+          subToolName: lastSubToolName,
+          subToolId: activeToolUseId,
+          subToolIsError: true,
+        }));
+        lastSubToolName = null;
+      }
       const errMessage = err instanceof Error ? err.message : String(err);
       const recentStderr = stderrLines.slice(-20);
       log.error({ err, stderrTail: recentStderr }, 'Claude Code execution failed');

package/src/tools/tasks/work-item-list.ts CHANGED Viewed

@@ -1,5 +1,17 @@
 import type { ToolContext, ToolExecutionResult } from '../types.js';
-import { listWorkItems, type WorkItemStatus } from '../../work-items/work-item-store.js';
+import { listWorkItems, type WorkItem, type WorkItemStatus } from '../../work-items/work-item-store.js';
+const PRIORITY_LABELS: Record<number, string> = { 0: 'High', 1: 'Medium', 2: 'Low' };
+function formatTaskList(items: WorkItem[]): string {
+  const lines: string[] = [];
+  for (const item of items) {
+    const priority = PRIORITY_LABELS[item.priorityTier] ?? 'Medium';
+    const status = item.status.replace(/_/g, ' ');
+    lines.push(`- [${priority}] ${item.title} (${status})`);
+  }
+  return lines.join('\n');
+}
 export async function executeTaskListShow(
   input: Record<string, unknown>,
@@ -33,7 +45,9 @@ export async function executeTaskListShow(
       ? `${count} ${Array.isArray(statusFilter) ? 'matching' : statusFilter} item${count === 1 ? '' : 's'}`
       : `${count} item${count === 1 ? '' : 's'}`;
-    return { content: `Opened Tasks window (${label}).`, isError: false };
+    const taskList = formatTaskList(items);
+    return { content: `Opened Tasks window (${label}).\n\nCurrent tasks:\n${taskList}`, isError: false };
   } catch (err) {
     const msg = err instanceof Error ? err.message : String(err);
     return { content: `Error: ${msg}`, isError: true };

package/src/workspace/commit-message-enrichment-service.ts CHANGED Viewed

@@ -183,6 +183,9 @@ export class CommitEnrichmentService {
       // has already settled with the timeout error, that rejection is orphaned.
       // The .catch() swallows it to prevent an unhandled promise rejection.
       const enrichmentPromise = this.doEnrichment(job, controller.signal);
+      enrichmentPromise.catch(() => {
+        // Intentionally swallowed — the timeout branch already handled the error
+      });
       await Promise.race([
         enrichmentPromise,
         new Promise<never>((_, reject) => {
@@ -192,9 +195,6 @@ export class CommitEnrichmentService {
           }, this.jobTimeoutMs);
         }),
       ]);
-      enrichmentPromise.catch(() => {
-        // Intentionally swallowed — the timeout branch already handled the error
-      });
       this.succeededCount++;
       log.debug(
         { commitHash: job.commitHash, attempts: job.attempts },

package/src/workspace/provider-commit-message-generator.ts CHANGED Viewed

@@ -9,9 +9,11 @@ const log = getLogger('commit-message-llm');
 export type CommitMessageSource = 'llm' | 'deterministic';
 export type LLMFallbackReason =
   | 'disabled'
-  | 'provider_not_initialized'
+  | 'missing_provider_api_key'
   | 'breaker_open'
   | 'insufficient_budget'
+  | 'missing_fast_model'
+  | 'provider_not_initialized'
   | 'timeout'
   | 'provider_error'
   | 'invalid_output';
@@ -36,6 +38,15 @@ Rules:
 - Total output must be under 300 characters
 - If you cannot determine a meaningful message, respond with exactly: FALLBACK`;
+const PROVIDER_DEFAULT_FAST_MODELS: Record<string, string> = {
+  anthropic: 'claude-haiku-4-5-20251001',
+  openai: 'gpt-4o-mini',
+  gemini: 'gemini-2.0-flash',
+};
+// Providers that can be initialized without an API key (e.g., Ollama runs locally)
+const KEYLESS_PROVIDERS = new Set(['ollama']);
 const deterministicProvider = new DefaultCommitMessageProvider();
 function buildDeterministicResult(
@@ -93,16 +104,33 @@ export class ProviderCommitMessageGenerator {
     const config = getConfig();
     const llmConfig = config.workspaceGit.commitMessageLLM;
+    // ── Fallback check order (canonical) ──────────────────────────────
+    // 1. disabled
+    // 2. missing_provider_api_key  (except keyless providers like ollama)
+    // 3. breaker_open
+    // 4. insufficient_budget
+    // 5. missing_fast_model
+    // 6. provider_not_initialized
+    // 7. call provider → timeout / provider_error / invalid_output
+    // ──────────────────────────────────────────────────────────────────
     // Step 1: Feature gate
     if (!llmConfig.enabled) {
       return buildDeterministicResult(context, 'disabled');
     }
-    // Step 2: Provider gate
     if (!llmConfig.useConfiguredProvider) {
       return buildDeterministicResult(context, 'disabled');
     }
+    // Step 2: API key preflight (skip for providers that run without a key)
+    if (!KEYLESS_PROVIDERS.has(config.provider)) {
+      const providerApiKey = config.apiKeys[config.provider];
+      if (!providerApiKey || providerApiKey === '') {
+        log.debug('Provider API key missing; falling back to deterministic');
+        return buildDeterministicResult(context, 'missing_provider_api_key');
+      }
+    }
     // Step 3: Circuit breaker
     if (this.isBreakerOpen()) {
       log.debug(
@@ -124,7 +152,19 @@ export class ProviderCommitMessageGenerator {
       }
     }
-    // Step 5: Call the provider
+    // Step 5: Fast model preflight — resolve before any provider call
+    const fastModel = llmConfig.providerFastModelOverrides[config.provider]
+      ?? PROVIDER_DEFAULT_FAST_MODELS[config.provider];
+    if (!fastModel) {
+      log.debug(
+        { provider: config.provider },
+        'No fast model resolvable for provider; falling back to deterministic',
+      );
+      return buildDeterministicResult(context, 'missing_fast_model');
+    }
+    // Step 6 + 7: Call the provider
     try {
       const { getProvider } = await import('../providers/registry.js');
@@ -172,7 +212,11 @@ export class ProviderCommitMessageGenerator {
           SYSTEM_PROMPT,
           {
             signal: ac.signal,
-            config: { max_tokens: llmConfig.maxTokens, temperature: llmConfig.temperature },
+            config: {
+              model: fastModel,
+              max_tokens: llmConfig.maxTokens,
+              temperature: llmConfig.temperature,
+            },
           },
         );
       } catch (err: unknown) {
@@ -203,21 +247,20 @@ export class ProviderCommitMessageGenerator {
         return buildDeterministicResult(context, 'invalid_output');
       }
-      // Validate single-line subject: first line must be <= 72 chars
-      const firstLine = text.split('\n')[0];
-      if (firstLine.length > 72) {
+      // Cap subject line to 72 chars deterministically (no fallback, no breaker failure)
+      const lines = text.split('\n');
+      if (lines[0].length > 72) {
         log.debug(
-          { subjectLength: firstLine.length },
-          'LLM subject line too long; falling back to deterministic',
+          { originalLength: lines[0].length },
+          'Capping LLM subject line to 72 chars',
         );
-        this.recordFailure();
-        return buildDeterministicResult(context, 'invalid_output');
+        lines[0] = lines[0].slice(0, 72);
       }
+      const finalMessage = lines.join('\n');
       this.recordSuccess();
-      return { message: text, source: 'llm' };
+      return { message: finalMessage, source: 'llm' };
     } catch (err: unknown) {
-      // Step 6: Any error -> deterministic fallback
       log.warn(
         { err: err instanceof Error ? err.message : String(err) },
         'Commit message LLM provider error; falling back to deterministic',

package/src/workspace/turn-commit.ts CHANGED Viewed

@@ -72,10 +72,14 @@ export async function commitTurnChanges(
     if (!provider) {
       // Guard: skip pre-check if deadline already elapsed to avoid unnecessary mutex contention
       let preClean = false;
+      let candidateChangedFiles: string[] = [];
       if (!deadlineMs || Date.now() < deadlineMs) {
         try {
           const preStatus = await gitService.getStatus();
           preClean = preStatus.clean;
+          if (!preClean) {
+            candidateChangedFiles = [...new Set([...preStatus.staged, ...preStatus.modified, ...preStatus.untracked])];
+          }
         } catch {
           // If we can't determine status, assume dirty so we don't skip the commit
         }
@@ -90,10 +94,10 @@ export async function commitTurnChanges(
               trigger: 'turn',
               sessionId,
               turnNumber,
-              changedFiles: [], // File list unavailable outside the git mutex; generator handles empty arrays
+              changedFiles: candidateChangedFiles,
               timestampMs: Date.now(),
             },
-            { deadlineMs, changedFiles: [] },
+            { deadlineMs, changedFiles: candidateChangedFiles },
           );
           commitMessageSource = result.source;
           llmFallbackReason = result.reason;