npm - vellum - Versions diffs - 0.2.9 → 0.2.11 - Mend

vellum 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/bun.lock +2 -2
package/package.json +2 -2
package/scripts/capture-x-graphql.ts +1 -18
package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +110 -0
package/src/__tests__/call-bridge.test.ts +40 -0
package/src/__tests__/call-state.test.ts +41 -0
package/src/__tests__/forbidden-legacy-symbols.test.ts +8 -6
package/src/__tests__/gateway-only-enforcement.test.ts +13 -89
package/src/__tests__/home-base-bootstrap.test.ts +13 -8
package/src/__tests__/intent-routing.test.ts +2 -5
package/src/__tests__/ipc-snapshot.test.ts +49 -0
package/src/__tests__/onboarding-starter-tasks.test.ts +12 -2
package/src/__tests__/prebuilt-home-base-seed.test.ts +9 -5
package/src/__tests__/relay-server.test.ts +55 -0
package/src/__tests__/skills.test.ts +83 -0
package/src/__tests__/system-prompt.test.ts +2 -24
package/src/__tests__/twilio-provider.test.ts +36 -0
package/src/__tests__/twilio-routes.test.ts +108 -0
package/src/calls/call-orchestrator.ts +25 -5
package/src/calls/call-state.ts +23 -0
package/src/calls/relay-server.ts +56 -1
package/src/calls/twilio-config.ts +9 -13
package/src/calls/twilio-provider.ts +6 -1
package/src/calls/twilio-routes.ts +10 -1
package/src/cli/core-commands.ts +12 -4
package/src/config/bundled-skills/app-builder/SKILL.md +57 -1
package/src/config/bundled-skills/document/SKILL.md +11 -3
package/src/config/bundled-skills/followups/icon.svg +24 -0
package/src/config/bundled-skills/messaging/SKILL.md +7 -3
package/src/config/bundled-skills/public-ingress/SKILL.md +183 -0
package/src/config/bundled-skills/self-upgrade/SKILL.md +4 -10
package/src/config/defaults.ts +1 -1
package/src/config/schema.ts +4 -7
package/src/config/system-prompt.ts +64 -360
package/src/config/vellum-skills/google-oauth-setup/SKILL.md +5 -1
package/src/config/vellum-skills/slack-oauth-setup/SKILL.md +5 -1
package/src/config/vellum-skills/telegram-setup/SKILL.md +2 -1
package/src/daemon/handlers/config.ts +20 -9
package/src/daemon/handlers/home-base.ts +3 -2
package/src/daemon/handlers/identity.ts +127 -0
package/src/daemon/handlers/index.ts +4 -0
package/src/daemon/handlers/workspace-files.ts +75 -0
package/src/daemon/ipc-contract-inventory.json +16 -4
package/src/daemon/ipc-contract.ts +62 -2
package/src/daemon/lifecycle.ts +16 -0
package/src/daemon/session-notifiers.ts +29 -0
package/src/daemon/session-surfaces.ts +5 -2
package/src/daemon/session-tool-setup.ts +15 -4
package/src/home-base/bootstrap.ts +3 -1
package/src/home-base/prebuilt/seed.ts +16 -5
package/src/inbound/public-ingress-urls.ts +15 -4
package/src/runtime/http-server.ts +123 -20
package/src/security/oauth2.ts +19 -161
package/src/tools/browser/auto-navigate.ts +2 -2
package/src/tools/browser/x-auto-navigate.ts +1 -1
package/src/tools/claude-code/claude-code.ts +1 -1
package/src/tools/system/version.ts +43 -0
package/src/tools/tasks/work-item-run.ts +1 -1
package/src/tools/terminal/parser.ts +29 -7
package/src/tools/tool-manifest.ts +2 -0
package/src/tools/ui-surface/definitions.ts +9 -2

package/src/runtime/http-server.ts CHANGED Viewed

@@ -5,11 +5,13 @@
  * `RUNTIME_HTTP_PORT` is set (default: disabled).
  */
-import { existsSync, readFileSync, statfsSync } from 'node:fs';
-import { resolve } from 'node:path';
+import { existsSync, readFileSync, statSync, statfsSync } from 'node:fs';
+import { resolve, join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
 import { timingSafeEqual } from 'node:crypto';
 import { ConfigError, IngressBlockedError } from '../util/errors.js';
 import { getLogger } from '../util/logger.js';
+import { getWorkspacePromptPath } from '../util/platform.js';
 import { TwilioConversationRelayProvider } from '../calls/twilio-provider.js';
 import { loadConfig } from '../config/loader.js';
 import { getPublicBaseUrl } from '../inbound/public-ingress-urls.js';
@@ -378,6 +380,7 @@ export class RuntimeHttpServer {
           log.info({ callSessionId, code, reason: reason?.toString() }, 'ConversationRelay WebSocket closed');
           if (callSessionId) {
             const connection = activeRelayConnections.get(callSessionId);
+            connection?.handleTransportClosed(code, reason?.toString());
             connection?.destroy();
             activeRelayConnections.delete(callSessionId);
           }
@@ -395,16 +398,9 @@ export class RuntimeHttpServer {
     }
     // Startup guard: log gateway-only mode warnings
-    try {
-      const config = loadConfig();
-      if (config.ingress.mode === 'gateway_only') {
-        log.info('Running in gateway-only ingress mode. Direct webhook routes disabled.');
-        if (!isLoopbackHost(this.hostname)) {
-          log.warn('gateway-only mode is enabled but RUNTIME_HTTP_HOST is not bound to loopback. This may expose the runtime to direct public access.');
-        }
-      }
-    } catch {
-      // Config loading may fail during startup — don't block server start
+    log.info('Running in gateway-only ingress mode. Direct webhook routes disabled.');
+    if (!isLoopbackHost(this.hostname)) {
+      log.warn('RUNTIME_HTTP_HOST is not bound to loopback. This may expose the runtime to direct public access.');
     }
     log.info({ port: this.actualPort, hostname: this.hostname, auth: !!this.bearerToken }, 'Runtime HTTP server listening');
@@ -445,14 +441,13 @@ export class RuntimeHttpServer {
     // WebSocket upgrade for ConversationRelay — before auth check because
     // Twilio WebSocket connections don't use bearer tokens.
     if (path.startsWith('/v1/calls/relay') && req.headers.get('upgrade')?.toLowerCase() === 'websocket') {
-      // In gateway_only mode, only allow relay connections from private network peers.
+      // Only allow relay connections from private network peers.
       // Primary check: actual peer address (cannot be spoofed) — accepts loopback
       // and RFC 1918/4193 private addresses to support container deployments.
       // Secondary check: Origin header (defense in depth).
-      const config = loadConfig();
-      if (config.ingress.mode === 'gateway_only' && (!isPrivateNetworkPeer(server, req) || !isPrivateNetworkOrigin(req))) {
+      if (!isPrivateNetworkPeer(server, req) || !isPrivateNetworkOrigin(req)) {
         return Response.json(
-          { error: 'Direct relay access disabled in gateway-only mode', code: 'GATEWAY_ONLY' },
+          { error: 'Direct relay access disabled — only private network peers allowed', code: 'GATEWAY_ONLY' },
           { status: 403 },
         );
       }
@@ -486,11 +481,10 @@ export class RuntimeHttpServer {
     if (resolvedTwilioSubpath && req.method === 'POST') {
       const twilioSubpath = resolvedTwilioSubpath;
-      // In gateway_only mode, block direct Twilio webhook routes
-      const ingressConfig = loadConfig();
-      if (ingressConfig.ingress.mode === 'gateway_only' && GATEWAY_ONLY_BLOCKED_SUBPATHS.has(twilioSubpath)) {
+      // Block direct Twilio webhook routes — must go through the gateway
+      if (GATEWAY_ONLY_BLOCKED_SUBPATHS.has(twilioSubpath)) {
         return Response.json(
-          { error: 'Direct webhook access disabled in gateway-only mode. Use the gateway.', code: 'GATEWAY_ONLY' },
+          { error: 'Direct webhook access disabled. Use the gateway.', code: 'GATEWAY_ONLY' },
           { status: 410 },
         );
       }
@@ -619,6 +613,19 @@ export class RuntimeHttpServer {
         return this.handleHealth();
       }
+      if (endpoint === 'conversations' && req.method === 'GET') {
+        const limit = Number(url.searchParams.get('limit') ?? 50);
+        const conversations = conversationStore.listConversations(limit);
+        return Response.json({
+          sessions: conversations.map((c) => ({
+            id: c.id,
+            title: c.title ?? 'Untitled',
+            updatedAt: c.updatedAt,
+            threadType: c.threadType === 'private' ? 'private' : 'standard',
+          })),
+        });
+      }
       if (endpoint === 'messages' && req.method === 'GET') {
         return handleListMessages(url, this.interfacesDir);
       }
@@ -770,6 +777,10 @@ export class RuntimeHttpServer {
         return await handleConnectAction(fakeReq);
       }
+      if (endpoint === 'identity' && req.method === 'GET') {
+        return this.handleGetIdentity();
+      }
       if (endpoint === 'events' && req.method === 'GET') {
         return handleSubscribeAssistantEvents(req, url);
       }
@@ -925,6 +936,98 @@ export class RuntimeHttpServer {
     }
   }
+  private handleGetIdentity(): Response {
+    const identityPath = getWorkspacePromptPath('IDENTITY.md');
+    if (!existsSync(identityPath)) {
+      return Response.json({ error: 'IDENTITY.md not found' }, { status: 404 });
+    }
+    const content = readFileSync(identityPath, 'utf-8');
+    const fields: Record<string, string> = {};
+    for (const line of content.split('\n')) {
+      const trimmed = line.trim();
+      const lower = trimmed.toLowerCase();
+      const extract = (prefix: string): string | null => {
+        if (!lower.startsWith(prefix)) return null;
+        return trimmed.split(':**').pop()?.trim() ?? null;
+      };
+      const name = extract('- **name:**');
+      if (name) { fields.name = name; continue; }
+      const role = extract('- **role:**');
+      if (role) { fields.role = role; continue; }
+      const personality = extract('- **personality:**') ?? extract('- **vibe:**');
+      if (personality) { fields.personality = personality; continue; }
+      const emoji = extract('- **emoji:**');
+      if (emoji) { fields.emoji = emoji; continue; }
+      const home = extract('- **home:**');
+      if (home) { fields.home = home; continue; }
+    }
+    // Read version from package.json
+    let version: string | undefined;
+    try {
+      const pkgPath = join(dirname(fileURLToPath(import.meta.url)), '../../package.json');
+      const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
+      version = pkg.version;
+    } catch {
+      // ignore
+    }
+    // Read createdAt from IDENTITY.md file birthtime
+    let createdAt: string | undefined;
+    try {
+      const stats = statSync(identityPath);
+      createdAt = stats.birthtime.toISOString();
+    } catch {
+      // ignore
+    }
+    // Read lockfile for assistantId, cloud, and originSystem
+    let assistantId: string | undefined;
+    let cloud: string | undefined;
+    let originSystem: string | undefined;
+    try {
+      const homedir = process.env.HOME ?? process.env.USERPROFILE ?? '';
+      const lockfilePaths = [
+        join(homedir, '.vellum.lock.json'),
+        join(homedir, '.vellum.lockfile.json'),
+      ];
+      for (const lockPath of lockfilePaths) {
+        if (!existsSync(lockPath)) continue;
+        const lockData = JSON.parse(readFileSync(lockPath, 'utf-8'));
+        const assistants = lockData.assistants as Array<Record<string, unknown>> | undefined;
+        if (assistants && assistants.length > 0) {
+          // Use the most recently hatched assistant
+          const sorted = [...assistants].sort((a, b) => {
+            const dateA = new Date(a.hatchedAt as string || 0).getTime();
+            const dateB = new Date(b.hatchedAt as string || 0).getTime();
+            return dateB - dateA;
+          });
+          const latest = sorted[0];
+          assistantId = latest.assistantId as string | undefined;
+          cloud = latest.cloud as string | undefined;
+          originSystem = cloud === 'local' ? 'local' : cloud;
+        }
+        break;
+      }
+    } catch {
+      // ignore — lockfile may not exist
+    }
+    return Response.json({
+      name: fields.name ?? '',
+      role: fields.role ?? '',
+      personality: fields.personality ?? '',
+      emoji: fields.emoji ?? '',
+      home: fields.home ?? '',
+      version,
+      assistantId,
+      createdAt,
+      originSystem,
+    });
+  }
   private handleHealth(): Response {
     return Response.json({
       status: 'healthy',

package/src/security/oauth2.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 /**
  * General-purpose OAuth2 Authorization Code flow with PKCE.
  *
- * Supports two callback transports:
- *   - loopback: spins up a local HTTP server on 127.0.0.1 (default when no public URL configured)
- *   - gateway:  uses the gateway's OAuth callback route + in-memory registry (when ingress.publicBaseUrl is set)
+ * Uses the gateway callback transport: OAuth callbacks route through the
+ * gateway's OAuth callback route + in-memory registry (requires
+ * ingress.publicBaseUrl to be configured).
  *
  * Moved from integrations/oauth2.ts. Types that were in integrations/types.ts
  * are now inlined here since the integration framework is removed.
@@ -138,122 +138,6 @@ async function exchangeCodeForTokens(
   return { tokens, grantedScopes, rawTokenResponse: tokenData };
 }
-// ---------------------------------------------------------------------------
-// Transport auto-detection
-// ---------------------------------------------------------------------------
-/**
- * Determine which callback transport to use when not explicitly specified.
- * Uses gateway if a public base URL is configured (ingress.publicBaseUrl or
- * INGRESS_PUBLIC_BASE_URL), otherwise loopback.
- */
-function detectTransport(): 'loopback' | 'gateway' {
-  try {
-    const { loadConfig } = require('../config/loader.js') as typeof import('../config/loader.js');
-    const { getPublicBaseUrl } = require('../inbound/public-ingress-urls.js') as typeof import('../inbound/public-ingress-urls.js');
-    const appConfig = loadConfig();
-    getPublicBaseUrl(appConfig); // throws if no public URL configured
-    return 'gateway';
-  } catch {
-    log.debug('No public base URL configured for transport auto-detection, defaulting to loopback');
-  }
-  return 'loopback';
-}
-// ---------------------------------------------------------------------------
-// Loopback transport
-// ---------------------------------------------------------------------------
-async function runLoopbackFlow(
-  config: OAuth2Config,
-  callbacks: OAuth2FlowCallbacks,
-  codeVerifier: string,
-  codeChallenge: string,
-  state: string,
-): Promise<OAuth2FlowResult> {
-  let resolveCode: (value: { code: string; returnedState: string }) => void;
-  let rejectCode: (reason: Error) => void;
-  const codePromise = new Promise<{ code: string; returnedState: string }>((resolve, reject) => {
-    resolveCode = resolve;
-    rejectCode = reject;
-  });
-  const FLOW_TIMEOUT_MS = 120_000;
-  const timeout = setTimeout(() => {
-    rejectCode(new Error('OAuth2 flow timed out waiting for user authorization'));
-  }, FLOW_TIMEOUT_MS);
-  const server = Bun.serve({
-    hostname: '127.0.0.1',
-    port: 0,
-    fetch(req) {
-      const url = new URL(req.url);
-      if (url.pathname !== '/callback') {
-        return new Response('Not found', { status: 404 });
-      }
-      const error = url.searchParams.get('error');
-      if (error) {
-        const desc = url.searchParams.get('error_description') ?? error;
-        rejectCode(new Error(`OAuth2 authorization denied: ${desc}`));
-        return new Response(
-          '<html><body><h2>Authorization denied</h2><p>You can close this tab.</p></body></html>',
-          { headers: { 'Content-Type': 'text/html' } },
-        );
-      }
-      const code = url.searchParams.get('code');
-      const returnedState = url.searchParams.get('state');
-      if (!code || !returnedState) {
-        rejectCode(new Error('OAuth2 callback missing code or state'));
-        return new Response('Missing code or state', { status: 400 });
-      }
-      if (returnedState !== state) {
-        rejectCode(new Error('OAuth2 state mismatch — possible CSRF attack'));
-        return new Response('State mismatch', { status: 400 });
-      }
-      resolveCode({ code, returnedState });
-      return new Response(
-        '<html><body><h2>Authorization successful!</h2><p>You can close this tab and return to Vellum.</p></body></html>',
-        { headers: { 'Content-Type': 'text/html' } },
-      );
-    },
-  });
-  const redirectUri = `http://127.0.0.1:${server.port}/callback`;
-  try {
-    const usePKCE = !config.clientSecret;
-    const authParams = new URLSearchParams({
-      ...config.extraParams,
-      client_id: config.clientId,
-      redirect_uri: redirectUri,
-      response_type: 'code',
-      scope: config.scopes.join(' '),
-      state,
-      ...(usePKCE ? { code_challenge: codeChallenge, code_challenge_method: 'S256' } : {}),
-    });
-    const authUrl = `${config.authUrl}?${authParams}`;
-    callbacks.openUrl(authUrl);
-    const { code, returnedState } = await codePromise;
-    if (returnedState !== state) {
-      throw new Error('OAuth2 state mismatch — possible CSRF attack');
-    }
-    return await exchangeCodeForTokens(config, code, redirectUri, codeVerifier);
-  } finally {
-    clearTimeout(timeout);
-    server.stop(true);
-  }
-}
 // ---------------------------------------------------------------------------
 // Gateway transport
 // ---------------------------------------------------------------------------
@@ -302,12 +186,9 @@ async function runGatewayFlow(
 /**
  * Run a full OAuth2 authorization code flow with PKCE support.
  *
- * Supports two callback transports:
- *   - loopback (default): local HTTP server on 127.0.0.1
- *   - gateway: callback via the gateway's OAuth route + in-memory registry
- *
- * Transport is auto-detected based on ingress.publicBaseUrl config unless
- * explicitly specified via options.callbackTransport.
+ * Uses the gateway callback transport, which routes OAuth callbacks through
+ * the gateway's OAuth route + in-memory registry. Requires a public ingress
+ * URL to be configured.
  */
 export async function startOAuth2Flow(
   config: OAuth2Config,
@@ -318,49 +199,26 @@ export async function startOAuth2Flow(
   const codeChallenge = generateCodeChallenge(codeVerifier);
   const state = generateState();
-  // In gateway_only mode, enforce gateway transport and require a public ingress URL
-  let ingressMode: string | undefined;
+  // Always enforce gateway transport and require a public ingress URL
+  let hasPublicUrl = false;
   try {
     const { loadConfig } = require('../config/loader.js') as typeof import('../config/loader.js');
-    ingressMode = loadConfig().ingress.mode;
+    const { getPublicBaseUrl } = require('../inbound/public-ingress-urls.js') as typeof import('../inbound/public-ingress-urls.js');
+    getPublicBaseUrl(loadConfig());
+    hasPublicUrl = true;
   } catch {
-    // Fail closed: if config can't be loaded (e.g., malformed config.json), default to the
-    // most restrictive mode to prevent loopback fallback from creating a fail-open path.
-    log.warn('Failed to load config for OAuth ingress mode detection; defaulting to gateway_only (fail closed)');
-    ingressMode = 'gateway_only';
+    // No public URL configured
   }
-  if (ingressMode === 'gateway_only') {
-    // Verify a public ingress URL is configured; fail fast with actionable error if not
-    let hasPublicUrl = false;
-    try {
-      const { loadConfig } = require('../config/loader.js') as typeof import('../config/loader.js');
-      const { getPublicBaseUrl } = require('../inbound/public-ingress-urls.js') as typeof import('../inbound/public-ingress-urls.js');
-      getPublicBaseUrl(loadConfig());
-      hasPublicUrl = true;
-    } catch {
-      // No public URL configured
-    }
-    if (!hasPublicUrl) {
-      throw new Error(
-        'OAuth requires a public ingress URL in gateway-only mode. Set ingress.publicBaseUrl or INGRESS_PUBLIC_BASE_URL so OAuth callbacks can route through the gateway.',
-      );
-    }
-    // In gateway_only mode, always use gateway transport — never fall back to loopback
-    log.debug({ transport: 'gateway' }, 'OAuth2 flow starting (gateway_only mode)');
-    return runGatewayFlow(config, callbacks, codeVerifier, codeChallenge, state);
-  }
-  const transport = options?.callbackTransport ?? detectTransport();
-  log.debug({ transport }, 'OAuth2 flow starting');
-  if (transport === 'gateway') {
-    return runGatewayFlow(config, callbacks, codeVerifier, codeChallenge, state);
+  if (!hasPublicUrl) {
+    throw new Error(
+      'OAuth requires a public ingress URL. Set ingress.publicBaseUrl or INGRESS_PUBLIC_BASE_URL so OAuth callbacks can route through the gateway.',
+    );
   }
-  return runLoopbackFlow(config, callbacks, codeVerifier, codeChallenge, state);
+  // Always use gateway transport — never fall back to loopback
+  log.debug({ transport: 'gateway' }, 'OAuth2 flow starting');
+  return runGatewayFlow(config, callbacks, codeVerifier, codeChallenge, state);
 }
 /**

package/src/tools/browser/auto-navigate.ts CHANGED Viewed

@@ -38,7 +38,7 @@ class MiniCDP {
           const cb = this.callbacks.get(msg.id);
           if (cb) {
             this.callbacks.delete(msg.id);
-            msg.error ? cb.reject(new Error(msg.error.message)) : cb.resolve(msg.result);
+            if (msg.error) { cb.reject(new Error(msg.error.message)); } else { cb.resolve(msg.result); }
           }
         }
       };
@@ -130,7 +130,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
   await sleep(SCROLL_WAIT_MS);
   // Discover internal links from the current page
-  let discoveredLinks = await discoverInternalLinks(cdp, domain);
+  const discoveredLinks = await discoverInternalLinks(cdp, domain);
   log.info({ count: discoveredLinks.length }, 'Discovered internal links from root');
   // Visit discovered pages

package/src/tools/browser/x-auto-navigate.ts CHANGED Viewed

@@ -35,7 +35,7 @@ class MiniCDP {
           const cb = this.callbacks.get(msg.id);
           if (cb) {
             this.callbacks.delete(msg.id);
-            msg.error ? cb.reject(new Error(msg.error.message)) : cb.resolve(msg.result);
+            if (msg.error) { cb.reject(new Error(msg.error.message)); } else { cb.resolve(msg.result); }
           }
         }
       };

package/src/tools/claude-code/claude-code.ts CHANGED Viewed

@@ -156,7 +156,7 @@ export const claudeCodeTool: Tool = {
         return { behavior: 'allow' as const };
       }
-      // For tools that need approval, bridge to Velly's confirmation flow
+      // For tools that need approval, bridge to Vellum's confirmation flow
       if (!context.requestConfirmation) {
         log.warn({ toolName }, 'Claude Code tool requires approval but no requestConfirmation callback available');
         return { behavior: 'deny' as const, message: 'Tool approval not available in this context' };

package/src/tools/system/version.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { RiskLevel } from '../../permissions/types.js';
+import type { ToolDefinition } from '../../providers/types.js';
+import { registerTool } from '../registry.js';
+import type { Tool, ToolContext, ToolExecutionResult } from '../types.js';
+function readPackageVersion(): string {
+  try {
+    const pkgPath = join(import.meta.dir, '../../../package.json');
+    const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
+    return pkg.version ?? 'unknown';
+  } catch {
+    return 'unknown';
+  }
+}
+class VersionTool implements Tool {
+  name = 'version';
+  description = 'Return the current version of the Vellum assistant daemon.';
+  category = 'system';
+  defaultRiskLevel = RiskLevel.Low;
+  getDefinition(): ToolDefinition {
+    return {
+      name: this.name,
+      description: this.description,
+      input_schema: {
+        type: 'object',
+        properties: {},
+        required: [],
+      },
+    };
+  }
+  async execute(_input: Record<string, unknown>, _context: ToolContext): Promise<ToolExecutionResult> {
+    const version = readPackageVersion();
+    return { content: `Vellum assistant version: ${version}`, isError: false };
+  }
+}
+registerTool(new VersionTool());

package/src/tools/tasks/work-item-run.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ToolContext, ToolExecutionResult } from '../types.js';
-import { getWorkItem, listWorkItems, identifyEntityById, buildWorkItemMismatchError } from '../../work-items/work-item-store.js';
+import { getWorkItem, listWorkItems, identifyEntityById } from '../../work-items/work-item-store.js';
 import { runWorkItemInBackground } from '../../work-items/work-item-runner.js';
 import { getTask } from '../../tasks/task-store.js';

package/src/tools/terminal/parser.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { join } from 'node:path';
-import { readFileSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { readFileSync, existsSync } from 'node:fs';
 import { createHash } from 'node:crypto';
 import { getLogger } from '../../util/logger.js';
 import { IntegrityError } from '../../util/errors.js';
@@ -75,11 +75,31 @@ function verifyWasmChecksum(filePath: string, label: string): void {
 let parserInstance: Parser | null = null;
 let initPromise: Promise<void> | null = null;
+/**
+ * Locate a WASM file from a dependency package.
+ *
+ * In development / `bunx` the file lives under `node_modules/` relative
+ * to the source tree.  In compiled Bun binaries `import.meta.dirname`
+ * points into the virtual `/$bunfs/` filesystem where binary assets
+ * don't exist — fall back to:
+ *   1. `../Resources/<file>` (macOS .app bundle layout)
+ *   2. Next to the compiled binary (process.execPath)
+ * This matches the pattern used by docker.ts for Dockerfile.sandbox.
+ */
 function findWasmPath(pkg: string, file: string): string {
-  return join(
-    import.meta.dirname ?? __dirname,
-    '..', '..', '..', 'node_modules', pkg, file,
-  );
+  const dir = import.meta.dirname ?? __dirname;
+  const sourcePath = join(dir, '..', '..', '..', 'node_modules', pkg, file);
+  if (!existsSync(sourcePath) && dir.startsWith('/$bunfs/')) {
+    const execDir = dirname(process.execPath);
+    // macOS .app bundle: binary is in Contents/MacOS/, resources in Contents/Resources/
+    const resourcesPath = join(execDir, '..', 'Resources', file);
+    if (existsSync(resourcesPath)) return resourcesPath;
+    // Fallback: next to the binary itself (non-app-bundle deployments)
+    return join(execDir, file);
+  }
+  return sourcePath;
 }
 async function ensureParser(): Promise<Parser> {
@@ -93,7 +113,9 @@ async function ensureParser(): Promise<Parser> {
       verifyWasmChecksum(treeSitterWasm, 'web-tree-sitter.wasm');
       verifyWasmChecksum(bashWasmPath, 'tree-sitter-bash.wasm');
-      await Parser.init();
+      await Parser.init({
+        locateFile: () => treeSitterWasm,
+      });
       const Bash = await Language.load(bashWasmPath);
       const parser = new Parser();

package/src/tools/tool-manifest.ts CHANGED Viewed

@@ -35,6 +35,7 @@ export async function loadEagerModules(): Promise<void> {
   await import('./calls/call-start.js');
   await import('./calls/call-status.js');
   await import('./calls/call-end.js');
+  await import('./system/version.js');
 }
 // Tool names registered by the eager modules above.  Listed explicitly so
@@ -57,6 +58,7 @@ export const eagerModuleToolNames: string[] = [
   'call_start',
   'call_status',
   'call_end',
+  'version',
 ];
 // ── Explicit tool instances ─────────────────────────────────────────

package/src/tools/ui-surface/definitions.ts CHANGED Viewed

@@ -26,7 +26,7 @@ function proxyExecute(): Promise<ToolExecutionResult> {
 export const uiShowTool: Tool = {
   name: 'ui_show',
   description:
-    'Show a UI surface to the user. Use display: "inline" (default) to embed in chat, or "panel" for a floating window.\n\n' +
+    'Show structured data or UI to the user. Use for displaying weather, flights, stock prices, quick tables, cards, lists, forms, or any temporary data visualization. Use display: "inline" (default) to embed in chat, or "panel" for a floating window. For long-form writing use the document skill instead; for interactive apps use the app-builder skill instead.\n\n' +
     'Supported surface types:\n' +
     '- card: Informational card with title, subtitle, body text, and optional metadata key-value pairs. ' +
     'Cards support an optional template field for specialized native rendering. ' +
@@ -54,7 +54,14 @@ export const uiShowTool: Tool = {
     'data shape: { prompt: string, acceptedTypes?: string[], maxFiles?: number }\n\n' +
     'Action payload conventions:\n' +
     '- Multi-select tables: use `window.vellum.sendAction(actionId, { selectedIds: [...] })` to send selected row IDs\n' +
-    '- Bulk actions: include `selectedRows` array with full row data for context',
+    '- Bulk actions: include `selectedRows` array with full row data for context\n\n' +
+    'Presenting choices: When the user needs to make a choice or provide structured input, prefer interactive surfaces over plain text. ' +
+    'Use list (2-8 options, single select), form (structured input with typed fields), confirmation (destructive/important actions), or table (data review with selectable rows).\n\n' +
+    'Tool chaining: After gathering data via tools (web search, browser, APIs), synthesize results into a visual output. ' +
+    'Exception: get_weather automatically renders its own surface — do NOT call ui_show or app_create after get_weather, just respond with a brief summary.\n\n' +
+    'Task progress for multi-step workflows: Create a card with template "task_progress" and templateData containing steps. ' +
+    'As each step completes, call ui_update to patch data.templateData (not top-level fields). ' +
+    'Set templateData.status to "completed" or "failed" when done.',
   category: 'ui-surface',
   defaultRiskLevel: RiskLevel.Low,
   executionMode: 'proxy',