npm - @vellumai/assistant - Versions diffs - 0.3.3 → 0.3.4 - Mend

@vellumai/assistant 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/README.md +8 -16
package/package.json +1 -1
package/src/__tests__/call-orchestrator.test.ts +321 -0
package/src/__tests__/channel-approval-routes.test.ts +382 -124
package/src/__tests__/channel-approvals.test.ts +51 -2
package/src/__tests__/channel-delivery-store.test.ts +30 -4
package/src/__tests__/channel-guardian.test.ts +187 -0
package/src/__tests__/config-schema.test.ts +1 -1
package/src/__tests__/daemon-lifecycle.test.ts +635 -0
package/src/__tests__/gateway-only-enforcement.test.ts +19 -13
package/src/__tests__/handlers-twilio-config.test.ts +73 -0
package/src/__tests__/secret-scanner.test.ts +223 -0
package/src/__tests__/shell-parser-property.test.ts +357 -2
package/src/__tests__/system-prompt.test.ts +25 -1
package/src/__tests__/tool-executor-lifecycle-events.test.ts +34 -1
package/src/__tests__/user-reference.test.ts +68 -0
package/src/calls/call-orchestrator.ts +63 -11
package/src/cli/map.ts +6 -0
package/src/commands/__tests__/cc-command-registry.test.ts +67 -0
package/src/commands/cc-command-registry.ts +14 -1
package/src/config/bundled-skills/claude-code/TOOLS.json +10 -3
package/src/config/bundled-skills/messaging/SKILL.md +4 -0
package/src/config/defaults.ts +1 -1
package/src/config/schema.ts +3 -3
package/src/config/skills.ts +5 -32
package/src/config/system-prompt.ts +16 -0
package/src/config/user-reference.ts +29 -0
package/src/config/vellum-skills/catalog.json +52 -0
package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -1
package/src/config/vellum-skills/twilio-setup/SKILL.md +38 -0
package/src/daemon/auth-manager.ts +103 -0
package/src/daemon/computer-use-session.ts +8 -1
package/src/daemon/config-watcher.ts +253 -0
package/src/daemon/handlers/config.ts +36 -13
package/src/daemon/handlers/skills.ts +6 -7
package/src/daemon/ipc-contract.ts +6 -0
package/src/daemon/ipc-handler.ts +87 -0
package/src/daemon/lifecycle.ts +16 -4
package/src/daemon/ride-shotgun-handler.ts +11 -1
package/src/daemon/server.ts +105 -502
package/src/daemon/session-agent-loop.ts +5 -14
package/src/daemon/session-runtime-assembly.ts +60 -44
package/src/daemon/session.ts +8 -1
package/src/memory/db-connection.ts +28 -0
package/src/memory/db-init.ts +1019 -0
package/src/memory/db.ts +2 -2007
package/src/memory/embedding-backend.ts +79 -11
package/src/memory/indexer.ts +2 -0
package/src/memory/job-utils.ts +64 -4
package/src/memory/jobs-worker.ts +7 -1
package/src/memory/recall-cache.ts +107 -0
package/src/memory/retriever.ts +30 -1
package/src/memory/schema-migration.ts +984 -0
package/src/memory/schema.ts +1 -0
package/src/memory/search/types.ts +2 -0
package/src/permissions/prompter.ts +14 -3
package/src/permissions/trust-store.ts +7 -0
package/src/runtime/channel-approvals.ts +17 -3
package/src/runtime/gateway-client.ts +2 -1
package/src/runtime/http-server.ts +15 -4
package/src/runtime/routes/channel-routes.ts +172 -84
package/src/runtime/routes/run-routes.ts +7 -1
package/src/runtime/run-orchestrator.ts +8 -1
package/src/security/secret-scanner.ts +218 -0
package/src/skills/frontmatter.ts +63 -0
package/src/skills/slash-commands.ts +23 -0
package/src/skills/vellum-catalog-remote.ts +107 -0
package/src/tools/browser/auto-navigate.ts +132 -24
package/src/tools/browser/browser-manager.ts +67 -61
package/src/tools/claude-code/claude-code.ts +55 -3
package/src/tools/executor.ts +10 -2
package/src/tools/skills/vellum-catalog.ts +61 -156
package/src/tools/terminal/parser.ts +21 -5
package/src/util/platform.ts +8 -1
package/src/util/retry.ts +4 -4

package/src/security/secret-scanner.ts CHANGED Viewed

@@ -457,6 +457,216 @@ function scanEntropy(
   return matches;
 }
+// ---------------------------------------------------------------------------
+// Encoded secret detection — decode + re-scan pass
+// ---------------------------------------------------------------------------
+/**
+ * Find percent-encoded segments containing 3+ encoded bytes, using a linear
+ * scan instead of a regex with nested quantifiers (which caused catastrophic
+ * backtracking on long near-miss inputs).
+ */
+function findPercentEncodedSegments(text: string): Array<{ start: number; end: number; match: string }> {
+  const results: Array<{ start: number; end: number; match: string }> = [];
+  const len = text.length;
+  const isUrlChar = (ch: string) => /[A-Za-z0-9_.~+/\-]/.test(ch);
+  const isHexDigit = (ch: string) => /[0-9A-Fa-f]/.test(ch);
+  let i = 0;
+  while (i < len) {
+    // Look for the start of a percent-encoded segment
+    if (text[i] !== '%' && !isUrlChar(text[i])) { i++; continue; }
+    // Walk a candidate segment of URL-safe chars and %XX sequences
+    const start = i;
+    let pctCount = 0;
+    while (i < len) {
+      if (text[i] === '%' && i + 2 < len && isHexDigit(text[i + 1]) && isHexDigit(text[i + 2])) {
+        pctCount++;
+        i += 3;
+      } else if (isUrlChar(text[i])) {
+        i++;
+      } else {
+        break;
+      }
+    }
+    if (pctCount >= 3) {
+      results.push({ start, end: i, match: text.slice(start, i) });
+    }
+    // Avoid re-scanning the same position if we didn't advance
+    if (i === start) i++;
+  }
+  return results;
+}
+/** Hex-escape sequences: \xHH patterns (3+ consecutive) */
+const HEX_ESCAPE_RE = /(?:\\x[0-9A-Fa-f]{2}){3,}/g;
+/** Candidate base64 segments — 24+ chars that could encode a secret (≥18 decoded bytes) */
+const ENCODED_BASE64_RE = /\b([A-Za-z0-9+/\-_]{24,}={0,3})(?=\W|$)/g;
+/** Continuous hex-encoded bytes — 32+ hex chars (16+ bytes decoded) */
+const CONTINUOUS_HEX_RE = /\b([0-9a-fA-F]{32,})\b/g;
+/** Check if decoded content is printable ASCII text */
+function isPrintableText(s: string): boolean {
+  return s.length > 0 && /^[\x20-\x7E\t\n\r]+$/.test(s);
+}
+function tryDecodeBase64(encoded: string): string | null {
+  try {
+    // Handle both standard and URL-safe base64
+    const standardized = encoded.replace(/-/g, '+').replace(/_/g, '/');
+    const decoded = Buffer.from(standardized, 'base64').toString('utf-8');
+    if (!isPrintableText(decoded)) return null;
+    // Verify round-trip to reject garbage decodes
+    const reEncoded = Buffer.from(decoded, 'utf-8').toString('base64').replace(/=+$/, '');
+    if (standardized.replace(/=+$/, '') !== reEncoded) return null;
+    return decoded;
+  } catch {
+    return null;
+  }
+}
+function tryDecodePercentEncoded(encoded: string): string | null {
+  try {
+    const decoded = decodeURIComponent(encoded);
+    if (decoded === encoded) return null;
+    if (!isPrintableText(decoded)) return null;
+    return decoded;
+  } catch {
+    return null;
+  }
+}
+function tryDecodeHexEscapes(encoded: string): string | null {
+  try {
+    const decoded = encoded.replace(/\\x([0-9A-Fa-f]{2})/g, (_, hex) =>
+      String.fromCharCode(parseInt(hex, 16)),
+    );
+    if (decoded === encoded) return null;
+    if (!isPrintableText(decoded)) return null;
+    return decoded;
+  } catch {
+    return null;
+  }
+}
+function tryDecodeContinuousHex(encoded: string): string | null {
+  try {
+    // Odd-length strings can't be decoded as pairs of hex digits
+    if (encoded.length % 2 !== 0) return null;
+    // Decode pairs of hex digits to bytes
+    const bytes: number[] = [];
+    for (let i = 0; i < encoded.length; i += 2) {
+      bytes.push(parseInt(encoded.slice(i, i + 2), 16));
+    }
+    const decoded = String.fromCharCode(...bytes);
+    if (!isPrintableText(decoded)) return null;
+    return decoded;
+  } catch {
+    return null;
+  }
+}
+/** Check if an encoded segment overlaps with any existing match range */
+function overlapsExisting(start: number, end: number, ranges: Set<string>): boolean {
+  for (const rangeKey of ranges) {
+    const sep = rangeKey.indexOf(':');
+    const rStart = Number(rangeKey.slice(0, sep));
+    const rEnd = Number(rangeKey.slice(sep + 1));
+    if (start < rEnd && end > rStart) return true;
+  }
+  return false;
+}
+/**
+ * Scan for encoded secrets by decoding candidate segments and running
+ * pattern matching on the decoded content. Catches base64-encoded,
+ * hex-encoded, and percent-encoded secrets that raw regex would miss.
+ */
+function scanEncoded(
+  text: string,
+  existingRanges: Set<string>,
+): SecretMatch[] {
+  const matches: SecretMatch[] = [];
+  // Helper: try to match decoded content against known secret patterns
+  const tryMatchDecoded = (
+    encoded: string,
+    decoded: string,
+    startIndex: number,
+    endIndex: number,
+    encoding: string,
+  ) => {
+    for (const pattern of PATTERNS) {
+      pattern.regex.lastIndex = 0;
+      let pm: RegExpExecArray | null;
+      while ((pm = pattern.regex.exec(decoded)) !== null) {
+        const value = pm[1] ?? pm[0];
+        if (isPlaceholder(value)) continue;
+        if (isAllowlisted(value)) continue;
+        if (pattern.type === 'AWS Secret Key' && !isLikelyAwsSecret(value)) continue;
+        const key = `${startIndex}:${endIndex}`;
+        existingRanges.add(key);
+        matches.push({
+          type: `${pattern.type} (${encoding})`,
+          startIndex,
+          endIndex,
+          redactedValue: redact(encoded),
+        });
+        return;
+      }
+    }
+  };
+  // Percent-encoded segments: use linear-time scanner instead of regex
+  if (text.includes('%')) {
+    for (const seg of findPercentEncodedSegments(text)) {
+      if (seg.match.length > 1000) continue;
+      if (overlapsExisting(seg.start, seg.end, existingRanges)) continue;
+      const decoded = tryDecodePercentEncoded(seg.match);
+      if (!decoded) continue;
+      tryMatchDecoded(seg.match, decoded, seg.start, seg.end, 'percent-encoded');
+    }
+  }
+  // Regex-based decoders for the remaining encodings
+  const decoders: Array<{
+    regex: RegExp;
+    decode: (s: string) => string | null;
+    encoding: string;
+    quickCheck?: (t: string) => boolean;
+  }> = [
+    { regex: HEX_ESCAPE_RE, decode: tryDecodeHexEscapes, encoding: 'hex-escaped', quickCheck: (t) => t.includes('\\x') },
+    { regex: ENCODED_BASE64_RE, decode: tryDecodeBase64, encoding: 'base64-encoded' },
+    { regex: CONTINUOUS_HEX_RE, decode: tryDecodeContinuousHex, encoding: 'hex-encoded' },
+  ];
+  for (const { regex, decode, encoding, quickCheck } of decoders) {
+    if (quickCheck && !quickCheck(text)) continue;
+    regex.lastIndex = 0;
+    let m: RegExpExecArray | null;
+    while ((m = regex.exec(text)) !== null) {
+      const encoded = m[1] ?? m[0];
+      if (encoded.length > 1000) continue;
+      const startIndex = m.index + (m[0].indexOf(encoded));
+      const endIndex = startIndex + encoded.length;
+      if (overlapsExisting(startIndex, endIndex, existingRanges)) continue;
+      const decoded = decode(encoded);
+      if (!decoded) continue;
+      tryMatchDecoded(encoded, decoded, startIndex, endIndex, encoding);
+    }
+  }
+  return matches;
+}
 // ---------------------------------------------------------------------------
 // Scan function
 // ---------------------------------------------------------------------------
@@ -508,6 +718,10 @@ export function scanText(text: string, entropyConfig?: Partial<EntropyConfig>):
   const entropyMatches = scanEntropy(text, eConfig, seen);
   matches.push(...entropyMatches);
+  // Encoded secret detection — decode candidate segments and re-scan
+  const encodedMatches = scanEncoded(text, seen);
+  matches.push(...encodedMatches);
   // Sort by position; at same start, wider match first so redaction covers the full span
   matches.sort((a, b) => a.startIndex - b.startIndex || b.endIndex - a.endIndex);
   return matches;
@@ -547,4 +761,8 @@ export {
   redact as _redact,
   PATTERNS as _PATTERNS,
   hasSecretContext as _hasSecretContext,
+  tryDecodeBase64 as _tryDecodeBase64,
+  tryDecodePercentEncoded as _tryDecodePercentEncoded,
+  tryDecodeHexEscapes as _tryDecodeHexEscapes,
+  tryDecodeContinuousHex as _tryDecodeContinuousHex,
 };

package/src/skills/frontmatter.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Shared frontmatter parsing for SKILL.md files.
+ *
+ * Frontmatter is a YAML-like block delimited by `---` at the top of a file.
+ * This module provides a single implementation used by the skill catalog loader,
+ * the Vellum catalog installer, and the CC command registry.
+ */
+/** Matches a `---` delimited frontmatter block at the start of a file. */
+export const FRONTMATTER_REGEX = /^---\r?\n([\s\S]*?)\r?\n---(?:\r?\n|$)/;
+export interface FrontmatterParseResult {
+  /** Key-value pairs extracted from the frontmatter block. */
+  fields: Record<string, string>;
+  /** The remaining file content after the frontmatter block. */
+  body: string;
+}
+/**
+ * Parse frontmatter fields from file content.
+ *
+ * Extracts key-value pairs from the `---` delimited block at the top of the
+ * file. Handles single- and double-quoted values, and unescapes common escape
+ * sequences (`\n`, `\r`, `\\`, `\"`) in double-quoted values.
+ *
+ * Returns `null` if no frontmatter block is found.
+ */
+export function parseFrontmatterFields(content: string): FrontmatterParseResult | null {
+  const match = content.match(FRONTMATTER_REGEX);
+  if (!match) return null;
+  const frontmatter = match[1];
+  const fields: Record<string, string> = {};
+  for (const line of frontmatter.split(/\r?\n/)) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('#')) continue;
+    const separatorIndex = trimmed.indexOf(':');
+    if (separatorIndex === -1) continue;
+    const key = trimmed.slice(0, separatorIndex).trim();
+    let value = trimmed.slice(separatorIndex + 1).trim();
+    const isDoubleQuoted = value.startsWith('"') && value.endsWith('"');
+    const isSingleQuoted = value.startsWith("'") && value.endsWith("'");
+    if (isDoubleQuoted || isSingleQuoted) {
+      value = value.slice(1, -1);
+      if (isDoubleQuoted) {
+        // Unescape sequences produced by buildSkillMarkdown's esc().
+        // Only for double-quoted values — single-quoted YAML treats backslashes literally.
+        // Single-pass to avoid misinterpreting \\n (escaped backslash + n) as a newline.
+        value = value.replace(/\\(["\\nr])/g, (_, ch) => {
+          if (ch === 'n') return '\n';
+          if (ch === 'r') return '\r';
+          return ch; // handles \\ → \ and \" → "
+        });
+      }
+    }
+    fields[key] = value;
+  }
+  return { fields, body: content.slice(match[0].length) };
+}

package/src/skills/slash-commands.ts CHANGED Viewed

@@ -155,6 +155,10 @@ export function formatUnknownSlashSkillMessage(
 /**
  * Rewrite user input for a known slash command into a model-facing prompt
  * that explicitly instructs the model to invoke the skill.
+ *
+ * For the claude-code skill, trailing arguments are routed via the `command`
+ * input (not `prompt`) so that .claude/commands/*.md templates are loaded
+ * and $ARGUMENTS substitution is applied.
  */
 export function rewriteKnownSlashCommandPrompt(params: {
   rawInput: string;
@@ -162,6 +166,25 @@ export function rewriteKnownSlashCommandPrompt(params: {
   skillName: string;
   trailingArgs: string;
 }): string {
+  // For the claude-code skill, route trailing args through the `command` input
+  // so CC command templates (.claude/commands/*.md) are loaded and $ARGUMENTS
+  // substitution is applied, rather than sending them as a raw prompt.
+  if (params.skillId === 'claude-code' && params.trailingArgs) {
+    // Extract the command name (first word of trailing args) and remaining arguments
+    const parts = params.trailingArgs.split(/\s+/);
+    const commandName = parts[0];
+    const commandArgs = parts.slice(1).join(' ');
+    const lines = [
+      `The user invoked the slash command \`/${params.skillId}\`.`,
+      `Execute the Claude Code command "${commandName}" using the claude_code tool with command="${commandName}".`,
+    ];
+    if (commandArgs) {
+      lines.push(`Pass the following as the \`arguments\` input: ${commandArgs}`);
+    }
+    return lines.join('\n');
+  }
   const lines = [
     `The user invoked the slash command \`/${params.skillId}\`.`,
     `Please invoke the "${params.skillName}" skill (ID: ${params.skillId}).`,

package/src/skills/vellum-catalog-remote.ts ADDED Viewed

@@ -0,0 +1,107 @@
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import type { CatalogEntry } from '../tools/skills/vellum-catalog.js';
+import { getLogger } from '../util/logger.js';
+const log = getLogger('vellum-catalog-remote');
+const GITHUB_RAW_BASE =
+  'https://raw.githubusercontent.com/vellum-ai/vellum-assistant/main/assistant/src/config/vellum-skills';
+const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
+interface CatalogManifest {
+  version: number;
+  skills: CatalogEntry[];
+}
+let cachedEntries: CatalogEntry[] | null = null;
+let cacheTimestamp = 0;
+function getBundledCatalogPath(): string {
+  return join(import.meta.dir, '..', 'config', 'vellum-skills', 'catalog.json');
+}
+function loadBundledCatalog(): CatalogEntry[] {
+  try {
+    const raw = readFileSync(getBundledCatalogPath(), 'utf-8');
+    const manifest: CatalogManifest = JSON.parse(raw);
+    return manifest.skills ?? [];
+  } catch (err) {
+    log.warn({ err }, 'Failed to read bundled catalog.json');
+    return [];
+  }
+}
+function getBundledSkillContent(skillId: string): string | null {
+  try {
+    const skillPath = join(import.meta.dir, '..', 'config', 'vellum-skills', skillId, 'SKILL.md');
+    return readFileSync(skillPath, 'utf-8');
+  } catch {
+    return null;
+  }
+}
+/** Fetch catalog entries (cached, async). Falls back to bundled copy. */
+export async function fetchCatalogEntries(): Promise<CatalogEntry[]> {
+  const now = Date.now();
+  if (cachedEntries && now - cacheTimestamp < CACHE_TTL_MS) {
+    return cachedEntries;
+  }
+  try {
+    const url = `${GITHUB_RAW_BASE}/catalog.json`;
+    const response = await fetch(url, {
+      signal: AbortSignal.timeout(5000),
+    });
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+    }
+    const manifest: CatalogManifest = await response.json();
+    const skills = manifest.skills;
+    if (!Array.isArray(skills) || skills.length === 0) {
+      throw new Error('Remote catalog has invalid or empty skills array');
+    }
+    cachedEntries = skills;
+    cacheTimestamp = now;
+    log.info({ count: cachedEntries.length }, 'Fetched remote vellum-skills catalog');
+    return cachedEntries;
+  } catch (err) {
+    log.warn({ err }, 'Failed to fetch remote catalog, falling back to bundled copy');
+    const bundled = loadBundledCatalog();
+    // Cache the bundled result too so we don't re-fetch on every call during outage
+    cachedEntries = bundled;
+    cacheTimestamp = now;
+    return bundled;
+  }
+}
+/** Fetch a skill's SKILL.md content from GitHub. Falls back to bundled copy. */
+export async function fetchSkillContent(skillId: string): Promise<string | null> {
+  try {
+    const url = `${GITHUB_RAW_BASE}/${encodeURIComponent(skillId)}/SKILL.md`;
+    const response = await fetch(url, {
+      signal: AbortSignal.timeout(10000),
+    });
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+    }
+    const content = await response.text();
+    log.info({ skillId }, 'Fetched remote SKILL.md');
+    return content;
+  } catch (err) {
+    log.warn({ err, skillId }, 'Failed to fetch remote SKILL.md, falling back to bundled copy');
+    return getBundledSkillContent(skillId);
+  }
+}
+/** Check if a skill ID exists in the remote catalog. */
+export async function checkVellumSkill(skillId: string): Promise<boolean> {
+  const entries = await fetchCatalogEntries();
+  return entries.some((e) => e.id === skillId);
+}

package/src/tools/browser/auto-navigate.ts CHANGED Viewed

@@ -10,9 +10,9 @@ import { getLogger } from '../../util/logger.js';
 const log = getLogger('auto-navigate');
 const CDP_BASE = 'http://localhost:9222';
-const MAX_PAGES = 15;
-const PAGE_WAIT_MS = 3500;
-const SCROLL_WAIT_MS = 2000;
+const MAX_PAGES = 10;
+const PAGE_WAIT_MS = 2500;
+const SCROLL_WAIT_MS = 1000;
 /** Minimal CDP client — connects to one page tab. */
 class MiniCDP {
@@ -57,15 +57,28 @@ class MiniCDP {
   close() { this.ws?.close(); }
 }
+export interface AutoNavProgress {
+  type: 'visiting' | 'discovered' | 'done';
+  url?: string;
+  pageNumber?: number;
+  totalDiscovered?: number;
+  visitedCount?: number;
+}
 /**
  * Navigate Chrome through a domain's pages to trigger API calls.
  * Discovers internal links from the DOM and visits up to ~15 unique paths.
  *
  * @param domain The domain to crawl (e.g. "example.com").
  * @param abortSignal Optional signal to stop navigation early.
+ * @param onProgress Optional callback for live progress updates.
  * @returns List of visited page URLs.
  */
-export async function autoNavigate(domain: string, abortSignal?: { aborted: boolean }): Promise<string[]> {
+export async function autoNavigate(
+  domain: string,
+  abortSignal?: { aborted: boolean },
+  onProgress?: (p: AutoNavProgress) => void,
+): Promise<string[]> {
   let wsUrl: string | null = null;
   try {
     const res = await fetch(`${CDP_BASE}/json/list`);
@@ -108,6 +121,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
   // Navigate to the domain root first
   try {
+    onProgress?.({ type: 'visiting', url: rootUrl, pageNumber: 1 });
     await cdp.send('Page.navigate', { url: rootUrl });
     await sleep(PAGE_WAIT_MS);
     visited.add('/');
@@ -125,12 +139,11 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
   await scrollPage(cdp);
   await sleep(SCROLL_WAIT_MS);
-  // Click common interactive elements on the root page
-  await clickInteractiveElements(cdp);
-  await sleep(SCROLL_WAIT_MS);
   // Discover internal links from the current page
-  const discoveredLinks = await discoverInternalLinks(cdp, domain);
+  let discoveredLinks = await discoverInternalLinks(cdp, domain);
+  // Sort links: deeper paths first (more likely to be content pages), skip shallow nav links
+  discoveredLinks = rankLinks(discoveredLinks);
+  onProgress?.({ type: 'discovered', totalDiscovered: discoveredLinks.length });
   log.info({ count: discoveredLinks.length }, 'Discovered internal links from root');
   // Visit discovered pages
@@ -140,6 +153,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
     if (visited.has(link.key)) continue;
     const url = link.url;
+    onProgress?.({ type: 'visiting', url, pageNumber: visited.size + 1, totalDiscovered: discoveredLinks.length });
     log.info({ url }, 'Auto-navigate visiting page');
     try {
@@ -152,9 +166,9 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
       await scrollPage(cdp);
       await sleep(SCROLL_WAIT_MS);
-      // Click interactive elements to trigger more API calls
-      await clickInteractiveElements(cdp);
-      await sleep(1500);
+      // Click tabs/buttons within the page (NOT nav links — those navigate away)
+      await clickPageTabs(cdp);
+      await sleep(800);
       // Discover more links from this page
       const newLinks = await discoverInternalLinks(cdp, domain);
@@ -171,6 +185,7 @@ export async function autoNavigate(domain: string, abortSignal?: { aborted: bool
   }
   cdp.close();
+  onProgress?.({ type: 'done', visitedCount: visitedUrls.length, totalDiscovered: discoveredLinks.length });
   log.info({ visited: visitedUrls.length, total: discoveredLinks.length + 1 }, 'Auto-navigation finished');
   return visitedUrls;
 }
@@ -180,6 +195,56 @@ interface DiscoveredLink {
   url: string;
   /** Deduplication key: origin + pathname. */
   key: string;
+  /** Path depth (number of segments). */
+  depth: number;
+}
+/** Paths that are typically navigation chrome, not content pages. */
+const SKIP_PATHS = [
+  '/home', '/login', '/signup', '/register', '/sign-up', '/sign-in',
+  '/help', '/support', '/contact', '/about', '/terms', '/privacy',
+  '/careers', '/press', '/blog', '/faq', '/sitemap',
+];
+/** Path patterns that indicate high-value purchase/content flows. */
+const HIGH_VALUE_PATTERNS = [
+  /\/orders/i, /\/cart/i, /\/checkout/i, /\/account/i, /\/settings/i,
+  /\/store\//i, /\/restaurant\//i, /\/menu/i, /\/payment/i,
+  /\/profile/i, /\/history/i, /\/favorites/i, /\/saved/i,
+  /\/search/i, /\/category/i, /\/collection/i,
+];
+/** Sort links to prioritize purchase/content flows, deduplicate by pattern. */
+function rankLinks(links: DiscoveredLink[]): DiscoveredLink[] {
+  const filtered = links.filter(l => {
+    const path = new URL(l.url).pathname.toLowerCase();
+    if (SKIP_PATHS.some(skip => path === skip || path === skip + '/')) return false;
+    return true;
+  });
+  // Deduplicate by host+path pattern — keep only one of /store/123, /store/456
+  // but preserve different subdomains (shop.example.com vs admin.example.com)
+  const byPattern = new Map<string, DiscoveredLink>();
+  for (const link of filtered) {
+    const parsed = new URL(link.url);
+    // Collapse numeric/hash segments to find the pattern
+    const pathPattern = parsed.pathname.replace(/\/\d+/g, '/{id}').replace(/\/[a-f0-9]{8,}/gi, '/{id}');
+    const pattern = parsed.hostname + pathPattern;
+    if (!byPattern.has(pattern)) {
+      byPattern.set(pattern, link);
+    }
+  }
+  return [...byPattern.values()].sort((a, b) => {
+    const aPath = new URL(a.url).pathname.toLowerCase();
+    const bPath = new URL(b.url).pathname.toLowerCase();
+    // High-value paths first
+    const aHighValue = HIGH_VALUE_PATTERNS.some(p => p.test(aPath)) ? 1 : 0;
+    const bHighValue = HIGH_VALUE_PATTERNS.some(p => p.test(bPath)) ? 1 : 0;
+    if (aHighValue !== bHighValue) return bHighValue - aHighValue;
+    // Then by depth (deeper = more specific)
+    return Math.min(b.depth, 4) - Math.min(a.depth, 4);
+  });
 }
 /** Extract internal links from the current page DOM, preserving subdomains. */
@@ -204,7 +269,11 @@ async function discoverInternalLinks(cdp: MiniCDP, domain: string): Promise<Disc
               const key = url.origin + url.pathname;
               if (!seen.has(key)) {
                 seen.add(key);
-                links.push({ url: url.origin + url.pathname, key });
+                links.push({
+                  url: url.origin + url.pathname,
+                  key,
+                  depth: path.split('/').filter(Boolean).length,
+                });
               }
             } catch { /* skip malformed URLs */ }
           }
@@ -222,25 +291,64 @@ async function discoverInternalLinks(cdp: MiniCDP, domain: string): Promise<Disc
 /** Scroll the page to trigger lazy-loaded content. */
 async function scrollPage(cdp: MiniCDP): Promise<void> {
-  await cdp.send('Runtime.evaluate', {
-    expression: 'window.scrollBy(0, 800)',
-    awaitPromise: false,
-  }).catch(() => {});
+  // Scroll in increments to trigger multiple lazy-load thresholds
+  for (let i = 0; i < 3; i++) {
+    await cdp.send('Runtime.evaluate', {
+      expression: 'window.scrollBy(0, 600)',
+      awaitPromise: false,
+    }).catch(() => {});
+    await sleep(500);
+  }
 }
-/** Click common interactive elements (tabs, nav buttons) to trigger API calls. */
-async function clickInteractiveElements(cdp: MiniCDP): Promise<void> {
+/**
+ * Click tabs, buttons, and flow-relevant elements within the current page.
+ * Avoids clicking navigation links (which would navigate away).
+ */
+async function clickPageTabs(cdp: MiniCDP): Promise<void> {
   const selectors = [
-    'nav a:not([href="/"])',
-    '[role="tab"]',
-    '[role="tablist"] button',
+    '[role="tab"]:not(:first-child)',
+    '[role="tablist"] button:not(:first-child)',
     'button[data-tab]',
-    '.tab, .nav-tab, .nav-link',
+    '[data-testid*="tab"]',
+    'button[aria-expanded="false"]',
   ];
   for (const selector of selectors) {
     await clickInPage(cdp, selector);
-    await sleep(800);
+    await sleep(600);
+  }
+  // Also try clicking purchase-flow buttons to trigger API calls
+  // (Add to Cart, etc. — these fire API requests even if we don't complete the flow)
+  await clickByText(cdp, 'Add to Cart');
+  await clickByText(cdp, 'Add to Order');
+  await clickByText(cdp, 'Add Item');
+}
+/** Click a button by its visible text content. */
+async function clickByText(cdp: MiniCDP, text: string): Promise<boolean> {
+  try {
+    const result = await cdp.send('Runtime.evaluate', {
+      expression: `
+        (function() {
+          const buttons = document.querySelectorAll('button, [role="button"]');
+          for (const btn of buttons) {
+            if (btn.textContent && btn.textContent.trim().toLowerCase().includes(${JSON.stringify(text.toLowerCase())})) {
+              btn.scrollIntoView({ block: 'center' });
+              btn.click();
+              return true;
+            }
+          }
+          return false;
+        })()
+      `,
+      awaitPromise: false,
+      returnByValue: true,
+    }) as { result?: { value?: boolean } };
+    return result?.result?.value === true;
+  } catch {
+    return false;
   }
 }