npm - vesper-wizard - Versions diffs - 2.0.6 → 2.0.8 - Mend

vesper-wizard 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +22 -0
package/build/index.js +8 -0
package/build/python/asset_downloader_engine.py +2 -0
package/build/python/vesper/core/asset_downloader.py +5 -1
package/package.json +2 -2
package/scripts/wizard.cjs +625 -0
package/src/python/asset_downloader_engine.py +2 -0
package/src/python/vesper/core/asset_downloader.py +5 -1

package/README.md CHANGED Viewed

@@ -222,6 +222,28 @@ export_dataset(
 ---
+#### `vesper_download_assets`
+Download image/media assets to a user-controlled local directory.
+**Parameters:**
+- `dataset_id` (string): Dataset identifier
+- `source` (string): `huggingface`, `kaggle`, or `url`
+- `target_dir` (string, optional): Exact local directory where assets should be written
+- `output_dir` (string, optional): Alias for `target_dir`
+- `output_format` (string, optional): `webdataset`, `imagefolder`, or `parquet`
+**Example:**
+```
+vesper_download_assets(
+  dataset_id="cats_vs_dogs",
+  source="kaggle",
+  target_dir="./datasets/cats_dogs_100",
+  output_format="imagefolder"
+)
+```
+---
 ### Quality Analysis
 #### `analyze_image_quality`

package/build/index.js CHANGED Viewed

@@ -960,6 +960,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
                         kaggle_ref: { type: "string", description: "Kaggle dataset ref (owner/dataset)." },
                         urls: { type: "array", items: { type: "string" }, description: "Direct asset URLs." },
                         output_format: { type: "string", enum: ["webdataset", "imagefolder", "parquet"], description: "Output asset format." },
+                        target_dir: { type: "string", description: "Optional local directory where downloaded assets should be written. If provided, Vesper writes directly to this directory instead of managed asset storage." },
+                        output_dir: { type: "string", description: "Alias for target_dir. When provided, downloaded assets are written directly to this local directory." },
                         max_items: { type: "number", description: "Optional cap on number of assets to fetch." },
                         workers: { type: "number", description: "Parallel worker count (default 8)." },
                         image_column: { type: "string", description: "Explicit image column name. If omitted, auto-detected from HF features, column names, and sample values." },
@@ -1521,6 +1523,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 ? (request.params.arguments?.urls).map(v => String(v))
                 : undefined;
             const outputFormat = String(request.params.arguments?.output_format || "webdataset");
+            const requestedOutputDir = request.params.arguments?.target_dir
+                ? String(request.params.arguments.target_dir).trim()
+                : request.params.arguments?.output_dir
+                    ? String(request.params.arguments.output_dir).trim()
+                    : undefined;
             const maxItems = request.params.arguments?.max_items ? Number(request.params.arguments.max_items) : undefined;
             const workers = request.params.arguments?.workers ? Number(request.params.arguments.workers) : 8;
             const imageColumn = request.params.arguments?.image_column ? String(request.params.arguments.image_column) : undefined;
@@ -1563,6 +1570,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 kaggle_ref: kaggleRef,
                 urls,
                 output_format: outputFormat,
+                output_dir: requestedOutputDir,
                 max_items: maxItems,
                 workers,
                 image_column: imageColumn,

package/build/python/asset_downloader_engine.py CHANGED Viewed

@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
 async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
     payload = json.loads(args.payload)
     output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
+    output_dir = payload.get("output_dir")
     workers = int(payload.get("workers") or 8)
     recipes_dir = payload.get("recipes_dir")
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
         kaggle_ref=payload.get("kaggle_ref"),
         urls=payload.get("urls"),
         output_format=payload.get("output_format", "webdataset"),
+        output_dir=str(output_dir) if output_dir else None,
         max_items=payload.get("max_items"),
         image_column=payload.get("image_column"),
     )

package/build/python/vesper/core/asset_downloader.py CHANGED Viewed

@@ -191,6 +191,7 @@ class AssetDownloader:
         kaggle_ref: Optional[str] = None,
         urls: Optional[List[str]] = None,
         output_format: str = "webdataset",
+        output_dir: Optional[str] = None,
         max_items: Optional[int] = None,
         image_column: Optional[str] = None,
     ) -> Dict[str, Any]:
@@ -231,7 +232,10 @@ class AssetDownloader:
                 raise ValueError("urls are required for source=url")
         # --- Now safe to create directories ---
-        dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
+        if output_dir:
+            dataset_dir = Path(output_dir).expanduser().resolve()
+        else:
+            dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
         images_dir = dataset_dir / "images"
         dataset_dir.mkdir(parents=True, exist_ok=True)
         images_dir.mkdir(parents=True, exist_ok=True)

package/package.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "name": "vesper-wizard",
-  "version": "2.0.6",
+  "version": "2.0.8",
   "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
   "type": "module",
   "main": "build/index.js",
   "bin": {
     "mcp-server": "./build/index.js",
     "vespermcp": "./build/index.js",
-    "vesper-wizard": "scripts/wizard.js"
+    "vesper-wizard": "scripts/wizard.cjs"
   },
   "files": [
     "build/**/*",

package/scripts/wizard.cjs ADDED Viewed

@@ -0,0 +1,625 @@
+#!/usr/bin/env node
+// ─────────────────────────────────────────────────────────────
+//  vesper-wizard  —  Zero-friction local setup for Vesper MCP
+//  Run:  npx vesper-wizard@latest
+// ─────────────────────────────────────────────────────────────
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const crypto = require('crypto');
+const { execSync, spawnSync } = require('child_process');
+const http = require('http');
+const https = require('https');
+const readline = require('readline');
+// ── Paths ────────────────────────────────────────────────────
+const HOME = os.homedir();
+const VESPER_DIR = path.join(HOME, '.vesper');
+const CONFIG_TOML = path.join(VESPER_DIR, 'config.toml');
+const DATA_DIR = path.join(VESPER_DIR, 'data');
+const IS_WIN = process.platform === 'win32';
+const APPDATA = process.env.APPDATA || path.join(HOME, 'AppData', 'Roaming');
+// ── Helpers ──────────────────────────────────────────────────
+function ensureDir(dir) {
+  if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+}
+function generateLocalKey() {
+  const random = crypto.randomBytes(24).toString('hex');
+  return `vesper_sk_local_${random}`;
+}
+function readToml(filePath) {
+  if (!fs.existsSync(filePath)) return {};
+  const content = fs.readFileSync(filePath, 'utf8');
+  const obj = {};
+  for (const line of content.split('\n')) {
+    const m = line.match(/^\s*(\w+)\s*=\s*"(.*)"\s*$/);
+    if (m) obj[m[1]] = m[2];
+  }
+  return obj;
+}
+function writeToml(filePath, data) {
+  ensureDir(path.dirname(filePath));
+  const lines = Object.entries(data).map(([k, v]) => `${k} = "${v}"`);
+  fs.writeFileSync(filePath, lines.join('\n') + '\n', 'utf8');
+}
+function dim(text) { return `\x1b[2m${text}\x1b[0m`; }
+function bold(text) { return `\x1b[1m${text}\x1b[0m`; }
+function green(text) { return `\x1b[32m${text}\x1b[0m`; }
+function cyan(text) { return `\x1b[36m${text}\x1b[0m`; }
+function yellow(text) { return `\x1b[33m${text}\x1b[0m`; }
+function red(text) { return `\x1b[31m${text}\x1b[0m`; }
+function magenta(text) { return `\x1b[35m${text}\x1b[0m`; }
+// ── Vesper API URL resolution ────────────────────────────────
+const VESPER_API_URL = process.env.VESPER_API_URL || '';
+const DEFAULT_VESPER_API_CANDIDATES = [
+  'http://localhost:3000',
+  'http://127.0.0.1:3000',
+  'https://vesper.dev',
+];
+// ── Device Auth Helpers ──────────────────────────────────────
+function httpJson(method, url, body) {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const lib = parsed.protocol === 'https:' ? https : http;
+    const opts = {
+      method,
+      hostname: parsed.hostname,
+      port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
+      path: parsed.pathname + parsed.search,
+      headers: { 'Content-Type': 'application/json' },
+    };
+    const req = lib.request(opts, (res) => {
+      let data = '';
+      res.on('data', (chunk) => (data += chunk));
+      res.on('end', () => {
+        try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
+        catch { resolve({ status: res.statusCode, body: data }); }
+      });
+    });
+    req.on('error', reject);
+    if (body) req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+async function probeDeviceAuth(baseUrl) {
+  try {
+    const res = await httpJson('POST', `${baseUrl}/api/auth/device/start`);
+    if (res.status === 201 && !!res.body && !!res.body.code) {
+      return { baseUrl, status: 'ready', response: res.body };
+    }
+    if (res.status === 503 && res.body && res.body.requiresSetup) {
+      return {
+        baseUrl,
+        status: 'setup-required',
+        response: res.body,
+        message: res.body.error || 'Auth storage is not initialized.',
+      };
+    }
+    return {
+      baseUrl,
+      status: 'unreachable',
+      response: res.body,
+      message: typeof res.body === 'string' ? res.body : JSON.stringify(res.body),
+    };
+  } catch (error) {
+    return {
+      baseUrl,
+      status: 'unreachable',
+      message: error && error.message ? error.message : 'Request failed',
+    };
+  }
+}
+async function resolveVesperApiBaseUrl() {
+  const candidates = VESPER_API_URL
+    ? [VESPER_API_URL]
+    : DEFAULT_VESPER_API_CANDIDATES;
+  let setupRequiredProbe = null;
+  for (const candidate of candidates) {
+    const probe = await probeDeviceAuth(candidate);
+    if (probe.status === 'ready') {
+      return probe;
+    }
+    if (!setupRequiredProbe && probe.status === 'setup-required') {
+      setupRequiredProbe = probe;
+    }
+  }
+  return setupRequiredProbe;
+}
+function openBrowser(url) {
+  try {
+    if (process.platform === 'win32') {
+      spawnSync('cmd', ['/c', 'start', '', url], { stdio: 'ignore' });
+    } else if (process.platform === 'darwin') {
+      spawnSync('open', [url], { stdio: 'ignore' });
+    } else {
+      spawnSync('xdg-open', [url], { stdio: 'ignore' });
+    }
+  } catch { /* browser open is best-effort */ }
+}
+function askYesNo(question) {
+  return new Promise((resolve) => {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    rl.question(`  ${question} ${dim('[Y/n]')} `, (answer) => {
+      rl.close();
+      resolve(!answer || answer.toLowerCase().startsWith('y'));
+    });
+  });
+}
+function askInput(question) {
+  return new Promise((resolve) => {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    rl.question(`  ${question} `, (answer) => {
+      rl.close();
+      resolve(String(answer || '').trim());
+    });
+  });
+}
+async function askChoice(question, choices, defaultValue) {
+  console.log(`  ${question}`);
+  choices.forEach((choice, index) => {
+    console.log(`    ${dim(String(index + 1) + ')')} ${choice.label}`);
+  });
+  const prompt = defaultValue ? `${dim('[default: ' + defaultValue + ']')}` : '';
+  const answer = await askInput(`${prompt} ${cyan('→')} Choose an option:`);
+  if (!answer && defaultValue) {
+    return defaultValue;
+  }
+  const numeric = Number(answer);
+  if (Number.isFinite(numeric) && numeric >= 1 && numeric <= choices.length) {
+    return choices[numeric - 1].value;
+  }
+  const matched = choices.find((choice) => choice.value === answer);
+  return matched ? matched.value : defaultValue;
+}
+function isCloudApiKey(value) {
+  return !!value && value.startsWith('vesper_sk_') && !value.startsWith('vesper_sk_local_');
+}
+async function promptForManualApiKey() {
+  console.log(`\n  ${cyan('■')} ${bold('Manual API Key')}`);
+  console.log(`  ${dim('Paste a Vesper cloud API key. It will be stored locally in config.toml.\n')}`);
+  while (true) {
+    const value = await askInput(`${cyan('→')} Vesper API key:`);
+    if (isCloudApiKey(value)) {
+      return value;
+    }
+    console.log(`  ${yellow('!')} ${yellow('Expected a Vesper key starting with vesper_sk_')}`);
+  }
+}
+async function chooseAuthMode(existingKey, existingAuthMode) {
+  const hasExistingKey = !!existingKey;
+  if (hasExistingKey) {
+    console.log(`      ${dim('Current key:')} ${dim(existingKey.slice(0, 24) + '...')}`);
+    console.log(`      ${dim('Current mode:')} ${dim(existingAuthMode || (isCloudApiKey(existingKey) ? 'cloud' : 'local_unified'))}`);
+  }
+  const choices = [];
+  if (hasExistingKey) {
+    choices.push({ value: 'keep', label: 'Keep current key as-is' });
+  }
+  choices.push({ value: 'manual', label: 'Provide Vesper API key manually' });
+  choices.push({ value: 'browser', label: 'Sign in through the browser' });
+  choices.push({ value: 'local', label: 'Use local-only key' });
+  return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, hasExistingKey ? 'keep' : 'browser');
+}
+async function deviceAuthFlow() {
+  console.log(`\n  ${cyan('■')} ${bold('Device Authentication')}`);
+  console.log(`  ${dim('Link your CLI to a Vesper account for cloud features\n')}`);
+  const resolvedApiBaseUrl = await resolveVesperApiBaseUrl();
+  if (!resolvedApiBaseUrl) {
+    console.log(`  ${red('✗')} ${red('Could not reach any Vesper auth endpoint.')}`);
+    console.log(`      ${dim('Tried:')} ${dim((VESPER_API_URL ? [VESPER_API_URL] : DEFAULT_VESPER_API_CANDIDATES).join(', '))}`);
+    console.log(`      ${dim('If your landing app is running locally, start it on http://localhost:3000 or set VESPER_API_URL.')}`);
+    console.log(`      ${dim('Falling back to local-only mode.\n')}`);
+    return null;
+  }
+  if (resolvedApiBaseUrl.status === 'setup-required') {
+    console.log(`  ${yellow('!')} ${yellow('Reached Vesper auth endpoint, but local auth storage is not initialized.')}`);
+    console.log(`      ${dim('Endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
+    console.log(`      ${dim('Reason:')} ${dim(resolvedApiBaseUrl.message || 'Apply Supabase migrations first.')}`);
+    console.log(`      ${dim('Run the SQL in supabase/migrations/001_device_auth.sql and 002_rate_limits.sql, then retry.')}`);
+    console.log(`      ${dim('Falling back to local-only mode.\n')}`);
+    return null;
+  }
+  console.log(`  ${dim('Auth endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}\n`);
+  // Step 1: Call /api/auth/device/start
+  process.stdout.write(`  ${dim('Requesting device code...')}`);
+  let startRes;
+  try {
+    startRes = await httpJson('POST', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/start`);
+  } catch (err) {
+    console.log(` ${red('✗')}`);
+    console.log(`      ${red('Could not reach Vesper API at')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
+    console.log(`      ${dim('Falling back to local-only mode.\n')}`);
+    return null;
+  }
+  if (startRes.status !== 201 || !startRes.body.code) {
+    console.log(` ${red('✗')}`);
+    console.log(`      ${red('Unexpected response:')} ${dim(JSON.stringify(startRes.body))}`);
+    return null;
+  }
+  const { code, loginUrl } = startRes.body;
+  console.log(` ${green('✓')}\n`);
+  // Step 2: Display code and open browser
+  console.log(`  ┌───────────────────────────────────────────────┐`);
+  console.log(`  │                                               │`);
+  console.log(`  │   ${bold('Your device code:')}  ${cyan(bold(code))}              │`);
+  console.log(`  │                                               │`);
+  console.log(`  │   ${dim('Open this URL to sign in:')}                   │`);
+  console.log(`  │   ${cyan(loginUrl.padEnd(41))}│`);
+  console.log(`  │                                               │`);
+  console.log(`  └───────────────────────────────────────────────┘\n`);
+  openBrowser(loginUrl);
+  console.log(`  ${dim('Browser opened automatically.')}`);
+  console.log(`  ${dim('Waiting for you to sign in...')}\n`);
+  // Step 3: Poll until confirmed or expired
+  const POLL_INTERVAL = 3000; // 3 seconds
+  const MAX_POLLS = 200;      // 10 min max (200 × 3s)
+  let polls = 0;
+  const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+  while (polls < MAX_POLLS) {
+    polls++;
+    const frame = spinner[polls % spinner.length];
+    process.stdout.write(`\r  ${cyan(frame)} Polling... (${polls})`);
+    try {
+      const pollRes = await httpJson('GET', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/poll?code=${code}`);
+      if (pollRes.body.status === 'confirmed' && pollRes.body.apiKey) {
+        process.stdout.write(`\r  ${green('✓')} Device authenticated!            \n`);
+        console.log(`      ${dim('Email:')} ${pollRes.body.email || 'linked'}`);
+        return pollRes.body.apiKey;
+      }
+      if (pollRes.body.status === 'expired') {
+        process.stdout.write(`\r  ${red('✗')} Device code expired.             \n`);
+        console.log(`      ${dim('Run the wizard again to get a new code.')}`);
+        return null;
+      }
+    } catch {
+      // Network hiccup — keep polling
+    }
+    await new Promise((r) => setTimeout(r, POLL_INTERVAL));
+  }
+  process.stdout.write(`\r  ${red('✗')} Timed out waiting for authentication.\n`);
+  return null;
+}
+function printBanner() {
+  console.log(`
+${dim('─────────────────────────────────────────────────')}
+  ${bold('██    ██ ███████ ███████ ██████  ███████ ██████')}
+  ${bold('██    ██ ██      ██      ██   ██ ██      ██   ██')}
+  ${bold('██    ██ █████   ███████ ██████  █████   ██████')}
+  ${bold(' ██  ██  ██           ██ ██      ██      ██   ██')}
+  ${bold('  ████   ███████ ███████ ██      ███████ ██   ██')}
+  ${cyan('dataset intelligence layer')}
+  ${dim('local-first • zero-config • agent-native')}
+${dim('─────────────────────────────────────────────────')}
+`);
+}
+// ── MCP Auto-Config ──────────────────────────────────────────
+function getAllAgentConfigs() {
+  const isMac = process.platform === 'darwin';
+  return [
+    {
+      name: 'Claude Code',
+      path: path.join(HOME, '.claude.json'),
+      format: 'mcpServers',
+    },
+    {
+      name: 'Claude Desktop',
+      path: IS_WIN
+        ? path.join(APPDATA, 'Claude', 'claude_desktop_config.json')
+        : isMac
+          ? path.join(HOME, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json')
+          : path.join(HOME, '.config', 'claude', 'claude_desktop_config.json'),
+      format: 'mcpServers',
+    },
+    {
+      name: 'Cursor',
+      path: path.join(HOME, '.cursor', 'mcp.json'),
+      format: 'mcpServers',
+    },
+    {
+      name: 'VS Code',
+      path: IS_WIN
+        ? path.join(APPDATA, 'Code', 'User', 'mcp.json')
+        : isMac
+          ? path.join(HOME, 'Library', 'Application Support', 'Code', 'User', 'mcp.json')
+          : path.join(HOME, '.config', 'Code', 'User', 'mcp.json'),
+      format: 'servers',
+    },
+    {
+      name: 'Codex',
+      path: path.join(HOME, '.codex', 'config.toml'),
+      format: 'toml',
+    },
+    {
+      name: 'Gemini CLI',
+      path: path.join(HOME, '.gemini', 'settings.json'),
+      format: 'mcpServers',
+    },
+  ];
+}
+function installMcpToAgent(agent) {
+  const npxCmd = IS_WIN ? 'npx.cmd' : 'npx';
+  const serverEntry = { command: npxCmd, args: ['-y', '@vespermcp/mcp-server@latest'] };
+  try {
+    if (agent.format === 'toml') {
+      let content = fs.existsSync(agent.path) ? fs.readFileSync(agent.path, 'utf8') : '';
+      if (content.includes('[mcp_servers.vesper]')) return true;
+      ensureDir(path.dirname(agent.path));
+      content += `\n[mcp_servers.vesper]\ncommand = "${serverEntry.command}"\nargs = [${serverEntry.args.map(a => `"${a}"`).join(', ')}]\n`;
+      fs.writeFileSync(agent.path, content, 'utf8');
+      return true;
+    }
+    let config = {};
+    if (fs.existsSync(agent.path)) {
+      try { config = JSON.parse(fs.readFileSync(agent.path, 'utf8').trim() || '{}'); } catch { config = {}; }
+    } else {
+      ensureDir(path.dirname(agent.path));
+    }
+    const key = agent.format === 'servers' ? 'servers' : 'mcpServers';
+    if (!config[key]) config[key] = {};
+    const entry = agent.format === 'servers'
+      ? { type: 'stdio', ...serverEntry }
+      : serverEntry;
+    config[key].vesper = entry;
+    fs.writeFileSync(agent.path, JSON.stringify(config, null, 2), 'utf8');
+    return true;
+  } catch {
+    return false;
+  }
+}
+// ── Server Health Check ──────────────────────────────────────
+async function checkServerHealth() {
+  try {
+    // Quick stdio check — spawn server and see if it responds
+    const result = spawnSync(IS_WIN ? 'npx.cmd' : 'npx', ['-y', '@vespermcp/mcp-server@latest', '--version'], {
+      timeout: 10000,
+      encoding: 'utf8',
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    return result.status === 0 || (result.stderr && result.stderr.includes('Vesper'));
+  } catch {
+    return false;
+  }
+}
+// ── Main Wizard ──────────────────────────────────────────────
+async function main() {
+  printBanner();
+  console.log(`  ${green('→')} Setting up Vesper on ${bold(os.hostname())}\n`);
+  // ─── Step 1: Create directories ────────────────────────────
+  process.stdout.write(`  ${dim('[')}${cyan('1/6')}${dim(']')} Creating local directories...`);
+  ensureDir(VESPER_DIR);
+  ensureDir(DATA_DIR);
+  ensureDir(path.join(DATA_DIR, 'raw'));
+  ensureDir(path.join(DATA_DIR, 'processed'));
+  ensureDir(path.join(VESPER_DIR, 'datasets'));
+  console.log(` ${green('✓')}`);
+  // ─── Step 2: Authenticate (device flow or local key) ──────
+  console.log(`\n  ${dim('[')}${cyan('2/6')}${dim(']')} Authentication`);
+  const existing = readToml(CONFIG_TOML);
+  let localKey = existing.api_key || '';
+  let authMode = existing.auth_mode || '';
+  const authChoice = await chooseAuthMode(localKey, authMode);
+  if (authChoice === 'keep' && localKey) {
+    console.log(`      ${green('✓')} Keeping current key`);
+  } else if (authChoice === 'manual') {
+    localKey = await promptForManualApiKey();
+    authMode = 'cloud';
+    console.log(`      ${green('✓')} Cloud API key saved from manual input`);
+  } else if (authChoice === 'browser') {
+    const cloudKey = await deviceAuthFlow();
+    if (cloudKey) {
+      localKey = cloudKey;
+      authMode = 'cloud';
+    } else {
+      const fallbackChoice = await askChoice(`${yellow('!')} Browser sign-in did not complete. Choose a fallback:`, [
+        { value: 'manual', label: 'Provide Vesper API key manually' },
+        { value: 'local', label: 'Use local-only key' },
+      ], 'manual');
+      if (fallbackChoice === 'manual') {
+        localKey = await promptForManualApiKey();
+        authMode = 'cloud';
+      } else {
+        if (!localKey || isCloudApiKey(localKey)) {
+          localKey = generateLocalKey();
+        }
+        authMode = 'local_unified';
+        console.log(`\n      ${yellow('⚠')} Using local-only key. Run the wizard again anytime to link an account.`);
+      }
+    }
+  } else {
+    if (!localKey || isCloudApiKey(localKey)) {
+      localKey = generateLocalKey();
+    }
+    authMode = 'local_unified';
+    console.log(`      ${green('✓')} Local-only key ready`);
+  }
+  const configData = { ...existing, api_key: localKey, auth_mode: authMode };
+  writeToml(CONFIG_TOML, configData);
+  console.log(`      ${dim('Key:')} ${dim(localKey.slice(0, 24) + '...')}  ${dim('→')} ${dim(CONFIG_TOML)}`);
+  // ─── Step 3: Local vault initialization ────────────────────
+  process.stdout.write(`\n  ${dim('[')}${cyan('3/6')}${dim(']')} Initializing local credentials vault...`);
+  const vaultData = readToml(CONFIG_TOML);
+  if (!vaultData.auth_mode) vaultData.auth_mode = 'local_unified';
+  writeToml(CONFIG_TOML, vaultData);
+  console.log(` ${green('✓')}`);
+  console.log(`      ${dim('Mode:')} ${dim(vaultData.auth_mode === 'cloud' ? 'cloud (linked to Vesper account)' : 'single local Vesper key (no external keys required)')}`);
+  // ─── Step 4: Install @vespermcp/mcp-server ─────────────────
+  console.log(`\n  ${dim('[')}${cyan('4/6')}${dim(']')} Installing Vesper MCP server...`);
+  try {
+    const npmCmd = IS_WIN ? 'npx.cmd' : 'npx';
+    spawnSync(npmCmd, ['-y', '@vespermcp/mcp-server@latest', '--setup', '--silent'], {
+      stdio: 'inherit',
+      timeout: 120000,
+    });
+    console.log(`      ${green('✓')} @vespermcp/mcp-server installed`);
+  } catch {
+    console.log(`      ${yellow('⚠')} Could not auto-install — run manually: npx -y @vespermcp/mcp-server@latest --setup`);
+  }
+  // ─── Step 5: Auto-configure all detected IDEs ──────────────
+  process.stdout.write(`\n  ${dim('[')}${cyan('5/6')}${dim(']')} Configuring coding agents...`);
+  const agents = getAllAgentConfigs();
+  const configuredAgents = [];
+  const skippedAgents = [];
+  for (const agent of agents) {
+    const dirExists = fs.existsSync(path.dirname(agent.path));
+    const fileExists = fs.existsSync(agent.path);
+    if (fileExists || dirExists) {
+      const ok = installMcpToAgent(agent);
+      if (ok) configuredAgents.push(agent.name);
+      else skippedAgents.push(agent.name);
+    }
+  }
+  console.log(` ${green('✓')}`);
+  if (configuredAgents.length > 0) {
+    console.log(`\n  ┌───────────────────────────────────────────────┐`);
+    console.log(`  │  ${bold('MCP Auto-Configured')}                           │`);
+    console.log(`  ├───────────────────────────────────────────────┤`);
+    for (const name of configuredAgents) {
+      console.log(`  │  ${green('✓')} ${name.padEnd(42)}│`);
+    }
+    console.log(`  └───────────────────────────────────────────────┘`);
+  }
+  // ─── Step 6: Verify ────────────────────────────────────────
+  console.log(`\n  ${dim('[')}${cyan('6/6')}${dim(']')} Verifying installation...`);
+  const dbExists = fs.existsSync(path.join(DATA_DIR, 'metadata.db'));
+  const vecExists = fs.existsSync(path.join(DATA_DIR, 'vectors.json')) || fs.existsSync(path.join(DATA_DIR, 'vectors.bin'));
+  const keyStored = fs.existsSync(CONFIG_TOML);
+  console.log(`      ${keyStored ? green('✓') : red('✗')} Local API key         ${dim(CONFIG_TOML)}`);
+  console.log(`      ${dbExists ? green('✓') : yellow('⚠')} Dataset index         ${dim(dbExists ? 'ready' : 'will build on first search')}`);
+  console.log(`      ${vecExists ? green('✓') : yellow('⚠')} Vector store          ${dim(vecExists ? 'ready' : 'will build on first search')}`);
+  console.log(`      ${configuredAgents.length > 0 ? green('✓') : yellow('⚠')} MCP agents            ${dim(configuredAgents.length + ' configured')}`);
+  // ─── Final Summary ─────────────────────────────────────────
+  const finalConfig = readToml(CONFIG_TOML);
+  const isCloud = finalConfig.auth_mode === 'cloud';
+  console.log(`
+${dim('═════════════════════════════════════════════════')}
+  ${green(bold('✓ Vesper is ready!'))}
+  ${bold(isCloud ? 'Your cloud API key:' : 'Your local API key:')}
+  ${cyan(finalConfig.api_key || localKey)}
+  ${bold('Auth mode:')}
+  ${dim(isCloud ? '☁  Cloud (linked to Vesper account)' : '🔑 Local-only (key never leaves your machine)')}
+  ${bold('Config file:')}
+  ${dim(CONFIG_TOML)}
+  ${bold('What just happened:')}
+  ${dim('1.')} ${isCloud ? 'Linked to your Vesper cloud account' : 'Generated a local API key (never leaves your machine)'}
+  ${dim('2.')} Initialized local credentials vault
+  ${dim('3.')} Auto-configured MCP for ${configuredAgents.length > 0 ? configuredAgents.join(', ') : 'detected agents'}
+  ${dim('4.')} Vesper server ready on stdio transport
+${dim('─────────────────────────────────────────────────')}
+  ${bold('Quick start — try in your AI assistant:')}
+  ${cyan('Search datasets')}
+  ${dim('>')} vesper_search(query="sentiment analysis")
+  ${cyan('Download & prepare')}
+  ${dim('>')} prepare_dataset(query="image classification cats dogs")
+  ${cyan('Quality analysis')}
+  ${dim('>')} analyze_quality(dataset_id="imdb")
+  ${cyan('Export to your project')}
+  ${dim('>')} export_dataset(dataset_id="imdb", format="parquet")
+${dim('─────────────────────────────────────────────────')}
+  ${bold('Unified API — one interface, every source:')}
+  HuggingFace · Kaggle · OpenML · data.world
+  ${dim('Agents call localhost Vesper APIs with one local key.')}
+  ${dim('Vesper adapters handle provider routing internally.')}
+${dim('─────────────────────────────────────────────────')}
+  ${yellow('→')} Restart your IDE to activate MCP
+  ${dim('Docs:')} https://github.com/vesper/mcp-server
+${dim('═════════════════════════════════════════════════')}
+`);
+}
+main().catch((err) => {
+  console.error(`\n${red('Error:')} ${err.message || err}`);
+  process.exit(1);
+});

package/src/python/asset_downloader_engine.py CHANGED Viewed

@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
 async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
     payload = json.loads(args.payload)
     output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
+    output_dir = payload.get("output_dir")
     workers = int(payload.get("workers") or 8)
     recipes_dir = payload.get("recipes_dir")
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
         kaggle_ref=payload.get("kaggle_ref"),
         urls=payload.get("urls"),
         output_format=payload.get("output_format", "webdataset"),
+        output_dir=str(output_dir) if output_dir else None,
         max_items=payload.get("max_items"),
         image_column=payload.get("image_column"),
     )

package/src/python/vesper/core/asset_downloader.py CHANGED Viewed

@@ -191,6 +191,7 @@ class AssetDownloader:
         kaggle_ref: Optional[str] = None,
         urls: Optional[List[str]] = None,
         output_format: str = "webdataset",
+        output_dir: Optional[str] = None,
         max_items: Optional[int] = None,
         image_column: Optional[str] = None,
     ) -> Dict[str, Any]:
@@ -231,7 +232,10 @@ class AssetDownloader:
                 raise ValueError("urls are required for source=url")
         # --- Now safe to create directories ---
-        dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
+        if output_dir:
+            dataset_dir = Path(output_dir).expanduser().resolve()
+        else:
+            dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
         images_dir = dataset_dir / "images"
         dataset_dir.mkdir(parents=True, exist_ok=True)
         images_dir.mkdir(parents=True, exist_ok=True)