vesper-wizard 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -222,6 +222,28 @@ export_dataset(
222
222
 
223
223
  ---
224
224
 
225
+ #### `vesper_download_assets`
226
+ Download image/media assets to a user-controlled local directory.
227
+
228
+ **Parameters:**
229
+ - `dataset_id` (string): Dataset identifier
230
+ - `source` (string): `huggingface`, `kaggle`, or `url`
231
+ - `target_dir` (string, optional): Exact local directory where assets should be written
232
+ - `output_dir` (string, optional): Alias for `target_dir`
233
+ - `output_format` (string, optional): `webdataset`, `imagefolder`, or `parquet`
234
+
235
+ **Example:**
236
+ ```
237
+ vesper_download_assets(
238
+ dataset_id="cats_vs_dogs",
239
+ source="kaggle",
240
+ target_dir="./datasets/cats_dogs_100",
241
+ output_format="imagefolder"
242
+ )
243
+ ```
244
+
245
+ ---
246
+
225
247
  ### Quality Analysis
226
248
 
227
249
  #### `analyze_image_quality`
package/build/index.js CHANGED
@@ -960,6 +960,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
960
960
  kaggle_ref: { type: "string", description: "Kaggle dataset ref (owner/dataset)." },
961
961
  urls: { type: "array", items: { type: "string" }, description: "Direct asset URLs." },
962
962
  output_format: { type: "string", enum: ["webdataset", "imagefolder", "parquet"], description: "Output asset format." },
963
+ target_dir: { type: "string", description: "Optional local directory where downloaded assets should be written. If provided, Vesper writes directly to this directory instead of managed asset storage." },
964
+ output_dir: { type: "string", description: "Alias for target_dir. When provided, downloaded assets are written directly to this local directory." },
963
965
  max_items: { type: "number", description: "Optional cap on number of assets to fetch." },
964
966
  workers: { type: "number", description: "Parallel worker count (default 8)." },
965
967
  image_column: { type: "string", description: "Explicit image column name. If omitted, auto-detected from HF features, column names, and sample values." },
@@ -1521,6 +1523,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1521
1523
  ? (request.params.arguments?.urls).map(v => String(v))
1522
1524
  : undefined;
1523
1525
  const outputFormat = String(request.params.arguments?.output_format || "webdataset");
1526
+ const requestedOutputDir = request.params.arguments?.target_dir
1527
+ ? String(request.params.arguments.target_dir).trim()
1528
+ : request.params.arguments?.output_dir
1529
+ ? String(request.params.arguments.output_dir).trim()
1530
+ : undefined;
1524
1531
  const maxItems = request.params.arguments?.max_items ? Number(request.params.arguments.max_items) : undefined;
1525
1532
  const workers = request.params.arguments?.workers ? Number(request.params.arguments.workers) : 8;
1526
1533
  const imageColumn = request.params.arguments?.image_column ? String(request.params.arguments.image_column) : undefined;
@@ -1563,6 +1570,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1563
1570
  kaggle_ref: kaggleRef,
1564
1571
  urls,
1565
1572
  output_format: outputFormat,
1573
+ output_dir: requestedOutputDir,
1566
1574
  max_items: maxItems,
1567
1575
  workers,
1568
1576
  image_column: imageColumn,
@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
26
26
  async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
27
27
  payload = json.loads(args.payload)
28
28
  output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
29
+ output_dir = payload.get("output_dir")
29
30
  workers = int(payload.get("workers") or 8)
30
31
  recipes_dir = payload.get("recipes_dir")
31
32
 
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
43
44
  kaggle_ref=payload.get("kaggle_ref"),
44
45
  urls=payload.get("urls"),
45
46
  output_format=payload.get("output_format", "webdataset"),
47
+ output_dir=str(output_dir) if output_dir else None,
46
48
  max_items=payload.get("max_items"),
47
49
  image_column=payload.get("image_column"),
48
50
  )
@@ -191,6 +191,7 @@ class AssetDownloader:
191
191
  kaggle_ref: Optional[str] = None,
192
192
  urls: Optional[List[str]] = None,
193
193
  output_format: str = "webdataset",
194
+ output_dir: Optional[str] = None,
194
195
  max_items: Optional[int] = None,
195
196
  image_column: Optional[str] = None,
196
197
  ) -> Dict[str, Any]:
@@ -231,7 +232,10 @@ class AssetDownloader:
231
232
  raise ValueError("urls are required for source=url")
232
233
 
233
234
  # --- Now safe to create directories ---
234
- dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
+ if output_dir:
236
+ dataset_dir = Path(output_dir).expanduser().resolve()
237
+ else:
238
+ dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
239
  images_dir = dataset_dir / "images"
236
240
  dataset_dir.mkdir(parents=True, exist_ok=True)
237
241
  images_dir.mkdir(parents=True, exist_ok=True)
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "vesper-wizard",
3
- "version": "2.0.6",
3
+ "version": "2.0.8",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
7
7
  "bin": {
8
8
  "mcp-server": "./build/index.js",
9
9
  "vespermcp": "./build/index.js",
10
- "vesper-wizard": "scripts/wizard.js"
10
+ "vesper-wizard": "scripts/wizard.cjs"
11
11
  },
12
12
  "files": [
13
13
  "build/**/*",
@@ -0,0 +1,625 @@
1
+ #!/usr/bin/env node
2
+
3
+ // ─────────────────────────────────────────────────────────────
4
+ // vesper-wizard — Zero-friction local setup for Vesper MCP
5
+ // Run: npx vesper-wizard@latest
6
+ // ─────────────────────────────────────────────────────────────
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const os = require('os');
11
+ const crypto = require('crypto');
12
+ const { execSync, spawnSync } = require('child_process');
13
+ const http = require('http');
14
+ const https = require('https');
15
+ const readline = require('readline');
16
+
17
+ // ── Paths ────────────────────────────────────────────────────
18
+ const HOME = os.homedir();
19
+ const VESPER_DIR = path.join(HOME, '.vesper');
20
+ const CONFIG_TOML = path.join(VESPER_DIR, 'config.toml');
21
+ const DATA_DIR = path.join(VESPER_DIR, 'data');
22
+ const IS_WIN = process.platform === 'win32';
23
+ const APPDATA = process.env.APPDATA || path.join(HOME, 'AppData', 'Roaming');
24
+
25
+ // ── Helpers ──────────────────────────────────────────────────
26
+ function ensureDir(dir) {
27
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
28
+ }
29
+
30
+ function generateLocalKey() {
31
+ const random = crypto.randomBytes(24).toString('hex');
32
+ return `vesper_sk_local_${random}`;
33
+ }
34
+
35
+ function readToml(filePath) {
36
+ if (!fs.existsSync(filePath)) return {};
37
+ const content = fs.readFileSync(filePath, 'utf8');
38
+ const obj = {};
39
+ for (const line of content.split('\n')) {
40
+ const m = line.match(/^\s*(\w+)\s*=\s*"(.*)"\s*$/);
41
+ if (m) obj[m[1]] = m[2];
42
+ }
43
+ return obj;
44
+ }
45
+
46
+ function writeToml(filePath, data) {
47
+ ensureDir(path.dirname(filePath));
48
+ const lines = Object.entries(data).map(([k, v]) => `${k} = "${v}"`);
49
+ fs.writeFileSync(filePath, lines.join('\n') + '\n', 'utf8');
50
+ }
51
+
52
+ function dim(text) { return `\x1b[2m${text}\x1b[0m`; }
53
+ function bold(text) { return `\x1b[1m${text}\x1b[0m`; }
54
+ function green(text) { return `\x1b[32m${text}\x1b[0m`; }
55
+ function cyan(text) { return `\x1b[36m${text}\x1b[0m`; }
56
+ function yellow(text) { return `\x1b[33m${text}\x1b[0m`; }
57
+ function red(text) { return `\x1b[31m${text}\x1b[0m`; }
58
+ function magenta(text) { return `\x1b[35m${text}\x1b[0m`; }
59
+
60
+ // ── Vesper API URL resolution ────────────────────────────────
61
+ const VESPER_API_URL = process.env.VESPER_API_URL || '';
62
+ const DEFAULT_VESPER_API_CANDIDATES = [
63
+ 'http://localhost:3000',
64
+ 'http://127.0.0.1:3000',
65
+ 'https://vesper.dev',
66
+ ];
67
+
68
+ // ── Device Auth Helpers ──────────────────────────────────────
69
+ function httpJson(method, url, body) {
70
+ return new Promise((resolve, reject) => {
71
+ const parsed = new URL(url);
72
+ const lib = parsed.protocol === 'https:' ? https : http;
73
+ const opts = {
74
+ method,
75
+ hostname: parsed.hostname,
76
+ port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
77
+ path: parsed.pathname + parsed.search,
78
+ headers: { 'Content-Type': 'application/json' },
79
+ };
80
+ const req = lib.request(opts, (res) => {
81
+ let data = '';
82
+ res.on('data', (chunk) => (data += chunk));
83
+ res.on('end', () => {
84
+ try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
85
+ catch { resolve({ status: res.statusCode, body: data }); }
86
+ });
87
+ });
88
+ req.on('error', reject);
89
+ if (body) req.write(JSON.stringify(body));
90
+ req.end();
91
+ });
92
+ }
93
+
94
+ async function probeDeviceAuth(baseUrl) {
95
+ try {
96
+ const res = await httpJson('POST', `${baseUrl}/api/auth/device/start`);
97
+ if (res.status === 201 && !!res.body && !!res.body.code) {
98
+ return { baseUrl, status: 'ready', response: res.body };
99
+ }
100
+
101
+ if (res.status === 503 && res.body && res.body.requiresSetup) {
102
+ return {
103
+ baseUrl,
104
+ status: 'setup-required',
105
+ response: res.body,
106
+ message: res.body.error || 'Auth storage is not initialized.',
107
+ };
108
+ }
109
+
110
+ return {
111
+ baseUrl,
112
+ status: 'unreachable',
113
+ response: res.body,
114
+ message: typeof res.body === 'string' ? res.body : JSON.stringify(res.body),
115
+ };
116
+ } catch (error) {
117
+ return {
118
+ baseUrl,
119
+ status: 'unreachable',
120
+ message: error && error.message ? error.message : 'Request failed',
121
+ };
122
+ }
123
+ }
124
+
125
+ async function resolveVesperApiBaseUrl() {
126
+ const candidates = VESPER_API_URL
127
+ ? [VESPER_API_URL]
128
+ : DEFAULT_VESPER_API_CANDIDATES;
129
+
130
+ let setupRequiredProbe = null;
131
+
132
+ for (const candidate of candidates) {
133
+ const probe = await probeDeviceAuth(candidate);
134
+ if (probe.status === 'ready') {
135
+ return probe;
136
+ }
137
+
138
+ if (!setupRequiredProbe && probe.status === 'setup-required') {
139
+ setupRequiredProbe = probe;
140
+ }
141
+ }
142
+
143
+ return setupRequiredProbe;
144
+ }
145
+
146
+ function openBrowser(url) {
147
+ try {
148
+ if (process.platform === 'win32') {
149
+ spawnSync('cmd', ['/c', 'start', '', url], { stdio: 'ignore' });
150
+ } else if (process.platform === 'darwin') {
151
+ spawnSync('open', [url], { stdio: 'ignore' });
152
+ } else {
153
+ spawnSync('xdg-open', [url], { stdio: 'ignore' });
154
+ }
155
+ } catch { /* browser open is best-effort */ }
156
+ }
157
+
158
+ function askYesNo(question) {
159
+ return new Promise((resolve) => {
160
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
161
+ rl.question(` ${question} ${dim('[Y/n]')} `, (answer) => {
162
+ rl.close();
163
+ resolve(!answer || answer.toLowerCase().startsWith('y'));
164
+ });
165
+ });
166
+ }
167
+
168
+ function askInput(question) {
169
+ return new Promise((resolve) => {
170
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
171
+ rl.question(` ${question} `, (answer) => {
172
+ rl.close();
173
+ resolve(String(answer || '').trim());
174
+ });
175
+ });
176
+ }
177
+
178
+ async function askChoice(question, choices, defaultValue) {
179
+ console.log(` ${question}`);
180
+ choices.forEach((choice, index) => {
181
+ console.log(` ${dim(String(index + 1) + ')')} ${choice.label}`);
182
+ });
183
+
184
+ const prompt = defaultValue ? `${dim('[default: ' + defaultValue + ']')}` : '';
185
+ const answer = await askInput(`${prompt} ${cyan('→')} Choose an option:`);
186
+ if (!answer && defaultValue) {
187
+ return defaultValue;
188
+ }
189
+
190
+ const numeric = Number(answer);
191
+ if (Number.isFinite(numeric) && numeric >= 1 && numeric <= choices.length) {
192
+ return choices[numeric - 1].value;
193
+ }
194
+
195
+ const matched = choices.find((choice) => choice.value === answer);
196
+ return matched ? matched.value : defaultValue;
197
+ }
198
+
199
+ function isCloudApiKey(value) {
200
+ return !!value && value.startsWith('vesper_sk_') && !value.startsWith('vesper_sk_local_');
201
+ }
202
+
203
+ async function promptForManualApiKey() {
204
+ console.log(`\n ${cyan('■')} ${bold('Manual API Key')}`);
205
+ console.log(` ${dim('Paste a Vesper cloud API key. It will be stored locally in config.toml.\n')}`);
206
+
207
+ while (true) {
208
+ const value = await askInput(`${cyan('→')} Vesper API key:`);
209
+ if (isCloudApiKey(value)) {
210
+ return value;
211
+ }
212
+ console.log(` ${yellow('!')} ${yellow('Expected a Vesper key starting with vesper_sk_')}`);
213
+ }
214
+ }
215
+
216
+ async function chooseAuthMode(existingKey, existingAuthMode) {
217
+ const hasExistingKey = !!existingKey;
218
+ if (hasExistingKey) {
219
+ console.log(` ${dim('Current key:')} ${dim(existingKey.slice(0, 24) + '...')}`);
220
+ console.log(` ${dim('Current mode:')} ${dim(existingAuthMode || (isCloudApiKey(existingKey) ? 'cloud' : 'local_unified'))}`);
221
+ }
222
+
223
+ const choices = [];
224
+ if (hasExistingKey) {
225
+ choices.push({ value: 'keep', label: 'Keep current key as-is' });
226
+ }
227
+ choices.push({ value: 'manual', label: 'Provide Vesper API key manually' });
228
+ choices.push({ value: 'browser', label: 'Sign in through the browser' });
229
+ choices.push({ value: 'local', label: 'Use local-only key' });
230
+
231
+ return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, hasExistingKey ? 'keep' : 'browser');
232
+ }
233
+
234
+ async function deviceAuthFlow() {
235
+ console.log(`\n ${cyan('■')} ${bold('Device Authentication')}`);
236
+ console.log(` ${dim('Link your CLI to a Vesper account for cloud features\n')}`);
237
+
238
+ const resolvedApiBaseUrl = await resolveVesperApiBaseUrl();
239
+ if (!resolvedApiBaseUrl) {
240
+ console.log(` ${red('✗')} ${red('Could not reach any Vesper auth endpoint.')}`);
241
+ console.log(` ${dim('Tried:')} ${dim((VESPER_API_URL ? [VESPER_API_URL] : DEFAULT_VESPER_API_CANDIDATES).join(', '))}`);
242
+ console.log(` ${dim('If your landing app is running locally, start it on http://localhost:3000 or set VESPER_API_URL.')}`);
243
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
244
+ return null;
245
+ }
246
+
247
+ if (resolvedApiBaseUrl.status === 'setup-required') {
248
+ console.log(` ${yellow('!')} ${yellow('Reached Vesper auth endpoint, but local auth storage is not initialized.')}`);
249
+ console.log(` ${dim('Endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
250
+ console.log(` ${dim('Reason:')} ${dim(resolvedApiBaseUrl.message || 'Apply Supabase migrations first.')}`);
251
+ console.log(` ${dim('Run the SQL in supabase/migrations/001_device_auth.sql and 002_rate_limits.sql, then retry.')}`);
252
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
253
+ return null;
254
+ }
255
+
256
+ console.log(` ${dim('Auth endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}\n`);
257
+
258
+ // Step 1: Call /api/auth/device/start
259
+ process.stdout.write(` ${dim('Requesting device code...')}`);
260
+ let startRes;
261
+ try {
262
+ startRes = await httpJson('POST', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/start`);
263
+ } catch (err) {
264
+ console.log(` ${red('✗')}`);
265
+ console.log(` ${red('Could not reach Vesper API at')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
266
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
267
+ return null;
268
+ }
269
+
270
+ if (startRes.status !== 201 || !startRes.body.code) {
271
+ console.log(` ${red('✗')}`);
272
+ console.log(` ${red('Unexpected response:')} ${dim(JSON.stringify(startRes.body))}`);
273
+ return null;
274
+ }
275
+
276
+ const { code, loginUrl } = startRes.body;
277
+ console.log(` ${green('✓')}\n`);
278
+
279
+ // Step 2: Display code and open browser
280
+ console.log(` ┌───────────────────────────────────────────────┐`);
281
+ console.log(` │ │`);
282
+ console.log(` │ ${bold('Your device code:')} ${cyan(bold(code))} │`);
283
+ console.log(` │ │`);
284
+ console.log(` │ ${dim('Open this URL to sign in:')} │`);
285
+ console.log(` │ ${cyan(loginUrl.padEnd(41))}│`);
286
+ console.log(` │ │`);
287
+ console.log(` └───────────────────────────────────────────────┘\n`);
288
+
289
+ openBrowser(loginUrl);
290
+ console.log(` ${dim('Browser opened automatically.')}`);
291
+ console.log(` ${dim('Waiting for you to sign in...')}\n`);
292
+
293
+ // Step 3: Poll until confirmed or expired
294
+ const POLL_INTERVAL = 3000; // 3 seconds
295
+ const MAX_POLLS = 200; // 10 min max (200 × 3s)
296
+ let polls = 0;
297
+ const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
298
+
299
+ while (polls < MAX_POLLS) {
300
+ polls++;
301
+ const frame = spinner[polls % spinner.length];
302
+ process.stdout.write(`\r ${cyan(frame)} Polling... (${polls})`);
303
+
304
+ try {
305
+ const pollRes = await httpJson('GET', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/poll?code=${code}`);
306
+
307
+ if (pollRes.body.status === 'confirmed' && pollRes.body.apiKey) {
308
+ process.stdout.write(`\r ${green('✓')} Device authenticated! \n`);
309
+ console.log(` ${dim('Email:')} ${pollRes.body.email || 'linked'}`);
310
+ return pollRes.body.apiKey;
311
+ }
312
+
313
+ if (pollRes.body.status === 'expired') {
314
+ process.stdout.write(`\r ${red('✗')} Device code expired. \n`);
315
+ console.log(` ${dim('Run the wizard again to get a new code.')}`);
316
+ return null;
317
+ }
318
+ } catch {
319
+ // Network hiccup — keep polling
320
+ }
321
+
322
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL));
323
+ }
324
+
325
+ process.stdout.write(`\r ${red('✗')} Timed out waiting for authentication.\n`);
326
+ return null;
327
+ }
328
+
329
+ function printBanner() {
330
+ console.log(`
331
+ ${dim('─────────────────────────────────────────────────')}
332
+
333
+ ${bold('██ ██ ███████ ███████ ██████ ███████ ██████')}
334
+ ${bold('██ ██ ██ ██ ██ ██ ██ ██ ██')}
335
+ ${bold('██ ██ █████ ███████ ██████ █████ ██████')}
336
+ ${bold(' ██ ██ ██ ██ ██ ██ ██ ██')}
337
+ ${bold(' ████ ███████ ███████ ██ ███████ ██ ██')}
338
+
339
+ ${cyan('dataset intelligence layer')}
340
+ ${dim('local-first • zero-config • agent-native')}
341
+
342
+ ${dim('─────────────────────────────────────────────────')}
343
+ `);
344
+ }
345
+
346
+ // ── MCP Auto-Config ──────────────────────────────────────────
347
+ function getAllAgentConfigs() {
348
+ const isMac = process.platform === 'darwin';
349
+ return [
350
+ {
351
+ name: 'Claude Code',
352
+ path: path.join(HOME, '.claude.json'),
353
+ format: 'mcpServers',
354
+ },
355
+ {
356
+ name: 'Claude Desktop',
357
+ path: IS_WIN
358
+ ? path.join(APPDATA, 'Claude', 'claude_desktop_config.json')
359
+ : isMac
360
+ ? path.join(HOME, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json')
361
+ : path.join(HOME, '.config', 'claude', 'claude_desktop_config.json'),
362
+ format: 'mcpServers',
363
+ },
364
+ {
365
+ name: 'Cursor',
366
+ path: path.join(HOME, '.cursor', 'mcp.json'),
367
+ format: 'mcpServers',
368
+ },
369
+ {
370
+ name: 'VS Code',
371
+ path: IS_WIN
372
+ ? path.join(APPDATA, 'Code', 'User', 'mcp.json')
373
+ : isMac
374
+ ? path.join(HOME, 'Library', 'Application Support', 'Code', 'User', 'mcp.json')
375
+ : path.join(HOME, '.config', 'Code', 'User', 'mcp.json'),
376
+ format: 'servers',
377
+ },
378
+ {
379
+ name: 'Codex',
380
+ path: path.join(HOME, '.codex', 'config.toml'),
381
+ format: 'toml',
382
+ },
383
+ {
384
+ name: 'Gemini CLI',
385
+ path: path.join(HOME, '.gemini', 'settings.json'),
386
+ format: 'mcpServers',
387
+ },
388
+ ];
389
+ }
390
+
391
+ function installMcpToAgent(agent) {
392
+ const npxCmd = IS_WIN ? 'npx.cmd' : 'npx';
393
+ const serverEntry = { command: npxCmd, args: ['-y', '@vespermcp/mcp-server@latest'] };
394
+
395
+ try {
396
+ if (agent.format === 'toml') {
397
+ let content = fs.existsSync(agent.path) ? fs.readFileSync(agent.path, 'utf8') : '';
398
+ if (content.includes('[mcp_servers.vesper]')) return true;
399
+ ensureDir(path.dirname(agent.path));
400
+ content += `\n[mcp_servers.vesper]\ncommand = "${serverEntry.command}"\nargs = [${serverEntry.args.map(a => `"${a}"`).join(', ')}]\n`;
401
+ fs.writeFileSync(agent.path, content, 'utf8');
402
+ return true;
403
+ }
404
+
405
+ let config = {};
406
+ if (fs.existsSync(agent.path)) {
407
+ try { config = JSON.parse(fs.readFileSync(agent.path, 'utf8').trim() || '{}'); } catch { config = {}; }
408
+ } else {
409
+ ensureDir(path.dirname(agent.path));
410
+ }
411
+
412
+ const key = agent.format === 'servers' ? 'servers' : 'mcpServers';
413
+ if (!config[key]) config[key] = {};
414
+
415
+ const entry = agent.format === 'servers'
416
+ ? { type: 'stdio', ...serverEntry }
417
+ : serverEntry;
418
+
419
+ config[key].vesper = entry;
420
+ fs.writeFileSync(agent.path, JSON.stringify(config, null, 2), 'utf8');
421
+ return true;
422
+ } catch {
423
+ return false;
424
+ }
425
+ }
426
+
427
+ // ── Server Health Check ──────────────────────────────────────
428
+ async function checkServerHealth() {
429
+ try {
430
+ // Quick stdio check — spawn server and see if it responds
431
+ const result = spawnSync(IS_WIN ? 'npx.cmd' : 'npx', ['-y', '@vespermcp/mcp-server@latest', '--version'], {
432
+ timeout: 10000,
433
+ encoding: 'utf8',
434
+ stdio: ['pipe', 'pipe', 'pipe'],
435
+ });
436
+ return result.status === 0 || (result.stderr && result.stderr.includes('Vesper'));
437
+ } catch {
438
+ return false;
439
+ }
440
+ }
441
+
442
+ // ── Main Wizard ──────────────────────────────────────────────
443
+ async function main() {
444
+ printBanner();
445
+
446
+ console.log(` ${green('→')} Setting up Vesper on ${bold(os.hostname())}\n`);
447
+
448
+ // ─── Step 1: Create directories ────────────────────────────
449
+ process.stdout.write(` ${dim('[')}${cyan('1/6')}${dim(']')} Creating local directories...`);
450
+ ensureDir(VESPER_DIR);
451
+ ensureDir(DATA_DIR);
452
+ ensureDir(path.join(DATA_DIR, 'raw'));
453
+ ensureDir(path.join(DATA_DIR, 'processed'));
454
+ ensureDir(path.join(VESPER_DIR, 'datasets'));
455
+ console.log(` ${green('✓')}`);
456
+
457
+ // ─── Step 2: Authenticate (device flow or local key) ──────
458
+ console.log(`\n ${dim('[')}${cyan('2/6')}${dim(']')} Authentication`);
459
+
460
+ const existing = readToml(CONFIG_TOML);
461
+ let localKey = existing.api_key || '';
462
+ let authMode = existing.auth_mode || '';
463
+
464
+ const authChoice = await chooseAuthMode(localKey, authMode);
465
+
466
+ if (authChoice === 'keep' && localKey) {
467
+ console.log(` ${green('✓')} Keeping current key`);
468
+ } else if (authChoice === 'manual') {
469
+ localKey = await promptForManualApiKey();
470
+ authMode = 'cloud';
471
+ console.log(` ${green('✓')} Cloud API key saved from manual input`);
472
+ } else if (authChoice === 'browser') {
473
+ const cloudKey = await deviceAuthFlow();
474
+ if (cloudKey) {
475
+ localKey = cloudKey;
476
+ authMode = 'cloud';
477
+ } else {
478
+ const fallbackChoice = await askChoice(`${yellow('!')} Browser sign-in did not complete. Choose a fallback:`, [
479
+ { value: 'manual', label: 'Provide Vesper API key manually' },
480
+ { value: 'local', label: 'Use local-only key' },
481
+ ], 'manual');
482
+
483
+ if (fallbackChoice === 'manual') {
484
+ localKey = await promptForManualApiKey();
485
+ authMode = 'cloud';
486
+ } else {
487
+ if (!localKey || isCloudApiKey(localKey)) {
488
+ localKey = generateLocalKey();
489
+ }
490
+ authMode = 'local_unified';
491
+ console.log(`\n ${yellow('⚠')} Using local-only key. Run the wizard again anytime to link an account.`);
492
+ }
493
+ }
494
+ } else {
495
+ if (!localKey || isCloudApiKey(localKey)) {
496
+ localKey = generateLocalKey();
497
+ }
498
+ authMode = 'local_unified';
499
+ console.log(` ${green('✓')} Local-only key ready`);
500
+ }
501
+
502
+ const configData = { ...existing, api_key: localKey, auth_mode: authMode };
503
+ writeToml(CONFIG_TOML, configData);
504
+ console.log(` ${dim('Key:')} ${dim(localKey.slice(0, 24) + '...')} ${dim('→')} ${dim(CONFIG_TOML)}`);
505
+
506
+ // ─── Step 3: Local vault initialization ────────────────────
507
+ process.stdout.write(`\n ${dim('[')}${cyan('3/6')}${dim(']')} Initializing local credentials vault...`);
508
+ const vaultData = readToml(CONFIG_TOML);
509
+ if (!vaultData.auth_mode) vaultData.auth_mode = 'local_unified';
510
+ writeToml(CONFIG_TOML, vaultData);
511
+ console.log(` ${green('✓')}`);
512
+ console.log(` ${dim('Mode:')} ${dim(vaultData.auth_mode === 'cloud' ? 'cloud (linked to Vesper account)' : 'single local Vesper key (no external keys required)')}`);
513
+
514
+ // ─── Step 4: Install @vespermcp/mcp-server ─────────────────
515
+ console.log(`\n ${dim('[')}${cyan('4/6')}${dim(']')} Installing Vesper MCP server...`);
516
+ try {
517
+ const npmCmd = IS_WIN ? 'npx.cmd' : 'npx';
518
+ spawnSync(npmCmd, ['-y', '@vespermcp/mcp-server@latest', '--setup', '--silent'], {
519
+ stdio: 'inherit',
520
+ timeout: 120000,
521
+ });
522
+ console.log(` ${green('✓')} @vespermcp/mcp-server installed`);
523
+ } catch {
524
+ console.log(` ${yellow('⚠')} Could not auto-install — run manually: npx -y @vespermcp/mcp-server@latest --setup`);
525
+ }
526
+
527
+ // ─── Step 5: Auto-configure all detected IDEs ──────────────
528
+ process.stdout.write(`\n ${dim('[')}${cyan('5/6')}${dim(']')} Configuring coding agents...`);
529
+ const agents = getAllAgentConfigs();
530
+ const configuredAgents = [];
531
+ const skippedAgents = [];
532
+
533
+ for (const agent of agents) {
534
+ const dirExists = fs.existsSync(path.dirname(agent.path));
535
+ const fileExists = fs.existsSync(agent.path);
536
+ if (fileExists || dirExists) {
537
+ const ok = installMcpToAgent(agent);
538
+ if (ok) configuredAgents.push(agent.name);
539
+ else skippedAgents.push(agent.name);
540
+ }
541
+ }
542
+ console.log(` ${green('✓')}`);
543
+
544
+ if (configuredAgents.length > 0) {
545
+ console.log(`\n ┌───────────────────────────────────────────────┐`);
546
+ console.log(` │ ${bold('MCP Auto-Configured')} │`);
547
+ console.log(` ├───────────────────────────────────────────────┤`);
548
+ for (const name of configuredAgents) {
549
+ console.log(` │ ${green('✓')} ${name.padEnd(42)}│`);
550
+ }
551
+ console.log(` └───────────────────────────────────────────────┘`);
552
+ }
553
+
554
+ // ─── Step 6: Verify ────────────────────────────────────────
555
+ console.log(`\n ${dim('[')}${cyan('6/6')}${dim(']')} Verifying installation...`);
556
+
557
+ const dbExists = fs.existsSync(path.join(DATA_DIR, 'metadata.db'));
558
+ const vecExists = fs.existsSync(path.join(DATA_DIR, 'vectors.json')) || fs.existsSync(path.join(DATA_DIR, 'vectors.bin'));
559
+ const keyStored = fs.existsSync(CONFIG_TOML);
560
+
561
+ console.log(` ${keyStored ? green('✓') : red('✗')} Local API key ${dim(CONFIG_TOML)}`);
562
+ console.log(` ${dbExists ? green('✓') : yellow('⚠')} Dataset index ${dim(dbExists ? 'ready' : 'will build on first search')}`);
563
+ console.log(` ${vecExists ? green('✓') : yellow('⚠')} Vector store ${dim(vecExists ? 'ready' : 'will build on first search')}`);
564
+ console.log(` ${configuredAgents.length > 0 ? green('✓') : yellow('⚠')} MCP agents ${dim(configuredAgents.length + ' configured')}`);
565
+
566
+ // ─── Final Summary ─────────────────────────────────────────
567
+ const finalConfig = readToml(CONFIG_TOML);
568
+ const isCloud = finalConfig.auth_mode === 'cloud';
569
+ console.log(`
570
+ ${dim('═════════════════════════════════════════════════')}
571
+
572
+ ${green(bold('✓ Vesper is ready!'))}
573
+
574
+ ${bold(isCloud ? 'Your cloud API key:' : 'Your local API key:')}
575
+ ${cyan(finalConfig.api_key || localKey)}
576
+
577
+ ${bold('Auth mode:')}
578
+ ${dim(isCloud ? '☁ Cloud (linked to Vesper account)' : '🔑 Local-only (key never leaves your machine)')}
579
+
580
+ ${bold('Config file:')}
581
+ ${dim(CONFIG_TOML)}
582
+
583
+ ${bold('What just happened:')}
584
+ ${dim('1.')} ${isCloud ? 'Linked to your Vesper cloud account' : 'Generated a local API key (never leaves your machine)'}
585
+ ${dim('2.')} Initialized local credentials vault
586
+ ${dim('3.')} Auto-configured MCP for ${configuredAgents.length > 0 ? configuredAgents.join(', ') : 'detected agents'}
587
+ ${dim('4.')} Vesper server ready on stdio transport
588
+
589
+ ${dim('─────────────────────────────────────────────────')}
590
+
591
+ ${bold('Quick start — try in your AI assistant:')}
592
+
593
+ ${cyan('Search datasets')}
594
+ ${dim('>')} vesper_search(query="sentiment analysis")
595
+
596
+ ${cyan('Download & prepare')}
597
+ ${dim('>')} prepare_dataset(query="image classification cats dogs")
598
+
599
+ ${cyan('Quality analysis')}
600
+ ${dim('>')} analyze_quality(dataset_id="imdb")
601
+
602
+ ${cyan('Export to your project')}
603
+ ${dim('>')} export_dataset(dataset_id="imdb", format="parquet")
604
+
605
+ ${dim('─────────────────────────────────────────────────')}
606
+
607
+ ${bold('Unified API — one interface, every source:')}
608
+ HuggingFace · Kaggle · OpenML · data.world
609
+
610
+ ${dim('Agents call localhost Vesper APIs with one local key.')}
611
+ ${dim('Vesper adapters handle provider routing internally.')}
612
+
613
+ ${dim('─────────────────────────────────────────────────')}
614
+
615
+ ${yellow('→')} Restart your IDE to activate MCP
616
+ ${dim('Docs:')} https://github.com/vesper/mcp-server
617
+
618
+ ${dim('═════════════════════════════════════════════════')}
619
+ `);
620
+ }
621
+
622
+ main().catch((err) => {
623
+ console.error(`\n${red('Error:')} ${err.message || err}`);
624
+ process.exit(1);
625
+ });
@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
26
26
  async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
27
27
  payload = json.loads(args.payload)
28
28
  output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
29
+ output_dir = payload.get("output_dir")
29
30
  workers = int(payload.get("workers") or 8)
30
31
  recipes_dir = payload.get("recipes_dir")
31
32
 
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
43
44
  kaggle_ref=payload.get("kaggle_ref"),
44
45
  urls=payload.get("urls"),
45
46
  output_format=payload.get("output_format", "webdataset"),
47
+ output_dir=str(output_dir) if output_dir else None,
46
48
  max_items=payload.get("max_items"),
47
49
  image_column=payload.get("image_column"),
48
50
  )
@@ -191,6 +191,7 @@ class AssetDownloader:
191
191
  kaggle_ref: Optional[str] = None,
192
192
  urls: Optional[List[str]] = None,
193
193
  output_format: str = "webdataset",
194
+ output_dir: Optional[str] = None,
194
195
  max_items: Optional[int] = None,
195
196
  image_column: Optional[str] = None,
196
197
  ) -> Dict[str, Any]:
@@ -231,7 +232,10 @@ class AssetDownloader:
231
232
  raise ValueError("urls are required for source=url")
232
233
 
233
234
  # --- Now safe to create directories ---
234
- dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
+ if output_dir:
236
+ dataset_dir = Path(output_dir).expanduser().resolve()
237
+ else:
238
+ dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
239
  images_dir = dataset_dir / "images"
236
240
  dataset_dir.mkdir(parents=True, exist_ok=True)
237
241
  images_dir.mkdir(parents=True, exist_ok=True)