vesper-wizard 2.0.6 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/build/index.js +8 -0
- package/build/python/asset_downloader_engine.py +2 -0
- package/build/python/vesper/core/asset_downloader.py +5 -1
- package/package.json +2 -2
- package/scripts/wizard.cjs +625 -0
- package/src/python/asset_downloader_engine.py +2 -0
- package/src/python/vesper/core/asset_downloader.py +5 -1
package/README.md
CHANGED
|
@@ -222,6 +222,28 @@ export_dataset(
|
|
|
222
222
|
|
|
223
223
|
---
|
|
224
224
|
|
|
225
|
+
#### `vesper_download_assets`
|
|
226
|
+
Download image/media assets to a user-controlled local directory.
|
|
227
|
+
|
|
228
|
+
**Parameters:**
|
|
229
|
+
- `dataset_id` (string): Dataset identifier
|
|
230
|
+
- `source` (string): `huggingface`, `kaggle`, or `url`
|
|
231
|
+
- `target_dir` (string, optional): Exact local directory where assets should be written
|
|
232
|
+
- `output_dir` (string, optional): Alias for `target_dir`
|
|
233
|
+
- `output_format` (string, optional): `webdataset`, `imagefolder`, or `parquet`
|
|
234
|
+
|
|
235
|
+
**Example:**
|
|
236
|
+
```
|
|
237
|
+
vesper_download_assets(
|
|
238
|
+
dataset_id="cats_vs_dogs",
|
|
239
|
+
source="kaggle",
|
|
240
|
+
target_dir="./datasets/cats_dogs_100",
|
|
241
|
+
output_format="imagefolder"
|
|
242
|
+
)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
225
247
|
### Quality Analysis
|
|
226
248
|
|
|
227
249
|
#### `analyze_image_quality`
|
package/build/index.js
CHANGED
|
@@ -960,6 +960,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
960
960
|
kaggle_ref: { type: "string", description: "Kaggle dataset ref (owner/dataset)." },
|
|
961
961
|
urls: { type: "array", items: { type: "string" }, description: "Direct asset URLs." },
|
|
962
962
|
output_format: { type: "string", enum: ["webdataset", "imagefolder", "parquet"], description: "Output asset format." },
|
|
963
|
+
target_dir: { type: "string", description: "Optional local directory where downloaded assets should be written. If provided, Vesper writes directly to this directory instead of managed asset storage." },
|
|
964
|
+
output_dir: { type: "string", description: "Alias for target_dir. When provided, downloaded assets are written directly to this local directory." },
|
|
963
965
|
max_items: { type: "number", description: "Optional cap on number of assets to fetch." },
|
|
964
966
|
workers: { type: "number", description: "Parallel worker count (default 8)." },
|
|
965
967
|
image_column: { type: "string", description: "Explicit image column name. If omitted, auto-detected from HF features, column names, and sample values." },
|
|
@@ -1521,6 +1523,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1521
1523
|
? (request.params.arguments?.urls).map(v => String(v))
|
|
1522
1524
|
: undefined;
|
|
1523
1525
|
const outputFormat = String(request.params.arguments?.output_format || "webdataset");
|
|
1526
|
+
const requestedOutputDir = request.params.arguments?.target_dir
|
|
1527
|
+
? String(request.params.arguments.target_dir).trim()
|
|
1528
|
+
: request.params.arguments?.output_dir
|
|
1529
|
+
? String(request.params.arguments.output_dir).trim()
|
|
1530
|
+
: undefined;
|
|
1524
1531
|
const maxItems = request.params.arguments?.max_items ? Number(request.params.arguments.max_items) : undefined;
|
|
1525
1532
|
const workers = request.params.arguments?.workers ? Number(request.params.arguments.workers) : 8;
|
|
1526
1533
|
const imageColumn = request.params.arguments?.image_column ? String(request.params.arguments.image_column) : undefined;
|
|
@@ -1563,6 +1570,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1563
1570
|
kaggle_ref: kaggleRef,
|
|
1564
1571
|
urls,
|
|
1565
1572
|
output_format: outputFormat,
|
|
1573
|
+
output_dir: requestedOutputDir,
|
|
1566
1574
|
max_items: maxItems,
|
|
1567
1575
|
workers,
|
|
1568
1576
|
image_column: imageColumn,
|
|
@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
|
|
|
26
26
|
async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
|
|
27
27
|
payload = json.loads(args.payload)
|
|
28
28
|
output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
|
|
29
|
+
output_dir = payload.get("output_dir")
|
|
29
30
|
workers = int(payload.get("workers") or 8)
|
|
30
31
|
recipes_dir = payload.get("recipes_dir")
|
|
31
32
|
|
|
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
|
|
|
43
44
|
kaggle_ref=payload.get("kaggle_ref"),
|
|
44
45
|
urls=payload.get("urls"),
|
|
45
46
|
output_format=payload.get("output_format", "webdataset"),
|
|
47
|
+
output_dir=str(output_dir) if output_dir else None,
|
|
46
48
|
max_items=payload.get("max_items"),
|
|
47
49
|
image_column=payload.get("image_column"),
|
|
48
50
|
)
|
|
@@ -191,6 +191,7 @@ class AssetDownloader:
|
|
|
191
191
|
kaggle_ref: Optional[str] = None,
|
|
192
192
|
urls: Optional[List[str]] = None,
|
|
193
193
|
output_format: str = "webdataset",
|
|
194
|
+
output_dir: Optional[str] = None,
|
|
194
195
|
max_items: Optional[int] = None,
|
|
195
196
|
image_column: Optional[str] = None,
|
|
196
197
|
) -> Dict[str, Any]:
|
|
@@ -231,7 +232,10 @@ class AssetDownloader:
|
|
|
231
232
|
raise ValueError("urls are required for source=url")
|
|
232
233
|
|
|
233
234
|
# --- Now safe to create directories ---
|
|
234
|
-
|
|
235
|
+
if output_dir:
|
|
236
|
+
dataset_dir = Path(output_dir).expanduser().resolve()
|
|
237
|
+
else:
|
|
238
|
+
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
235
239
|
images_dir = dataset_dir / "images"
|
|
236
240
|
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
237
241
|
images_dir.mkdir(parents=True, exist_ok=True)
|
package/package.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vesper-wizard",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.8",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"mcp-server": "./build/index.js",
|
|
9
9
|
"vespermcp": "./build/index.js",
|
|
10
|
-
"vesper-wizard": "scripts/wizard.
|
|
10
|
+
"vesper-wizard": "scripts/wizard.cjs"
|
|
11
11
|
},
|
|
12
12
|
"files": [
|
|
13
13
|
"build/**/*",
|
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// ─────────────────────────────────────────────────────────────
|
|
4
|
+
// vesper-wizard — Zero-friction local setup for Vesper MCP
|
|
5
|
+
// Run: npx vesper-wizard@latest
|
|
6
|
+
// ─────────────────────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const os = require('os');
|
|
11
|
+
const crypto = require('crypto');
|
|
12
|
+
const { execSync, spawnSync } = require('child_process');
|
|
13
|
+
const http = require('http');
|
|
14
|
+
const https = require('https');
|
|
15
|
+
const readline = require('readline');
|
|
16
|
+
|
|
17
|
+
// ── Paths ────────────────────────────────────────────────────
|
|
18
|
+
const HOME = os.homedir();
|
|
19
|
+
const VESPER_DIR = path.join(HOME, '.vesper');
|
|
20
|
+
const CONFIG_TOML = path.join(VESPER_DIR, 'config.toml');
|
|
21
|
+
const DATA_DIR = path.join(VESPER_DIR, 'data');
|
|
22
|
+
const IS_WIN = process.platform === 'win32';
|
|
23
|
+
const APPDATA = process.env.APPDATA || path.join(HOME, 'AppData', 'Roaming');
|
|
24
|
+
|
|
25
|
+
// ── Helpers ──────────────────────────────────────────────────
|
|
26
|
+
function ensureDir(dir) {
|
|
27
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function generateLocalKey() {
|
|
31
|
+
const random = crypto.randomBytes(24).toString('hex');
|
|
32
|
+
return `vesper_sk_local_${random}`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function readToml(filePath) {
|
|
36
|
+
if (!fs.existsSync(filePath)) return {};
|
|
37
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
38
|
+
const obj = {};
|
|
39
|
+
for (const line of content.split('\n')) {
|
|
40
|
+
const m = line.match(/^\s*(\w+)\s*=\s*"(.*)"\s*$/);
|
|
41
|
+
if (m) obj[m[1]] = m[2];
|
|
42
|
+
}
|
|
43
|
+
return obj;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function writeToml(filePath, data) {
|
|
47
|
+
ensureDir(path.dirname(filePath));
|
|
48
|
+
const lines = Object.entries(data).map(([k, v]) => `${k} = "${v}"`);
|
|
49
|
+
fs.writeFileSync(filePath, lines.join('\n') + '\n', 'utf8');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function dim(text) { return `\x1b[2m${text}\x1b[0m`; }
|
|
53
|
+
function bold(text) { return `\x1b[1m${text}\x1b[0m`; }
|
|
54
|
+
function green(text) { return `\x1b[32m${text}\x1b[0m`; }
|
|
55
|
+
function cyan(text) { return `\x1b[36m${text}\x1b[0m`; }
|
|
56
|
+
function yellow(text) { return `\x1b[33m${text}\x1b[0m`; }
|
|
57
|
+
function red(text) { return `\x1b[31m${text}\x1b[0m`; }
|
|
58
|
+
function magenta(text) { return `\x1b[35m${text}\x1b[0m`; }
|
|
59
|
+
|
|
60
|
+
// ── Vesper API URL resolution ────────────────────────────────
|
|
61
|
+
const VESPER_API_URL = process.env.VESPER_API_URL || '';
|
|
62
|
+
const DEFAULT_VESPER_API_CANDIDATES = [
|
|
63
|
+
'http://localhost:3000',
|
|
64
|
+
'http://127.0.0.1:3000',
|
|
65
|
+
'https://vesper.dev',
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
// ── Device Auth Helpers ──────────────────────────────────────
|
|
69
|
+
function httpJson(method, url, body) {
|
|
70
|
+
return new Promise((resolve, reject) => {
|
|
71
|
+
const parsed = new URL(url);
|
|
72
|
+
const lib = parsed.protocol === 'https:' ? https : http;
|
|
73
|
+
const opts = {
|
|
74
|
+
method,
|
|
75
|
+
hostname: parsed.hostname,
|
|
76
|
+
port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
|
|
77
|
+
path: parsed.pathname + parsed.search,
|
|
78
|
+
headers: { 'Content-Type': 'application/json' },
|
|
79
|
+
};
|
|
80
|
+
const req = lib.request(opts, (res) => {
|
|
81
|
+
let data = '';
|
|
82
|
+
res.on('data', (chunk) => (data += chunk));
|
|
83
|
+
res.on('end', () => {
|
|
84
|
+
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
|
|
85
|
+
catch { resolve({ status: res.statusCode, body: data }); }
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
req.on('error', reject);
|
|
89
|
+
if (body) req.write(JSON.stringify(body));
|
|
90
|
+
req.end();
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function probeDeviceAuth(baseUrl) {
|
|
95
|
+
try {
|
|
96
|
+
const res = await httpJson('POST', `${baseUrl}/api/auth/device/start`);
|
|
97
|
+
if (res.status === 201 && !!res.body && !!res.body.code) {
|
|
98
|
+
return { baseUrl, status: 'ready', response: res.body };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (res.status === 503 && res.body && res.body.requiresSetup) {
|
|
102
|
+
return {
|
|
103
|
+
baseUrl,
|
|
104
|
+
status: 'setup-required',
|
|
105
|
+
response: res.body,
|
|
106
|
+
message: res.body.error || 'Auth storage is not initialized.',
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
baseUrl,
|
|
112
|
+
status: 'unreachable',
|
|
113
|
+
response: res.body,
|
|
114
|
+
message: typeof res.body === 'string' ? res.body : JSON.stringify(res.body),
|
|
115
|
+
};
|
|
116
|
+
} catch (error) {
|
|
117
|
+
return {
|
|
118
|
+
baseUrl,
|
|
119
|
+
status: 'unreachable',
|
|
120
|
+
message: error && error.message ? error.message : 'Request failed',
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async function resolveVesperApiBaseUrl() {
|
|
126
|
+
const candidates = VESPER_API_URL
|
|
127
|
+
? [VESPER_API_URL]
|
|
128
|
+
: DEFAULT_VESPER_API_CANDIDATES;
|
|
129
|
+
|
|
130
|
+
let setupRequiredProbe = null;
|
|
131
|
+
|
|
132
|
+
for (const candidate of candidates) {
|
|
133
|
+
const probe = await probeDeviceAuth(candidate);
|
|
134
|
+
if (probe.status === 'ready') {
|
|
135
|
+
return probe;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (!setupRequiredProbe && probe.status === 'setup-required') {
|
|
139
|
+
setupRequiredProbe = probe;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return setupRequiredProbe;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function openBrowser(url) {
|
|
147
|
+
try {
|
|
148
|
+
if (process.platform === 'win32') {
|
|
149
|
+
spawnSync('cmd', ['/c', 'start', '', url], { stdio: 'ignore' });
|
|
150
|
+
} else if (process.platform === 'darwin') {
|
|
151
|
+
spawnSync('open', [url], { stdio: 'ignore' });
|
|
152
|
+
} else {
|
|
153
|
+
spawnSync('xdg-open', [url], { stdio: 'ignore' });
|
|
154
|
+
}
|
|
155
|
+
} catch { /* browser open is best-effort */ }
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function askYesNo(question) {
|
|
159
|
+
return new Promise((resolve) => {
|
|
160
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
161
|
+
rl.question(` ${question} ${dim('[Y/n]')} `, (answer) => {
|
|
162
|
+
rl.close();
|
|
163
|
+
resolve(!answer || answer.toLowerCase().startsWith('y'));
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function askInput(question) {
|
|
169
|
+
return new Promise((resolve) => {
|
|
170
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
171
|
+
rl.question(` ${question} `, (answer) => {
|
|
172
|
+
rl.close();
|
|
173
|
+
resolve(String(answer || '').trim());
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function askChoice(question, choices, defaultValue) {
|
|
179
|
+
console.log(` ${question}`);
|
|
180
|
+
choices.forEach((choice, index) => {
|
|
181
|
+
console.log(` ${dim(String(index + 1) + ')')} ${choice.label}`);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const prompt = defaultValue ? `${dim('[default: ' + defaultValue + ']')}` : '';
|
|
185
|
+
const answer = await askInput(`${prompt} ${cyan('→')} Choose an option:`);
|
|
186
|
+
if (!answer && defaultValue) {
|
|
187
|
+
return defaultValue;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const numeric = Number(answer);
|
|
191
|
+
if (Number.isFinite(numeric) && numeric >= 1 && numeric <= choices.length) {
|
|
192
|
+
return choices[numeric - 1].value;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const matched = choices.find((choice) => choice.value === answer);
|
|
196
|
+
return matched ? matched.value : defaultValue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function isCloudApiKey(value) {
|
|
200
|
+
return !!value && value.startsWith('vesper_sk_') && !value.startsWith('vesper_sk_local_');
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function promptForManualApiKey() {
|
|
204
|
+
console.log(`\n ${cyan('■')} ${bold('Manual API Key')}`);
|
|
205
|
+
console.log(` ${dim('Paste a Vesper cloud API key. It will be stored locally in config.toml.\n')}`);
|
|
206
|
+
|
|
207
|
+
while (true) {
|
|
208
|
+
const value = await askInput(`${cyan('→')} Vesper API key:`);
|
|
209
|
+
if (isCloudApiKey(value)) {
|
|
210
|
+
return value;
|
|
211
|
+
}
|
|
212
|
+
console.log(` ${yellow('!')} ${yellow('Expected a Vesper key starting with vesper_sk_')}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async function chooseAuthMode(existingKey, existingAuthMode) {
|
|
217
|
+
const hasExistingKey = !!existingKey;
|
|
218
|
+
if (hasExistingKey) {
|
|
219
|
+
console.log(` ${dim('Current key:')} ${dim(existingKey.slice(0, 24) + '...')}`);
|
|
220
|
+
console.log(` ${dim('Current mode:')} ${dim(existingAuthMode || (isCloudApiKey(existingKey) ? 'cloud' : 'local_unified'))}`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const choices = [];
|
|
224
|
+
if (hasExistingKey) {
|
|
225
|
+
choices.push({ value: 'keep', label: 'Keep current key as-is' });
|
|
226
|
+
}
|
|
227
|
+
choices.push({ value: 'manual', label: 'Provide Vesper API key manually' });
|
|
228
|
+
choices.push({ value: 'browser', label: 'Sign in through the browser' });
|
|
229
|
+
choices.push({ value: 'local', label: 'Use local-only key' });
|
|
230
|
+
|
|
231
|
+
return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, hasExistingKey ? 'keep' : 'browser');
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
async function deviceAuthFlow() {
|
|
235
|
+
console.log(`\n ${cyan('■')} ${bold('Device Authentication')}`);
|
|
236
|
+
console.log(` ${dim('Link your CLI to a Vesper account for cloud features\n')}`);
|
|
237
|
+
|
|
238
|
+
const resolvedApiBaseUrl = await resolveVesperApiBaseUrl();
|
|
239
|
+
if (!resolvedApiBaseUrl) {
|
|
240
|
+
console.log(` ${red('✗')} ${red('Could not reach any Vesper auth endpoint.')}`);
|
|
241
|
+
console.log(` ${dim('Tried:')} ${dim((VESPER_API_URL ? [VESPER_API_URL] : DEFAULT_VESPER_API_CANDIDATES).join(', '))}`);
|
|
242
|
+
console.log(` ${dim('If your landing app is running locally, start it on http://localhost:3000 or set VESPER_API_URL.')}`);
|
|
243
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
244
|
+
return null;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if (resolvedApiBaseUrl.status === 'setup-required') {
|
|
248
|
+
console.log(` ${yellow('!')} ${yellow('Reached Vesper auth endpoint, but local auth storage is not initialized.')}`);
|
|
249
|
+
console.log(` ${dim('Endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
|
|
250
|
+
console.log(` ${dim('Reason:')} ${dim(resolvedApiBaseUrl.message || 'Apply Supabase migrations first.')}`);
|
|
251
|
+
console.log(` ${dim('Run the SQL in supabase/migrations/001_device_auth.sql and 002_rate_limits.sql, then retry.')}`);
|
|
252
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
console.log(` ${dim('Auth endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}\n`);
|
|
257
|
+
|
|
258
|
+
// Step 1: Call /api/auth/device/start
|
|
259
|
+
process.stdout.write(` ${dim('Requesting device code...')}`);
|
|
260
|
+
let startRes;
|
|
261
|
+
try {
|
|
262
|
+
startRes = await httpJson('POST', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/start`);
|
|
263
|
+
} catch (err) {
|
|
264
|
+
console.log(` ${red('✗')}`);
|
|
265
|
+
console.log(` ${red('Could not reach Vesper API at')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
|
|
266
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (startRes.status !== 201 || !startRes.body.code) {
|
|
271
|
+
console.log(` ${red('✗')}`);
|
|
272
|
+
console.log(` ${red('Unexpected response:')} ${dim(JSON.stringify(startRes.body))}`);
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const { code, loginUrl } = startRes.body;
|
|
277
|
+
console.log(` ${green('✓')}\n`);
|
|
278
|
+
|
|
279
|
+
// Step 2: Display code and open browser
|
|
280
|
+
console.log(` ┌───────────────────────────────────────────────┐`);
|
|
281
|
+
console.log(` │ │`);
|
|
282
|
+
console.log(` │ ${bold('Your device code:')} ${cyan(bold(code))} │`);
|
|
283
|
+
console.log(` │ │`);
|
|
284
|
+
console.log(` │ ${dim('Open this URL to sign in:')} │`);
|
|
285
|
+
console.log(` │ ${cyan(loginUrl.padEnd(41))}│`);
|
|
286
|
+
console.log(` │ │`);
|
|
287
|
+
console.log(` └───────────────────────────────────────────────┘\n`);
|
|
288
|
+
|
|
289
|
+
openBrowser(loginUrl);
|
|
290
|
+
console.log(` ${dim('Browser opened automatically.')}`);
|
|
291
|
+
console.log(` ${dim('Waiting for you to sign in...')}\n`);
|
|
292
|
+
|
|
293
|
+
// Step 3: Poll until confirmed or expired
|
|
294
|
+
const POLL_INTERVAL = 3000; // 3 seconds
|
|
295
|
+
const MAX_POLLS = 200; // 10 min max (200 × 3s)
|
|
296
|
+
let polls = 0;
|
|
297
|
+
const spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
298
|
+
|
|
299
|
+
while (polls < MAX_POLLS) {
|
|
300
|
+
polls++;
|
|
301
|
+
const frame = spinner[polls % spinner.length];
|
|
302
|
+
process.stdout.write(`\r ${cyan(frame)} Polling... (${polls})`);
|
|
303
|
+
|
|
304
|
+
try {
|
|
305
|
+
const pollRes = await httpJson('GET', `${resolvedApiBaseUrl.baseUrl}/api/auth/device/poll?code=${code}`);
|
|
306
|
+
|
|
307
|
+
if (pollRes.body.status === 'confirmed' && pollRes.body.apiKey) {
|
|
308
|
+
process.stdout.write(`\r ${green('✓')} Device authenticated! \n`);
|
|
309
|
+
console.log(` ${dim('Email:')} ${pollRes.body.email || 'linked'}`);
|
|
310
|
+
return pollRes.body.apiKey;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (pollRes.body.status === 'expired') {
|
|
314
|
+
process.stdout.write(`\r ${red('✗')} Device code expired. \n`);
|
|
315
|
+
console.log(` ${dim('Run the wizard again to get a new code.')}`);
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
} catch {
|
|
319
|
+
// Network hiccup — keep polling
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
await new Promise((r) => setTimeout(r, POLL_INTERVAL));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
process.stdout.write(`\r ${red('✗')} Timed out waiting for authentication.\n`);
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function printBanner() {
|
|
330
|
+
console.log(`
|
|
331
|
+
${dim('─────────────────────────────────────────────────')}
|
|
332
|
+
|
|
333
|
+
${bold('██ ██ ███████ ███████ ██████ ███████ ██████')}
|
|
334
|
+
${bold('██ ██ ██ ██ ██ ██ ██ ██ ██')}
|
|
335
|
+
${bold('██ ██ █████ ███████ ██████ █████ ██████')}
|
|
336
|
+
${bold(' ██ ██ ██ ██ ██ ██ ██ ██')}
|
|
337
|
+
${bold(' ████ ███████ ███████ ██ ███████ ██ ██')}
|
|
338
|
+
|
|
339
|
+
${cyan('dataset intelligence layer')}
|
|
340
|
+
${dim('local-first • zero-config • agent-native')}
|
|
341
|
+
|
|
342
|
+
${dim('─────────────────────────────────────────────────')}
|
|
343
|
+
`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ── MCP Auto-Config ──────────────────────────────────────────
|
|
347
|
+
function getAllAgentConfigs() {
|
|
348
|
+
const isMac = process.platform === 'darwin';
|
|
349
|
+
return [
|
|
350
|
+
{
|
|
351
|
+
name: 'Claude Code',
|
|
352
|
+
path: path.join(HOME, '.claude.json'),
|
|
353
|
+
format: 'mcpServers',
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
name: 'Claude Desktop',
|
|
357
|
+
path: IS_WIN
|
|
358
|
+
? path.join(APPDATA, 'Claude', 'claude_desktop_config.json')
|
|
359
|
+
: isMac
|
|
360
|
+
? path.join(HOME, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json')
|
|
361
|
+
: path.join(HOME, '.config', 'claude', 'claude_desktop_config.json'),
|
|
362
|
+
format: 'mcpServers',
|
|
363
|
+
},
|
|
364
|
+
{
|
|
365
|
+
name: 'Cursor',
|
|
366
|
+
path: path.join(HOME, '.cursor', 'mcp.json'),
|
|
367
|
+
format: 'mcpServers',
|
|
368
|
+
},
|
|
369
|
+
{
|
|
370
|
+
name: 'VS Code',
|
|
371
|
+
path: IS_WIN
|
|
372
|
+
? path.join(APPDATA, 'Code', 'User', 'mcp.json')
|
|
373
|
+
: isMac
|
|
374
|
+
? path.join(HOME, 'Library', 'Application Support', 'Code', 'User', 'mcp.json')
|
|
375
|
+
: path.join(HOME, '.config', 'Code', 'User', 'mcp.json'),
|
|
376
|
+
format: 'servers',
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
name: 'Codex',
|
|
380
|
+
path: path.join(HOME, '.codex', 'config.toml'),
|
|
381
|
+
format: 'toml',
|
|
382
|
+
},
|
|
383
|
+
{
|
|
384
|
+
name: 'Gemini CLI',
|
|
385
|
+
path: path.join(HOME, '.gemini', 'settings.json'),
|
|
386
|
+
format: 'mcpServers',
|
|
387
|
+
},
|
|
388
|
+
];
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function installMcpToAgent(agent) {
|
|
392
|
+
const npxCmd = IS_WIN ? 'npx.cmd' : 'npx';
|
|
393
|
+
const serverEntry = { command: npxCmd, args: ['-y', '@vespermcp/mcp-server@latest'] };
|
|
394
|
+
|
|
395
|
+
try {
|
|
396
|
+
if (agent.format === 'toml') {
|
|
397
|
+
let content = fs.existsSync(agent.path) ? fs.readFileSync(agent.path, 'utf8') : '';
|
|
398
|
+
if (content.includes('[mcp_servers.vesper]')) return true;
|
|
399
|
+
ensureDir(path.dirname(agent.path));
|
|
400
|
+
content += `\n[mcp_servers.vesper]\ncommand = "${serverEntry.command}"\nargs = [${serverEntry.args.map(a => `"${a}"`).join(', ')}]\n`;
|
|
401
|
+
fs.writeFileSync(agent.path, content, 'utf8');
|
|
402
|
+
return true;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
let config = {};
|
|
406
|
+
if (fs.existsSync(agent.path)) {
|
|
407
|
+
try { config = JSON.parse(fs.readFileSync(agent.path, 'utf8').trim() || '{}'); } catch { config = {}; }
|
|
408
|
+
} else {
|
|
409
|
+
ensureDir(path.dirname(agent.path));
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const key = agent.format === 'servers' ? 'servers' : 'mcpServers';
|
|
413
|
+
if (!config[key]) config[key] = {};
|
|
414
|
+
|
|
415
|
+
const entry = agent.format === 'servers'
|
|
416
|
+
? { type: 'stdio', ...serverEntry }
|
|
417
|
+
: serverEntry;
|
|
418
|
+
|
|
419
|
+
config[key].vesper = entry;
|
|
420
|
+
fs.writeFileSync(agent.path, JSON.stringify(config, null, 2), 'utf8');
|
|
421
|
+
return true;
|
|
422
|
+
} catch {
|
|
423
|
+
return false;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// ── Server Health Check ──────────────────────────────────────
|
|
428
|
+
async function checkServerHealth() {
|
|
429
|
+
try {
|
|
430
|
+
// Quick stdio check — spawn server and see if it responds
|
|
431
|
+
const result = spawnSync(IS_WIN ? 'npx.cmd' : 'npx', ['-y', '@vespermcp/mcp-server@latest', '--version'], {
|
|
432
|
+
timeout: 10000,
|
|
433
|
+
encoding: 'utf8',
|
|
434
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
435
|
+
});
|
|
436
|
+
return result.status === 0 || (result.stderr && result.stderr.includes('Vesper'));
|
|
437
|
+
} catch {
|
|
438
|
+
return false;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// ── Main Wizard ──────────────────────────────────────────────
|
|
443
|
+
async function main() {
|
|
444
|
+
printBanner();
|
|
445
|
+
|
|
446
|
+
console.log(` ${green('→')} Setting up Vesper on ${bold(os.hostname())}\n`);
|
|
447
|
+
|
|
448
|
+
// ─── Step 1: Create directories ────────────────────────────
|
|
449
|
+
process.stdout.write(` ${dim('[')}${cyan('1/6')}${dim(']')} Creating local directories...`);
|
|
450
|
+
ensureDir(VESPER_DIR);
|
|
451
|
+
ensureDir(DATA_DIR);
|
|
452
|
+
ensureDir(path.join(DATA_DIR, 'raw'));
|
|
453
|
+
ensureDir(path.join(DATA_DIR, 'processed'));
|
|
454
|
+
ensureDir(path.join(VESPER_DIR, 'datasets'));
|
|
455
|
+
console.log(` ${green('✓')}`);
|
|
456
|
+
|
|
457
|
+
// ─── Step 2: Authenticate (device flow or local key) ──────
|
|
458
|
+
console.log(`\n ${dim('[')}${cyan('2/6')}${dim(']')} Authentication`);
|
|
459
|
+
|
|
460
|
+
const existing = readToml(CONFIG_TOML);
|
|
461
|
+
let localKey = existing.api_key || '';
|
|
462
|
+
let authMode = existing.auth_mode || '';
|
|
463
|
+
|
|
464
|
+
const authChoice = await chooseAuthMode(localKey, authMode);
|
|
465
|
+
|
|
466
|
+
if (authChoice === 'keep' && localKey) {
|
|
467
|
+
console.log(` ${green('✓')} Keeping current key`);
|
|
468
|
+
} else if (authChoice === 'manual') {
|
|
469
|
+
localKey = await promptForManualApiKey();
|
|
470
|
+
authMode = 'cloud';
|
|
471
|
+
console.log(` ${green('✓')} Cloud API key saved from manual input`);
|
|
472
|
+
} else if (authChoice === 'browser') {
|
|
473
|
+
const cloudKey = await deviceAuthFlow();
|
|
474
|
+
if (cloudKey) {
|
|
475
|
+
localKey = cloudKey;
|
|
476
|
+
authMode = 'cloud';
|
|
477
|
+
} else {
|
|
478
|
+
const fallbackChoice = await askChoice(`${yellow('!')} Browser sign-in did not complete. Choose a fallback:`, [
|
|
479
|
+
{ value: 'manual', label: 'Provide Vesper API key manually' },
|
|
480
|
+
{ value: 'local', label: 'Use local-only key' },
|
|
481
|
+
], 'manual');
|
|
482
|
+
|
|
483
|
+
if (fallbackChoice === 'manual') {
|
|
484
|
+
localKey = await promptForManualApiKey();
|
|
485
|
+
authMode = 'cloud';
|
|
486
|
+
} else {
|
|
487
|
+
if (!localKey || isCloudApiKey(localKey)) {
|
|
488
|
+
localKey = generateLocalKey();
|
|
489
|
+
}
|
|
490
|
+
authMode = 'local_unified';
|
|
491
|
+
console.log(`\n ${yellow('⚠')} Using local-only key. Run the wizard again anytime to link an account.`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} else {
|
|
495
|
+
if (!localKey || isCloudApiKey(localKey)) {
|
|
496
|
+
localKey = generateLocalKey();
|
|
497
|
+
}
|
|
498
|
+
authMode = 'local_unified';
|
|
499
|
+
console.log(` ${green('✓')} Local-only key ready`);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
const configData = { ...existing, api_key: localKey, auth_mode: authMode };
|
|
503
|
+
writeToml(CONFIG_TOML, configData);
|
|
504
|
+
console.log(` ${dim('Key:')} ${dim(localKey.slice(0, 24) + '...')} ${dim('→')} ${dim(CONFIG_TOML)}`);
|
|
505
|
+
|
|
506
|
+
// ─── Step 3: Local vault initialization ────────────────────
|
|
507
|
+
process.stdout.write(`\n ${dim('[')}${cyan('3/6')}${dim(']')} Initializing local credentials vault...`);
|
|
508
|
+
const vaultData = readToml(CONFIG_TOML);
|
|
509
|
+
if (!vaultData.auth_mode) vaultData.auth_mode = 'local_unified';
|
|
510
|
+
writeToml(CONFIG_TOML, vaultData);
|
|
511
|
+
console.log(` ${green('✓')}`);
|
|
512
|
+
console.log(` ${dim('Mode:')} ${dim(vaultData.auth_mode === 'cloud' ? 'cloud (linked to Vesper account)' : 'single local Vesper key (no external keys required)')}`);
|
|
513
|
+
|
|
514
|
+
// ─── Step 4: Install @vespermcp/mcp-server ─────────────────
|
|
515
|
+
console.log(`\n ${dim('[')}${cyan('4/6')}${dim(']')} Installing Vesper MCP server...`);
|
|
516
|
+
try {
|
|
517
|
+
const npmCmd = IS_WIN ? 'npx.cmd' : 'npx';
|
|
518
|
+
spawnSync(npmCmd, ['-y', '@vespermcp/mcp-server@latest', '--setup', '--silent'], {
|
|
519
|
+
stdio: 'inherit',
|
|
520
|
+
timeout: 120000,
|
|
521
|
+
});
|
|
522
|
+
console.log(` ${green('✓')} @vespermcp/mcp-server installed`);
|
|
523
|
+
} catch {
|
|
524
|
+
console.log(` ${yellow('⚠')} Could not auto-install — run manually: npx -y @vespermcp/mcp-server@latest --setup`);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ─── Step 5: Auto-configure all detected IDEs ──────────────
|
|
528
|
+
process.stdout.write(`\n ${dim('[')}${cyan('5/6')}${dim(']')} Configuring coding agents...`);
|
|
529
|
+
const agents = getAllAgentConfigs();
|
|
530
|
+
const configuredAgents = [];
|
|
531
|
+
const skippedAgents = [];
|
|
532
|
+
|
|
533
|
+
for (const agent of agents) {
|
|
534
|
+
const dirExists = fs.existsSync(path.dirname(agent.path));
|
|
535
|
+
const fileExists = fs.existsSync(agent.path);
|
|
536
|
+
if (fileExists || dirExists) {
|
|
537
|
+
const ok = installMcpToAgent(agent);
|
|
538
|
+
if (ok) configuredAgents.push(agent.name);
|
|
539
|
+
else skippedAgents.push(agent.name);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
console.log(` ${green('✓')}`);
|
|
543
|
+
|
|
544
|
+
if (configuredAgents.length > 0) {
|
|
545
|
+
console.log(`\n ┌───────────────────────────────────────────────┐`);
|
|
546
|
+
console.log(` │ ${bold('MCP Auto-Configured')} │`);
|
|
547
|
+
console.log(` ├───────────────────────────────────────────────┤`);
|
|
548
|
+
for (const name of configuredAgents) {
|
|
549
|
+
console.log(` │ ${green('✓')} ${name.padEnd(42)}│`);
|
|
550
|
+
}
|
|
551
|
+
console.log(` └───────────────────────────────────────────────┘`);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// ─── Step 6: Verify ────────────────────────────────────────
|
|
555
|
+
console.log(`\n ${dim('[')}${cyan('6/6')}${dim(']')} Verifying installation...`);
|
|
556
|
+
|
|
557
|
+
const dbExists = fs.existsSync(path.join(DATA_DIR, 'metadata.db'));
|
|
558
|
+
const vecExists = fs.existsSync(path.join(DATA_DIR, 'vectors.json')) || fs.existsSync(path.join(DATA_DIR, 'vectors.bin'));
|
|
559
|
+
const keyStored = fs.existsSync(CONFIG_TOML);
|
|
560
|
+
|
|
561
|
+
console.log(` ${keyStored ? green('✓') : red('✗')} Local API key ${dim(CONFIG_TOML)}`);
|
|
562
|
+
console.log(` ${dbExists ? green('✓') : yellow('⚠')} Dataset index ${dim(dbExists ? 'ready' : 'will build on first search')}`);
|
|
563
|
+
console.log(` ${vecExists ? green('✓') : yellow('⚠')} Vector store ${dim(vecExists ? 'ready' : 'will build on first search')}`);
|
|
564
|
+
console.log(` ${configuredAgents.length > 0 ? green('✓') : yellow('⚠')} MCP agents ${dim(configuredAgents.length + ' configured')}`);
|
|
565
|
+
|
|
566
|
+
// ─── Final Summary ─────────────────────────────────────────
|
|
567
|
+
const finalConfig = readToml(CONFIG_TOML);
|
|
568
|
+
const isCloud = finalConfig.auth_mode === 'cloud';
|
|
569
|
+
console.log(`
|
|
570
|
+
${dim('═════════════════════════════════════════════════')}
|
|
571
|
+
|
|
572
|
+
${green(bold('✓ Vesper is ready!'))}
|
|
573
|
+
|
|
574
|
+
${bold(isCloud ? 'Your cloud API key:' : 'Your local API key:')}
|
|
575
|
+
${cyan(finalConfig.api_key || localKey)}
|
|
576
|
+
|
|
577
|
+
${bold('Auth mode:')}
|
|
578
|
+
${dim(isCloud ? '☁ Cloud (linked to Vesper account)' : '🔑 Local-only (key never leaves your machine)')}
|
|
579
|
+
|
|
580
|
+
${bold('Config file:')}
|
|
581
|
+
${dim(CONFIG_TOML)}
|
|
582
|
+
|
|
583
|
+
${bold('What just happened:')}
|
|
584
|
+
${dim('1.')} ${isCloud ? 'Linked to your Vesper cloud account' : 'Generated a local API key (never leaves your machine)'}
|
|
585
|
+
${dim('2.')} Initialized local credentials vault
|
|
586
|
+
${dim('3.')} Auto-configured MCP for ${configuredAgents.length > 0 ? configuredAgents.join(', ') : 'detected agents'}
|
|
587
|
+
${dim('4.')} Vesper server ready on stdio transport
|
|
588
|
+
|
|
589
|
+
${dim('─────────────────────────────────────────────────')}
|
|
590
|
+
|
|
591
|
+
${bold('Quick start — try in your AI assistant:')}
|
|
592
|
+
|
|
593
|
+
${cyan('Search datasets')}
|
|
594
|
+
${dim('>')} vesper_search(query="sentiment analysis")
|
|
595
|
+
|
|
596
|
+
${cyan('Download & prepare')}
|
|
597
|
+
${dim('>')} prepare_dataset(query="image classification cats dogs")
|
|
598
|
+
|
|
599
|
+
${cyan('Quality analysis')}
|
|
600
|
+
${dim('>')} analyze_quality(dataset_id="imdb")
|
|
601
|
+
|
|
602
|
+
${cyan('Export to your project')}
|
|
603
|
+
${dim('>')} export_dataset(dataset_id="imdb", format="parquet")
|
|
604
|
+
|
|
605
|
+
${dim('─────────────────────────────────────────────────')}
|
|
606
|
+
|
|
607
|
+
${bold('Unified API — one interface, every source:')}
|
|
608
|
+
HuggingFace · Kaggle · OpenML · data.world
|
|
609
|
+
|
|
610
|
+
${dim('Agents call localhost Vesper APIs with one local key.')}
|
|
611
|
+
${dim('Vesper adapters handle provider routing internally.')}
|
|
612
|
+
|
|
613
|
+
${dim('─────────────────────────────────────────────────')}
|
|
614
|
+
|
|
615
|
+
${yellow('→')} Restart your IDE to activate MCP
|
|
616
|
+
${dim('Docs:')} https://github.com/vesper/mcp-server
|
|
617
|
+
|
|
618
|
+
${dim('═════════════════════════════════════════════════')}
|
|
619
|
+
`);
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
main().catch((err) => {
|
|
623
|
+
console.error(`\n${red('Error:')} ${err.message || err}`);
|
|
624
|
+
process.exit(1);
|
|
625
|
+
});
|
|
@@ -26,6 +26,7 @@ def _print(payload: Dict[str, Any]) -> None:
|
|
|
26
26
|
async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
|
|
27
27
|
payload = json.loads(args.payload)
|
|
28
28
|
output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
|
|
29
|
+
output_dir = payload.get("output_dir")
|
|
29
30
|
workers = int(payload.get("workers") or 8)
|
|
30
31
|
recipes_dir = payload.get("recipes_dir")
|
|
31
32
|
|
|
@@ -43,6 +44,7 @@ async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
|
|
|
43
44
|
kaggle_ref=payload.get("kaggle_ref"),
|
|
44
45
|
urls=payload.get("urls"),
|
|
45
46
|
output_format=payload.get("output_format", "webdataset"),
|
|
47
|
+
output_dir=str(output_dir) if output_dir else None,
|
|
46
48
|
max_items=payload.get("max_items"),
|
|
47
49
|
image_column=payload.get("image_column"),
|
|
48
50
|
)
|
|
@@ -191,6 +191,7 @@ class AssetDownloader:
|
|
|
191
191
|
kaggle_ref: Optional[str] = None,
|
|
192
192
|
urls: Optional[List[str]] = None,
|
|
193
193
|
output_format: str = "webdataset",
|
|
194
|
+
output_dir: Optional[str] = None,
|
|
194
195
|
max_items: Optional[int] = None,
|
|
195
196
|
image_column: Optional[str] = None,
|
|
196
197
|
) -> Dict[str, Any]:
|
|
@@ -231,7 +232,10 @@ class AssetDownloader:
|
|
|
231
232
|
raise ValueError("urls are required for source=url")
|
|
232
233
|
|
|
233
234
|
# --- Now safe to create directories ---
|
|
234
|
-
|
|
235
|
+
if output_dir:
|
|
236
|
+
dataset_dir = Path(output_dir).expanduser().resolve()
|
|
237
|
+
else:
|
|
238
|
+
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
235
239
|
images_dir = dataset_dir / "images"
|
|
236
240
|
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
237
241
|
images_dir.mkdir(parents=True, exist_ok=True)
|