@myvillage/cli 1.3.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1379 @@
1
+ import chalk from 'chalk';
2
+ import inquirer from 'inquirer';
3
+ import axios from 'axios';
4
+ import pLimit from 'p-limit';
5
+ import { spawn } from 'child_process';
6
+ import { createInterface } from 'readline';
7
+ import { createHash } from 'crypto';
8
+ import {
9
+ existsSync,
10
+ mkdirSync,
11
+ readFileSync,
12
+ writeFileSync,
13
+ readdirSync,
14
+ statSync,
15
+ createReadStream,
16
+ createWriteStream,
17
+ } from 'fs';
18
+ import { join, resolve, relative, extname, basename } from 'path';
19
+ import { pipeline } from 'stream/promises';
20
+ import { parse as parseYaml } from 'yaml';
21
+ import { createRequire } from 'module';
22
+ import { isAuthenticated } from '../utils/auth.js';
23
+ import { brand, villageSpinner, success, error, info, header, stripAnsi } from '../utils/brand.js';
24
+ import { relativeTime, truncate } from '../utils/formatters.js';
25
+ import {
26
+ listDatasets,
27
+ getDataset,
28
+ getDatasetDownload,
29
+ createJob,
30
+ listJobs,
31
+ getJob,
32
+ updateJobStatus,
33
+ completeJob,
34
+ failJob,
35
+ listModels,
36
+ getModel,
37
+ publishModel,
38
+ ingestText,
39
+ ingestStructured,
40
+ prepareIngestion,
41
+ completeIngestion,
42
+ getUploadUrls,
43
+ getScriptsManifest,
44
+ } from '../utils/soulprint-api.js';
45
+ import {
46
+ getSoulprintDir,
47
+ getDatasetsDir,
48
+ getModelsDir,
49
+ getConfigsDir,
50
+ getLogsDir,
51
+ getVenvDir,
52
+ getScriptsDir,
53
+ isWorkspaceInitialized,
54
+ readWorkspaceConfig,
55
+ writeWorkspaceConfig,
56
+ getLocalDatasetDir,
57
+ isDatasetDownloaded,
58
+ getPythonPath,
59
+ isPythonAvailable,
60
+ getPythonVersion,
61
+ detectGPU,
62
+ getMachineInfo,
63
+ getJobOutputDir,
64
+ getJobLogFile,
65
+ appendJobLog,
66
+ } from '../utils/soulprint-workspace.js';
67
+
68
+ const require = createRequire(import.meta.url);
69
+ const { version: cliVersion } = require('../../package.json');
70
+
71
+ // ── Helpers ────────────────────────────────────────────
72
+
73
+ function padRight(str, len) {
74
+ const visible = stripAnsi(String(str));
75
+ if (visible.length >= len) return String(str);
76
+ return String(str) + ' '.repeat(len - visible.length);
77
+ }
78
+
79
+ function formatStatus(status) {
80
+ const map = {
81
+ COLLECTING: chalk.cyan,
82
+ READY: brand.green,
83
+ TRAINING: chalk.yellow,
84
+ ARCHIVED: brand.teal,
85
+ CREATED: brand.teal,
86
+ DOWNLOADING: chalk.cyan,
87
+ PREPARING: chalk.cyan,
88
+ EVALUATING: chalk.yellow,
89
+ COMPLETED: brand.green,
90
+ FAILED: chalk.red,
91
+ CANCELLED: brand.teal,
92
+ DRAFT: brand.teal,
93
+ VALIDATED: brand.green,
94
+ PUBLISHED: brand.green,
95
+ REJECTED: chalk.red,
96
+ };
97
+ const colorFn = map[status] || brand.teal;
98
+ return colorFn(status);
99
+ }
100
+
101
+ function formatBytes(bytes) {
102
+ if (bytes < 1024) return `${bytes} B`;
103
+ if (bytes < 1048576) return `${(bytes / 1024).toFixed(1)} KB`;
104
+ if (bytes < 1073741824) return `${(bytes / 1048576).toFixed(1)} MB`;
105
+ return `${(bytes / 1073741824).toFixed(1)} GB`;
106
+ }
107
+
108
+ function formatProgress(progress) {
109
+ if (progress == null) return brand.teal('--');
110
+ const pct = Math.round(progress * 100);
111
+ return `${pct}%`;
112
+ }
113
+
114
+ const TYPE_EXTENSIONS = {
115
+ text: ['.json', '.jsonl'],
116
+ image: ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif'],
117
+ audio: ['.wav', '.mp3', '.flac', '.ogg', '.m4a'],
118
+ structured: ['.json', '.jsonl', '.csv'],
119
+ multimodal: ['.json', '.jsonl'],
120
+ };
121
+
122
+ const MIME_TYPES = {
123
+ '.png': 'image/png',
124
+ '.jpg': 'image/jpeg',
125
+ '.jpeg': 'image/jpeg',
126
+ '.webp': 'image/webp',
127
+ '.bmp': 'image/bmp',
128
+ '.gif': 'image/gif',
129
+ '.wav': 'audio/wav',
130
+ '.mp3': 'audio/mpeg',
131
+ '.flac': 'audio/flac',
132
+ '.ogg': 'audio/ogg',
133
+ '.m4a': 'audio/mp4',
134
+ '.json': 'application/json',
135
+ '.jsonl': 'application/jsonl',
136
+ '.csv': 'text/csv',
137
+ };
138
+
139
+ const VALID_METHODS = {
140
+ text: ['openai-finetune', 'full-finetune', 'lora'],
141
+ image: ['lora', 'dreambooth'],
142
+ audio: ['whisper-finetune', 'wav2vec-finetune'],
143
+ structured: ['llm-finetune', 'gradient-boost', 'sklearn-pipeline'],
144
+ multimodal: ['clip-finetune'],
145
+ };
146
+
147
+ const METHOD_SCRIPTS = {
148
+ 'openai-finetune': 'text/finetune_openai.py',
149
+ 'full-finetune': 'text/finetune_local.py',
150
+ 'lora:text': 'text/finetune_local.py',
151
+ 'lora:image': 'image/lora_sdxl.py',
152
+ 'dreambooth': 'image/dreambooth.py',
153
+ 'whisper-finetune': 'audio/whisper_finetune.py',
154
+ 'wav2vec-finetune': 'audio/wav2vec_finetune.py',
155
+ 'llm-finetune': 'structured/llm_classifier.py',
156
+ 'gradient-boost': 'structured/tabular_train.py',
157
+ 'sklearn-pipeline': 'structured/tabular_train.py',
158
+ 'clip-finetune': 'multimodal/clip_finetune.py',
159
+ };
160
+
161
+ function collectFiles(dir, extensions, recursive = false) {
162
+ const files = [];
163
+ function walk(currentDir) {
164
+ const entries = readdirSync(currentDir, { withFileTypes: true });
165
+ for (const entry of entries) {
166
+ const fullPath = join(currentDir, entry.name);
167
+ if (entry.isDirectory() && recursive) {
168
+ walk(fullPath);
169
+ } else if (entry.isFile()) {
170
+ const ext = extname(entry.name).toLowerCase();
171
+ if (extensions.includes(ext)) {
172
+ const stat = statSync(fullPath);
173
+ files.push({
174
+ filename: entry.name,
175
+ fullPath,
176
+ relativePath: relative(dir, fullPath),
177
+ size: stat.size,
178
+ mimeType: MIME_TYPES[ext] || 'application/octet-stream',
179
+ });
180
+ }
181
+ }
182
+ }
183
+ }
184
+ walk(dir);
185
+ return files;
186
+ }
187
+
188
+ async function computeSHA256(filePath) {
189
+ return new Promise((resolve, reject) => {
190
+ const hash = createHash('sha256');
191
+ const stream = createReadStream(filePath);
192
+ stream.on('data', (chunk) => hash.update(chunk));
193
+ stream.on('end', () => resolve(`sha256:${hash.digest('hex')}`));
194
+ stream.on('error', reject);
195
+ });
196
+ }
197
+
198
+ function scanArtifacts(dir) {
199
+ const files = [];
200
+ function walk(currentDir) {
201
+ const entries = readdirSync(currentDir, { withFileTypes: true });
202
+ for (const entry of entries) {
203
+ const fullPath = join(currentDir, entry.name);
204
+ if (entry.isDirectory()) {
205
+ walk(fullPath);
206
+ } else if (entry.isFile()) {
207
+ const stat = statSync(fullPath);
208
+ files.push({
209
+ path: relative(dir, fullPath),
210
+ fullPath,
211
+ size: stat.size,
212
+ });
213
+ }
214
+ }
215
+ }
216
+ walk(dir);
217
+ return files;
218
+ }
219
+
220
+ // ── AUTH GUARD ──────────────────────────────────────────
221
+
222
+ function requireAuth() {
223
+ if (!isAuthenticated()) {
224
+ console.log(chalk.red(` \u2717 Authentication required. Run ${brand.gold("'myvillage login'")} first.\n`));
225
+ return false;
226
+ }
227
+ return true;
228
+ }
229
+
230
+ function requireWorkspace() {
231
+ if (!isWorkspaceInitialized()) {
232
+ console.log(chalk.red(` \u2717 SoulPrint workspace not initialized. Run ${brand.gold("'myvillage soulprint init'")} first.\n`));
233
+ return false;
234
+ }
235
+ return true;
236
+ }
237
+
238
+ // ════════════════════════════════════════════════════════
239
+ // INIT
240
+ // ════════════════════════════════════════════════════════
241
+
242
+ export async function soulprintInitCommand(options) {
243
+ if (!requireAuth()) return;
244
+
245
+ try {
246
+ if (isWorkspaceInitialized()) {
247
+ const { reinit } = await inquirer.prompt([{
248
+ type: 'confirm',
249
+ name: 'reinit',
250
+ message: 'SoulPrint workspace already exists. Reinitialize?',
251
+ default: false,
252
+ }]);
253
+ if (!reinit) {
254
+ info('Cancelled.\n');
255
+ return;
256
+ }
257
+ }
258
+
259
+ header('Initializing SoulPrint Studio workspace');
260
+
261
+ // Check prerequisites
262
+ console.log(' Checking prerequisites:');
263
+ const pythonAvailable = isPythonAvailable();
264
+ const pythonVer = getPythonVersion();
265
+ if (pythonAvailable) {
266
+ success(`${pythonVer} found`);
267
+ } else {
268
+ error('Python 3.10+ required. Install it and try again.\n');
269
+ return;
270
+ }
271
+
272
+ const gpu = detectGPU();
273
+ if (gpu.type === 'cuda') {
274
+ success(`CUDA detected (${gpu.name}, ${gpu.vram})`);
275
+ } else if (gpu.type === 'mps') {
276
+ success(`MPS available (${gpu.name}, ${gpu.vram})`);
277
+ } else {
278
+ info('\u2717 No GPU detected (CPU-only training available)');
279
+ }
280
+
281
+ console.log();
282
+
283
+ // Create directory tree
284
+ const spDir = getSoulprintDir();
285
+ const dirs = [
286
+ getDatasetsDir(),
287
+ getModelsDir(),
288
+ getConfigsDir(),
289
+ getLogsDir(),
290
+ getScriptsDir(),
291
+ ];
292
+
293
+ console.log(` Creating workspace at ${brand.teal(spDir)}/`);
294
+ for (const dir of dirs) {
295
+ mkdirSync(dir, { recursive: true });
296
+ success(basename(dir) + '/');
297
+ }
298
+
299
+ console.log();
300
+
301
+ // Python venv
302
+ if (!options.skipPython) {
303
+ const venvDir = getVenvDir();
304
+ const spinner = villageSpinner('Creating Python virtual environment...').start();
305
+ try {
306
+ await new Promise((res, rej) => {
307
+ const proc = spawn('python3', ['-m', 'venv', venvDir], { stdio: 'pipe' });
308
+ proc.on('close', (code) => code === 0 ? res() : rej(new Error(`venv creation failed (exit ${code})`)));
309
+ proc.on('error', rej);
310
+ });
311
+ spinner.succeed('Python environment created at ' + brand.teal(venvDir));
312
+ } catch (err) {
313
+ spinner.fail(`Python venv creation failed: ${err.message}`);
314
+ info('You can skip this with --skip-python and install dependencies manually.\n');
315
+ return;
316
+ }
317
+ } else {
318
+ info('Skipping Python venv setup (--skip-python)');
319
+ }
320
+
321
+ // Download training scripts
322
+ if (!options.skipScripts) {
323
+ const spinner = villageSpinner('Downloading training scripts...').start();
324
+ try {
325
+ const manifest = await getScriptsManifest();
326
+ const scriptsDir = getScriptsDir();
327
+ for (const file of manifest.files || []) {
328
+ const destDir = join(scriptsDir, file.path.split('/').slice(0, -1).join('/'));
329
+ mkdirSync(destDir, { recursive: true });
330
+ const dest = join(scriptsDir, file.path);
331
+ const response = await axios.get(file.url, { responseType: 'arraybuffer' });
332
+ writeFileSync(dest, response.data);
333
+ }
334
+ spinner.succeed(`Training scripts v${manifest.version || '1.0.0'} downloaded`);
335
+ } catch (err) {
336
+ spinner.warn(`Could not download training scripts: ${err.message}`);
337
+ info('You can download them later or place scripts manually in ' + getScriptsDir());
338
+ }
339
+ } else {
340
+ info('Skipping script download (--skip-scripts)');
341
+ }
342
+
343
+ // Write workspace config
344
+ writeWorkspaceConfig({
345
+ version: 1,
346
+ initializedAt: new Date().toISOString(),
347
+ cliVersion,
348
+ pythonVersion: pythonVer,
349
+ gpu: { type: gpu.type, name: gpu.name, vram: gpu.vram },
350
+ venvPath: getVenvDir(),
351
+ });
352
+
353
+ console.log();
354
+ success('SoulPrint Studio workspace initialized!');
355
+ console.log();
356
+ console.log(' Next steps:');
357
+ console.log(` ${brand.gold('myvillage soulprint datasets list')} Browse available datasets`);
358
+ console.log(` ${brand.gold('myvillage soulprint datasets pull <slug>')} Download a dataset`);
359
+ console.log(` ${brand.gold('myvillage soulprint train --help')} See training options`);
360
+ console.log();
361
+ } catch (err) {
362
+ if (err.isTtyError) {
363
+ error('Prompts cannot be rendered in this environment.\n');
364
+ return;
365
+ }
366
+ error(`Initialization failed: ${err.message}\n`);
367
+ }
368
+ }
369
+
370
+ // ════════════════════════════════════════════════════════
371
+ // INGEST
372
+ // ════════════════════════════════════════════════════════
373
+
374
+ export async function soulprintIngestCommand(path, options) {
375
+ if (!requireAuth()) return;
376
+
377
+ try {
378
+ const dataType = options.type?.toUpperCase();
379
+ const typeKey = options.type?.toLowerCase();
380
+ if (!TYPE_EXTENSIONS[typeKey]) {
381
+ error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
382
+ return;
383
+ }
384
+
385
+ const resolvedPath = resolve(path);
386
+ if (!existsSync(resolvedPath)) {
387
+ error(`Path not found: ${resolvedPath}\n`);
388
+ return;
389
+ }
390
+
391
+ // Resolve files
392
+ const extensions = TYPE_EXTENSIONS[typeKey];
393
+ let files;
394
+ const stat = statSync(resolvedPath);
395
+ if (stat.isDirectory()) {
396
+ console.log(`\n Scanning ${brand.teal(resolvedPath)} for ${typeKey} files...`);
397
+ files = collectFiles(resolvedPath, extensions, !!options.recursive);
398
+ if (options.glob) {
399
+ const pattern = options.glob.toLowerCase();
400
+ files = files.filter(f => f.filename.toLowerCase().endsWith(pattern.replace('*', '')));
401
+ }
402
+ } else {
403
+ files = [{
404
+ filename: basename(resolvedPath),
405
+ fullPath: resolvedPath,
406
+ relativePath: basename(resolvedPath),
407
+ size: stat.size,
408
+ mimeType: MIME_TYPES[extname(resolvedPath).toLowerCase()] || 'application/octet-stream',
409
+ }];
410
+ }
411
+
412
+ if (files.length === 0) {
413
+ error(`No ${typeKey} files found. Expected: ${extensions.join(', ')}\n`);
414
+ return;
415
+ }
416
+
417
+ const totalSize = files.reduce((sum, f) => sum + f.size, 0);
418
+ info(`Found ${files.length} files (${formatBytes(totalSize)})`);
419
+
420
+ // Load companion metadata
421
+ let captions = null;
422
+ if (options.captions) {
423
+ const captionsPath = resolve(options.captions);
424
+ if (!existsSync(captionsPath)) {
425
+ error(`Captions file not found: ${captionsPath}\n`);
426
+ return;
427
+ }
428
+ const raw = readFileSync(captionsPath, 'utf-8');
429
+ try {
430
+ captions = JSON.parse(raw); // JSON map { filename: caption }
431
+ } catch {
432
+ captions = raw.split('\n').filter(l => l.trim()); // line-based
433
+ }
434
+ const count = Array.isArray(captions) ? captions.length : Object.keys(captions).length;
435
+ info(`Captions loaded: ${count} entries from ${basename(captionsPath)}`);
436
+ }
437
+
438
+ let transcriptions = null;
439
+ if (options.transcriptions) {
440
+ const transPath = resolve(options.transcriptions);
441
+ if (!existsSync(transPath)) {
442
+ error(`Transcriptions file not found: ${transPath}\n`);
443
+ return;
444
+ }
445
+ const raw = readFileSync(transPath, 'utf-8');
446
+ try {
447
+ transcriptions = JSON.parse(raw);
448
+ } catch {
449
+ transcriptions = raw.split('\n').filter(l => l.trim());
450
+ }
451
+ }
452
+
453
+ // Validate dataset
454
+ const spinner = villageSpinner('Validating target dataset...').start();
455
+ let dataset;
456
+ try {
457
+ dataset = await getDataset(options.dataset);
458
+ spinner.stop();
459
+ } catch (err) {
460
+ spinner.fail(`Dataset "${options.dataset}" not found.`);
461
+ info(`Run ${brand.gold("'myvillage soulprint datasets list'")} to see available datasets.\n`);
462
+ return;
463
+ }
464
+
465
+ if (dataset.dataType !== dataType) {
466
+ error(`Dataset "${options.dataset}" is type ${dataset.dataType}, but --type ${typeKey} was specified.\n`);
467
+ return;
468
+ }
469
+ if (dataset.status !== 'COLLECTING') {
470
+ error(`Dataset "${options.dataset}" is ${dataset.status}. Only COLLECTING datasets accept new data.\n`);
471
+ return;
472
+ }
473
+
474
+ console.log(`\n Target dataset: ${brand.gold(dataset.slug)} (${dataset.dataType}, ${dataset.status})\n`);
475
+
476
+ // Dry run
477
+ if (options.dryRun) {
478
+ console.log(brand.teal(' DRY RUN \u2014 nothing will be uploaded\n'));
479
+ console.log(` Files found: ${files.length}`);
480
+ console.log(` Total size: ${formatBytes(totalSize)}`);
481
+ const extCounts = {};
482
+ for (const f of files) {
483
+ const ext = extname(f.filename).toLowerCase();
484
+ extCounts[ext] = (extCounts[ext] || 0) + 1;
485
+ }
486
+ console.log(` Extensions: ${Object.entries(extCounts).map(([e, n]) => `${e} (${n})`).join(', ')}`);
487
+ console.log();
488
+ return;
489
+ }
490
+
491
+ // Route by type
492
+ if (typeKey === 'text' || typeKey === 'structured') {
493
+ await ingestInlineData(files, typeKey, options, dataset);
494
+ } else {
495
+ await ingestFileData(files, typeKey, options, dataset, captions, transcriptions);
496
+ }
497
+ } catch (err) {
498
+ const message = err.response?.data?.error || err.message;
499
+ error(`Ingestion failed: ${message}\n`);
500
+ }
501
+ }
502
+
503
+ async function ingestInlineData(files, typeKey, options, dataset) {
504
+ // Read and parse items from files
505
+ let allItems = [];
506
+ for (const file of files) {
507
+ const raw = readFileSync(file.fullPath, 'utf-8');
508
+ const ext = extname(file.filename).toLowerCase();
509
+ if (ext === '.jsonl') {
510
+ const lines = raw.split('\n').filter(l => l.trim());
511
+ allItems.push(...lines.map(l => JSON.parse(l)));
512
+ } else if (ext === '.json') {
513
+ const parsed = JSON.parse(raw);
514
+ allItems.push(...(Array.isArray(parsed) ? parsed : [parsed]));
515
+ } else if (ext === '.csv') {
516
+ const lines = raw.split('\n').filter(l => l.trim());
517
+ if (lines.length < 2) return;
518
+ const headers = lines[0].split(',').map(h => h.trim());
519
+ for (let i = 1; i < lines.length; i++) {
520
+ const values = lines[i].split(',').map(v => v.trim());
521
+ const row = {};
522
+ headers.forEach((h, idx) => { row[h] = values[idx]; });
523
+ allItems.push(row);
524
+ }
525
+ }
526
+ }
527
+
528
+ console.log(` Ingesting ${allItems.length} items...\n`);
529
+
530
+ const batchSize = 1000;
531
+ let ingested = 0;
532
+ for (let i = 0; i < allItems.length; i += batchSize) {
533
+ const batch = allItems.slice(i, i + batchSize);
534
+ const payload = {
535
+ source: options.source || 'CLI',
536
+ dataset: dataset.slug,
537
+ items: batch,
538
+ };
539
+ if (typeKey === 'text') {
540
+ await ingestText(payload);
541
+ } else {
542
+ await ingestStructured(payload);
543
+ }
544
+ ingested += batch.length;
545
+ const pct = Math.round((ingested / allItems.length) * 100);
546
+ process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${ingested.toLocaleString()}/${allItems.length.toLocaleString()})`);
547
+ }
548
+
549
+ console.log();
550
+ console.log();
551
+ success(`${allItems.length.toLocaleString()} items ingested into ${dataset.slug}`);
552
+ console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/datasets/${dataset.slug}`)}\n`);
553
+ }
554
+
555
+ async function ingestFileData(files, typeKey, options, dataset, captions, transcriptions) {
556
+ const concurrency = parseInt(options.concurrency) || 5;
557
+ const limit = pLimit(concurrency);
558
+
559
+ // Build metadata
560
+ const metadata = {};
561
+ if (captions) {
562
+ if (Array.isArray(captions)) {
563
+ metadata.captions = captions;
564
+ } else {
565
+ metadata.captions = files.map(f => captions[f.filename] || null);
566
+ }
567
+ }
568
+ if (transcriptions) {
569
+ if (Array.isArray(transcriptions)) {
570
+ metadata.transcriptions = transcriptions;
571
+ } else {
572
+ metadata.transcriptions = files.map(f => transcriptions[f.filename] || null);
573
+ }
574
+ }
575
+
576
+ const spinner = villageSpinner('Preparing ingestion...').start();
577
+ const preparation = await prepareIngestion({
578
+ source: options.source || 'CLI',
579
+ dataType: typeKey.toUpperCase(),
580
+ dataset: dataset.slug,
581
+ files: files.map(f => ({ filename: f.filename, size: f.size, mimeType: f.mimeType })),
582
+ metadata,
583
+ });
584
+ spinner.succeed(`Ingestion ${preparation.ingestionId} created`);
585
+
586
+ // Upload files
587
+ const totalFiles = files.length;
588
+ const totalBytes = files.reduce((sum, f) => sum + f.size, 0);
589
+ let uploadedFiles = 0;
590
+
591
+ console.log(`\n Uploading ${totalFiles} files to S3...`);
592
+
593
+ const checksums = [];
594
+ const uploadUrls = preparation.uploadUrls;
595
+
596
+ const uploads = uploadUrls.map((urlInfo, i) =>
597
+ limit(async () => {
598
+ const file = files[i];
599
+ const checksum = await computeSHA256(file.fullPath);
600
+ checksums.push({ filename: file.filename, checksum });
601
+
602
+ let retries = 0;
603
+ while (retries < 3) {
604
+ try {
605
+ await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
606
+ headers: {
607
+ 'Content-Type': file.mimeType || 'application/octet-stream',
608
+ 'Content-Length': file.size,
609
+ },
610
+ maxBodyLength: Infinity,
611
+ });
612
+ break;
613
+ } catch (err) {
614
+ retries++;
615
+ if (retries >= 3) throw err;
616
+ await new Promise(r => setTimeout(r, 1000 * Math.pow(2, retries)));
617
+ }
618
+ }
619
+
620
+ uploadedFiles++;
621
+ const pct = Math.round((uploadedFiles / totalFiles) * 100);
622
+ process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${uploadedFiles}/${totalFiles}) ${formatBytes(totalBytes)}`);
623
+ })
624
+ );
625
+
626
+ await Promise.all(uploads);
627
+ console.log();
628
+
629
+ // Finalize
630
+ const finalSpinner = villageSpinner('Finalizing...').start();
631
+ const result = await completeIngestion(preparation.ingestionId, { checksums });
632
+ finalSpinner.succeed(`${result.itemCount} items added to ${dataset.slug}`);
633
+ console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/datasets/${dataset.slug}`)}\n`);
634
+ }
635
+
636
+ // ════════════════════════════════════════════════════════
637
+ // DATASETS LIST
638
+ // ════════════════════════════════════════════════════════
639
+
640
+ export async function soulprintDatasetListCommand(options) {
641
+ if (!requireAuth()) return;
642
+
643
+ const spinner = villageSpinner('Loading datasets...').start();
644
+ try {
645
+ const params = {};
646
+ if (options.type) params.dataType = options.type.toUpperCase();
647
+ if (options.status) params.status = options.status.toUpperCase();
648
+
649
+ const result = await listDatasets(params);
650
+ spinner.stop();
651
+
652
+ if (options.json) {
653
+ console.log(JSON.stringify(result, null, 2));
654
+ return;
655
+ }
656
+
657
+ const datasets = result.datasets || result.data || result;
658
+ if (!Array.isArray(datasets) || datasets.length === 0) {
659
+ info('\n No datasets found.\n');
660
+ return;
661
+ }
662
+
663
+ header('Datasets on SoulPrint Studio');
664
+
665
+ // Table header
666
+ console.log(
667
+ ` ${brand.teal(padRight('Name', 28))}${padRight('Type', 14)}${padRight('Status', 14)}${padRight('Items', 10)}${padRight('Version', 10)}`
668
+ );
669
+ console.log(brand.darkGold(` ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(8)} ${'\u2500'.repeat(8)}`));
670
+
671
+ for (const ds of datasets) {
672
+ const name = padRight(truncate(ds.name || ds.slug, 26), 28);
673
+ const type = padRight(ds.dataType, 14);
674
+ const status = padRight(formatStatus(ds.status), 14);
675
+ const items = padRight(ds.itemCount?.toLocaleString() || '0', 10);
676
+ const ver = padRight(`v${ds.currentVersion || 1}`, 10);
677
+ console.log(` ${brand.gold(name)}${type}${status}${items}${ver}`);
678
+ }
679
+ console.log();
680
+ } catch (err) {
681
+ const message = err.response?.data?.error || err.message;
682
+ spinner.fail(`Failed to load datasets: ${message}`);
683
+ }
684
+ }
685
+
686
+ // ════════════════════════════════════════════════════════
687
+ // DATASETS PULL
688
+ // ════════════════════════════════════════════════════════
689
+
690
+ export async function soulprintDatasetPullCommand(slug, options) {
691
+ if (!requireAuth()) return;
692
+ if (!requireWorkspace()) return;
693
+
694
+ try {
695
+ const version = options.version ? parseInt(options.version) : undefined;
696
+
697
+ // Check if already downloaded
698
+ if (version && isDatasetDownloaded(slug, version) && !options.force) {
699
+ info(`Dataset ${slug} v${version} is already downloaded locally.`);
700
+ info(`Use --force to re-download.\n`);
701
+ return;
702
+ }
703
+
704
+ const spinner = villageSpinner(`Fetching download info for ${slug}...`).start();
705
+ const downloadInfo = await getDatasetDownload(slug, version);
706
+ spinner.stop();
707
+
708
+ const ver = downloadInfo.version;
709
+ const itemCount = downloadInfo.itemCount;
710
+ const totalSize = downloadInfo.totalSizeBytes;
711
+
712
+ console.log(`\n Pulling dataset: ${brand.gold(`${slug} v${ver}`)}`);
713
+ console.log(` Items: ${itemCount?.toLocaleString()} | Size: ${formatBytes(totalSize)}\n`);
714
+
715
+ const localDir = getLocalDatasetDir(slug, ver);
716
+ mkdirSync(localDir, { recursive: true });
717
+
718
+ // Download manifest
719
+ if (downloadInfo.manifest?.url) {
720
+ const manifestResp = await axios.get(downloadInfo.manifest.url, { responseType: 'arraybuffer' });
721
+ writeFileSync(join(localDir, 'manifest.json'), manifestResp.data);
722
+ }
723
+
724
+ // Download files
725
+ const urls = downloadInfo.downloadUrls || [];
726
+ if (urls.length > 0) {
727
+ const limit = pLimit(5);
728
+ let downloaded = 0;
729
+
730
+ console.log(` Downloading to ${brand.teal(localDir)}/`);
731
+
732
+ const downloads = urls.map(item =>
733
+ limit(async () => {
734
+ const destPath = join(localDir, item.path);
735
+ const destDir = join(localDir, item.path.split('/').slice(0, -1).join('/'));
736
+ mkdirSync(destDir, { recursive: true });
737
+
738
+ const response = await axios.get(item.url, { responseType: 'stream' });
739
+ await pipeline(response.data, createWriteStream(destPath));
740
+
741
+ downloaded++;
742
+ const pct = Math.round((downloaded / urls.length) * 100);
743
+ process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${downloaded.toLocaleString()}/${urls.length.toLocaleString()})`);
744
+ })
745
+ );
746
+
747
+ await Promise.all(downloads);
748
+ console.log();
749
+ }
750
+
751
+ // Write manifest if not already present
752
+ if (!existsSync(join(localDir, 'manifest.json'))) {
753
+ writeFileSync(join(localDir, 'manifest.json'), JSON.stringify({
754
+ slug, version: ver, itemCount, totalSizeBytes: totalSize, downloadedAt: new Date().toISOString(),
755
+ }, null, 2));
756
+ }
757
+
758
+ console.log();
759
+ success(`Dataset ready at: ${localDir}`);
760
+
761
+ // Print split counts if available
762
+ for (const split of ['train', 'validation', 'test']) {
763
+ const splitDir = join(localDir, split);
764
+ if (existsSync(splitDir)) {
765
+ const count = readdirSync(splitDir).length;
766
+ info(`${padRight(split + '/', 16)} ${count} items`);
767
+ }
768
+ }
769
+ console.log();
770
+ } catch (err) {
771
+ const message = err.response?.data?.error || err.message;
772
+ error(`Failed to pull dataset: ${message}\n`);
773
+ }
774
+ }
775
+
776
+ // ════════════════════════════════════════════════════════
777
+ // TRAIN
778
+ // ════════════════════════════════════════════════════════
779
+
780
+ export async function soulprintTrainCommand(options) {
781
+ if (!requireAuth()) return;
782
+ if (!requireWorkspace()) return;
783
+
784
+ try {
785
+ const typeKey = options.type?.toLowerCase();
786
+ if (!VALID_METHODS[typeKey]) {
787
+ error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
788
+ return;
789
+ }
790
+
791
+ if (options.method && !VALID_METHODS[typeKey].includes(options.method)) {
792
+ error(`Invalid method "${options.method}" for type "${typeKey}".`);
793
+ info(`Valid methods: ${VALID_METHODS[typeKey].join(', ')}\n`);
794
+ return;
795
+ }
796
+
797
+ // If no method specified, prompt
798
+ let method = options.method;
799
+ if (!method) {
800
+ const { selectedMethod } = await inquirer.prompt([{
801
+ type: 'list',
802
+ name: 'selectedMethod',
803
+ message: `Training method for ${typeKey}:`,
804
+ choices: VALID_METHODS[typeKey].map(m => ({ name: m, value: m })),
805
+ }]);
806
+ method = selectedMethod;
807
+ }
808
+
809
+ // Load config YAML if provided
810
+ let config = {};
811
+ if (options.config) {
812
+ const configPath = resolve(options.config);
813
+ if (!existsSync(configPath)) {
814
+ error(`Config file not found: ${configPath}\n`);
815
+ return;
816
+ }
817
+ config = parseYaml(readFileSync(configPath, 'utf-8'));
818
+ }
819
+
820
+ // Fetch dataset info
821
+ const spinner = villageSpinner('Validating dataset...').start();
822
+ let dataset;
823
+ try {
824
+ dataset = await getDataset(options.dataset);
825
+ spinner.stop();
826
+ } catch (err) {
827
+ spinner.fail(`Dataset "${options.dataset}" not found.`);
828
+ return;
829
+ }
830
+
831
+ const datasetVersion = options.version ? parseInt(options.version) : dataset.currentVersion;
832
+
833
+ // Check local download
834
+ if (!isDatasetDownloaded(options.dataset, datasetVersion)) {
835
+ const { pullNow } = await inquirer.prompt([{
836
+ type: 'confirm',
837
+ name: 'pullNow',
838
+ message: `Dataset "${options.dataset}" v${datasetVersion} not found locally. Download it now?`,
839
+ default: true,
840
+ }]);
841
+ if (pullNow) {
842
+ await soulprintDatasetPullCommand(options.dataset, { version: String(datasetVersion) });
843
+ } else {
844
+ info('Cannot train without a local dataset copy.\n');
845
+ return;
846
+ }
847
+ }
848
+
849
+ const localDatasetDir = getLocalDatasetDir(options.dataset, datasetVersion);
850
+ const baseModel = options.base || config.baseModel || 'default';
851
+ const jobName = options.name || `${dataset.name} ${method} v${datasetVersion}`;
852
+
853
+ // Dry run
854
+ if (options.dryRun) {
855
+ header('Training Configuration (Dry Run)');
856
+ console.log(` Type: ${typeKey}`);
857
+ console.log(` Dataset: ${options.dataset} v${datasetVersion}`);
858
+ console.log(` Base model: ${baseModel}`);
859
+ console.log(` Method: ${method}`);
860
+ console.log(` Dataset dir: ${localDatasetDir}`);
861
+ if (Object.keys(config).length > 0) {
862
+ console.log(` Config: ${JSON.stringify(config, null, 2).split('\n').join('\n ')}`);
863
+ }
864
+ console.log();
865
+ info('Dry run complete. No job was created.\n');
866
+ return;
867
+ }
868
+
869
+ // Create job on Studio
870
+ const machineInfo = getMachineInfo();
871
+ const createSpinner = villageSpinner('Creating training job on SoulPrint Studio...').start();
872
+ const jobResult = await createJob({
873
+ name: jobName,
874
+ dataType: typeKey.toUpperCase(),
875
+ datasetSlug: options.dataset,
876
+ datasetVersion,
877
+ baseModel,
878
+ method,
879
+ config,
880
+ machineInfo,
881
+ cliVersion,
882
+ });
883
+ const jobId = jobResult.jobId;
884
+ createSpinner.succeed(`Job registered: ${brand.gold(jobId)}`);
885
+
886
+ // Print training summary
887
+ console.log();
888
+ console.log(` Starting ${brand.gold(method)} training:`);
889
+ console.log(` Base model: ${baseModel}`);
890
+ console.log(` Dataset: ${options.dataset} v${datasetVersion}`);
891
+ console.log(` Method: ${method}`);
892
+ if (config.epochs) console.log(` Epochs: ${config.epochs}`);
893
+ console.log(` Device: ${machineInfo.gpuType} (${machineInfo.gpu})`);
894
+ console.log();
895
+
896
+ // Resolve Python script
897
+ const scriptKey = method === 'lora' ? `lora:${typeKey}` : method;
898
+ const scriptRelPath = METHOD_SCRIPTS[scriptKey];
899
+ if (!scriptRelPath) {
900
+ error(`No training script found for method "${method}"\n`);
901
+ await failJob(jobId, { errorMessage: `No training script for method: ${method}` });
902
+ return;
903
+ }
904
+
905
+ const scriptPath = join(getScriptsDir(), scriptRelPath);
906
+ const pythonPath = getPythonPath() || 'python3';
907
+
908
+ if (!existsSync(scriptPath)) {
909
+ error(`Training script not found: ${scriptPath}`);
910
+ info(`Run ${brand.gold("'myvillage soulprint init'")} to download training scripts.\n`);
911
+ await failJob(jobId, { errorMessage: `Training script not found: ${scriptRelPath}` });
912
+ return;
913
+ }
914
+
915
+ // Update status to PREPARING
916
+ await updateJobStatus(jobId, { status: 'PREPARING' }).catch(() => {});
917
+
918
+ const outputDir = getJobOutputDir(jobId);
919
+ mkdirSync(outputDir, { recursive: true });
920
+
921
+ const scriptArgs = [
922
+ '--dataset-dir', localDatasetDir,
923
+ '--output-dir', outputDir,
924
+ '--base-model', baseModel,
925
+ '--method', method,
926
+ '--config', JSON.stringify(config),
927
+ '--job-id', jobId,
928
+ ];
929
+
930
+ // Spawn Python
931
+ await updateJobStatus(jobId, { status: 'TRAINING' }).catch(() => {});
932
+
933
+ let trainingProcess = null;
934
+ let lastReportTime = 0;
935
+ let lastReportedEpoch = 0;
936
+ const REPORT_INTERVAL_MS = 10_000;
937
+ let finalData = null;
938
+
939
+ // SIGINT handler
940
+ const sigintHandler = async () => {
941
+ if (trainingProcess) {
942
+ console.log('\n Stopping training gracefully...');
943
+ trainingProcess.kill('SIGTERM');
944
+ const deadline = Date.now() + 10000;
945
+ while (Date.now() < deadline && !trainingProcess.killed) {
946
+ await new Promise(r => setTimeout(r, 500));
947
+ }
948
+ if (!trainingProcess.killed) {
949
+ trainingProcess.kill('SIGKILL');
950
+ }
951
+ await updateJobStatus(jobId, { status: 'CANCELLED' }).catch(() => {});
952
+ console.log(' Training cancelled.');
953
+ }
954
+ process.exit(0);
955
+ };
956
+ process.on('SIGINT', sigintHandler);
957
+
958
+ try {
959
+ finalData = await new Promise((resolveP, rejectP) => {
960
+ trainingProcess = spawn(pythonPath, [scriptPath, ...scriptArgs], {
961
+ env: { ...process.env, PYTHONUNBUFFERED: '1' },
962
+ cwd: getSoulprintDir(),
963
+ });
964
+
965
+ const rl = createInterface({ input: trainingProcess.stdout });
966
+ rl.on('line', (line) => {
967
+ try {
968
+ const data = JSON.parse(line);
969
+ if (data.type === 'progress') {
970
+ // Display
971
+ const pct = data.totalEpochs ? Math.round((data.epoch / data.totalEpochs) * 100) : 0;
972
+ process.stdout.write(`\r Epoch ${data.epoch || '?'}/${data.totalEpochs || '?'} ${'━'.repeat(Math.floor(pct / 5))}${'─'.repeat(20 - Math.floor(pct / 5))} loss: ${data.loss?.toFixed(4) || '?'} lr: ${data.lr || '?'} `);
973
+
974
+ appendJobLog(jobId, { type: 'progress', ...data });
975
+
976
+ // Throttle reports to Studio
977
+ const now = Date.now();
978
+ const isEpochBoundary = data.epoch && data.epoch !== lastReportedEpoch;
979
+ if (now - lastReportTime > REPORT_INTERVAL_MS || isEpochBoundary) {
980
+ lastReportTime = now;
981
+ lastReportedEpoch = data.epoch;
982
+ updateJobStatus(jobId, {
983
+ status: 'TRAINING',
984
+ progress: data.totalEpochs ? data.epoch / data.totalEpochs : null,
985
+ currentEpoch: data.epoch,
986
+ totalEpochs: data.totalEpochs,
987
+ currentStep: data.step,
988
+ totalSteps: data.totalSteps,
989
+ metrics: { trainingLoss: data.loss, validationLoss: data.valLoss },
990
+ logs: `Epoch ${data.epoch}/${data.totalEpochs} - loss: ${data.loss}`,
991
+ }).catch(() => {});
992
+ }
993
+ } else if (data.type === 'log') {
994
+ appendJobLog(jobId, data);
995
+ } else if (data.type === 'complete') {
996
+ resolveP(data);
997
+ }
998
+ } catch {
999
+ appendJobLog(jobId, { type: 'stdout', message: line });
1000
+ }
1001
+ });
1002
+
1003
+ let stderr = '';
1004
+ trainingProcess.stderr.on('data', (chunk) => {
1005
+ stderr += chunk.toString();
1006
+ appendJobLog(jobId, { type: 'stderr', message: chunk.toString().trim() });
1007
+ });
1008
+
1009
+ trainingProcess.on('close', (code) => {
1010
+ if (code !== 0) {
1011
+ rejectP(new Error(stderr || `Training process exited with code ${code}`));
1012
+ }
1013
+ });
1014
+
1015
+ trainingProcess.on('error', (err) => {
1016
+ rejectP(new Error(`Failed to start training: ${err.message}`));
1017
+ });
1018
+ });
1019
+ } catch (err) {
1020
+ process.removeListener('SIGINT', sigintHandler);
1021
+ console.log();
1022
+ error(`Training failed: ${err.message}`);
1023
+ await failJob(jobId, { errorMessage: err.message }).catch(() => {});
1024
+ info(`Logs saved to: ${getJobLogFile(jobId)}\n`);
1025
+ return;
1026
+ }
1027
+
1028
+ process.removeListener('SIGINT', sigintHandler);
1029
+ console.log();
1030
+ console.log();
1031
+ success('Training complete!');
1032
+ console.log(` Output: ${brand.teal(outputDir)}`);
1033
+ if (finalData?.finalMetrics) {
1034
+ const m = finalData.finalMetrics;
1035
+ if (m.trainingLoss != null) console.log(` Final loss: ${m.trainingLoss}`);
1036
+ if (m.fidScore != null) console.log(` FID score: ${m.fidScore}`);
1037
+ if (m.wer != null) console.log(` WER: ${m.wer}`);
1038
+ if (m.mae != null) console.log(` MAE: ${m.mae}`);
1039
+ if (m.r2Score != null) console.log(` R\u00B2: ${m.r2Score}`);
1040
+ }
1041
+ console.log();
1042
+
1043
+ // Upload artifacts
1044
+ const uploadSpinner = villageSpinner('Uploading artifacts to SoulPrint Studio...').start();
1045
+ try {
1046
+ const artifacts = scanArtifacts(outputDir);
1047
+ const totalArtifactSize = artifacts.reduce((sum, f) => sum + f.size, 0);
1048
+
1049
+ for (const artifact of artifacts) {
1050
+ artifact.checksum = await computeSHA256(artifact.fullPath);
1051
+ }
1052
+
1053
+ const { urls } = await getUploadUrls({
1054
+ jobId,
1055
+ files: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
1056
+ });
1057
+
1058
+ const uploadLimit = pLimit(3);
1059
+ const artifactUploads = urls.map((urlInfo, i) =>
1060
+ uploadLimit(async () => {
1061
+ const file = artifacts[i];
1062
+ await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
1063
+ headers: { 'Content-Type': 'application/octet-stream' },
1064
+ maxBodyLength: Infinity,
1065
+ });
1066
+ })
1067
+ );
1068
+ await Promise.all(artifactUploads);
1069
+
1070
+ const completeResult = await completeJob(jobId, {
1071
+ s3Artifacts: `soulprint/models/${jobId}/`,
1072
+ artifactSizeBytes: totalArtifactSize,
1073
+ artifactManifest: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
1074
+ finalMetrics: finalData?.finalMetrics || {},
1075
+ evalSamples: finalData?.evalSamples || [],
1076
+ });
1077
+
1078
+ uploadSpinner.succeed(`Model registered: ${brand.gold(completeResult.modelId || jobId)}`);
1079
+ console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/jobs/${jobId}`)}\n`);
1080
+ } catch (err) {
1081
+ uploadSpinner.fail(`Artifact upload failed: ${err.message}`);
1082
+ info(`Artifacts saved locally at: ${outputDir}`);
1083
+ info(`Try again with: ${brand.gold(`myvillage soulprint push ${outputDir} --job ${jobId}`)}\n`);
1084
+ }
1085
+ } catch (err) {
1086
+ if (err.isTtyError) {
1087
+ error('Prompts cannot be rendered in this environment.\n');
1088
+ return;
1089
+ }
1090
+ const message = err.response?.data?.error || err.message;
1091
+ error(`Training setup failed: ${message}\n`);
1092
+ }
1093
+ }
1094
+
1095
+ // ════════════════════════════════════════════════════════
1096
+ // JOBS
1097
+ // ════════════════════════════════════════════════════════
1098
+
1099
+ export async function soulprintJobsCommand(options) {
1100
+ if (!requireAuth()) return;
1101
+
1102
+ const spinner = villageSpinner('Loading training jobs...').start();
1103
+ try {
1104
+ const params = {};
1105
+ if (options.status) params.status = options.status.toUpperCase();
1106
+
1107
+ const result = await listJobs(params);
1108
+ spinner.stop();
1109
+
1110
+ if (options.json) {
1111
+ console.log(JSON.stringify(result, null, 2));
1112
+ return;
1113
+ }
1114
+
1115
+ const jobs = result.jobs || result.data || result;
1116
+ if (!Array.isArray(jobs) || jobs.length === 0) {
1117
+ info('\n No training jobs found.\n');
1118
+ return;
1119
+ }
1120
+
1121
+ header('Training Jobs');
1122
+
1123
+ console.log(
1124
+ ` ${brand.teal(padRight('Job ID', 16))}${padRight('Name', 28)}${padRight('Status', 14)}${padRight('Progress', 10)}${padRight('Started', 12)}`
1125
+ );
1126
+ console.log(brand.darkGold(` ${'\u2500'.repeat(14)} ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(8)} ${'\u2500'.repeat(10)}`));
1127
+
1128
+ for (const job of jobs) {
1129
+ const id = padRight(truncate(job.id, 14), 16);
1130
+ const name = padRight(truncate(job.name, 26), 28);
1131
+ const status = padRight(formatStatus(job.status), 14);
1132
+ const progress = padRight(formatProgress(job.progress), 10);
1133
+ const started = padRight(job.startedAt ? relativeTime(job.startedAt) : (job.createdAt ? relativeTime(job.createdAt) : '--'), 12);
1134
+ console.log(` ${chalk.cyan(id)}${name}${status}${progress}${started}`);
1135
+ }
1136
+ console.log();
1137
+ } catch (err) {
1138
+ const message = err.response?.data?.error || err.message;
1139
+ spinner.fail(`Failed to load jobs: ${message}`);
1140
+ }
1141
+ }
1142
+
1143
+ export async function soulprintJobDetailCommand(jobId, options) {
1144
+ if (!requireAuth()) return;
1145
+
1146
+ const spinner = villageSpinner(`Loading job ${jobId}...`).start();
1147
+ try {
1148
+ const job = await getJob(jobId);
1149
+ spinner.stop();
1150
+
1151
+ if (options.json) {
1152
+ console.log(JSON.stringify(job, null, 2));
1153
+ return;
1154
+ }
1155
+
1156
+ console.log();
1157
+ console.log(` ${chalk.bold(`Job: ${job.name}`)} (${chalk.cyan(job.id)})`);
1158
+ console.log(` Status: ${formatStatus(job.status)}`);
1159
+ console.log(` Dataset: ${brand.gold(job.dataset?.slug || job.datasetId)} v${job.datasetVersion}`);
1160
+ console.log(` Base: ${job.baseModel}`);
1161
+ console.log(` Method: ${job.method}`);
1162
+
1163
+ if (job.progress != null) {
1164
+ const pct = Math.round(job.progress * 100);
1165
+ const filled = Math.floor(pct / 2.5);
1166
+ console.log();
1167
+ console.log(` Progress: ${'\u2588'.repeat(filled)}${'\u2591'.repeat(40 - filled)} ${pct}%`);
1168
+ if (job.currentEpoch != null) console.log(` Epoch: ${job.currentEpoch}/${job.totalEpochs}`);
1169
+ if (job.currentStep != null) console.log(` Step: ${job.currentStep?.toLocaleString()}/${job.totalSteps?.toLocaleString()}`);
1170
+ }
1171
+
1172
+ if (job.metrics) {
1173
+ console.log();
1174
+ console.log(' Metrics:');
1175
+ const m = job.metrics;
1176
+ if (m.trainingLoss != null) console.log(` Training loss: ${m.trainingLoss}`);
1177
+ if (m.validationLoss != null) console.log(` Validation loss: ${m.validationLoss}`);
1178
+ if (m.accuracy != null) console.log(` Accuracy: ${m.accuracy}`);
1179
+ if (m.fidScore != null) console.log(` FID score: ${m.fidScore}`);
1180
+ if (m.clipScore != null) console.log(` CLIP score: ${m.clipScore}`);
1181
+ if (m.wer != null) console.log(` WER: ${m.wer}`);
1182
+ if (m.cer != null) console.log(` CER: ${m.cer}`);
1183
+ if (m.mae != null) console.log(` MAE: ${m.mae}`);
1184
+ if (m.r2Score != null) console.log(` R\u00B2: ${m.r2Score}`);
1185
+ if (m.f1Score != null) console.log(` F1: ${m.f1Score}`);
1186
+ }
1187
+
1188
+ if (job.machineInfo) {
1189
+ console.log();
1190
+ console.log(` Machine: ${job.machineInfo.gpu || 'unknown'} (${job.machineInfo.vram || '?'}) \u2014 ${job.machineInfo.os || 'unknown'}`);
1191
+ }
1192
+ if (job.cliVersion) console.log(` CLI: v${job.cliVersion}`);
1193
+ if (job.startedAt) console.log(` Started: ${job.startedAt}`);
1194
+ if (job.completedAt) console.log(` Ended: ${job.completedAt}`);
1195
+ if (job.errorMessage) {
1196
+ console.log();
1197
+ console.log(chalk.red(` Error: ${job.errorMessage}`));
1198
+ }
1199
+ console.log();
1200
+ } catch (err) {
1201
+ const message = err.response?.data?.error || err.message;
1202
+ spinner.fail(`Failed to load job: ${message}`);
1203
+ }
1204
+ }
1205
+
1206
+ // ════════════════════════════════════════════════════════
1207
+ // MODELS
1208
+ // ════════════════════════════════════════════════════════
1209
+
1210
+ export async function soulprintModelsCommand(options) {
1211
+ if (!requireAuth()) return;
1212
+
1213
+ const spinner = villageSpinner('Loading models...').start();
1214
+ try {
1215
+ const params = {};
1216
+ if (options.status) params.status = options.status.toUpperCase();
1217
+ if (options.type) params.dataType = options.type.toUpperCase();
1218
+
1219
+ const result = await listModels(params);
1220
+ spinner.stop();
1221
+
1222
+ if (options.json) {
1223
+ console.log(JSON.stringify(result, null, 2));
1224
+ return;
1225
+ }
1226
+
1227
+ const models = result.models || result.data || result;
1228
+ if (!Array.isArray(models) || models.length === 0) {
1229
+ info('\n No models found.\n');
1230
+ return;
1231
+ }
1232
+
1233
+ header('Model Registry');
1234
+
1235
+ console.log(
1236
+ ` ${brand.teal(padRight('Name', 28))}${padRight('Type', 14)}${padRight('Status', 14)}${padRight('Base Model', 24)}${padRight('Created', 12)}`
1237
+ );
1238
+ console.log(brand.darkGold(` ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(22)} ${'\u2500'.repeat(10)}`));
1239
+
1240
+ for (const model of models) {
1241
+ const name = padRight(truncate(model.name || model.slug, 26), 28);
1242
+ const type = padRight(model.dataType, 14);
1243
+ const status = padRight(formatStatus(model.status), 14);
1244
+ const base = padRight(truncate(model.baseModel, 22), 24);
1245
+ const created = padRight(model.createdAt ? relativeTime(model.createdAt) : '--', 12);
1246
+ console.log(` ${brand.gold(name)}${type}${status}${base}${created}`);
1247
+ }
1248
+ console.log();
1249
+ } catch (err) {
1250
+ const message = err.response?.data?.error || err.message;
1251
+ spinner.fail(`Failed to load models: ${message}`);
1252
+ }
1253
+ }
1254
+
1255
+ // ════════════════════════════════════════════════════════
1256
+ // PUSH
1257
+ // ════════════════════════════════════════════════════════
1258
+
1259
+ export async function soulprintPushCommand(path, options) {
1260
+ if (!requireAuth()) return;
1261
+
1262
+ try {
1263
+ const resolvedPath = resolve(path);
1264
+ if (!existsSync(resolvedPath)) {
1265
+ error(`Path not found: ${resolvedPath}\n`);
1266
+ return;
1267
+ }
1268
+
1269
+ const typeKey = options.type?.toLowerCase();
1270
+ if (!TYPE_EXTENSIONS[typeKey]) {
1271
+ error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
1272
+ return;
1273
+ }
1274
+
1275
+ console.log('\n Scanning artifacts...');
1276
+ const artifacts = scanArtifacts(resolvedPath);
1277
+ const totalSize = artifacts.reduce((sum, f) => sum + f.size, 0);
1278
+ info(`${artifacts.length} files, ${formatBytes(totalSize)} total`);
1279
+
1280
+ // Compute checksums
1281
+ const checksumSpinner = villageSpinner('Computing checksums...').start();
1282
+ for (const artifact of artifacts) {
1283
+ artifact.checksum = await computeSHA256(artifact.fullPath);
1284
+ }
1285
+ checksumSpinner.stop();
1286
+
1287
+ // Get upload URLs
1288
+ const uploadData = { files: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })) };
1289
+ if (options.job) uploadData.jobId = options.job;
1290
+
1291
+ const spinner = villageSpinner('Uploading model artifacts to SoulPrint Studio...').start();
1292
+ const { urls } = await getUploadUrls(uploadData);
1293
+
1294
+ const limit = pLimit(3);
1295
+ let uploadedBytes = 0;
1296
+
1297
+ const uploads = urls.map((urlInfo, i) =>
1298
+ limit(async () => {
1299
+ const file = artifacts[i];
1300
+ await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
1301
+ headers: { 'Content-Type': 'application/octet-stream' },
1302
+ maxBodyLength: Infinity,
1303
+ });
1304
+ uploadedBytes += file.size;
1305
+ })
1306
+ );
1307
+
1308
+ await Promise.all(uploads);
1309
+ spinner.succeed(`Uploaded ${formatBytes(totalSize)}`);
1310
+
1311
+ // Register model
1312
+ const registerSpinner = villageSpinner('Registering model...').start();
1313
+ const modelPayload = {
1314
+ name: options.name,
1315
+ dataType: typeKey.toUpperCase(),
1316
+ baseModel: options.base || 'unknown',
1317
+ method: options.method || 'unknown',
1318
+ description: options.description || '',
1319
+ s3Artifacts: `soulprint/models/${options.name.toLowerCase().replace(/\s+/g, '-')}/`,
1320
+ artifactSizeBytes: totalSize,
1321
+ artifactManifest: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
1322
+ };
1323
+ if (options.job) modelPayload.jobId = options.job;
1324
+
1325
+ const client = (await import('../utils/soulprint-api.js')).getSoulprintClient();
1326
+ const response = await client.post('/models', modelPayload);
1327
+ const model = response.data;
1328
+ registerSpinner.succeed(`Model registered: ${brand.gold(model.slug || model.name)}`);
1329
+
1330
+ console.log(` Status: ${formatStatus('DRAFT')}`);
1331
+ console.log(` View: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/models/${model.slug || ''}`)}\n`);
1332
+ } catch (err) {
1333
+ const message = err.response?.data?.error || err.message;
1334
+ error(`Push failed: ${message}\n`);
1335
+ }
1336
+ }
1337
+
1338
+ // ════════════════════════════════════════════════════════
1339
+ // PUBLISH
1340
+ // ════════════════════════════════════════════════════════
1341
+
1342
+ export async function soulprintPublishCommand(modelSlug, options) {
1343
+ if (!requireAuth()) return;
1344
+
1345
+ try {
1346
+ // Verify model exists and is VALIDATED
1347
+ const spinner = villageSpinner(`Publishing ${modelSlug} to MyVillage Platform...`).start();
1348
+
1349
+ const model = await getModel(modelSlug);
1350
+ if (model.status !== 'VALIDATED') {
1351
+ spinner.fail(`Model "${modelSlug}" is ${model.status}. Only VALIDATED models can be published.`);
1352
+ return;
1353
+ }
1354
+
1355
+ const publishData = {
1356
+ tier: options.tier || 'FREE',
1357
+ isPublic: !!options.public,
1358
+ };
1359
+ if (options.villager) publishData.targetVillagerId = options.villager;
1360
+ if (options.villages) publishData.targetVillageIds = options.villages.split(',').map(s => s.trim());
1361
+
1362
+ const result = await publishModel(modelSlug, publishData);
1363
+ spinner.succeed('Model pushed to portal.myvillageproject.ai');
1364
+
1365
+ if (result.platformModelId) {
1366
+ success(`Platform Model ID: ${result.platformModelId}`);
1367
+ }
1368
+ success('Status: DRAFT (activate in portal admin)');
1369
+
1370
+ if (result.platformUrl) {
1371
+ console.log(`\n Model available at: ${brand.teal(result.platformUrl)}\n`);
1372
+ } else if (result.platformModelId) {
1373
+ console.log(`\n Model available at: ${brand.teal(`https://portal.myvillageproject.ai/admin/models/${result.platformModelId}`)}\n`);
1374
+ }
1375
+ } catch (err) {
1376
+ const message = err.response?.data?.error || err.message;
1377
+ error(`Publish failed: ${message}\n`);
1378
+ }
1379
+ }