@myvillage/cli 1.3.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -199
- package/package.json +11 -6
- package/src/agent-runtime/context.js +99 -0
- package/src/agent-runtime/daemon-entry.js +66 -0
- package/src/agent-runtime/daemon.js +65 -0
- package/src/agent-runtime/loop.js +281 -0
- package/src/agent-runtime/mcp-client.js +93 -0
- package/src/agent-runtime/scheduler.js +53 -0
- package/src/commands/agent-local.js +624 -0
- package/src/commands/agent.js +274 -42
- package/src/commands/bizreqs.js +965 -0
- package/src/commands/comment.js +5 -4
- package/src/commands/community.js +13 -12
- package/src/commands/create-app.js +253 -0
- package/src/commands/create-game.js +9 -8
- package/src/commands/deploy.js +101 -23
- package/src/commands/feed.js +4 -3
- package/src/commands/login.js +164 -76
- package/src/commands/logout.js +45 -7
- package/src/commands/post.js +14 -13
- package/src/commands/profile.js +4 -3
- package/src/commands/search.js +3 -2
- package/src/commands/soulprint.js +1379 -0
- package/src/commands/status.js +64 -28
- package/src/commands/vote.js +46 -18
- package/src/index.js +244 -1
- package/src/utils/agent-scaffolder.js +165 -0
- package/src/utils/api.js +135 -14
- package/src/utils/app-templates.js +2983 -0
- package/src/utils/brand.js +107 -0
- package/src/utils/config.js +17 -1
- package/src/utils/formatters.js +351 -18
- package/src/utils/local-agent.js +168 -0
- package/src/utils/soulprint-api.js +136 -0
- package/src/utils/soulprint-workspace.js +158 -0
|
@@ -0,0 +1,1379 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import inquirer from 'inquirer';
|
|
3
|
+
import axios from 'axios';
|
|
4
|
+
import pLimit from 'p-limit';
|
|
5
|
+
import { spawn } from 'child_process';
|
|
6
|
+
import { createInterface } from 'readline';
|
|
7
|
+
import { createHash } from 'crypto';
|
|
8
|
+
import {
|
|
9
|
+
existsSync,
|
|
10
|
+
mkdirSync,
|
|
11
|
+
readFileSync,
|
|
12
|
+
writeFileSync,
|
|
13
|
+
readdirSync,
|
|
14
|
+
statSync,
|
|
15
|
+
createReadStream,
|
|
16
|
+
createWriteStream,
|
|
17
|
+
} from 'fs';
|
|
18
|
+
import { join, resolve, relative, extname, basename } from 'path';
|
|
19
|
+
import { pipeline } from 'stream/promises';
|
|
20
|
+
import { parse as parseYaml } from 'yaml';
|
|
21
|
+
import { createRequire } from 'module';
|
|
22
|
+
import { isAuthenticated } from '../utils/auth.js';
|
|
23
|
+
import { brand, villageSpinner, success, error, info, header, stripAnsi } from '../utils/brand.js';
|
|
24
|
+
import { relativeTime, truncate } from '../utils/formatters.js';
|
|
25
|
+
import {
|
|
26
|
+
listDatasets,
|
|
27
|
+
getDataset,
|
|
28
|
+
getDatasetDownload,
|
|
29
|
+
createJob,
|
|
30
|
+
listJobs,
|
|
31
|
+
getJob,
|
|
32
|
+
updateJobStatus,
|
|
33
|
+
completeJob,
|
|
34
|
+
failJob,
|
|
35
|
+
listModels,
|
|
36
|
+
getModel,
|
|
37
|
+
publishModel,
|
|
38
|
+
ingestText,
|
|
39
|
+
ingestStructured,
|
|
40
|
+
prepareIngestion,
|
|
41
|
+
completeIngestion,
|
|
42
|
+
getUploadUrls,
|
|
43
|
+
getScriptsManifest,
|
|
44
|
+
} from '../utils/soulprint-api.js';
|
|
45
|
+
import {
|
|
46
|
+
getSoulprintDir,
|
|
47
|
+
getDatasetsDir,
|
|
48
|
+
getModelsDir,
|
|
49
|
+
getConfigsDir,
|
|
50
|
+
getLogsDir,
|
|
51
|
+
getVenvDir,
|
|
52
|
+
getScriptsDir,
|
|
53
|
+
isWorkspaceInitialized,
|
|
54
|
+
readWorkspaceConfig,
|
|
55
|
+
writeWorkspaceConfig,
|
|
56
|
+
getLocalDatasetDir,
|
|
57
|
+
isDatasetDownloaded,
|
|
58
|
+
getPythonPath,
|
|
59
|
+
isPythonAvailable,
|
|
60
|
+
getPythonVersion,
|
|
61
|
+
detectGPU,
|
|
62
|
+
getMachineInfo,
|
|
63
|
+
getJobOutputDir,
|
|
64
|
+
getJobLogFile,
|
|
65
|
+
appendJobLog,
|
|
66
|
+
} from '../utils/soulprint-workspace.js';
|
|
67
|
+
|
|
68
|
+
const require = createRequire(import.meta.url);
|
|
69
|
+
const { version: cliVersion } = require('../../package.json');
|
|
70
|
+
|
|
71
|
+
// ── Helpers ────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
function padRight(str, len) {
|
|
74
|
+
const visible = stripAnsi(String(str));
|
|
75
|
+
if (visible.length >= len) return String(str);
|
|
76
|
+
return String(str) + ' '.repeat(len - visible.length);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function formatStatus(status) {
|
|
80
|
+
const map = {
|
|
81
|
+
COLLECTING: chalk.cyan,
|
|
82
|
+
READY: brand.green,
|
|
83
|
+
TRAINING: chalk.yellow,
|
|
84
|
+
ARCHIVED: brand.teal,
|
|
85
|
+
CREATED: brand.teal,
|
|
86
|
+
DOWNLOADING: chalk.cyan,
|
|
87
|
+
PREPARING: chalk.cyan,
|
|
88
|
+
EVALUATING: chalk.yellow,
|
|
89
|
+
COMPLETED: brand.green,
|
|
90
|
+
FAILED: chalk.red,
|
|
91
|
+
CANCELLED: brand.teal,
|
|
92
|
+
DRAFT: brand.teal,
|
|
93
|
+
VALIDATED: brand.green,
|
|
94
|
+
PUBLISHED: brand.green,
|
|
95
|
+
REJECTED: chalk.red,
|
|
96
|
+
};
|
|
97
|
+
const colorFn = map[status] || brand.teal;
|
|
98
|
+
return colorFn(status);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function formatBytes(bytes) {
|
|
102
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
103
|
+
if (bytes < 1048576) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
104
|
+
if (bytes < 1073741824) return `${(bytes / 1048576).toFixed(1)} MB`;
|
|
105
|
+
return `${(bytes / 1073741824).toFixed(1)} GB`;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function formatProgress(progress) {
|
|
109
|
+
if (progress == null) return brand.teal('--');
|
|
110
|
+
const pct = Math.round(progress * 100);
|
|
111
|
+
return `${pct}%`;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const TYPE_EXTENSIONS = {
|
|
115
|
+
text: ['.json', '.jsonl'],
|
|
116
|
+
image: ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif'],
|
|
117
|
+
audio: ['.wav', '.mp3', '.flac', '.ogg', '.m4a'],
|
|
118
|
+
structured: ['.json', '.jsonl', '.csv'],
|
|
119
|
+
multimodal: ['.json', '.jsonl'],
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const MIME_TYPES = {
|
|
123
|
+
'.png': 'image/png',
|
|
124
|
+
'.jpg': 'image/jpeg',
|
|
125
|
+
'.jpeg': 'image/jpeg',
|
|
126
|
+
'.webp': 'image/webp',
|
|
127
|
+
'.bmp': 'image/bmp',
|
|
128
|
+
'.gif': 'image/gif',
|
|
129
|
+
'.wav': 'audio/wav',
|
|
130
|
+
'.mp3': 'audio/mpeg',
|
|
131
|
+
'.flac': 'audio/flac',
|
|
132
|
+
'.ogg': 'audio/ogg',
|
|
133
|
+
'.m4a': 'audio/mp4',
|
|
134
|
+
'.json': 'application/json',
|
|
135
|
+
'.jsonl': 'application/jsonl',
|
|
136
|
+
'.csv': 'text/csv',
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const VALID_METHODS = {
|
|
140
|
+
text: ['openai-finetune', 'full-finetune', 'lora'],
|
|
141
|
+
image: ['lora', 'dreambooth'],
|
|
142
|
+
audio: ['whisper-finetune', 'wav2vec-finetune'],
|
|
143
|
+
structured: ['llm-finetune', 'gradient-boost', 'sklearn-pipeline'],
|
|
144
|
+
multimodal: ['clip-finetune'],
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const METHOD_SCRIPTS = {
|
|
148
|
+
'openai-finetune': 'text/finetune_openai.py',
|
|
149
|
+
'full-finetune': 'text/finetune_local.py',
|
|
150
|
+
'lora:text': 'text/finetune_local.py',
|
|
151
|
+
'lora:image': 'image/lora_sdxl.py',
|
|
152
|
+
'dreambooth': 'image/dreambooth.py',
|
|
153
|
+
'whisper-finetune': 'audio/whisper_finetune.py',
|
|
154
|
+
'wav2vec-finetune': 'audio/wav2vec_finetune.py',
|
|
155
|
+
'llm-finetune': 'structured/llm_classifier.py',
|
|
156
|
+
'gradient-boost': 'structured/tabular_train.py',
|
|
157
|
+
'sklearn-pipeline': 'structured/tabular_train.py',
|
|
158
|
+
'clip-finetune': 'multimodal/clip_finetune.py',
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
function collectFiles(dir, extensions, recursive = false) {
|
|
162
|
+
const files = [];
|
|
163
|
+
function walk(currentDir) {
|
|
164
|
+
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
165
|
+
for (const entry of entries) {
|
|
166
|
+
const fullPath = join(currentDir, entry.name);
|
|
167
|
+
if (entry.isDirectory() && recursive) {
|
|
168
|
+
walk(fullPath);
|
|
169
|
+
} else if (entry.isFile()) {
|
|
170
|
+
const ext = extname(entry.name).toLowerCase();
|
|
171
|
+
if (extensions.includes(ext)) {
|
|
172
|
+
const stat = statSync(fullPath);
|
|
173
|
+
files.push({
|
|
174
|
+
filename: entry.name,
|
|
175
|
+
fullPath,
|
|
176
|
+
relativePath: relative(dir, fullPath),
|
|
177
|
+
size: stat.size,
|
|
178
|
+
mimeType: MIME_TYPES[ext] || 'application/octet-stream',
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
walk(dir);
|
|
185
|
+
return files;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function computeSHA256(filePath) {
|
|
189
|
+
return new Promise((resolve, reject) => {
|
|
190
|
+
const hash = createHash('sha256');
|
|
191
|
+
const stream = createReadStream(filePath);
|
|
192
|
+
stream.on('data', (chunk) => hash.update(chunk));
|
|
193
|
+
stream.on('end', () => resolve(`sha256:${hash.digest('hex')}`));
|
|
194
|
+
stream.on('error', reject);
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function scanArtifacts(dir) {
|
|
199
|
+
const files = [];
|
|
200
|
+
function walk(currentDir) {
|
|
201
|
+
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
202
|
+
for (const entry of entries) {
|
|
203
|
+
const fullPath = join(currentDir, entry.name);
|
|
204
|
+
if (entry.isDirectory()) {
|
|
205
|
+
walk(fullPath);
|
|
206
|
+
} else if (entry.isFile()) {
|
|
207
|
+
const stat = statSync(fullPath);
|
|
208
|
+
files.push({
|
|
209
|
+
path: relative(dir, fullPath),
|
|
210
|
+
fullPath,
|
|
211
|
+
size: stat.size,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
walk(dir);
|
|
217
|
+
return files;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── AUTH GUARD ──────────────────────────────────────────
|
|
221
|
+
|
|
222
|
+
function requireAuth() {
|
|
223
|
+
if (!isAuthenticated()) {
|
|
224
|
+
console.log(chalk.red(` \u2717 Authentication required. Run ${brand.gold("'myvillage login'")} first.\n`));
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
return true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function requireWorkspace() {
|
|
231
|
+
if (!isWorkspaceInitialized()) {
|
|
232
|
+
console.log(chalk.red(` \u2717 SoulPrint workspace not initialized. Run ${brand.gold("'myvillage soulprint init'")} first.\n`));
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
return true;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ════════════════════════════════════════════════════════
|
|
239
|
+
// INIT
|
|
240
|
+
// ════════════════════════════════════════════════════════
|
|
241
|
+
|
|
242
|
+
export async function soulprintInitCommand(options) {
|
|
243
|
+
if (!requireAuth()) return;
|
|
244
|
+
|
|
245
|
+
try {
|
|
246
|
+
if (isWorkspaceInitialized()) {
|
|
247
|
+
const { reinit } = await inquirer.prompt([{
|
|
248
|
+
type: 'confirm',
|
|
249
|
+
name: 'reinit',
|
|
250
|
+
message: 'SoulPrint workspace already exists. Reinitialize?',
|
|
251
|
+
default: false,
|
|
252
|
+
}]);
|
|
253
|
+
if (!reinit) {
|
|
254
|
+
info('Cancelled.\n');
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
header('Initializing SoulPrint Studio workspace');
|
|
260
|
+
|
|
261
|
+
// Check prerequisites
|
|
262
|
+
console.log(' Checking prerequisites:');
|
|
263
|
+
const pythonAvailable = isPythonAvailable();
|
|
264
|
+
const pythonVer = getPythonVersion();
|
|
265
|
+
if (pythonAvailable) {
|
|
266
|
+
success(`${pythonVer} found`);
|
|
267
|
+
} else {
|
|
268
|
+
error('Python 3.10+ required. Install it and try again.\n');
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const gpu = detectGPU();
|
|
273
|
+
if (gpu.type === 'cuda') {
|
|
274
|
+
success(`CUDA detected (${gpu.name}, ${gpu.vram})`);
|
|
275
|
+
} else if (gpu.type === 'mps') {
|
|
276
|
+
success(`MPS available (${gpu.name}, ${gpu.vram})`);
|
|
277
|
+
} else {
|
|
278
|
+
info('\u2717 No GPU detected (CPU-only training available)');
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
console.log();
|
|
282
|
+
|
|
283
|
+
// Create directory tree
|
|
284
|
+
const spDir = getSoulprintDir();
|
|
285
|
+
const dirs = [
|
|
286
|
+
getDatasetsDir(),
|
|
287
|
+
getModelsDir(),
|
|
288
|
+
getConfigsDir(),
|
|
289
|
+
getLogsDir(),
|
|
290
|
+
getScriptsDir(),
|
|
291
|
+
];
|
|
292
|
+
|
|
293
|
+
console.log(` Creating workspace at ${brand.teal(spDir)}/`);
|
|
294
|
+
for (const dir of dirs) {
|
|
295
|
+
mkdirSync(dir, { recursive: true });
|
|
296
|
+
success(basename(dir) + '/');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
console.log();
|
|
300
|
+
|
|
301
|
+
// Python venv
|
|
302
|
+
if (!options.skipPython) {
|
|
303
|
+
const venvDir = getVenvDir();
|
|
304
|
+
const spinner = villageSpinner('Creating Python virtual environment...').start();
|
|
305
|
+
try {
|
|
306
|
+
await new Promise((res, rej) => {
|
|
307
|
+
const proc = spawn('python3', ['-m', 'venv', venvDir], { stdio: 'pipe' });
|
|
308
|
+
proc.on('close', (code) => code === 0 ? res() : rej(new Error(`venv creation failed (exit ${code})`)));
|
|
309
|
+
proc.on('error', rej);
|
|
310
|
+
});
|
|
311
|
+
spinner.succeed('Python environment created at ' + brand.teal(venvDir));
|
|
312
|
+
} catch (err) {
|
|
313
|
+
spinner.fail(`Python venv creation failed: ${err.message}`);
|
|
314
|
+
info('You can skip this with --skip-python and install dependencies manually.\n');
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
} else {
|
|
318
|
+
info('Skipping Python venv setup (--skip-python)');
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Download training scripts
|
|
322
|
+
if (!options.skipScripts) {
|
|
323
|
+
const spinner = villageSpinner('Downloading training scripts...').start();
|
|
324
|
+
try {
|
|
325
|
+
const manifest = await getScriptsManifest();
|
|
326
|
+
const scriptsDir = getScriptsDir();
|
|
327
|
+
for (const file of manifest.files || []) {
|
|
328
|
+
const destDir = join(scriptsDir, file.path.split('/').slice(0, -1).join('/'));
|
|
329
|
+
mkdirSync(destDir, { recursive: true });
|
|
330
|
+
const dest = join(scriptsDir, file.path);
|
|
331
|
+
const response = await axios.get(file.url, { responseType: 'arraybuffer' });
|
|
332
|
+
writeFileSync(dest, response.data);
|
|
333
|
+
}
|
|
334
|
+
spinner.succeed(`Training scripts v${manifest.version || '1.0.0'} downloaded`);
|
|
335
|
+
} catch (err) {
|
|
336
|
+
spinner.warn(`Could not download training scripts: ${err.message}`);
|
|
337
|
+
info('You can download them later or place scripts manually in ' + getScriptsDir());
|
|
338
|
+
}
|
|
339
|
+
} else {
|
|
340
|
+
info('Skipping script download (--skip-scripts)');
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Write workspace config
|
|
344
|
+
writeWorkspaceConfig({
|
|
345
|
+
version: 1,
|
|
346
|
+
initializedAt: new Date().toISOString(),
|
|
347
|
+
cliVersion,
|
|
348
|
+
pythonVersion: pythonVer,
|
|
349
|
+
gpu: { type: gpu.type, name: gpu.name, vram: gpu.vram },
|
|
350
|
+
venvPath: getVenvDir(),
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
console.log();
|
|
354
|
+
success('SoulPrint Studio workspace initialized!');
|
|
355
|
+
console.log();
|
|
356
|
+
console.log(' Next steps:');
|
|
357
|
+
console.log(` ${brand.gold('myvillage soulprint datasets list')} Browse available datasets`);
|
|
358
|
+
console.log(` ${brand.gold('myvillage soulprint datasets pull <slug>')} Download a dataset`);
|
|
359
|
+
console.log(` ${brand.gold('myvillage soulprint train --help')} See training options`);
|
|
360
|
+
console.log();
|
|
361
|
+
} catch (err) {
|
|
362
|
+
if (err.isTtyError) {
|
|
363
|
+
error('Prompts cannot be rendered in this environment.\n');
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
error(`Initialization failed: ${err.message}\n`);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// ════════════════════════════════════════════════════════
|
|
371
|
+
// INGEST
|
|
372
|
+
// ════════════════════════════════════════════════════════
|
|
373
|
+
|
|
374
|
+
export async function soulprintIngestCommand(path, options) {
|
|
375
|
+
if (!requireAuth()) return;
|
|
376
|
+
|
|
377
|
+
try {
|
|
378
|
+
const dataType = options.type?.toUpperCase();
|
|
379
|
+
const typeKey = options.type?.toLowerCase();
|
|
380
|
+
if (!TYPE_EXTENSIONS[typeKey]) {
|
|
381
|
+
error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const resolvedPath = resolve(path);
|
|
386
|
+
if (!existsSync(resolvedPath)) {
|
|
387
|
+
error(`Path not found: ${resolvedPath}\n`);
|
|
388
|
+
return;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Resolve files
|
|
392
|
+
const extensions = TYPE_EXTENSIONS[typeKey];
|
|
393
|
+
let files;
|
|
394
|
+
const stat = statSync(resolvedPath);
|
|
395
|
+
if (stat.isDirectory()) {
|
|
396
|
+
console.log(`\n Scanning ${brand.teal(resolvedPath)} for ${typeKey} files...`);
|
|
397
|
+
files = collectFiles(resolvedPath, extensions, !!options.recursive);
|
|
398
|
+
if (options.glob) {
|
|
399
|
+
const pattern = options.glob.toLowerCase();
|
|
400
|
+
files = files.filter(f => f.filename.toLowerCase().endsWith(pattern.replace('*', '')));
|
|
401
|
+
}
|
|
402
|
+
} else {
|
|
403
|
+
files = [{
|
|
404
|
+
filename: basename(resolvedPath),
|
|
405
|
+
fullPath: resolvedPath,
|
|
406
|
+
relativePath: basename(resolvedPath),
|
|
407
|
+
size: stat.size,
|
|
408
|
+
mimeType: MIME_TYPES[extname(resolvedPath).toLowerCase()] || 'application/octet-stream',
|
|
409
|
+
}];
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (files.length === 0) {
|
|
413
|
+
error(`No ${typeKey} files found. Expected: ${extensions.join(', ')}\n`);
|
|
414
|
+
return;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const totalSize = files.reduce((sum, f) => sum + f.size, 0);
|
|
418
|
+
info(`Found ${files.length} files (${formatBytes(totalSize)})`);
|
|
419
|
+
|
|
420
|
+
// Load companion metadata
|
|
421
|
+
let captions = null;
|
|
422
|
+
if (options.captions) {
|
|
423
|
+
const captionsPath = resolve(options.captions);
|
|
424
|
+
if (!existsSync(captionsPath)) {
|
|
425
|
+
error(`Captions file not found: ${captionsPath}\n`);
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
const raw = readFileSync(captionsPath, 'utf-8');
|
|
429
|
+
try {
|
|
430
|
+
captions = JSON.parse(raw); // JSON map { filename: caption }
|
|
431
|
+
} catch {
|
|
432
|
+
captions = raw.split('\n').filter(l => l.trim()); // line-based
|
|
433
|
+
}
|
|
434
|
+
const count = Array.isArray(captions) ? captions.length : Object.keys(captions).length;
|
|
435
|
+
info(`Captions loaded: ${count} entries from ${basename(captionsPath)}`);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
let transcriptions = null;
|
|
439
|
+
if (options.transcriptions) {
|
|
440
|
+
const transPath = resolve(options.transcriptions);
|
|
441
|
+
if (!existsSync(transPath)) {
|
|
442
|
+
error(`Transcriptions file not found: ${transPath}\n`);
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
const raw = readFileSync(transPath, 'utf-8');
|
|
446
|
+
try {
|
|
447
|
+
transcriptions = JSON.parse(raw);
|
|
448
|
+
} catch {
|
|
449
|
+
transcriptions = raw.split('\n').filter(l => l.trim());
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Validate dataset
|
|
454
|
+
const spinner = villageSpinner('Validating target dataset...').start();
|
|
455
|
+
let dataset;
|
|
456
|
+
try {
|
|
457
|
+
dataset = await getDataset(options.dataset);
|
|
458
|
+
spinner.stop();
|
|
459
|
+
} catch (err) {
|
|
460
|
+
spinner.fail(`Dataset "${options.dataset}" not found.`);
|
|
461
|
+
info(`Run ${brand.gold("'myvillage soulprint datasets list'")} to see available datasets.\n`);
|
|
462
|
+
return;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (dataset.dataType !== dataType) {
|
|
466
|
+
error(`Dataset "${options.dataset}" is type ${dataset.dataType}, but --type ${typeKey} was specified.\n`);
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
469
|
+
if (dataset.status !== 'COLLECTING') {
|
|
470
|
+
error(`Dataset "${options.dataset}" is ${dataset.status}. Only COLLECTING datasets accept new data.\n`);
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
console.log(`\n Target dataset: ${brand.gold(dataset.slug)} (${dataset.dataType}, ${dataset.status})\n`);
|
|
475
|
+
|
|
476
|
+
// Dry run
|
|
477
|
+
if (options.dryRun) {
|
|
478
|
+
console.log(brand.teal(' DRY RUN \u2014 nothing will be uploaded\n'));
|
|
479
|
+
console.log(` Files found: ${files.length}`);
|
|
480
|
+
console.log(` Total size: ${formatBytes(totalSize)}`);
|
|
481
|
+
const extCounts = {};
|
|
482
|
+
for (const f of files) {
|
|
483
|
+
const ext = extname(f.filename).toLowerCase();
|
|
484
|
+
extCounts[ext] = (extCounts[ext] || 0) + 1;
|
|
485
|
+
}
|
|
486
|
+
console.log(` Extensions: ${Object.entries(extCounts).map(([e, n]) => `${e} (${n})`).join(', ')}`);
|
|
487
|
+
console.log();
|
|
488
|
+
return;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Route by type
|
|
492
|
+
if (typeKey === 'text' || typeKey === 'structured') {
|
|
493
|
+
await ingestInlineData(files, typeKey, options, dataset);
|
|
494
|
+
} else {
|
|
495
|
+
await ingestFileData(files, typeKey, options, dataset, captions, transcriptions);
|
|
496
|
+
}
|
|
497
|
+
} catch (err) {
|
|
498
|
+
const message = err.response?.data?.error || err.message;
|
|
499
|
+
error(`Ingestion failed: ${message}\n`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
async function ingestInlineData(files, typeKey, options, dataset) {
|
|
504
|
+
// Read and parse items from files
|
|
505
|
+
let allItems = [];
|
|
506
|
+
for (const file of files) {
|
|
507
|
+
const raw = readFileSync(file.fullPath, 'utf-8');
|
|
508
|
+
const ext = extname(file.filename).toLowerCase();
|
|
509
|
+
if (ext === '.jsonl') {
|
|
510
|
+
const lines = raw.split('\n').filter(l => l.trim());
|
|
511
|
+
allItems.push(...lines.map(l => JSON.parse(l)));
|
|
512
|
+
} else if (ext === '.json') {
|
|
513
|
+
const parsed = JSON.parse(raw);
|
|
514
|
+
allItems.push(...(Array.isArray(parsed) ? parsed : [parsed]));
|
|
515
|
+
} else if (ext === '.csv') {
|
|
516
|
+
const lines = raw.split('\n').filter(l => l.trim());
|
|
517
|
+
if (lines.length < 2) return;
|
|
518
|
+
const headers = lines[0].split(',').map(h => h.trim());
|
|
519
|
+
for (let i = 1; i < lines.length; i++) {
|
|
520
|
+
const values = lines[i].split(',').map(v => v.trim());
|
|
521
|
+
const row = {};
|
|
522
|
+
headers.forEach((h, idx) => { row[h] = values[idx]; });
|
|
523
|
+
allItems.push(row);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
console.log(` Ingesting ${allItems.length} items...\n`);
|
|
529
|
+
|
|
530
|
+
const batchSize = 1000;
|
|
531
|
+
let ingested = 0;
|
|
532
|
+
for (let i = 0; i < allItems.length; i += batchSize) {
|
|
533
|
+
const batch = allItems.slice(i, i + batchSize);
|
|
534
|
+
const payload = {
|
|
535
|
+
source: options.source || 'CLI',
|
|
536
|
+
dataset: dataset.slug,
|
|
537
|
+
items: batch,
|
|
538
|
+
};
|
|
539
|
+
if (typeKey === 'text') {
|
|
540
|
+
await ingestText(payload);
|
|
541
|
+
} else {
|
|
542
|
+
await ingestStructured(payload);
|
|
543
|
+
}
|
|
544
|
+
ingested += batch.length;
|
|
545
|
+
const pct = Math.round((ingested / allItems.length) * 100);
|
|
546
|
+
process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${ingested.toLocaleString()}/${allItems.length.toLocaleString()})`);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
console.log();
|
|
550
|
+
console.log();
|
|
551
|
+
success(`${allItems.length.toLocaleString()} items ingested into ${dataset.slug}`);
|
|
552
|
+
console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/datasets/${dataset.slug}`)}\n`);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
async function ingestFileData(files, typeKey, options, dataset, captions, transcriptions) {
|
|
556
|
+
const concurrency = parseInt(options.concurrency) || 5;
|
|
557
|
+
const limit = pLimit(concurrency);
|
|
558
|
+
|
|
559
|
+
// Build metadata
|
|
560
|
+
const metadata = {};
|
|
561
|
+
if (captions) {
|
|
562
|
+
if (Array.isArray(captions)) {
|
|
563
|
+
metadata.captions = captions;
|
|
564
|
+
} else {
|
|
565
|
+
metadata.captions = files.map(f => captions[f.filename] || null);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
if (transcriptions) {
|
|
569
|
+
if (Array.isArray(transcriptions)) {
|
|
570
|
+
metadata.transcriptions = transcriptions;
|
|
571
|
+
} else {
|
|
572
|
+
metadata.transcriptions = files.map(f => transcriptions[f.filename] || null);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
const spinner = villageSpinner('Preparing ingestion...').start();
|
|
577
|
+
const preparation = await prepareIngestion({
|
|
578
|
+
source: options.source || 'CLI',
|
|
579
|
+
dataType: typeKey.toUpperCase(),
|
|
580
|
+
dataset: dataset.slug,
|
|
581
|
+
files: files.map(f => ({ filename: f.filename, size: f.size, mimeType: f.mimeType })),
|
|
582
|
+
metadata,
|
|
583
|
+
});
|
|
584
|
+
spinner.succeed(`Ingestion ${preparation.ingestionId} created`);
|
|
585
|
+
|
|
586
|
+
// Upload files
|
|
587
|
+
const totalFiles = files.length;
|
|
588
|
+
const totalBytes = files.reduce((sum, f) => sum + f.size, 0);
|
|
589
|
+
let uploadedFiles = 0;
|
|
590
|
+
|
|
591
|
+
console.log(`\n Uploading ${totalFiles} files to S3...`);
|
|
592
|
+
|
|
593
|
+
const checksums = [];
|
|
594
|
+
const uploadUrls = preparation.uploadUrls;
|
|
595
|
+
|
|
596
|
+
const uploads = uploadUrls.map((urlInfo, i) =>
|
|
597
|
+
limit(async () => {
|
|
598
|
+
const file = files[i];
|
|
599
|
+
const checksum = await computeSHA256(file.fullPath);
|
|
600
|
+
checksums.push({ filename: file.filename, checksum });
|
|
601
|
+
|
|
602
|
+
let retries = 0;
|
|
603
|
+
while (retries < 3) {
|
|
604
|
+
try {
|
|
605
|
+
await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
|
|
606
|
+
headers: {
|
|
607
|
+
'Content-Type': file.mimeType || 'application/octet-stream',
|
|
608
|
+
'Content-Length': file.size,
|
|
609
|
+
},
|
|
610
|
+
maxBodyLength: Infinity,
|
|
611
|
+
});
|
|
612
|
+
break;
|
|
613
|
+
} catch (err) {
|
|
614
|
+
retries++;
|
|
615
|
+
if (retries >= 3) throw err;
|
|
616
|
+
await new Promise(r => setTimeout(r, 1000 * Math.pow(2, retries)));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
uploadedFiles++;
|
|
621
|
+
const pct = Math.round((uploadedFiles / totalFiles) * 100);
|
|
622
|
+
process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${uploadedFiles}/${totalFiles}) ${formatBytes(totalBytes)}`);
|
|
623
|
+
})
|
|
624
|
+
);
|
|
625
|
+
|
|
626
|
+
await Promise.all(uploads);
|
|
627
|
+
console.log();
|
|
628
|
+
|
|
629
|
+
// Finalize
|
|
630
|
+
const finalSpinner = villageSpinner('Finalizing...').start();
|
|
631
|
+
const result = await completeIngestion(preparation.ingestionId, { checksums });
|
|
632
|
+
finalSpinner.succeed(`${result.itemCount} items added to ${dataset.slug}`);
|
|
633
|
+
console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/datasets/${dataset.slug}`)}\n`);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// ════════════════════════════════════════════════════════
|
|
637
|
+
// DATASETS LIST
|
|
638
|
+
// ════════════════════════════════════════════════════════
|
|
639
|
+
|
|
640
|
+
export async function soulprintDatasetListCommand(options) {
|
|
641
|
+
if (!requireAuth()) return;
|
|
642
|
+
|
|
643
|
+
const spinner = villageSpinner('Loading datasets...').start();
|
|
644
|
+
try {
|
|
645
|
+
const params = {};
|
|
646
|
+
if (options.type) params.dataType = options.type.toUpperCase();
|
|
647
|
+
if (options.status) params.status = options.status.toUpperCase();
|
|
648
|
+
|
|
649
|
+
const result = await listDatasets(params);
|
|
650
|
+
spinner.stop();
|
|
651
|
+
|
|
652
|
+
if (options.json) {
|
|
653
|
+
console.log(JSON.stringify(result, null, 2));
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const datasets = result.datasets || result.data || result;
|
|
658
|
+
if (!Array.isArray(datasets) || datasets.length === 0) {
|
|
659
|
+
info('\n No datasets found.\n');
|
|
660
|
+
return;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
header('Datasets on SoulPrint Studio');
|
|
664
|
+
|
|
665
|
+
// Table header
|
|
666
|
+
console.log(
|
|
667
|
+
` ${brand.teal(padRight('Name', 28))}${padRight('Type', 14)}${padRight('Status', 14)}${padRight('Items', 10)}${padRight('Version', 10)}`
|
|
668
|
+
);
|
|
669
|
+
console.log(brand.darkGold(` ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(8)} ${'\u2500'.repeat(8)}`));
|
|
670
|
+
|
|
671
|
+
for (const ds of datasets) {
|
|
672
|
+
const name = padRight(truncate(ds.name || ds.slug, 26), 28);
|
|
673
|
+
const type = padRight(ds.dataType, 14);
|
|
674
|
+
const status = padRight(formatStatus(ds.status), 14);
|
|
675
|
+
const items = padRight(ds.itemCount?.toLocaleString() || '0', 10);
|
|
676
|
+
const ver = padRight(`v${ds.currentVersion || 1}`, 10);
|
|
677
|
+
console.log(` ${brand.gold(name)}${type}${status}${items}${ver}`);
|
|
678
|
+
}
|
|
679
|
+
console.log();
|
|
680
|
+
} catch (err) {
|
|
681
|
+
const message = err.response?.data?.error || err.message;
|
|
682
|
+
spinner.fail(`Failed to load datasets: ${message}`);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// ════════════════════════════════════════════════════════
|
|
687
|
+
// DATASETS PULL
|
|
688
|
+
// ════════════════════════════════════════════════════════
|
|
689
|
+
|
|
690
|
+
export async function soulprintDatasetPullCommand(slug, options) {
|
|
691
|
+
if (!requireAuth()) return;
|
|
692
|
+
if (!requireWorkspace()) return;
|
|
693
|
+
|
|
694
|
+
try {
|
|
695
|
+
const version = options.version ? parseInt(options.version) : undefined;
|
|
696
|
+
|
|
697
|
+
// Check if already downloaded
|
|
698
|
+
if (version && isDatasetDownloaded(slug, version) && !options.force) {
|
|
699
|
+
info(`Dataset ${slug} v${version} is already downloaded locally.`);
|
|
700
|
+
info(`Use --force to re-download.\n`);
|
|
701
|
+
return;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
const spinner = villageSpinner(`Fetching download info for ${slug}...`).start();
|
|
705
|
+
const downloadInfo = await getDatasetDownload(slug, version);
|
|
706
|
+
spinner.stop();
|
|
707
|
+
|
|
708
|
+
const ver = downloadInfo.version;
|
|
709
|
+
const itemCount = downloadInfo.itemCount;
|
|
710
|
+
const totalSize = downloadInfo.totalSizeBytes;
|
|
711
|
+
|
|
712
|
+
console.log(`\n Pulling dataset: ${brand.gold(`${slug} v${ver}`)}`);
|
|
713
|
+
console.log(` Items: ${itemCount?.toLocaleString()} | Size: ${formatBytes(totalSize)}\n`);
|
|
714
|
+
|
|
715
|
+
const localDir = getLocalDatasetDir(slug, ver);
|
|
716
|
+
mkdirSync(localDir, { recursive: true });
|
|
717
|
+
|
|
718
|
+
// Download manifest
|
|
719
|
+
if (downloadInfo.manifest?.url) {
|
|
720
|
+
const manifestResp = await axios.get(downloadInfo.manifest.url, { responseType: 'arraybuffer' });
|
|
721
|
+
writeFileSync(join(localDir, 'manifest.json'), manifestResp.data);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// Download files
|
|
725
|
+
const urls = downloadInfo.downloadUrls || [];
|
|
726
|
+
if (urls.length > 0) {
|
|
727
|
+
const limit = pLimit(5);
|
|
728
|
+
let downloaded = 0;
|
|
729
|
+
|
|
730
|
+
console.log(` Downloading to ${brand.teal(localDir)}/`);
|
|
731
|
+
|
|
732
|
+
const downloads = urls.map(item =>
|
|
733
|
+
limit(async () => {
|
|
734
|
+
const destPath = join(localDir, item.path);
|
|
735
|
+
const destDir = join(localDir, item.path.split('/').slice(0, -1).join('/'));
|
|
736
|
+
mkdirSync(destDir, { recursive: true });
|
|
737
|
+
|
|
738
|
+
const response = await axios.get(item.url, { responseType: 'stream' });
|
|
739
|
+
await pipeline(response.data, createWriteStream(destPath));
|
|
740
|
+
|
|
741
|
+
downloaded++;
|
|
742
|
+
const pct = Math.round((downloaded / urls.length) * 100);
|
|
743
|
+
process.stdout.write(`\r [${'\u2588'.repeat(Math.floor(pct / 2.5))}${'\u2591'.repeat(40 - Math.floor(pct / 2.5))}] ${pct}% (${downloaded.toLocaleString()}/${urls.length.toLocaleString()})`);
|
|
744
|
+
})
|
|
745
|
+
);
|
|
746
|
+
|
|
747
|
+
await Promise.all(downloads);
|
|
748
|
+
console.log();
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Write manifest if not already present
|
|
752
|
+
if (!existsSync(join(localDir, 'manifest.json'))) {
|
|
753
|
+
writeFileSync(join(localDir, 'manifest.json'), JSON.stringify({
|
|
754
|
+
slug, version: ver, itemCount, totalSizeBytes: totalSize, downloadedAt: new Date().toISOString(),
|
|
755
|
+
}, null, 2));
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
console.log();
|
|
759
|
+
success(`Dataset ready at: ${localDir}`);
|
|
760
|
+
|
|
761
|
+
// Print split counts if available
|
|
762
|
+
for (const split of ['train', 'validation', 'test']) {
|
|
763
|
+
const splitDir = join(localDir, split);
|
|
764
|
+
if (existsSync(splitDir)) {
|
|
765
|
+
const count = readdirSync(splitDir).length;
|
|
766
|
+
info(`${padRight(split + '/', 16)} ${count} items`);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
console.log();
|
|
770
|
+
} catch (err) {
|
|
771
|
+
const message = err.response?.data?.error || err.message;
|
|
772
|
+
error(`Failed to pull dataset: ${message}\n`);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
// ════════════════════════════════════════════════════════
|
|
777
|
+
// TRAIN
|
|
778
|
+
// ════════════════════════════════════════════════════════
|
|
779
|
+
|
|
780
|
+
export async function soulprintTrainCommand(options) {
|
|
781
|
+
if (!requireAuth()) return;
|
|
782
|
+
if (!requireWorkspace()) return;
|
|
783
|
+
|
|
784
|
+
try {
|
|
785
|
+
const typeKey = options.type?.toLowerCase();
|
|
786
|
+
if (!VALID_METHODS[typeKey]) {
|
|
787
|
+
error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
if (options.method && !VALID_METHODS[typeKey].includes(options.method)) {
|
|
792
|
+
error(`Invalid method "${options.method}" for type "${typeKey}".`);
|
|
793
|
+
info(`Valid methods: ${VALID_METHODS[typeKey].join(', ')}\n`);
|
|
794
|
+
return;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
// If no method specified, prompt
|
|
798
|
+
let method = options.method;
|
|
799
|
+
if (!method) {
|
|
800
|
+
const { selectedMethod } = await inquirer.prompt([{
|
|
801
|
+
type: 'list',
|
|
802
|
+
name: 'selectedMethod',
|
|
803
|
+
message: `Training method for ${typeKey}:`,
|
|
804
|
+
choices: VALID_METHODS[typeKey].map(m => ({ name: m, value: m })),
|
|
805
|
+
}]);
|
|
806
|
+
method = selectedMethod;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
// Load config YAML if provided
|
|
810
|
+
let config = {};
|
|
811
|
+
if (options.config) {
|
|
812
|
+
const configPath = resolve(options.config);
|
|
813
|
+
if (!existsSync(configPath)) {
|
|
814
|
+
error(`Config file not found: ${configPath}\n`);
|
|
815
|
+
return;
|
|
816
|
+
}
|
|
817
|
+
config = parseYaml(readFileSync(configPath, 'utf-8'));
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
// Fetch dataset info
|
|
821
|
+
const spinner = villageSpinner('Validating dataset...').start();
|
|
822
|
+
let dataset;
|
|
823
|
+
try {
|
|
824
|
+
dataset = await getDataset(options.dataset);
|
|
825
|
+
spinner.stop();
|
|
826
|
+
} catch (err) {
|
|
827
|
+
spinner.fail(`Dataset "${options.dataset}" not found.`);
|
|
828
|
+
return;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
const datasetVersion = options.version ? parseInt(options.version) : dataset.currentVersion;
|
|
832
|
+
|
|
833
|
+
// Check local download
|
|
834
|
+
if (!isDatasetDownloaded(options.dataset, datasetVersion)) {
|
|
835
|
+
const { pullNow } = await inquirer.prompt([{
|
|
836
|
+
type: 'confirm',
|
|
837
|
+
name: 'pullNow',
|
|
838
|
+
message: `Dataset "${options.dataset}" v${datasetVersion} not found locally. Download it now?`,
|
|
839
|
+
default: true,
|
|
840
|
+
}]);
|
|
841
|
+
if (pullNow) {
|
|
842
|
+
await soulprintDatasetPullCommand(options.dataset, { version: String(datasetVersion) });
|
|
843
|
+
} else {
|
|
844
|
+
info('Cannot train without a local dataset copy.\n');
|
|
845
|
+
return;
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
const localDatasetDir = getLocalDatasetDir(options.dataset, datasetVersion);
|
|
850
|
+
const baseModel = options.base || config.baseModel || 'default';
|
|
851
|
+
const jobName = options.name || `${dataset.name} ${method} v${datasetVersion}`;
|
|
852
|
+
|
|
853
|
+
// Dry run
|
|
854
|
+
if (options.dryRun) {
|
|
855
|
+
header('Training Configuration (Dry Run)');
|
|
856
|
+
console.log(` Type: ${typeKey}`);
|
|
857
|
+
console.log(` Dataset: ${options.dataset} v${datasetVersion}`);
|
|
858
|
+
console.log(` Base model: ${baseModel}`);
|
|
859
|
+
console.log(` Method: ${method}`);
|
|
860
|
+
console.log(` Dataset dir: ${localDatasetDir}`);
|
|
861
|
+
if (Object.keys(config).length > 0) {
|
|
862
|
+
console.log(` Config: ${JSON.stringify(config, null, 2).split('\n').join('\n ')}`);
|
|
863
|
+
}
|
|
864
|
+
console.log();
|
|
865
|
+
info('Dry run complete. No job was created.\n');
|
|
866
|
+
return;
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Create job on Studio
|
|
870
|
+
const machineInfo = getMachineInfo();
|
|
871
|
+
const createSpinner = villageSpinner('Creating training job on SoulPrint Studio...').start();
|
|
872
|
+
const jobResult = await createJob({
|
|
873
|
+
name: jobName,
|
|
874
|
+
dataType: typeKey.toUpperCase(),
|
|
875
|
+
datasetSlug: options.dataset,
|
|
876
|
+
datasetVersion,
|
|
877
|
+
baseModel,
|
|
878
|
+
method,
|
|
879
|
+
config,
|
|
880
|
+
machineInfo,
|
|
881
|
+
cliVersion,
|
|
882
|
+
});
|
|
883
|
+
const jobId = jobResult.jobId;
|
|
884
|
+
createSpinner.succeed(`Job registered: ${brand.gold(jobId)}`);
|
|
885
|
+
|
|
886
|
+
// Print training summary
|
|
887
|
+
console.log();
|
|
888
|
+
console.log(` Starting ${brand.gold(method)} training:`);
|
|
889
|
+
console.log(` Base model: ${baseModel}`);
|
|
890
|
+
console.log(` Dataset: ${options.dataset} v${datasetVersion}`);
|
|
891
|
+
console.log(` Method: ${method}`);
|
|
892
|
+
if (config.epochs) console.log(` Epochs: ${config.epochs}`);
|
|
893
|
+
console.log(` Device: ${machineInfo.gpuType} (${machineInfo.gpu})`);
|
|
894
|
+
console.log();
|
|
895
|
+
|
|
896
|
+
// Resolve Python script
|
|
897
|
+
const scriptKey = method === 'lora' ? `lora:${typeKey}` : method;
|
|
898
|
+
const scriptRelPath = METHOD_SCRIPTS[scriptKey];
|
|
899
|
+
if (!scriptRelPath) {
|
|
900
|
+
error(`No training script found for method "${method}"\n`);
|
|
901
|
+
await failJob(jobId, { errorMessage: `No training script for method: ${method}` });
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
const scriptPath = join(getScriptsDir(), scriptRelPath);
|
|
906
|
+
const pythonPath = getPythonPath() || 'python3';
|
|
907
|
+
|
|
908
|
+
if (!existsSync(scriptPath)) {
|
|
909
|
+
error(`Training script not found: ${scriptPath}`);
|
|
910
|
+
info(`Run ${brand.gold("'myvillage soulprint init'")} to download training scripts.\n`);
|
|
911
|
+
await failJob(jobId, { errorMessage: `Training script not found: ${scriptRelPath}` });
|
|
912
|
+
return;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
// Update status to PREPARING
|
|
916
|
+
await updateJobStatus(jobId, { status: 'PREPARING' }).catch(() => {});
|
|
917
|
+
|
|
918
|
+
const outputDir = getJobOutputDir(jobId);
|
|
919
|
+
mkdirSync(outputDir, { recursive: true });
|
|
920
|
+
|
|
921
|
+
const scriptArgs = [
|
|
922
|
+
'--dataset-dir', localDatasetDir,
|
|
923
|
+
'--output-dir', outputDir,
|
|
924
|
+
'--base-model', baseModel,
|
|
925
|
+
'--method', method,
|
|
926
|
+
'--config', JSON.stringify(config),
|
|
927
|
+
'--job-id', jobId,
|
|
928
|
+
];
|
|
929
|
+
|
|
930
|
+
// Spawn Python
|
|
931
|
+
await updateJobStatus(jobId, { status: 'TRAINING' }).catch(() => {});
|
|
932
|
+
|
|
933
|
+
let trainingProcess = null;
|
|
934
|
+
let lastReportTime = 0;
|
|
935
|
+
let lastReportedEpoch = 0;
|
|
936
|
+
const REPORT_INTERVAL_MS = 10_000;
|
|
937
|
+
let finalData = null;
|
|
938
|
+
|
|
939
|
+
// SIGINT handler
|
|
940
|
+
const sigintHandler = async () => {
|
|
941
|
+
if (trainingProcess) {
|
|
942
|
+
console.log('\n Stopping training gracefully...');
|
|
943
|
+
trainingProcess.kill('SIGTERM');
|
|
944
|
+
const deadline = Date.now() + 10000;
|
|
945
|
+
while (Date.now() < deadline && !trainingProcess.killed) {
|
|
946
|
+
await new Promise(r => setTimeout(r, 500));
|
|
947
|
+
}
|
|
948
|
+
if (!trainingProcess.killed) {
|
|
949
|
+
trainingProcess.kill('SIGKILL');
|
|
950
|
+
}
|
|
951
|
+
await updateJobStatus(jobId, { status: 'CANCELLED' }).catch(() => {});
|
|
952
|
+
console.log(' Training cancelled.');
|
|
953
|
+
}
|
|
954
|
+
process.exit(0);
|
|
955
|
+
};
|
|
956
|
+
process.on('SIGINT', sigintHandler);
|
|
957
|
+
|
|
958
|
+
try {
|
|
959
|
+
finalData = await new Promise((resolveP, rejectP) => {
|
|
960
|
+
trainingProcess = spawn(pythonPath, [scriptPath, ...scriptArgs], {
|
|
961
|
+
env: { ...process.env, PYTHONUNBUFFERED: '1' },
|
|
962
|
+
cwd: getSoulprintDir(),
|
|
963
|
+
});
|
|
964
|
+
|
|
965
|
+
const rl = createInterface({ input: trainingProcess.stdout });
|
|
966
|
+
rl.on('line', (line) => {
|
|
967
|
+
try {
|
|
968
|
+
const data = JSON.parse(line);
|
|
969
|
+
if (data.type === 'progress') {
|
|
970
|
+
// Display
|
|
971
|
+
const pct = data.totalEpochs ? Math.round((data.epoch / data.totalEpochs) * 100) : 0;
|
|
972
|
+
process.stdout.write(`\r Epoch ${data.epoch || '?'}/${data.totalEpochs || '?'} ${'━'.repeat(Math.floor(pct / 5))}${'─'.repeat(20 - Math.floor(pct / 5))} loss: ${data.loss?.toFixed(4) || '?'} lr: ${data.lr || '?'} `);
|
|
973
|
+
|
|
974
|
+
appendJobLog(jobId, { type: 'progress', ...data });
|
|
975
|
+
|
|
976
|
+
// Throttle reports to Studio
|
|
977
|
+
const now = Date.now();
|
|
978
|
+
const isEpochBoundary = data.epoch && data.epoch !== lastReportedEpoch;
|
|
979
|
+
if (now - lastReportTime > REPORT_INTERVAL_MS || isEpochBoundary) {
|
|
980
|
+
lastReportTime = now;
|
|
981
|
+
lastReportedEpoch = data.epoch;
|
|
982
|
+
updateJobStatus(jobId, {
|
|
983
|
+
status: 'TRAINING',
|
|
984
|
+
progress: data.totalEpochs ? data.epoch / data.totalEpochs : null,
|
|
985
|
+
currentEpoch: data.epoch,
|
|
986
|
+
totalEpochs: data.totalEpochs,
|
|
987
|
+
currentStep: data.step,
|
|
988
|
+
totalSteps: data.totalSteps,
|
|
989
|
+
metrics: { trainingLoss: data.loss, validationLoss: data.valLoss },
|
|
990
|
+
logs: `Epoch ${data.epoch}/${data.totalEpochs} - loss: ${data.loss}`,
|
|
991
|
+
}).catch(() => {});
|
|
992
|
+
}
|
|
993
|
+
} else if (data.type === 'log') {
|
|
994
|
+
appendJobLog(jobId, data);
|
|
995
|
+
} else if (data.type === 'complete') {
|
|
996
|
+
resolveP(data);
|
|
997
|
+
}
|
|
998
|
+
} catch {
|
|
999
|
+
appendJobLog(jobId, { type: 'stdout', message: line });
|
|
1000
|
+
}
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
let stderr = '';
|
|
1004
|
+
trainingProcess.stderr.on('data', (chunk) => {
|
|
1005
|
+
stderr += chunk.toString();
|
|
1006
|
+
appendJobLog(jobId, { type: 'stderr', message: chunk.toString().trim() });
|
|
1007
|
+
});
|
|
1008
|
+
|
|
1009
|
+
trainingProcess.on('close', (code) => {
|
|
1010
|
+
if (code !== 0) {
|
|
1011
|
+
rejectP(new Error(stderr || `Training process exited with code ${code}`));
|
|
1012
|
+
}
|
|
1013
|
+
});
|
|
1014
|
+
|
|
1015
|
+
trainingProcess.on('error', (err) => {
|
|
1016
|
+
rejectP(new Error(`Failed to start training: ${err.message}`));
|
|
1017
|
+
});
|
|
1018
|
+
});
|
|
1019
|
+
} catch (err) {
|
|
1020
|
+
process.removeListener('SIGINT', sigintHandler);
|
|
1021
|
+
console.log();
|
|
1022
|
+
error(`Training failed: ${err.message}`);
|
|
1023
|
+
await failJob(jobId, { errorMessage: err.message }).catch(() => {});
|
|
1024
|
+
info(`Logs saved to: ${getJobLogFile(jobId)}\n`);
|
|
1025
|
+
return;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
process.removeListener('SIGINT', sigintHandler);
|
|
1029
|
+
console.log();
|
|
1030
|
+
console.log();
|
|
1031
|
+
success('Training complete!');
|
|
1032
|
+
console.log(` Output: ${brand.teal(outputDir)}`);
|
|
1033
|
+
if (finalData?.finalMetrics) {
|
|
1034
|
+
const m = finalData.finalMetrics;
|
|
1035
|
+
if (m.trainingLoss != null) console.log(` Final loss: ${m.trainingLoss}`);
|
|
1036
|
+
if (m.fidScore != null) console.log(` FID score: ${m.fidScore}`);
|
|
1037
|
+
if (m.wer != null) console.log(` WER: ${m.wer}`);
|
|
1038
|
+
if (m.mae != null) console.log(` MAE: ${m.mae}`);
|
|
1039
|
+
if (m.r2Score != null) console.log(` R\u00B2: ${m.r2Score}`);
|
|
1040
|
+
}
|
|
1041
|
+
console.log();
|
|
1042
|
+
|
|
1043
|
+
// Upload artifacts
|
|
1044
|
+
const uploadSpinner = villageSpinner('Uploading artifacts to SoulPrint Studio...').start();
|
|
1045
|
+
try {
|
|
1046
|
+
const artifacts = scanArtifacts(outputDir);
|
|
1047
|
+
const totalArtifactSize = artifacts.reduce((sum, f) => sum + f.size, 0);
|
|
1048
|
+
|
|
1049
|
+
for (const artifact of artifacts) {
|
|
1050
|
+
artifact.checksum = await computeSHA256(artifact.fullPath);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
const { urls } = await getUploadUrls({
|
|
1054
|
+
jobId,
|
|
1055
|
+
files: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
|
|
1056
|
+
});
|
|
1057
|
+
|
|
1058
|
+
const uploadLimit = pLimit(3);
|
|
1059
|
+
const artifactUploads = urls.map((urlInfo, i) =>
|
|
1060
|
+
uploadLimit(async () => {
|
|
1061
|
+
const file = artifacts[i];
|
|
1062
|
+
await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
|
|
1063
|
+
headers: { 'Content-Type': 'application/octet-stream' },
|
|
1064
|
+
maxBodyLength: Infinity,
|
|
1065
|
+
});
|
|
1066
|
+
})
|
|
1067
|
+
);
|
|
1068
|
+
await Promise.all(artifactUploads);
|
|
1069
|
+
|
|
1070
|
+
const completeResult = await completeJob(jobId, {
|
|
1071
|
+
s3Artifacts: `soulprint/models/${jobId}/`,
|
|
1072
|
+
artifactSizeBytes: totalArtifactSize,
|
|
1073
|
+
artifactManifest: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
|
|
1074
|
+
finalMetrics: finalData?.finalMetrics || {},
|
|
1075
|
+
evalSamples: finalData?.evalSamples || [],
|
|
1076
|
+
});
|
|
1077
|
+
|
|
1078
|
+
uploadSpinner.succeed(`Model registered: ${brand.gold(completeResult.modelId || jobId)}`);
|
|
1079
|
+
console.log(`\n View in Studio: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/jobs/${jobId}`)}\n`);
|
|
1080
|
+
} catch (err) {
|
|
1081
|
+
uploadSpinner.fail(`Artifact upload failed: ${err.message}`);
|
|
1082
|
+
info(`Artifacts saved locally at: ${outputDir}`);
|
|
1083
|
+
info(`Try again with: ${brand.gold(`myvillage soulprint push ${outputDir} --job ${jobId}`)}\n`);
|
|
1084
|
+
}
|
|
1085
|
+
} catch (err) {
|
|
1086
|
+
if (err.isTtyError) {
|
|
1087
|
+
error('Prompts cannot be rendered in this environment.\n');
|
|
1088
|
+
return;
|
|
1089
|
+
}
|
|
1090
|
+
const message = err.response?.data?.error || err.message;
|
|
1091
|
+
error(`Training setup failed: ${message}\n`);
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
// ════════════════════════════════════════════════════════
|
|
1096
|
+
// JOBS
|
|
1097
|
+
// ════════════════════════════════════════════════════════
|
|
1098
|
+
|
|
1099
|
+
export async function soulprintJobsCommand(options) {
|
|
1100
|
+
if (!requireAuth()) return;
|
|
1101
|
+
|
|
1102
|
+
const spinner = villageSpinner('Loading training jobs...').start();
|
|
1103
|
+
try {
|
|
1104
|
+
const params = {};
|
|
1105
|
+
if (options.status) params.status = options.status.toUpperCase();
|
|
1106
|
+
|
|
1107
|
+
const result = await listJobs(params);
|
|
1108
|
+
spinner.stop();
|
|
1109
|
+
|
|
1110
|
+
if (options.json) {
|
|
1111
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1112
|
+
return;
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
const jobs = result.jobs || result.data || result;
|
|
1116
|
+
if (!Array.isArray(jobs) || jobs.length === 0) {
|
|
1117
|
+
info('\n No training jobs found.\n');
|
|
1118
|
+
return;
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
header('Training Jobs');
|
|
1122
|
+
|
|
1123
|
+
console.log(
|
|
1124
|
+
` ${brand.teal(padRight('Job ID', 16))}${padRight('Name', 28)}${padRight('Status', 14)}${padRight('Progress', 10)}${padRight('Started', 12)}`
|
|
1125
|
+
);
|
|
1126
|
+
console.log(brand.darkGold(` ${'\u2500'.repeat(14)} ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(8)} ${'\u2500'.repeat(10)}`));
|
|
1127
|
+
|
|
1128
|
+
for (const job of jobs) {
|
|
1129
|
+
const id = padRight(truncate(job.id, 14), 16);
|
|
1130
|
+
const name = padRight(truncate(job.name, 26), 28);
|
|
1131
|
+
const status = padRight(formatStatus(job.status), 14);
|
|
1132
|
+
const progress = padRight(formatProgress(job.progress), 10);
|
|
1133
|
+
const started = padRight(job.startedAt ? relativeTime(job.startedAt) : (job.createdAt ? relativeTime(job.createdAt) : '--'), 12);
|
|
1134
|
+
console.log(` ${chalk.cyan(id)}${name}${status}${progress}${started}`);
|
|
1135
|
+
}
|
|
1136
|
+
console.log();
|
|
1137
|
+
} catch (err) {
|
|
1138
|
+
const message = err.response?.data?.error || err.message;
|
|
1139
|
+
spinner.fail(`Failed to load jobs: ${message}`);
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
export async function soulprintJobDetailCommand(jobId, options) {
|
|
1144
|
+
if (!requireAuth()) return;
|
|
1145
|
+
|
|
1146
|
+
const spinner = villageSpinner(`Loading job ${jobId}...`).start();
|
|
1147
|
+
try {
|
|
1148
|
+
const job = await getJob(jobId);
|
|
1149
|
+
spinner.stop();
|
|
1150
|
+
|
|
1151
|
+
if (options.json) {
|
|
1152
|
+
console.log(JSON.stringify(job, null, 2));
|
|
1153
|
+
return;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
console.log();
|
|
1157
|
+
console.log(` ${chalk.bold(`Job: ${job.name}`)} (${chalk.cyan(job.id)})`);
|
|
1158
|
+
console.log(` Status: ${formatStatus(job.status)}`);
|
|
1159
|
+
console.log(` Dataset: ${brand.gold(job.dataset?.slug || job.datasetId)} v${job.datasetVersion}`);
|
|
1160
|
+
console.log(` Base: ${job.baseModel}`);
|
|
1161
|
+
console.log(` Method: ${job.method}`);
|
|
1162
|
+
|
|
1163
|
+
if (job.progress != null) {
|
|
1164
|
+
const pct = Math.round(job.progress * 100);
|
|
1165
|
+
const filled = Math.floor(pct / 2.5);
|
|
1166
|
+
console.log();
|
|
1167
|
+
console.log(` Progress: ${'\u2588'.repeat(filled)}${'\u2591'.repeat(40 - filled)} ${pct}%`);
|
|
1168
|
+
if (job.currentEpoch != null) console.log(` Epoch: ${job.currentEpoch}/${job.totalEpochs}`);
|
|
1169
|
+
if (job.currentStep != null) console.log(` Step: ${job.currentStep?.toLocaleString()}/${job.totalSteps?.toLocaleString()}`);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
if (job.metrics) {
|
|
1173
|
+
console.log();
|
|
1174
|
+
console.log(' Metrics:');
|
|
1175
|
+
const m = job.metrics;
|
|
1176
|
+
if (m.trainingLoss != null) console.log(` Training loss: ${m.trainingLoss}`);
|
|
1177
|
+
if (m.validationLoss != null) console.log(` Validation loss: ${m.validationLoss}`);
|
|
1178
|
+
if (m.accuracy != null) console.log(` Accuracy: ${m.accuracy}`);
|
|
1179
|
+
if (m.fidScore != null) console.log(` FID score: ${m.fidScore}`);
|
|
1180
|
+
if (m.clipScore != null) console.log(` CLIP score: ${m.clipScore}`);
|
|
1181
|
+
if (m.wer != null) console.log(` WER: ${m.wer}`);
|
|
1182
|
+
if (m.cer != null) console.log(` CER: ${m.cer}`);
|
|
1183
|
+
if (m.mae != null) console.log(` MAE: ${m.mae}`);
|
|
1184
|
+
if (m.r2Score != null) console.log(` R\u00B2: ${m.r2Score}`);
|
|
1185
|
+
if (m.f1Score != null) console.log(` F1: ${m.f1Score}`);
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
if (job.machineInfo) {
|
|
1189
|
+
console.log();
|
|
1190
|
+
console.log(` Machine: ${job.machineInfo.gpu || 'unknown'} (${job.machineInfo.vram || '?'}) \u2014 ${job.machineInfo.os || 'unknown'}`);
|
|
1191
|
+
}
|
|
1192
|
+
if (job.cliVersion) console.log(` CLI: v${job.cliVersion}`);
|
|
1193
|
+
if (job.startedAt) console.log(` Started: ${job.startedAt}`);
|
|
1194
|
+
if (job.completedAt) console.log(` Ended: ${job.completedAt}`);
|
|
1195
|
+
if (job.errorMessage) {
|
|
1196
|
+
console.log();
|
|
1197
|
+
console.log(chalk.red(` Error: ${job.errorMessage}`));
|
|
1198
|
+
}
|
|
1199
|
+
console.log();
|
|
1200
|
+
} catch (err) {
|
|
1201
|
+
const message = err.response?.data?.error || err.message;
|
|
1202
|
+
spinner.fail(`Failed to load job: ${message}`);
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
// ════════════════════════════════════════════════════════
|
|
1207
|
+
// MODELS
|
|
1208
|
+
// ════════════════════════════════════════════════════════
|
|
1209
|
+
|
|
1210
|
+
export async function soulprintModelsCommand(options) {
|
|
1211
|
+
if (!requireAuth()) return;
|
|
1212
|
+
|
|
1213
|
+
const spinner = villageSpinner('Loading models...').start();
|
|
1214
|
+
try {
|
|
1215
|
+
const params = {};
|
|
1216
|
+
if (options.status) params.status = options.status.toUpperCase();
|
|
1217
|
+
if (options.type) params.dataType = options.type.toUpperCase();
|
|
1218
|
+
|
|
1219
|
+
const result = await listModels(params);
|
|
1220
|
+
spinner.stop();
|
|
1221
|
+
|
|
1222
|
+
if (options.json) {
|
|
1223
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1224
|
+
return;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
const models = result.models || result.data || result;
|
|
1228
|
+
if (!Array.isArray(models) || models.length === 0) {
|
|
1229
|
+
info('\n No models found.\n');
|
|
1230
|
+
return;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
header('Model Registry');
|
|
1234
|
+
|
|
1235
|
+
console.log(
|
|
1236
|
+
` ${brand.teal(padRight('Name', 28))}${padRight('Type', 14)}${padRight('Status', 14)}${padRight('Base Model', 24)}${padRight('Created', 12)}`
|
|
1237
|
+
);
|
|
1238
|
+
console.log(brand.darkGold(` ${'\u2500'.repeat(26)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(12)} ${'\u2500'.repeat(22)} ${'\u2500'.repeat(10)}`));
|
|
1239
|
+
|
|
1240
|
+
for (const model of models) {
|
|
1241
|
+
const name = padRight(truncate(model.name || model.slug, 26), 28);
|
|
1242
|
+
const type = padRight(model.dataType, 14);
|
|
1243
|
+
const status = padRight(formatStatus(model.status), 14);
|
|
1244
|
+
const base = padRight(truncate(model.baseModel, 22), 24);
|
|
1245
|
+
const created = padRight(model.createdAt ? relativeTime(model.createdAt) : '--', 12);
|
|
1246
|
+
console.log(` ${brand.gold(name)}${type}${status}${base}${created}`);
|
|
1247
|
+
}
|
|
1248
|
+
console.log();
|
|
1249
|
+
} catch (err) {
|
|
1250
|
+
const message = err.response?.data?.error || err.message;
|
|
1251
|
+
spinner.fail(`Failed to load models: ${message}`);
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
// ════════════════════════════════════════════════════════
|
|
1256
|
+
// PUSH
|
|
1257
|
+
// ════════════════════════════════════════════════════════
|
|
1258
|
+
|
|
1259
|
+
export async function soulprintPushCommand(path, options) {
|
|
1260
|
+
if (!requireAuth()) return;
|
|
1261
|
+
|
|
1262
|
+
try {
|
|
1263
|
+
const resolvedPath = resolve(path);
|
|
1264
|
+
if (!existsSync(resolvedPath)) {
|
|
1265
|
+
error(`Path not found: ${resolvedPath}\n`);
|
|
1266
|
+
return;
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
const typeKey = options.type?.toLowerCase();
|
|
1270
|
+
if (!TYPE_EXTENSIONS[typeKey]) {
|
|
1271
|
+
error(`Invalid type "${options.type}". Must be: text, image, audio, structured, multimodal\n`);
|
|
1272
|
+
return;
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
console.log('\n Scanning artifacts...');
|
|
1276
|
+
const artifacts = scanArtifacts(resolvedPath);
|
|
1277
|
+
const totalSize = artifacts.reduce((sum, f) => sum + f.size, 0);
|
|
1278
|
+
info(`${artifacts.length} files, ${formatBytes(totalSize)} total`);
|
|
1279
|
+
|
|
1280
|
+
// Compute checksums
|
|
1281
|
+
const checksumSpinner = villageSpinner('Computing checksums...').start();
|
|
1282
|
+
for (const artifact of artifacts) {
|
|
1283
|
+
artifact.checksum = await computeSHA256(artifact.fullPath);
|
|
1284
|
+
}
|
|
1285
|
+
checksumSpinner.stop();
|
|
1286
|
+
|
|
1287
|
+
// Get upload URLs
|
|
1288
|
+
const uploadData = { files: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })) };
|
|
1289
|
+
if (options.job) uploadData.jobId = options.job;
|
|
1290
|
+
|
|
1291
|
+
const spinner = villageSpinner('Uploading model artifacts to SoulPrint Studio...').start();
|
|
1292
|
+
const { urls } = await getUploadUrls(uploadData);
|
|
1293
|
+
|
|
1294
|
+
const limit = pLimit(3);
|
|
1295
|
+
let uploadedBytes = 0;
|
|
1296
|
+
|
|
1297
|
+
const uploads = urls.map((urlInfo, i) =>
|
|
1298
|
+
limit(async () => {
|
|
1299
|
+
const file = artifacts[i];
|
|
1300
|
+
await axios.put(urlInfo.uploadUrl, createReadStream(file.fullPath), {
|
|
1301
|
+
headers: { 'Content-Type': 'application/octet-stream' },
|
|
1302
|
+
maxBodyLength: Infinity,
|
|
1303
|
+
});
|
|
1304
|
+
uploadedBytes += file.size;
|
|
1305
|
+
})
|
|
1306
|
+
);
|
|
1307
|
+
|
|
1308
|
+
await Promise.all(uploads);
|
|
1309
|
+
spinner.succeed(`Uploaded ${formatBytes(totalSize)}`);
|
|
1310
|
+
|
|
1311
|
+
// Register model
|
|
1312
|
+
const registerSpinner = villageSpinner('Registering model...').start();
|
|
1313
|
+
const modelPayload = {
|
|
1314
|
+
name: options.name,
|
|
1315
|
+
dataType: typeKey.toUpperCase(),
|
|
1316
|
+
baseModel: options.base || 'unknown',
|
|
1317
|
+
method: options.method || 'unknown',
|
|
1318
|
+
description: options.description || '',
|
|
1319
|
+
s3Artifacts: `soulprint/models/${options.name.toLowerCase().replace(/\s+/g, '-')}/`,
|
|
1320
|
+
artifactSizeBytes: totalSize,
|
|
1321
|
+
artifactManifest: artifacts.map(f => ({ path: f.path, size: f.size, checksum: f.checksum })),
|
|
1322
|
+
};
|
|
1323
|
+
if (options.job) modelPayload.jobId = options.job;
|
|
1324
|
+
|
|
1325
|
+
const client = (await import('../utils/soulprint-api.js')).getSoulprintClient();
|
|
1326
|
+
const response = await client.post('/models', modelPayload);
|
|
1327
|
+
const model = response.data;
|
|
1328
|
+
registerSpinner.succeed(`Model registered: ${brand.gold(model.slug || model.name)}`);
|
|
1329
|
+
|
|
1330
|
+
console.log(` Status: ${formatStatus('DRAFT')}`);
|
|
1331
|
+
console.log(` View: ${brand.teal(`https://soulprint-studio.myvillageproject.ai/models/${model.slug || ''}`)}\n`);
|
|
1332
|
+
} catch (err) {
|
|
1333
|
+
const message = err.response?.data?.error || err.message;
|
|
1334
|
+
error(`Push failed: ${message}\n`);
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
// ════════════════════════════════════════════════════════
|
|
1339
|
+
// PUBLISH
|
|
1340
|
+
// ════════════════════════════════════════════════════════
|
|
1341
|
+
|
|
1342
|
+
export async function soulprintPublishCommand(modelSlug, options) {
|
|
1343
|
+
if (!requireAuth()) return;
|
|
1344
|
+
|
|
1345
|
+
try {
|
|
1346
|
+
// Verify model exists and is VALIDATED
|
|
1347
|
+
const spinner = villageSpinner(`Publishing ${modelSlug} to MyVillage Platform...`).start();
|
|
1348
|
+
|
|
1349
|
+
const model = await getModel(modelSlug);
|
|
1350
|
+
if (model.status !== 'VALIDATED') {
|
|
1351
|
+
spinner.fail(`Model "${modelSlug}" is ${model.status}. Only VALIDATED models can be published.`);
|
|
1352
|
+
return;
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
const publishData = {
|
|
1356
|
+
tier: options.tier || 'FREE',
|
|
1357
|
+
isPublic: !!options.public,
|
|
1358
|
+
};
|
|
1359
|
+
if (options.villager) publishData.targetVillagerId = options.villager;
|
|
1360
|
+
if (options.villages) publishData.targetVillageIds = options.villages.split(',').map(s => s.trim());
|
|
1361
|
+
|
|
1362
|
+
const result = await publishModel(modelSlug, publishData);
|
|
1363
|
+
spinner.succeed('Model pushed to portal.myvillageproject.ai');
|
|
1364
|
+
|
|
1365
|
+
if (result.platformModelId) {
|
|
1366
|
+
success(`Platform Model ID: ${result.platformModelId}`);
|
|
1367
|
+
}
|
|
1368
|
+
success('Status: DRAFT (activate in portal admin)');
|
|
1369
|
+
|
|
1370
|
+
if (result.platformUrl) {
|
|
1371
|
+
console.log(`\n Model available at: ${brand.teal(result.platformUrl)}\n`);
|
|
1372
|
+
} else if (result.platformModelId) {
|
|
1373
|
+
console.log(`\n Model available at: ${brand.teal(`https://portal.myvillageproject.ai/admin/models/${result.platformModelId}`)}\n`);
|
|
1374
|
+
}
|
|
1375
|
+
} catch (err) {
|
|
1376
|
+
const message = err.response?.data?.error || err.message;
|
|
1377
|
+
error(`Publish failed: ${message}\n`);
|
|
1378
|
+
}
|
|
1379
|
+
}
|