@sogni-ai/sogni-creative-agent-skill 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4866 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * sogni-agent - Generate images and videos using Sogni AI
4
+ * Usage: sogni-agent [options] "prompt"
5
+ */
6
+
7
+ import { SogniClientWrapper, ClientEvent, getMaxContextImages } from '@sogni-ai/sogni-client-wrapper';
8
+ import JSON5 from 'json5';
9
+ import { createHash, randomBytes } from 'crypto';
10
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, mkdtempSync, statSync, readdirSync, realpathSync, lstatSync, unlinkSync, rmdirSync } from 'fs';
11
+ import { join, dirname, basename, extname, sep } from 'path';
12
+ import { homedir, tmpdir } from 'os';
13
+ import sharp from 'sharp';
14
+ import { getEnv, hasEnv } from './env.mjs';
15
+ import { PACKAGE_VERSION } from './version.mjs';
16
+ import { assertSafeUrl } from './ssrf-guard.mjs';
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Path sanitization — defense-in-depth for any value that becomes a file path
20
+ // or process argument. execaSync runs argument arrays without shell expansion,
21
+ // so classic shell injection is not possible. These checks guard against:
22
+ // • null-byte injection (can truncate paths at the C level)
23
+ // • control-character injection
24
+ // • FFMPEG_PATH pointing to a non-ffmpeg binary
25
+ // ---------------------------------------------------------------------------
26
+
27
+ /**
28
+ * Reject null bytes and control characters in a path string.
29
+ * Returns the path unchanged when valid; throws otherwise.
30
+ */
31
+ function sanitizePath(p, label) {
32
+ if (typeof p !== 'string') {
33
+ const err = new Error(`${label || 'Path'} must be a string.`);
34
+ err.code = 'INVALID_PATH';
35
+ throw err;
36
+ }
37
+ if (p.includes('\0')) {
38
+ const err = new Error(`${label || 'Path'} contains a null byte.`);
39
+ err.code = 'INVALID_PATH';
40
+ throw err;
41
+ }
42
+ // Reject ASCII control characters except tab (\x09), newline (\x0a), carriage return (\x0d)
43
+ if (/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/.test(p)) {
44
+ const err = new Error(`${label || 'Path'} contains invalid control characters.`);
45
+ err.code = 'INVALID_PATH';
46
+ throw err;
47
+ }
48
+ return p;
49
+ }
50
+
51
+ const DEFAULT_CREDENTIALS_PATH = join(homedir(), '.config', 'sogni', 'credentials');
52
+ const DEFAULT_LAST_RENDER_PATH = join(homedir(), '.config', 'sogni', 'last-render.json');
53
+ const DEFAULT_OPENCLAW_CONFIG_PATH = join(homedir(), '.openclaw', 'openclaw.json');
54
+ const DEFAULT_MEDIA_INBOUND_DIR = join(homedir(), '.clawdbot', 'media', 'inbound');
55
+ const DEFAULT_MEMORIES_PATH = join(homedir(), '.config', 'sogni', 'memories.json');
56
+ const DEFAULT_PERSONALITY_PATH = join(homedir(), '.config', 'sogni', 'personality.txt');
57
+ const DEFAULT_PERSONAS_DIR = join(homedir(), '.config', 'sogni', 'personas');
58
+ const DEFAULT_PERSONAS_INDEX_PATH = join(homedir(), '.config', 'sogni', 'personas', 'index.json');
59
+ const OPENCLAW_CONFIG_PATH = getEnv('OPENCLAW_CONFIG_PATH') || DEFAULT_OPENCLAW_CONFIG_PATH;
60
+ const IS_OPENCLAW_INVOCATION = Boolean(getEnv('OPENCLAW_PLUGIN_CONFIG'));
61
+ const RAW_ARGS = process.argv.slice(2);
62
+ const CLI_WANTS_JSON = RAW_ARGS.includes('--json');
63
+ const JSON_ERROR_MODE = CLI_WANTS_JSON || IS_OPENCLAW_INVOCATION;
64
+ const LTX23_WORKFLOW_MODELS = {
65
+ t2v: 'ltx23-22b-fp8_t2v_distilled',
66
+ i2v: 'ltx23-22b-fp8_i2v_distilled',
67
+ ia2v: 'ltx23-22b-fp8_ia2v_distilled',
68
+ a2v: 'ltx23-22b-fp8_a2v_distilled',
69
+ v2v: 'ltx23-22b-fp8_v2v_distilled'
70
+ };
71
+
72
+ const SEEDANCE_WORKFLOW_MODELS = {
73
+ t2v: 'seedance-2-0_t2v',
74
+ t2vFast: 'seedance-2-0-fast_t2v',
75
+ ia2v: 'seedance-2-0_ia2v',
76
+ v2v: 'seedance-2-0_v2v'
77
+ };
78
+
79
+ const VIDEO_MODEL_REGISTRY = {
80
+ [LTX23_WORKFLOW_MODELS.t2v]: {
81
+ workflow: 't2v',
82
+ family: 'ltx23',
83
+ defaultWidth: 1920,
84
+ defaultHeight: 1088,
85
+ minDimension: 640,
86
+ maxDimension: 3840,
87
+ dimensionMultiple: 64,
88
+ steps: 8,
89
+ guidance: 1.0,
90
+ fps: 24,
91
+ frameStep: 8,
92
+ minFrames: 25,
93
+ maxFrames: 505,
94
+ sampler: 'euler_ancestral',
95
+ scheduler: 'simple',
96
+ supportsNativeAudio: true
97
+ },
98
+ [LTX23_WORKFLOW_MODELS.i2v]: {
99
+ workflow: 'i2v',
100
+ family: 'ltx23',
101
+ defaultWidth: 1920,
102
+ defaultHeight: 1088,
103
+ minDimension: 640,
104
+ maxDimension: 3840,
105
+ dimensionMultiple: 64,
106
+ steps: 8,
107
+ guidance: 1.0,
108
+ fps: 24,
109
+ frameStep: 8,
110
+ minFrames: 25,
111
+ maxFrames: 505,
112
+ sampler: 'euler_ancestral',
113
+ scheduler: 'simple',
114
+ supportsNativeAudio: true
115
+ },
116
+ [LTX23_WORKFLOW_MODELS.ia2v]: {
117
+ workflow: 'ia2v',
118
+ family: 'ltx23',
119
+ defaultWidth: 1920,
120
+ defaultHeight: 1088,
121
+ minDimension: 640,
122
+ maxDimension: 3840,
123
+ dimensionMultiple: 64,
124
+ steps: 8,
125
+ guidance: 1.0,
126
+ fps: 24,
127
+ frameStep: 8,
128
+ minFrames: 25,
129
+ maxFrames: 505,
130
+ sampler: 'euler_ancestral',
131
+ scheduler: 'simple'
132
+ },
133
+ [LTX23_WORKFLOW_MODELS.a2v]: {
134
+ workflow: 'a2v',
135
+ family: 'ltx23',
136
+ defaultWidth: 1920,
137
+ defaultHeight: 1088,
138
+ minDimension: 640,
139
+ maxDimension: 3840,
140
+ dimensionMultiple: 64,
141
+ steps: 8,
142
+ guidance: 1.0,
143
+ fps: 24,
144
+ frameStep: 8,
145
+ minFrames: 25,
146
+ maxFrames: 505,
147
+ sampler: 'euler_ancestral',
148
+ scheduler: 'simple'
149
+ },
150
+ [LTX23_WORKFLOW_MODELS.v2v]: {
151
+ workflow: 'v2v',
152
+ family: 'ltx23',
153
+ defaultWidth: 1920,
154
+ defaultHeight: 1088,
155
+ minDimension: 640,
156
+ maxDimension: 3840,
157
+ dimensionMultiple: 64,
158
+ steps: 8,
159
+ guidance: 1.0,
160
+ fps: 25,
161
+ frameStep: 8,
162
+ minFrames: 25,
163
+ maxFrames: 505,
164
+ sampler: 'euler_ancestral',
165
+ scheduler: 'simple'
166
+ },
167
+ 'wan_v2.2-14b-fp8_t2v_lightx2v': {
168
+ workflow: 't2v',
169
+ family: 'wan22',
170
+ defaultWidth: 640,
171
+ defaultHeight: 640,
172
+ minDimension: 480,
173
+ maxDimension: 1536,
174
+ dimensionMultiple: 16,
175
+ steps: 4,
176
+ guidance: 1.0,
177
+ fps: 32,
178
+ internalFps: 16,
179
+ frameStep: 1,
180
+ minFrames: 17,
181
+ maxFrames: 161,
182
+ sampler: 'euler',
183
+ scheduler: 'simple',
184
+ shift: 5.0
185
+ },
186
+ 'wan_v2.2-14b-fp8_i2v_lightx2v': {
187
+ workflow: 'i2v',
188
+ family: 'wan22',
189
+ defaultWidth: 832,
190
+ defaultHeight: 480,
191
+ minDimension: 480,
192
+ maxDimension: 1536,
193
+ dimensionMultiple: 16,
194
+ steps: 4,
195
+ guidance: 1.0,
196
+ fps: 32,
197
+ internalFps: 16,
198
+ frameStep: 1,
199
+ minFrames: 17,
200
+ maxFrames: 321,
201
+ sampler: 'euler',
202
+ scheduler: 'simple',
203
+ shift: 8.0
204
+ },
205
+ 'wan_v2.2-14b-fp8_s2v_lightx2v': {
206
+ workflow: 's2v',
207
+ family: 'wan22',
208
+ defaultWidth: 832,
209
+ defaultHeight: 480,
210
+ minDimension: 480,
211
+ maxDimension: 1536,
212
+ dimensionMultiple: 16,
213
+ steps: 4,
214
+ guidance: 1.0,
215
+ fps: 32,
216
+ internalFps: 16,
217
+ frameStep: 1,
218
+ minFrames: 17,
219
+ maxFrames: 321,
220
+ sampler: 'uni_pc',
221
+ scheduler: 'simple',
222
+ shift: 8.0
223
+ },
224
+ 'wan_v2.2-14b-fp8_animate-move_lightx2v': {
225
+ workflow: 'animate-move',
226
+ family: 'wan22',
227
+ defaultWidth: 832,
228
+ defaultHeight: 480,
229
+ minDimension: 480,
230
+ maxDimension: 1536,
231
+ dimensionMultiple: 16,
232
+ steps: 4,
233
+ guidance: 1.0,
234
+ fps: 32,
235
+ internalFps: 16,
236
+ frameStep: 1,
237
+ minFrames: 17,
238
+ maxFrames: 321,
239
+ sampler: 'euler',
240
+ scheduler: 'simple',
241
+ shift: 8.0
242
+ },
243
+ 'wan_v2.2-14b-fp8_animate-replace_lightx2v': {
244
+ workflow: 'animate-replace',
245
+ family: 'wan22',
246
+ defaultWidth: 832,
247
+ defaultHeight: 480,
248
+ minDimension: 480,
249
+ maxDimension: 1536,
250
+ dimensionMultiple: 16,
251
+ steps: 4,
252
+ guidance: 1.0,
253
+ fps: 32,
254
+ internalFps: 16,
255
+ frameStep: 1,
256
+ minFrames: 17,
257
+ maxFrames: 321,
258
+ sampler: 'euler',
259
+ scheduler: 'simple',
260
+ shift: 8.0
261
+ },
262
+ [SEEDANCE_WORKFLOW_MODELS.t2v]: {
263
+ workflow: 't2v',
264
+ family: 'seedance2',
265
+ defaultWidth: 1920,
266
+ defaultHeight: 1088,
267
+ minDimension: 1,
268
+ maxDimension: 99999,
269
+ dimensionMultiple: 1,
270
+ fps: 24,
271
+ frameStep: 1,
272
+ minFrames: 97,
273
+ maxFrames: 361,
274
+ supportsNativeAudio: true
275
+ },
276
+ [SEEDANCE_WORKFLOW_MODELS.t2vFast]: {
277
+ workflow: 't2v',
278
+ family: 'seedance2',
279
+ defaultWidth: 1280,
280
+ defaultHeight: 720,
281
+ minDimension: 1,
282
+ maxDimension: 1280,
283
+ dimensionMultiple: 1,
284
+ fps: 24,
285
+ frameStep: 1,
286
+ minFrames: 97,
287
+ maxFrames: 361,
288
+ supportsNativeAudio: true
289
+ },
290
+ [SEEDANCE_WORKFLOW_MODELS.ia2v]: {
291
+ workflow: 'ia2v',
292
+ family: 'seedance2',
293
+ defaultWidth: 1920,
294
+ defaultHeight: 1088,
295
+ minDimension: 1,
296
+ maxDimension: 99999,
297
+ dimensionMultiple: 1,
298
+ fps: 24,
299
+ frameStep: 1,
300
+ minFrames: 97,
301
+ maxFrames: 361
302
+ },
303
+ [SEEDANCE_WORKFLOW_MODELS.v2v]: {
304
+ workflow: 'v2v',
305
+ family: 'seedance2',
306
+ defaultWidth: 1920,
307
+ defaultHeight: 1088,
308
+ minDimension: 1,
309
+ maxDimension: 99999,
310
+ dimensionMultiple: 1,
311
+ fps: 24,
312
+ frameStep: 1,
313
+ minFrames: 96,
314
+ maxFrames: 360
315
+ }
316
+ };
317
+
318
+ for (const workflow of ['t2v', 'i2v', 'ia2v', 'a2v', 'v2v']) {
319
+ const ltx2Distilled = `ltx2-19b-fp8_${workflow}_distilled`;
320
+ const ltx2Quality = `ltx2-19b-fp8_${workflow}`;
321
+ const base = VIDEO_MODEL_REGISTRY[LTX23_WORKFLOW_MODELS[workflow]];
322
+ if (!base) continue;
323
+ VIDEO_MODEL_REGISTRY[ltx2Distilled] = {
324
+ ...base,
325
+ family: 'ltx2',
326
+ defaultWidth: 768,
327
+ defaultHeight: 768,
328
+ minDimension: 480,
329
+ maxDimension: 1536,
330
+ steps: 8,
331
+ supportsNativeAudio: workflow === 't2v' || workflow === 'i2v'
332
+ };
333
+ VIDEO_MODEL_REGISTRY[ltx2Quality] = {
334
+ ...VIDEO_MODEL_REGISTRY[ltx2Distilled],
335
+ steps: 20
336
+ };
337
+ }
338
+
339
+ const VIDEO_WORKFLOW_DEFAULT_MODELS = {
340
+ 't2v': LTX23_WORKFLOW_MODELS.t2v,
341
+ 'i2v': 'wan_v2.2-14b-fp8_i2v_lightx2v',
342
+ 's2v': 'wan_v2.2-14b-fp8_s2v_lightx2v',
343
+ 'ia2v': LTX23_WORKFLOW_MODELS.ia2v,
344
+ 'a2v': LTX23_WORKFLOW_MODELS.a2v,
345
+ 'animate-move': 'wan_v2.2-14b-fp8_animate-move_lightx2v',
346
+ 'animate-replace': 'wan_v2.2-14b-fp8_animate-replace_lightx2v',
347
+ 'v2v': LTX23_WORKFLOW_MODELS.v2v
348
+ };
349
+
350
+ function isLtx2Model(modelId) { return modelId?.startsWith('ltx2-') || modelId?.startsWith('ltx23-') || false; }
351
+ function isWanModel(modelId) { return modelId?.startsWith('wan_') || false; }
352
+ function isSeedanceModel(modelId) { return modelId?.startsWith('seedance-2-0') || false; }
353
+
354
+ function resolveVideoControlNetStrength(name, explicitStrength) {
355
+ if (explicitStrength !== null && explicitStrength !== undefined) return explicitStrength;
356
+ return name === 'detailer' ? 1.0 : 0.85;
357
+ }
358
+
359
+ const VIDEO_MODEL_ALIASES = {
360
+ ltx23: LTX23_WORKFLOW_MODELS.t2v,
361
+ 'ltx23-t2v': LTX23_WORKFLOW_MODELS.t2v,
362
+ 'ltx23-i2v': LTX23_WORKFLOW_MODELS.i2v,
363
+ 'ltx23-ia2v': LTX23_WORKFLOW_MODELS.ia2v,
364
+ 'ltx23-a2v': LTX23_WORKFLOW_MODELS.a2v,
365
+ 'ltx23-v2v': LTX23_WORKFLOW_MODELS.v2v,
366
+ wan22: 'wan_v2.2-14b-fp8_t2v_lightx2v',
367
+ 'wan22-t2v': 'wan_v2.2-14b-fp8_t2v_lightx2v',
368
+ 'wan22-i2v': 'wan_v2.2-14b-fp8_i2v_lightx2v',
369
+ 'wan22-s2v': 'wan_v2.2-14b-fp8_s2v_lightx2v',
370
+ 'wan22-animate-move': 'wan_v2.2-14b-fp8_animate-move_lightx2v',
371
+ 'wan22-animate-replace': 'wan_v2.2-14b-fp8_animate-replace_lightx2v',
372
+ seedance2: SEEDANCE_WORKFLOW_MODELS.t2v,
373
+ 'seedance2-t2v': SEEDANCE_WORKFLOW_MODELS.t2v,
374
+ 'seedance2-fast': SEEDANCE_WORKFLOW_MODELS.t2vFast,
375
+ 'seedance2-fast-t2v': SEEDANCE_WORKFLOW_MODELS.t2vFast,
376
+ 'seedance2-ia2v': SEEDANCE_WORKFLOW_MODELS.ia2v,
377
+ 'seedance2-v2v': SEEDANCE_WORKFLOW_MODELS.v2v
378
+ };
379
+
380
+ function resolveVideoModelAlias(modelId, workflow) {
381
+ if (!modelId) return modelId;
382
+ const key = String(modelId).trim().toLowerCase();
383
+ if (key === 'ltx23' && workflow && LTX23_WORKFLOW_MODELS[workflow]) {
384
+ return LTX23_WORKFLOW_MODELS[workflow];
385
+ }
386
+ if (key === 'wan22' && workflow) {
387
+ return VIDEO_WORKFLOW_DEFAULT_MODELS[workflow] || VIDEO_MODEL_ALIASES.wan22;
388
+ }
389
+ if (key === 'seedance2' && workflow && SEEDANCE_WORKFLOW_MODELS[workflow]) {
390
+ return SEEDANCE_WORKFLOW_MODELS[workflow];
391
+ }
392
+ return VIDEO_MODEL_ALIASES[key] || modelId;
393
+ }
394
+
395
+ function getBuiltinVideoModelConfig(modelId) {
396
+ if (!modelId) return null;
397
+ const id = resolveVideoModelAlias(modelId);
398
+ if (VIDEO_MODEL_REGISTRY[id]) return VIDEO_MODEL_REGISTRY[id];
399
+ const workflow = inferVideoWorkflowFromModel(id);
400
+ if (!workflow) return null;
401
+ if (id.startsWith('ltx23-') && LTX23_WORKFLOW_MODELS[workflow]) {
402
+ return VIDEO_MODEL_REGISTRY[LTX23_WORKFLOW_MODELS[workflow]] || null;
403
+ }
404
+ if (id.startsWith('ltx2-')) {
405
+ return {
406
+ workflow,
407
+ family: 'ltx2',
408
+ defaultWidth: 768,
409
+ defaultHeight: 768,
410
+ minDimension: 480,
411
+ maxDimension: 1536,
412
+ dimensionMultiple: 64,
413
+ steps: id.includes('distilled') ? 8 : 20,
414
+ guidance: 1.0,
415
+ fps: workflow === 'v2v' ? 25 : 24,
416
+ frameStep: 8,
417
+ minFrames: 25,
418
+ maxFrames: 321,
419
+ sampler: 'euler_ancestral',
420
+ scheduler: 'simple'
421
+ };
422
+ }
423
+ if (isWanModel(id)) {
424
+ return {
425
+ workflow,
426
+ family: 'wan22',
427
+ defaultWidth: workflow === 't2v' ? 640 : 832,
428
+ defaultHeight: workflow === 't2v' ? 640 : 480,
429
+ minDimension: 480,
430
+ maxDimension: 1536,
431
+ dimensionMultiple: 16,
432
+ steps: id.includes('lightx2v') ? 4 : 20,
433
+ guidance: 1.0,
434
+ fps: 32,
435
+ internalFps: 16,
436
+ frameStep: 1,
437
+ minFrames: 17,
438
+ maxFrames: workflow === 't2v' ? 161 : 321,
439
+ sampler: workflow === 's2v' ? 'uni_pc' : 'euler',
440
+ scheduler: 'simple',
441
+ shift: workflow === 't2v' ? 5.0 : 8.0
442
+ };
443
+ }
444
+ return null;
445
+ }
446
+
447
+ function expandHomePath(rawPath) {
448
+ if (typeof rawPath !== 'string') return rawPath;
449
+ if (rawPath === '~') return homedir();
450
+ if (rawPath.startsWith('~/') || rawPath.startsWith('~\\')) {
451
+ return join(homedir(), rawPath.slice(2));
452
+ }
453
+ return rawPath;
454
+ }
455
+
456
+ function resolveConfiguredPath(rawPath, fallbackPath, label) {
457
+ const candidate = expandHomePath(rawPath) || fallbackPath;
458
+ return sanitizePath(candidate, label);
459
+ }
460
+
461
+ function isPathWithinBase(basePath, targetPath) {
462
+ return targetPath === basePath || targetPath.startsWith(`${basePath}${sep}`);
463
+ }
464
+
465
+ function buildCliErrorPayload({ message, code, details, hint, prompt }) {
466
+ const payload = {
467
+ success: false,
468
+ error: message || 'Unknown error',
469
+ prompt: prompt ?? null
470
+ };
471
+ if (code) payload.errorCode = code;
472
+ if (details) payload.errorDetails = details;
473
+ if (hint) payload.hint = hint;
474
+ payload.timestamp = new Date().toISOString();
475
+ payload.node = process.versions.node;
476
+ payload.cwd = process.cwd();
477
+ if (IS_OPENCLAW_INVOCATION) payload.openclaw = true;
478
+ return payload;
479
+ }
480
+
481
+ function fatalCliError(message, opts = {}) {
482
+ let prompt = opts.prompt;
483
+ if (prompt === undefined) {
484
+ try {
485
+ // If parsing already populated options, include prompt for better downstream reporting.
486
+ prompt = options?.prompt ?? null;
487
+ } catch (e) {
488
+ prompt = null;
489
+ }
490
+ }
491
+ const payload = buildCliErrorPayload({
492
+ message,
493
+ code: opts.code,
494
+ details: opts.details,
495
+ hint: opts.hint,
496
+ prompt
497
+ });
498
+
499
+ if (JSON_ERROR_MODE) {
500
+ console.log(JSON.stringify(payload));
501
+ if (!CLI_WANTS_JSON) {
502
+ // OpenClaw expects JSON, but humans still benefit from stderr.
503
+ console.error(`Error: ${payload.error}`);
504
+ if (payload.hint) console.error(`Hint: ${payload.hint}`);
505
+ }
506
+ } else {
507
+ console.error(`Error: ${payload.error}`);
508
+ if (payload.hint) console.error(`Hint: ${payload.hint}`);
509
+ }
510
+ process.exit(1);
511
+ }
512
+
513
+ function normalizeVideoWorkflow(value) {
514
+ if (!value) return null;
515
+ const normalized = value.toLowerCase();
516
+ if (normalized === 't2v' || normalized === 'text-to-video') return 't2v';
517
+ if (normalized === 'i2v' || normalized === 'image-to-video') return 'i2v';
518
+ if (normalized === 's2v' || normalized === 'sound-to-video') return 's2v';
519
+ if (normalized === 'ia2v' || normalized === 'image-audio-to-video' || normalized === 'image+audio-to-video') return 'ia2v';
520
+ if (normalized === 'a2v' || normalized === 'audio-to-video') return 'a2v';
521
+ if (normalized === 'animate-move' || normalized === 'animate_move') return 'animate-move';
522
+ if (normalized === 'animate-replace' || normalized === 'animate_replace') return 'animate-replace';
523
+ if (normalized === 'v2v' || normalized === 'video-to-video') return 'v2v';
524
+ return null;
525
+ }
526
+
527
+ function inferVideoWorkflowFromModel(modelId) {
528
+ if (!modelId) return null;
529
+ const id = resolveVideoModelAlias(modelId).toLowerCase();
530
+ if (id.includes('animate-move')) return 'animate-move';
531
+ if (id.includes('animate-replace')) return 'animate-replace';
532
+ if (id.includes('_v2v')) return 'v2v';
533
+ if (id.includes('_ia2v')) return 'ia2v';
534
+ if (id.includes('_a2v')) return 'a2v';
535
+ if (id.includes('_t2v') || id.includes('-t2v')) return 't2v';
536
+ if (id.includes('_i2v') || id.includes('-i2v')) return 'i2v';
537
+ if (id.includes('_s2v') || id.includes('-s2v')) return 's2v';
538
+ return null;
539
+ }
540
+
541
+ function promptExplicitlyDisablesSpeech(prompt) {
542
+ return /\b(no dialogue|no speech|without dialogue|without speech|silent|no voiceover|no voice-over)\b/i.test(prompt || '');
543
+ }
544
+
545
+ function containsQuotedDialogue(prompt) {
546
+ return /"[^"]{1,400}"/.test(prompt || '');
547
+ }
548
+
549
+ function promptMentionsSpeech(prompt) {
550
+ if (!prompt || promptExplicitlyDisablesSpeech(prompt)) return false;
551
+ return /\b(dialogue|speaks?|speaking|says?|said|asks?|asked|whispers?|shouts?|yells?|narrates?|narration|voiceover|voice-over|conversation|monologue|interview|talking|tells? (?:a )?story)\b/i.test(prompt);
552
+ }
553
+
554
+ function promptMentionsAudio(prompt) {
555
+ if (!prompt) return false;
556
+ return /\b(audio|sound|sounds|ambient sound|music|song|singing|sings|voice|voices|dialogue|speech|voiceover|voice-over|narration|foley)\b/i.test(prompt);
557
+ }
558
+
559
+ function promptLooksLikeLongFormStory(prompt) {
560
+ return /\b(story|screenplay|script|scene|episode|short film|commercial|storyboard|chapter|narrative)\b/i.test(prompt || '');
561
+ }
562
+
563
+ function promptLooksLikeLipSync(prompt) {
564
+ return /\b(lip[- ]?sync|lipsync|talking head|mouth movement|sync(?:hronize)? (?:the )?(?:lips|mouth|speech)|face speaks|sing along)\b/i.test(prompt || '');
565
+ }
566
+
567
+ function promptNeedsLtxNativeAudio(prompt) {
568
+ return !promptExplicitlyDisablesSpeech(prompt) && (
569
+ containsQuotedDialogue(prompt) ||
570
+ promptMentionsSpeech(prompt) ||
571
+ promptMentionsAudio(prompt) ||
572
+ promptLooksLikeLongFormStory(prompt)
573
+ );
574
+ }
575
+
576
+ function normalizeScreenplayDialogueQuotes(prompt) {
577
+ if (!prompt) return prompt;
578
+ return prompt
579
+ .replace(/^(\s*[A-Za-z][A-Za-z0-9 _.-]{0,48}:\s*)'([^'\n]{1,300})'/gm, '$1"$2"')
580
+ .replace(/([\s(])'([^'\n]{1,180})'(?=[\s).,!?:;]|$)/g, '$1"$2"');
581
+ }
582
+
583
+ function extractQuotedDialogueSegments(prompt) {
584
+ const matches = [];
585
+ const pattern = /"([^"]{1,800})"/g;
586
+ let match;
587
+ while ((match = pattern.exec(prompt || '')) !== null) {
588
+ matches.push(match[1]);
589
+ }
590
+ return matches;
591
+ }
592
+
593
+ function countWords(text) {
594
+ const words = String(text || '').trim().match(/\b[\w'-]+\b/g);
595
+ return words ? words.length : 0;
596
+ }
597
+
598
+ function quotedDialogueWordCount(prompt) {
599
+ return extractQuotedDialogueSegments(prompt).reduce((sum, segment) => sum + countWords(segment), 0);
600
+ }
601
+
602
+ function suggestedDurationForDialogue(prompt, currentDuration) {
603
+ const words = quotedDialogueWordCount(prompt);
604
+ if (words <= 0) return currentDuration;
605
+ const speechSeconds = Math.ceil(words / 2.5) + 2;
606
+ return Math.max(currentDuration, Math.min(20, speechSeconds));
607
+ }
608
+
609
+ function formatAudioIdPrompt(prompt, voiceName) {
610
+ if (!prompt || /\[VISUAL\]|\[SPEECH\]|\[SOUNDS\]/i.test(prompt)) return prompt;
611
+ const dialogue = extractQuotedDialogueSegments(prompt);
612
+ const speechLines = dialogue.length > 0
613
+ ? dialogue.map((line, index) => `${voiceName || `SPEAKER_${index + 1}`}: "${line}"`).join('\n')
614
+ : 'No spoken dialogue unless exact quoted words are present in the visual prompt.';
615
+ return [
616
+ '[VISUAL]',
617
+ prompt.trim(),
618
+ '',
619
+ '[SPEECH]',
620
+ speechLines,
621
+ '',
622
+ '[SOUNDS]',
623
+ 'Use natural ambient sound that matches the scene unless the prompt specifies silence.'
624
+ ].join('\n');
625
+ }
626
+
627
+ function applyVideoPromptGuardrails() {
628
+ if (!options.video || !options.prompt) return;
629
+
630
+ const normalizedPrompt = normalizeScreenplayDialogueQuotes(options.prompt);
631
+ if (normalizedPrompt !== options.prompt) {
632
+ options.prompt = normalizedPrompt;
633
+ if (!options.quiet) {
634
+ console.error('Normalized screenplay dialogue to double quotes for video prompting.');
635
+ }
636
+ }
637
+
638
+ if (promptMentionsSpeech(options.prompt) && !containsQuotedDialogue(options.prompt) && !options.quiet) {
639
+ console.error(
640
+ 'Warning: video prompt mentions speech/dialogue but has no exact spoken words in double quotes. ' +
641
+ 'LTX native audio works best with concrete quoted dialogue.'
642
+ );
643
+ }
644
+
645
+ if (!options.frames && !cliSet.duration) {
646
+ const suggested = suggestedDurationForDialogue(options.prompt, options.duration);
647
+ if (suggested > options.duration) {
648
+ if (!options.quiet) {
649
+ console.error(`Auto-extended video duration from ${options.duration}s to ${suggested}s to fit quoted dialogue.`);
650
+ }
651
+ options.duration = suggested;
652
+ }
653
+ } else if (!options.quiet) {
654
+ const dialogueWords = quotedDialogueWordCount(options.prompt);
655
+ const hardBudget = Math.floor((options.frames ? options.frames / options.fps : options.duration) * 3.75);
656
+ if (dialogueWords > hardBudget) {
657
+ console.error(
658
+ `Warning: quoted dialogue has about ${dialogueWords} words, which may not fit in ` +
659
+ `${options.frames ? `${options.frames} frames` : `${options.duration}s`}.`
660
+ );
661
+ }
662
+ }
663
+
664
+ if (options.referenceAudioIdentity) {
665
+ options.prompt = formatAudioIdPrompt(options.prompt, options._voicePersonaResolvedName || options.voicePersonaName || 'SPEAKER');
666
+ }
667
+ }
668
+
669
+ function inferVideoWorkflowFromAssets(opts) {
670
+ if (opts.refVideo && opts.videoControlNetName) return 'v2v';
671
+ if (opts.refVideo) return 'animate-move';
672
+ if (opts.refAudio && !opts.refImage && !opts.refImageEnd) return 'a2v';
673
+ if (opts.refAudio && opts.refImage) return promptLooksLikeLipSync(opts.prompt) ? 's2v' : 'ia2v';
674
+ if (opts.refAudio) return 's2v';
675
+ if (opts.refImage || opts.refImageEnd) return 'i2v';
676
+ return null;
677
+ }
678
+
679
+ function workflowRequiresImage(workflow) {
680
+ return workflow === 'i2v' || workflow === 's2v' || workflow === 'ia2v' || workflow === 'animate-move' || workflow === 'animate-replace';
681
+ }
682
+
683
+ function normalizeSeedStrategy(value) {
684
+ if (!value) return null;
685
+ const normalized = value.toLowerCase();
686
+ if (normalized === 'random') return 'random';
687
+ if (normalized === 'prompt-hash' || normalized === 'prompt_hash') return 'prompt-hash';
688
+ return null;
689
+ }
690
+
691
+ function generateRandomSeed() {
692
+ return randomBytes(4).readUInt32BE(0);
693
+ }
694
+
695
+ // ---------------------------------------------------------------------------
696
+ // Dynamic prompt variations — {option1|option2|option3} syntax
697
+ // For count > 1, cycles through options sequentially per image.
698
+ // ---------------------------------------------------------------------------
699
+ const VARIATION_PATTERN = /\{([^}]+)\}/g;
700
+
701
+ function hasPromptVariations(prompt) {
702
+ return /\{[^}]+\}/.test(prompt);
703
+ }
704
+
705
+ function expandPromptVariation(prompt, index) {
706
+ return prompt.replace(VARIATION_PATTERN, (_match, group) => {
707
+ const options = group.split('|').map(s => s.trim());
708
+ return options[index % options.length];
709
+ });
710
+ }
711
+
712
+ // ---------------------------------------------------------------------------
713
+ // Prompt sanitization — strip grid/collage-causing phrases for batch generation
714
+ // Prevents models from rendering grids inside single images when count > 1.
715
+ // ---------------------------------------------------------------------------
716
+ const BATCH_SANITIZE_PATTERNS = [
717
+ /\b\d+\s+different\b/gi,
718
+ /\b\d+\s+versions?\b/gi,
719
+ /\b\d+\s+variations?\b/gi,
720
+ /\b\d+\s+separate\b/gi,
721
+ /\bdifferent\s+(?:expressions?|poses?|angles?|versions?|styles?)\b/gi,
722
+ /\bvarious\s+(?:expressions?|poses?|angles?)\b/gi,
723
+ /\bmultiple\s+(?:versions?|images?|photos?)\b/gi,
724
+ /\b(?:grid|collage|montage|triptych|side-by-side|split-screen|mood\s*board)\b/gi,
725
+ /\beach\s+(?:with|one|showing)\b/gi,
726
+ /\b(?:switch|mix)\s+up\b/gi,
727
+ /\bput\s+them\s+(?:all\s+)?together\b/gi,
728
+ /\ball\s+together\b/gi,
729
+ /\bin\s+one\s+image\b/gi,
730
+ /\brepeated?\s*(?:\d+\s*times?)?\b/gi,
731
+ ];
732
+
733
+ function sanitizeBatchPrompt(prompt) {
734
+ let result = prompt;
735
+ for (const pattern of BATCH_SANITIZE_PATTERNS) {
736
+ result = result.replace(pattern, '');
737
+ }
738
+ // Clean up extra whitespace
739
+ return result.replace(/\s{2,}/g, ' ').trim();
740
+ }
741
+
742
+ function computePromptHashSeed(opts) {
743
+ const payload = {
744
+ prompt: opts.prompt || '',
745
+ model: opts.model || '',
746
+ workflow: opts.video ? opts.videoWorkflow : 'image',
747
+ width: opts.width,
748
+ height: opts.height,
749
+ azimuth: opts.azimuth || '',
750
+ elevation: opts.elevation || '',
751
+ distance: opts.distance || '',
752
+ angleDescription: opts.angleDescription || '',
753
+ outputFormat: opts.outputFormat || '',
754
+ sampler: opts.sampler || '',
755
+ scheduler: opts.scheduler || '',
756
+ targetResolution: opts.targetResolution ?? null,
757
+ loras: opts.loras || [],
758
+ loraStrengths: opts.loraStrengths || [],
759
+ refImage: opts.refImage || '',
760
+ refImageEnd: opts.refImageEnd || '',
761
+ refAudio: opts.refAudio || '',
762
+ audioStart: opts.audioStart ?? null,
763
+ audioDuration: opts.audioDuration ?? null,
764
+ referenceAudioIdentity: opts.referenceAudioIdentity || '',
765
+ refVideo: opts.refVideo || '',
766
+ videoStart: opts.videoStart ?? null,
767
+ contextImages: opts.contextImages || [],
768
+ autoResizeVideoAssets: opts.autoResizeVideoAssets,
769
+ tokenType: opts.tokenType || '',
770
+ steps: opts.steps ?? null,
771
+ guidance: opts.guidance ?? null
772
+ };
773
+ const hash = createHash('sha256').update(JSON.stringify(payload)).digest();
774
+ return hash.readUInt32BE(0);
775
+ }
776
+
777
+ function parseCsv(value) {
778
+ if (!value) return [];
779
+ return value.split(',').map((entry) => entry.trim()).filter(Boolean);
780
+ }
781
+
782
+ function parseNumberValue(raw, flagName) {
783
+ const num = Number(raw);
784
+ if (!Number.isFinite(num)) {
785
+ fatalCliError(`${flagName} must be a number.`, {
786
+ code: 'INVALID_ARGUMENT',
787
+ details: { flag: flagName, value: raw }
788
+ });
789
+ }
790
+ return num;
791
+ }
792
+
793
+ function parseNonNegativeNumberValue(raw, flagName) {
794
+ const num = parseNumberValue(raw, flagName);
795
+ if (num < 0) {
796
+ fatalCliError(`${flagName} must be >= 0.`, {
797
+ code: 'INVALID_ARGUMENT',
798
+ details: { flag: flagName, value: raw, min: 0 }
799
+ });
800
+ }
801
+ return num;
802
+ }
803
+
804
+ function parseNumberList(raw, flagName) {
805
+ const entries = parseCsv(raw);
806
+ return entries.map((entry) => parseNumberValue(entry, flagName));
807
+ }
808
+
809
+ function requireFlagValue(argv, index, flagName) {
810
+ const value = argv[index + 1];
811
+ if (value === undefined) {
812
+ fatalCliError(`${flagName} requires a value.`, {
813
+ code: 'INVALID_ARGUMENT',
814
+ details: { flag: flagName }
815
+ });
816
+ }
817
+ return value;
818
+ }
819
+
820
+ function parseIntegerValue(raw, flagName) {
821
+ const num = Number(raw);
822
+ if (!Number.isInteger(num)) {
823
+ fatalCliError(`${flagName} must be an integer.`, {
824
+ code: 'INVALID_ARGUMENT',
825
+ details: { flag: flagName, value: raw }
826
+ });
827
+ }
828
+ return num;
829
+ }
830
+
831
+ function parsePositiveIntegerValue(raw, flagName, min = 1) {
832
+ const num = parseIntegerValue(raw, flagName);
833
+ if (num < min) {
834
+ fatalCliError(`${flagName} must be >= ${min}.`, {
835
+ code: 'INVALID_ARGUMENT',
836
+ details: { flag: flagName, value: raw, min }
837
+ });
838
+ }
839
+ return num;
840
+ }
841
+
842
+ function parseSeedValue(raw, flagName) {
843
+ const num = parseIntegerValue(raw, flagName);
844
+ if (num < 0 || num > 0xFFFFFFFF) {
845
+ fatalCliError(`${flagName} must be between 0 and 4294967295.`, {
846
+ code: 'INVALID_ARGUMENT',
847
+ details: { flag: flagName, value: raw }
848
+ });
849
+ }
850
+ return num;
851
+ }
852
+
853
+ function getModelDefaults(modelId, config) {
854
+ if (!modelId) return null;
855
+ const normalizedModelId = resolveVideoModelAlias(modelId);
856
+ const builtin = getBuiltinVideoModelConfig(normalizedModelId);
857
+ const entry = config?.modelDefaults?.[normalizedModelId] || config?.modelDefaults?.[modelId];
858
+ if (!entry || typeof entry !== 'object') return builtin;
859
+ return { ...(builtin || {}), ...entry };
860
+ }
861
+
862
+ function selectDefaultVideoModel(workflow, opts, config) {
863
+ if (!workflow) return null;
864
+ const configured = config?.videoModels?.[workflow];
865
+ if (configured) return resolveVideoModelAlias(configured, workflow);
866
+ if (workflow === 'ia2v') return LTX23_WORKFLOW_MODELS.ia2v;
867
+ if (workflow === 'a2v') return LTX23_WORKFLOW_MODELS.a2v;
868
+ if (workflow === 'v2v') return LTX23_WORKFLOW_MODELS.v2v;
869
+ if (workflow === 't2v') return LTX23_WORKFLOW_MODELS.t2v;
870
+ if (workflow === 'i2v' && (opts.referenceAudioIdentity || promptNeedsLtxNativeAudio(opts.prompt) || opts.quality === 'hq' || opts.quality === 'pro')) {
871
+ return LTX23_WORKFLOW_MODELS.i2v;
872
+ }
873
+ return VIDEO_WORKFLOW_DEFAULT_MODELS[workflow] || null;
874
+ }
875
+
876
+ function dimensionsWithShortSide(width, height, shortSide) {
877
+ const w = Number(width);
878
+ const h = Number(height);
879
+ const s = Number(shortSide);
880
+ if (!Number.isFinite(w) || !Number.isFinite(h) || !Number.isFinite(s) || w <= 0 || h <= 0 || s <= 0) {
881
+ return { width, height };
882
+ }
883
+ const currentShort = Math.min(w, h);
884
+ const scale = s / currentShort;
885
+ return {
886
+ width: Math.round(w * scale),
887
+ height: Math.round(h * scale)
888
+ };
889
+ }
890
+
891
+ function formatTokenValue(value) {
892
+ if (!Number.isFinite(value)) return 'unknown';
893
+ return value.toFixed(2);
894
+ }
895
+
896
+ function inferDefaultVideoSteps(modelId) {
897
+ const id = (modelId || '').toLowerCase();
898
+ if (isSeedanceModel(id)) return undefined;
899
+ if (isLtx2Model(id) && id.includes('distilled')) return 8;
900
+ if (id.includes('lightx2v')) return 4;
901
+ if (id.includes('lightning') || id.includes('turbo') || id.includes('lcm')) return 4;
902
+ if (isLtx2Model(id)) return 20;
903
+ return 20;
904
+ }
905
+
906
+ function resolveVideoSteps(modelId, modelDefaults, explicitSteps) {
907
+ if (Number.isFinite(explicitSteps)) return explicitSteps;
908
+ if (Number.isFinite(modelDefaults?.steps)) return modelDefaults.steps;
909
+ return inferDefaultVideoSteps(modelId);
910
+ }
911
+
912
+ function parseCostEstimate(estimate, tokenType) {
913
+ if (!estimate) return null;
914
+ const raw = tokenType === 'sogni'
915
+ ? estimate.sogni ?? estimate.token
916
+ : estimate.spark ?? estimate.token;
917
+ const value = Number.parseFloat(raw);
918
+ return Number.isFinite(value) ? value : null;
919
+ }
920
+
921
+ function buildBalanceError(message, details) {
922
+ const err = new Error(message);
923
+ err.code = 'INSUFFICIENT_BALANCE';
924
+ err.details = details || null;
925
+ return err;
926
+ }
927
+
928
+ function gcdInt(a, b) {
929
+ let x = Math.abs(Math.trunc(a));
930
+ let y = Math.abs(Math.trunc(b));
931
+ while (y !== 0) {
932
+ const t = y;
933
+ y = x % y;
934
+ x = t;
935
+ }
936
+ return x || 1;
937
+ }
938
+
939
+ function isHttpUrl(value) {
940
+ return typeof value === 'string' && (value.startsWith('http://') || value.startsWith('https://'));
941
+ }
942
+
943
+ function getPngDimensions(buffer) {
944
+ if (!buffer || buffer.length < 24) return null;
945
+ // PNG signature: 89 50 4E 47 0D 0A 1A 0A
946
+ if (
947
+ buffer[0] !== 0x89 || buffer[1] !== 0x50 || buffer[2] !== 0x4E || buffer[3] !== 0x47 ||
948
+ buffer[4] !== 0x0D || buffer[5] !== 0x0A || buffer[6] !== 0x1A || buffer[7] !== 0x0A
949
+ ) {
950
+ return null;
951
+ }
952
+ try {
953
+ const width = buffer.readUInt32BE(16);
954
+ const height = buffer.readUInt32BE(20);
955
+ if (!width || !height) return null;
956
+ return { width, height, type: 'png' };
957
+ } catch {
958
+ return null;
959
+ }
960
+ }
961
+
962
+ function getJpegDimensions(buffer) {
963
+ if (!buffer || buffer.length < 4) return null;
964
+ // JPEG SOI: FF D8
965
+ if (buffer[0] !== 0xFF || buffer[1] !== 0xD8) return null;
966
+
967
+ // Walk segments until we find a Start Of Frame marker that contains dimensions.
968
+ // Common SOF markers: C0 (baseline), C1, C2 (progressive), C3, C5-C7, C9-CB, CD-CF
969
+ let i = 2;
970
+ while (i + 9 < buffer.length) {
971
+ // Find marker prefix 0xFF
972
+ if (buffer[i] !== 0xFF) {
973
+ i++;
974
+ continue;
975
+ }
976
+ // Skip fill bytes 0xFF
977
+ while (i < buffer.length && buffer[i] === 0xFF) i++;
978
+ if (i >= buffer.length) break;
979
+ const marker = buffer[i];
980
+ i++;
981
+
982
+ // Markers without a length field
983
+ if (marker === 0xD9 || marker === 0xDA) break; // EOI or SOS
984
+ if (marker >= 0xD0 && marker <= 0xD7) continue; // RSTn
985
+
986
+ if (i + 1 >= buffer.length) break;
987
+ const segmentLength = buffer.readUInt16BE(i);
988
+ if (segmentLength < 2) break;
989
+ const segmentStart = i + 2;
990
+
991
+ const isSof =
992
+ (marker >= 0xC0 && marker <= 0xC3) ||
993
+ (marker >= 0xC5 && marker <= 0xC7) ||
994
+ (marker >= 0xC9 && marker <= 0xCB) ||
995
+ (marker >= 0xCD && marker <= 0xCF);
996
+
997
+ if (isSof) {
998
+ if (segmentStart + 7 >= buffer.length) break;
999
+ try {
1000
+ const height = buffer.readUInt16BE(segmentStart + 1);
1001
+ const width = buffer.readUInt16BE(segmentStart + 3);
1002
+ if (!width || !height) return null;
1003
+ return { width, height, type: 'jpg' };
1004
+ } catch {
1005
+ return null;
1006
+ }
1007
+ }
1008
+
1009
+ i = segmentStart + (segmentLength - 2);
1010
+ }
1011
+
1012
+ return null;
1013
+ }
1014
+
1015
+ function getImageDimensionsFromBuffer(buffer) {
1016
+ return getPngDimensions(buffer) || getJpegDimensions(buffer);
1017
+ }
1018
+
1019
+ const DEFAULT_VIDEO_DIMENSION_RULES = {
1020
+ minDimension: 480,
1021
+ maxDimension: 1536,
1022
+ dimensionMultiple: 16
1023
+ };
1024
+ const VIDEO_DIMENSION_MULTIPLE = DEFAULT_VIDEO_DIMENSION_RULES.dimensionMultiple;
1025
+
1026
+ function videoDimensionRulesFromDefaults(modelDefaults) {
1027
+ return {
1028
+ minDimension: modelDefaults?.minDimension || DEFAULT_VIDEO_DIMENSION_RULES.minDimension,
1029
+ maxDimension: modelDefaults?.maxDimension || DEFAULT_VIDEO_DIMENSION_RULES.maxDimension,
1030
+ dimensionMultiple: modelDefaults?.dimensionMultiple || DEFAULT_VIDEO_DIMENSION_RULES.dimensionMultiple
1031
+ };
1032
+ }
1033
+
1034
+ /**
1035
+ * Resizes an image buffer to model-compatible dimensions while maintaining aspect ratio.
1036
+ * Uses sharp's fit:inside to preserve aspect, then rounds to the model divisor.
1037
+ */
1038
+ async function resizeImageBufferForVideo(buffer, originalWidth, originalHeight, rules = DEFAULT_VIDEO_DIMENSION_RULES) {
1039
+ const multiple = rules.dimensionMultiple || VIDEO_DIMENSION_MULTIPLE;
1040
+ const roundToMultiple = (n) => Math.max(multiple, Math.round(n / multiple) * multiple);
1041
+ const targetWidth = Math.max(rules.minDimension, Math.min(rules.maxDimension, roundToMultiple(originalWidth)));
1042
+ const targetHeight = Math.max(rules.minDimension, Math.min(rules.maxDimension, roundToMultiple(originalHeight)));
1043
+
1044
+ // Resize using sharp with fit:inside (maintains aspect ratio)
1045
+ const resizedBuffer = await sharp(buffer)
1046
+ .resize(targetWidth, targetHeight, { fit: 'inside', withoutEnlargement: false })
1047
+ .toBuffer();
1048
+
1049
+ // Get actual dimensions after resize
1050
+ const metadata = await sharp(resizedBuffer).metadata();
1051
+ const actualWidth = roundToMultiple(metadata.width);
1052
+ const actualHeight = roundToMultiple(metadata.height);
1053
+
1054
+ // If dimensions aren't exactly model-compatible, do a final resize/crop.
1055
+ if (metadata.width !== actualWidth || metadata.height !== actualHeight) {
1056
+ return await sharp(resizedBuffer)
1057
+ .resize(actualWidth, actualHeight, { fit: 'cover' })
1058
+ .toBuffer();
1059
+ }
1060
+
1061
+ return resizedBuffer;
1062
+ }
1063
+
1064
+ function normalizeVideoDimensionsLikeWrapper(width, height, rules = DEFAULT_VIDEO_DIMENSION_RULES) {
1065
+ let targetWidth = Number(width);
1066
+ let targetHeight = Number(height);
1067
+ let adjusted = false;
1068
+
1069
+ const effectiveMin = rules.minDimension || DEFAULT_VIDEO_DIMENSION_RULES.minDimension;
1070
+ const effectiveMax = rules.maxDimension || DEFAULT_VIDEO_DIMENSION_RULES.maxDimension;
1071
+ const effectiveMultiple = rules.dimensionMultiple || DEFAULT_VIDEO_DIMENSION_RULES.dimensionMultiple;
1072
+
1073
+ if (!Number.isFinite(targetWidth) || !Number.isFinite(targetHeight)) {
1074
+ return { width: targetWidth, height: targetHeight, adjusted: false };
1075
+ }
1076
+
1077
+ if (targetWidth > effectiveMax || targetHeight > effectiveMax) {
1078
+ const scaleFactor = Math.min(effectiveMax / targetWidth, effectiveMax / targetHeight);
1079
+ targetWidth = Math.floor(targetWidth * scaleFactor);
1080
+ targetHeight = Math.floor(targetHeight * scaleFactor);
1081
+ adjusted = true;
1082
+ }
1083
+
1084
+ if (targetWidth < effectiveMin || targetHeight < effectiveMin) {
1085
+ const scaleFactor = Math.max(effectiveMin / targetWidth, effectiveMin / targetHeight);
1086
+ targetWidth = Math.floor(targetWidth * scaleFactor);
1087
+ targetHeight = Math.floor(targetHeight * scaleFactor);
1088
+ adjusted = true;
1089
+ if (targetWidth > effectiveMax || targetHeight > effectiveMax) {
1090
+ const downscaleFactor = Math.min(effectiveMax / targetWidth, effectiveMax / targetHeight);
1091
+ targetWidth = Math.floor(targetWidth * downscaleFactor);
1092
+ targetHeight = Math.floor(targetHeight * downscaleFactor);
1093
+ }
1094
+ }
1095
+
1096
+ const roundedWidth = Math.floor(targetWidth / effectiveMultiple) * effectiveMultiple;
1097
+ const roundedHeight = Math.floor(targetHeight / effectiveMultiple) * effectiveMultiple;
1098
+ if (roundedWidth !== targetWidth || roundedHeight !== targetHeight) {
1099
+ adjusted = true;
1100
+ }
1101
+ targetWidth = roundedWidth;
1102
+ targetHeight = roundedHeight;
1103
+
1104
+ if (targetWidth < effectiveMin) {
1105
+ targetWidth = Math.ceil(effectiveMin / effectiveMultiple) * effectiveMultiple;
1106
+ adjusted = true;
1107
+ }
1108
+ if (targetHeight < effectiveMin) {
1109
+ targetHeight = Math.ceil(effectiveMin / effectiveMultiple) * effectiveMultiple;
1110
+ adjusted = true;
1111
+ }
1112
+
1113
+ return { width: targetWidth, height: targetHeight, adjusted };
1114
+ }
1115
+
1116
+ function predictSharpInsideResizeDims(refWidth, refHeight, targetWidth, targetHeight) {
1117
+ const rw = Number(refWidth);
1118
+ const rh = Number(refHeight);
1119
+ const tw = Number(targetWidth);
1120
+ const th = Number(targetHeight);
1121
+ if (!Number.isFinite(rw) || !Number.isFinite(rh) || !Number.isFinite(tw) || !Number.isFinite(th) || rw <= 0 || rh <= 0 || tw <= 0 || th <= 0) {
1122
+ return null;
1123
+ }
1124
+
1125
+ // Matches sharp(vips) behavior in SogniClientWrapper.resizeImageBuffer(..., fit: 'inside'):
1126
+ // Choose limiting dimension; keep it exact; compute the other dimension with Math.round().
1127
+ const scaleW = tw / rw;
1128
+ const scaleH = th / rh;
1129
+ const widthLimited = scaleW <= scaleH;
1130
+ if (widthLimited) {
1131
+ return { width: tw, height: Math.round(rh * tw / rw) };
1132
+ }
1133
+ return { width: Math.round(rw * th / rh), height: th };
1134
+ }
1135
+
1136
+ function pickCompatibleI2vBoundingBox(refWidth, refHeight, desiredWidth, desiredHeight, { allowImperfect = false, rules = DEFAULT_VIDEO_DIMENSION_RULES } = {}) {
1137
+ const effectiveMin = rules.minDimension || DEFAULT_VIDEO_DIMENSION_RULES.minDimension;
1138
+ const effectiveMax = rules.maxDimension || DEFAULT_VIDEO_DIMENSION_RULES.maxDimension;
1139
+ const effectiveMultiple = rules.dimensionMultiple || DEFAULT_VIDEO_DIMENSION_RULES.dimensionMultiple;
1140
+ const desiredW = Number.isFinite(Number(desiredWidth)) ? Number(desiredWidth) : 512;
1141
+ const desiredH = Number.isFinite(Number(desiredHeight)) ? Number(desiredHeight) : 512;
1142
+ const desiredMax = Math.max(effectiveMin, Math.min(effectiveMax, Math.max(desiredW, desiredH)));
1143
+ let best = null;
1144
+ let bestImperfect = null;
1145
+
1146
+ for (let w = effectiveMin; w <= effectiveMax; w += effectiveMultiple) {
1147
+ for (let h = effectiveMin; h <= effectiveMax; h += effectiveMultiple) {
1148
+ const normalized = normalizeVideoDimensionsLikeWrapper(w, h, rules);
1149
+ if (!Number.isFinite(normalized.width) || !Number.isFinite(normalized.height)) continue;
1150
+ const out = predictSharpInsideResizeDims(refWidth, refHeight, normalized.width, normalized.height);
1151
+ if (!out) continue;
1152
+ // Require both output dimensions >= model minimum for API compatibility.
1153
+ if (out.width < effectiveMin || out.height < effectiveMin) continue;
1154
+
1155
+ const isPerfect = out.width % effectiveMultiple === 0 && out.height % effectiveMultiple === 0;
1156
+
1157
+ const outMax = Math.max(out.width, out.height);
1158
+ const distance = Math.abs(normalized.width - desiredW) + Math.abs(normalized.height - desiredH);
1159
+ // Prefer a bounding box close to what the user asked for, then output close to requested max, then maximize output area.
1160
+ const score = -distance * 1e9 - Math.abs(outMax - desiredMax) * 1e8 + out.width * out.height * 1e3 - (normalized.width * normalized.height);
1161
+
1162
+ if (isPerfect) {
1163
+ if (!best || score > best.score) {
1164
+ best = { width: normalized.width, height: normalized.height, output: out, score, perfect: true };
1165
+ }
1166
+ } else if (allowImperfect) {
1167
+ // Track imperfect candidates: prefer those closest to the model divisor.
1168
+ const widthRemainder = out.width % effectiveMultiple;
1169
+ const heightRemainder = out.height % effectiveMultiple;
1170
+ const divisorDistance = Math.min(widthRemainder, effectiveMultiple - widthRemainder) +
1171
+ Math.min(heightRemainder, effectiveMultiple - heightRemainder);
1172
+ const imperfectScore = -divisorDistance * 1e10 + score;
1173
+ if (!bestImperfect || imperfectScore > bestImperfect.score) {
1174
+ const adjustedWidth = Math.round(out.width / effectiveMultiple) * effectiveMultiple;
1175
+ const adjustedHeight = Math.round(out.height / effectiveMultiple) * effectiveMultiple;
1176
+ bestImperfect = {
1177
+ width: normalized.width,
1178
+ height: normalized.height,
1179
+ output: out,
1180
+ adjustedOutput: { width: adjustedWidth, height: adjustedHeight },
1181
+ score: imperfectScore,
1182
+ perfect: false
1183
+ };
1184
+ }
1185
+ }
1186
+ }
1187
+ }
1188
+
1189
+ return best || (allowImperfect ? bestImperfect : null);
1190
+ }
1191
+
1192
+ const MULTI_ANGLE_AZIMUTHS = [
1193
+ { key: 'front', prompt: 'front view' },
1194
+ { key: 'front-right', prompt: 'front-right quarter view' },
1195
+ { key: 'right', prompt: 'right side view' },
1196
+ { key: 'back-right', prompt: 'back-right quarter view' },
1197
+ { key: 'back', prompt: 'back view' },
1198
+ { key: 'back-left', prompt: 'back-left quarter view' },
1199
+ { key: 'left', prompt: 'left side view' },
1200
+ { key: 'front-left', prompt: 'front-left quarter view' }
1201
+ ];
1202
+
1203
+ const MULTI_ANGLE_ELEVATIONS = [
1204
+ { key: 'low-angle', prompt: 'low-angle shot' },
1205
+ { key: 'eye-level', prompt: 'eye-level shot' },
1206
+ { key: 'elevated', prompt: 'elevated shot' },
1207
+ { key: 'high-angle', prompt: 'high-angle shot' }
1208
+ ];
1209
+
1210
+ const MULTI_ANGLE_DISTANCES = [
1211
+ { key: 'close-up', prompt: 'close-up' },
1212
+ { key: 'medium', prompt: 'medium shot' },
1213
+ { key: 'wide', prompt: 'wide shot' }
1214
+ ];
1215
+
1216
+ const MULTI_ANGLE_AZIMUTH_ALIASES = new Map([
1217
+ ['front-right quarter', 'front-right'],
1218
+ ['front right quarter', 'front-right'],
1219
+ ['back-right quarter', 'back-right'],
1220
+ ['back right quarter', 'back-right'],
1221
+ ['back-left quarter', 'back-left'],
1222
+ ['back left quarter', 'back-left'],
1223
+ ['front-left quarter', 'front-left'],
1224
+ ['front left quarter', 'front-left']
1225
+ ]);
1226
+
1227
+ const MULTI_ANGLE_ELEVATION_ALIASES = new Map([
1228
+ ['low angle', 'low-angle'],
1229
+ ['eye level', 'eye-level'],
1230
+ ['high angle', 'high-angle']
1231
+ ]);
1232
+
1233
+ const MULTI_ANGLE_DISTANCE_ALIASES = new Map([
1234
+ ['close up', 'close-up'],
1235
+ ['medium shot', 'medium'],
1236
+ ['wide shot', 'wide']
1237
+ ]);
1238
+
1239
+ function normalizeMultiAngleValue(value, aliases, allowedKeys, label) {
1240
+ if (!value) return null;
1241
+ const normalized = value.toLowerCase().replace(/_/g, '-').replace(/\s+/g, ' ').trim();
1242
+ const aliased = aliases.get(normalized) || normalized;
1243
+ if (!allowedKeys.includes(aliased)) {
1244
+ fatalCliError(`Invalid ${label} "${value}".`, {
1245
+ code: 'INVALID_ARGUMENT',
1246
+ details: { field: label, value, allowed: allowedKeys }
1247
+ });
1248
+ }
1249
+ return aliased;
1250
+ }
1251
+
1252
+ function buildMultiAnglePrompt({ azimuth, elevation, distance, description }) {
1253
+ const azimuthPrompt = MULTI_ANGLE_AZIMUTHS.find((a) => a.key === azimuth)?.prompt;
1254
+ const elevationPrompt = MULTI_ANGLE_ELEVATIONS.find((e) => e.key === elevation)?.prompt;
1255
+ const distancePrompt = MULTI_ANGLE_DISTANCES.find((d) => d.key === distance)?.prompt;
1256
+ const parts = ['<sks>', azimuthPrompt, elevationPrompt, distancePrompt].filter(Boolean);
1257
+ if (description) parts.push(description);
1258
+ return parts.join(' ');
1259
+ }
1260
+
1261
+ function loadOpenClawPluginConfig() {
1262
+ const openclawPluginConfig = getEnv('OPENCLAW_PLUGIN_CONFIG');
1263
+ if (openclawPluginConfig) {
1264
+ try {
1265
+ return JSON5.parse(openclawPluginConfig);
1266
+ } catch (e) {
1267
+ return null;
1268
+ }
1269
+ }
1270
+ if (!existsSync(OPENCLAW_CONFIG_PATH)) return null;
1271
+ try {
1272
+ const raw = readFileSync(OPENCLAW_CONFIG_PATH, 'utf8');
1273
+ const parsed = JSON5.parse(raw);
1274
+ return parsed?.plugins?.entries?.['sogni-creative-agent-skill']?.config || null;
1275
+ } catch (e) {
1276
+ return null;
1277
+ }
1278
+ }
1279
+
1280
+ const openclawConfig = loadOpenClawPluginConfig();
1281
+ const CREDENTIALS_PATH = resolveConfiguredPath(
1282
+ getEnv('SOGNI_CREDENTIALS_PATH') || openclawConfig?.credentialsPath,
1283
+ DEFAULT_CREDENTIALS_PATH,
1284
+ 'SOGNI credentials path'
1285
+ );
1286
+ const LAST_RENDER_PATH = resolveConfiguredPath(
1287
+ getEnv('SOGNI_LAST_RENDER_PATH') || openclawConfig?.lastRenderPath,
1288
+ DEFAULT_LAST_RENDER_PATH,
1289
+ 'SOGNI last render path'
1290
+ );
1291
+ const MEDIA_INBOUND_DIR = resolveConfiguredPath(
1292
+ getEnv('SOGNI_MEDIA_INBOUND_DIR') || openclawConfig?.mediaInboundDir,
1293
+ DEFAULT_MEDIA_INBOUND_DIR,
1294
+ 'SOGNI media inbound path'
1295
+ );
1296
+
1297
+ // Parse arguments
1298
+ const args = process.argv.slice(2);
1299
+ const options = {
1300
+ prompt: null,
1301
+ output: null,
1302
+ model: null, // Will be set based on type
1303
+ width: 512,
1304
+ height: 512,
1305
+ count: 1,
1306
+ json: false,
1307
+ quiet: false,
1308
+ timeout: 30000,
1309
+ strictSize: false,
1310
+ quality: null, // Quality tier: fast|hq|pro — auto-selects model, steps, dimensions
1311
+ tokenType: null,
1312
+ steps: null,
1313
+ guidance: null,
1314
+ outputFormat: null,
1315
+ sampler: null,
1316
+ scheduler: null,
1317
+ loras: [],
1318
+ loraStrengths: [],
1319
+ multiAngle: false,
1320
+ angles360: false,
1321
+ azimuth: 'front',
1322
+ elevation: 'eye-level',
1323
+ distance: 'medium',
1324
+ angleStrength: null,
1325
+ angleDescription: '',
1326
+ seed: null,
1327
+ lastSeed: false,
1328
+ seedStrategy: null,
1329
+ video: false,
1330
+ videoWorkflow: null,
1331
+ fps: 16,
1332
+ duration: 5,
1333
+ frames: null,
1334
+ targetResolution: null, // Short-side target for video, preserving aspect ratio
1335
+ autoResizeVideoAssets: null,
1336
+ estimateVideoCost: false,
1337
+ showBalance: false,
1338
+ showVersion: false,
1339
+ angles360Video: null,
1340
+ refImage: null, // Reference image for video (start frame)
1341
+ refImageEnd: null, // End frame for video interpolation
1342
+ refAudio: null, // Uploaded/generated audio for ia2v/a2v, or s2v lip-sync
1343
+ audioStart: null, // Optional start offset into reference audio
1344
+ audioDuration: null, // Optional duration slice for reference audio
1345
+ referenceAudioIdentity: null, // Voice identity reference for LTX native audio
1346
+ voicePersonaName: null,
1347
+ refVideo: null, // Reference video for animate workflows
1348
+ videoStart: null, // Optional start offset into reference video
1349
+ contextImages: [], // Context images for image editing
1350
+ looping: false, // Create looping video (i2v only): generate A→B then B→A and concatenate
1351
+ photobooth: false, // Photobooth mode (InstantID face transfer)
1352
+ cnStrength: null, // ControlNet strength override
1353
+ cnGuidanceEnd: null, // ControlNet guidance end override
1354
+ videoControlNetName: null, // ControlNet name for v2v: canny|pose|depth|detailer
1355
+ videoControlNetStrength: null, // ControlNet strength for v2v (0.0-1.0)
1356
+ sam2Coordinates: null, // SAM2 coordinates for animate-replace [{x,y}]
1357
+ trimEndFrame: false, // Trim last frame for seamless stitching
1358
+ firstFrameStrength: null, // Keyframe interpolation (0.0-1.0)
1359
+ lastFrameStrength: null, // Keyframe interpolation (0.0-1.0)
1360
+ extractLastFrame: null, // --extract-last-frame <video> <image>
1361
+ extractLastFrameOutput: null,
1362
+ concatVideos: null, // --concat-videos <out> <clip1> <clip2> [...]
1363
+ concatVideosClips: null,
1364
+ concatAudio: null, // Optional audio file to mux over concatenated clips
1365
+ concatAudioStart: null,
1366
+ listMedia: null, // --list-media [images|audio|all]
1367
+ // Memory, personality, persona commands
1368
+ memoryAction: null, // set|get|list|remove
1369
+ memoryKey: null,
1370
+ memoryValue: null,
1371
+ memoryCategory: null,
1372
+ personalityAction: null, // set|get|clear
1373
+ personalityText: null,
1374
+ personaAction: null, // add|list|remove|resolve
1375
+ personaName: null,
1376
+ personaRelationship: null,
1377
+ personaDescription: null,
1378
+ personaTags: null,
1379
+ personaVoice: null,
1380
+ personaVoiceClip: null,
1381
+ personaPhoto: null, // alias for --ref when used with --persona-add
1382
+ noFilter: false // Disable NSFW content filter
1383
+ };
1384
+ const cliSet = {
1385
+ output: false,
1386
+ model: false,
1387
+ width: false,
1388
+ height: false,
1389
+ count: false,
1390
+ timeout: false,
1391
+ strictSize: false,
1392
+ quality: false,
1393
+ tokenType: false,
1394
+ steps: false,
1395
+ guidance: false,
1396
+ outputFormat: false,
1397
+ sampler: false,
1398
+ scheduler: false,
1399
+ loras: false,
1400
+ loraStrengths: false,
1401
+ multiAngle: false,
1402
+ azimuth: false,
1403
+ elevation: false,
1404
+ distance: false,
1405
+ angleStrength: false,
1406
+ angleDescription: false,
1407
+ seed: false,
1408
+ seedStrategy: false,
1409
+ video: false,
1410
+ workflow: false,
1411
+ fps: false,
1412
+ duration: false,
1413
+ frames: false,
1414
+ targetResolution: false,
1415
+ autoResizeVideoAssets: false,
1416
+ angles360Video: false,
1417
+ videoModel: false,
1418
+ refImage: false,
1419
+ refImageEnd: false,
1420
+ refAudio: false,
1421
+ audioStart: false,
1422
+ audioDuration: false,
1423
+ referenceAudioIdentity: false,
1424
+ voicePersonaName: false,
1425
+ refVideo: false,
1426
+ videoStart: false,
1427
+ context: false,
1428
+ looping: false,
1429
+ photobooth: false,
1430
+ cnStrength: false,
1431
+ cnGuidanceEnd: false,
1432
+ videoControlNetName: false,
1433
+ videoControlNetStrength: false,
1434
+ sam2Coordinates: false,
1435
+ trimEndFrame: false,
1436
+ firstFrameStrength: false,
1437
+ lastFrameStrength: false
1438
+ };
1439
+
1440
+ // Parse CLI args
1441
+ for (let i = 0; i < args.length; i++) {
1442
+ const arg = args[i];
1443
+ if (arg === '-o' || arg === '--output') {
1444
+ const raw = requireFlagValue(args, i, arg);
1445
+ i++;
1446
+ options.output = raw;
1447
+ cliSet.output = true;
1448
+ } else if (arg === '-m' || arg === '--model') {
1449
+ const raw = requireFlagValue(args, i, arg);
1450
+ i++;
1451
+ options.model = raw;
1452
+ cliSet.model = true;
1453
+ } else if (arg === '-w' || arg === '--width') {
1454
+ const raw = requireFlagValue(args, i, arg);
1455
+ i++;
1456
+ options.width = parsePositiveIntegerValue(raw, arg);
1457
+ cliSet.width = true;
1458
+ } else if (arg === '-h' || arg === '--height') {
1459
+ const raw = requireFlagValue(args, i, arg);
1460
+ i++;
1461
+ options.height = parsePositiveIntegerValue(raw, arg);
1462
+ cliSet.height = true;
1463
+ } else if (arg === '-n' || arg === '--count') {
1464
+ const raw = requireFlagValue(args, i, arg);
1465
+ i++;
1466
+ options.count = parsePositiveIntegerValue(raw, arg);
1467
+ cliSet.count = true;
1468
+ } else if (arg === '-t' || arg === '--timeout') {
1469
+ const raw = requireFlagValue(args, i, arg);
1470
+ i++;
1471
+ options.timeout = parsePositiveIntegerValue(raw, arg) * 1000;
1472
+ cliSet.timeout = true;
1473
+ } else if (arg === '--quality' || arg === '-Q') {
1474
+ const raw = requireFlagValue(args, i, arg);
1475
+ i++;
1476
+ options.quality = raw.toLowerCase();
1477
+ cliSet.quality = true;
1478
+ } else if (arg === '--token-type' || arg === '--token') {
1479
+ const raw = requireFlagValue(args, i, arg);
1480
+ i++;
1481
+ options.tokenType = raw;
1482
+ cliSet.tokenType = true;
1483
+ } else if (arg === '--steps') {
1484
+ const raw = requireFlagValue(args, i, arg);
1485
+ i++;
1486
+ options.steps = parsePositiveIntegerValue(raw, arg);
1487
+ cliSet.steps = true;
1488
+ } else if (arg === '--guidance') {
1489
+ const raw = requireFlagValue(args, i, arg);
1490
+ i++;
1491
+ options.guidance = parseNumberValue(raw, arg);
1492
+ cliSet.guidance = true;
1493
+ } else if (arg === '--output-format' || arg === '--format') {
1494
+ const raw = requireFlagValue(args, i, arg);
1495
+ i++;
1496
+ options.outputFormat = raw;
1497
+ cliSet.outputFormat = true;
1498
+ } else if (arg === '--sampler') {
1499
+ const raw = requireFlagValue(args, i, arg);
1500
+ i++;
1501
+ options.sampler = raw;
1502
+ cliSet.sampler = true;
1503
+ } else if (arg === '--scheduler') {
1504
+ const raw = requireFlagValue(args, i, arg);
1505
+ i++;
1506
+ options.scheduler = raw;
1507
+ cliSet.scheduler = true;
1508
+ } else if (arg === '--multi-angle' || arg === '--multiple-angles') {
1509
+ options.multiAngle = true;
1510
+ cliSet.multiAngle = true;
1511
+ } else if (arg === '--angles-360') {
1512
+ options.angles360 = true;
1513
+ options.multiAngle = true;
1514
+ cliSet.multiAngle = true;
1515
+ } else if (arg === '--angles-360-video') {
1516
+ options.angles360Video = true;
1517
+ cliSet.angles360Video = true;
1518
+ if (args[i + 1] && !args[i + 1].startsWith('-')) {
1519
+ options.angles360Video = args[++i];
1520
+ }
1521
+ } else if (arg === '--video-model' || arg === '--i2v-model') {
1522
+ const raw = requireFlagValue(args, i, arg);
1523
+ i++;
1524
+ options.videoModel = raw;
1525
+ cliSet.videoModel = true;
1526
+ } else if (arg === '--azimuth') {
1527
+ const raw = requireFlagValue(args, i, arg);
1528
+ i++;
1529
+ options.azimuth = raw;
1530
+ cliSet.azimuth = true;
1531
+ } else if (arg === '--elevation') {
1532
+ const raw = requireFlagValue(args, i, arg);
1533
+ i++;
1534
+ options.elevation = raw;
1535
+ cliSet.elevation = true;
1536
+ } else if (arg === '--distance') {
1537
+ const raw = requireFlagValue(args, i, arg);
1538
+ i++;
1539
+ options.distance = raw;
1540
+ cliSet.distance = true;
1541
+ } else if (arg === '--angle-strength' || arg === '--strength') {
1542
+ const raw = requireFlagValue(args, i, arg);
1543
+ i++;
1544
+ options.angleStrength = parseNumberValue(raw, arg);
1545
+ cliSet.angleStrength = true;
1546
+ } else if (arg === '--angle-description' || arg === '--angle-anchor' || arg === '--description' || arg === '--anchor') {
1547
+ const raw = requireFlagValue(args, i, arg);
1548
+ i++;
1549
+ options.angleDescription = raw;
1550
+ cliSet.angleDescription = true;
1551
+ } else if (arg === '--lora' || arg === '--lora-model') {
1552
+ const raw = requireFlagValue(args, i, arg);
1553
+ i++;
1554
+ options.loras.push(raw);
1555
+ cliSet.loras = true;
1556
+ } else if (arg === '--loras') {
1557
+ const raw = requireFlagValue(args, i, arg);
1558
+ i++;
1559
+ options.loras.push(...parseCsv(raw));
1560
+ cliSet.loras = true;
1561
+ } else if (arg === '--lora-strength') {
1562
+ const raw = requireFlagValue(args, i, arg);
1563
+ i++;
1564
+ options.loraStrengths.push(parseNumberValue(raw, arg));
1565
+ cliSet.loraStrengths = true;
1566
+ } else if (arg === '--lora-strengths') {
1567
+ const raw = requireFlagValue(args, i, arg);
1568
+ i++;
1569
+ options.loraStrengths.push(...parseNumberList(raw, arg));
1570
+ cliSet.loraStrengths = true;
1571
+ } else if (arg === '-s' || arg === '--seed') {
1572
+ const raw = requireFlagValue(args, i, arg);
1573
+ i++;
1574
+ options.seed = parseSeedValue(raw, arg);
1575
+ cliSet.seed = true;
1576
+ } else if (arg === '--seed-strategy') {
1577
+ const raw = requireFlagValue(args, i, arg);
1578
+ i++;
1579
+ options.seedStrategy = raw;
1580
+ cliSet.seedStrategy = true;
1581
+ } else if (arg === '--last-seed' || arg === '--reseed') {
1582
+ options.lastSeed = true;
1583
+ } else if (arg === '--video' || arg === '-v') {
1584
+ options.video = true;
1585
+ cliSet.video = true;
1586
+ } else if (arg === '--workflow') {
1587
+ const raw = requireFlagValue(args, i, arg);
1588
+ i++;
1589
+ options.videoWorkflow = raw;
1590
+ cliSet.workflow = true;
1591
+ } else if (arg === '--fps') {
1592
+ const raw = requireFlagValue(args, i, arg);
1593
+ i++;
1594
+ options.fps = parsePositiveIntegerValue(raw, arg);
1595
+ cliSet.fps = true;
1596
+ } else if (arg === '--duration') {
1597
+ const raw = requireFlagValue(args, i, arg);
1598
+ i++;
1599
+ options.duration = parsePositiveIntegerValue(raw, arg);
1600
+ cliSet.duration = true;
1601
+ } else if (arg === '--frames') {
1602
+ const raw = requireFlagValue(args, i, arg);
1603
+ i++;
1604
+ options.frames = parsePositiveIntegerValue(raw, arg);
1605
+ cliSet.frames = true;
1606
+ } else if (arg === '--target-resolution' || arg === '--short-side') {
1607
+ const raw = requireFlagValue(args, i, arg);
1608
+ i++;
1609
+ options.targetResolution = parsePositiveIntegerValue(raw, arg);
1610
+ cliSet.targetResolution = true;
1611
+ } else if (arg === '--auto-resize-assets') {
1612
+ options.autoResizeVideoAssets = true;
1613
+ cliSet.autoResizeVideoAssets = true;
1614
+ } else if (arg === '--no-auto-resize-assets') {
1615
+ options.autoResizeVideoAssets = false;
1616
+ cliSet.autoResizeVideoAssets = true;
1617
+ } else if (arg === '--ref' || arg === '--reference') {
1618
+ const raw = requireFlagValue(args, i, arg);
1619
+ i++;
1620
+ options.refImage = raw;
1621
+ cliSet.refImage = true;
1622
+ } else if (arg === '--ref-end' || arg === '--end') {
1623
+ const raw = requireFlagValue(args, i, arg);
1624
+ i++;
1625
+ options.refImageEnd = raw;
1626
+ cliSet.refImageEnd = true;
1627
+ } else if (arg === '--ref-audio' || arg === '--audio') {
1628
+ const raw = requireFlagValue(args, i, arg);
1629
+ i++;
1630
+ options.refAudio = raw;
1631
+ cliSet.refAudio = true;
1632
+ } else if (arg === '--audio-start') {
1633
+ const raw = requireFlagValue(args, i, arg);
1634
+ i++;
1635
+ options.audioStart = parseNonNegativeNumberValue(raw, arg);
1636
+ cliSet.audioStart = true;
1637
+ } else if (arg === '--audio-duration') {
1638
+ const raw = requireFlagValue(args, i, arg);
1639
+ i++;
1640
+ options.audioDuration = parseNonNegativeNumberValue(raw, arg);
1641
+ cliSet.audioDuration = true;
1642
+ } else if (arg === '--reference-audio-identity' || arg === '--voice-identity') {
1643
+ const raw = requireFlagValue(args, i, arg);
1644
+ i++;
1645
+ options.referenceAudioIdentity = raw;
1646
+ cliSet.referenceAudioIdentity = true;
1647
+ } else if (arg === '--voice-persona') {
1648
+ const raw = requireFlagValue(args, i, arg);
1649
+ i++;
1650
+ options.voicePersonaName = raw;
1651
+ cliSet.voicePersonaName = true;
1652
+ } else if (arg === '--ref-video') {
1653
+ const raw = requireFlagValue(args, i, arg);
1654
+ i++;
1655
+ options.refVideo = raw;
1656
+ cliSet.refVideo = true;
1657
+ } else if (arg === '--video-start' || arg === '--video-start-offset') {
1658
+ const raw = requireFlagValue(args, i, arg);
1659
+ i++;
1660
+ options.videoStart = parseNonNegativeNumberValue(raw, arg);
1661
+ cliSet.videoStart = true;
1662
+ } else if (arg === '--looping' || arg === '--loop') {
1663
+ options.looping = true;
1664
+ cliSet.looping = true;
1665
+ } else if (arg === '-c' || arg === '--context') {
1666
+ const raw = requireFlagValue(args, i, arg);
1667
+ i++;
1668
+ options.contextImages.push(raw);
1669
+ cliSet.context = true;
1670
+ } else if (arg === '--photobooth') {
1671
+ options.photobooth = true;
1672
+ cliSet.photobooth = true;
1673
+ } else if (arg === '--cn-strength') {
1674
+ const raw = requireFlagValue(args, i, arg);
1675
+ i++;
1676
+ options.cnStrength = parseNumberValue(raw, arg);
1677
+ cliSet.cnStrength = true;
1678
+ } else if (arg === '--cn-guidance-end') {
1679
+ const raw = requireFlagValue(args, i, arg);
1680
+ i++;
1681
+ options.cnGuidanceEnd = parseNumberValue(raw, arg);
1682
+ cliSet.cnGuidanceEnd = true;
1683
+ } else if (arg === '--controlnet-name') {
1684
+ const raw = requireFlagValue(args, i, arg);
1685
+ i++;
1686
+ options.videoControlNetName = raw;
1687
+ cliSet.videoControlNetName = true;
1688
+ } else if (arg === '--controlnet-strength') {
1689
+ const raw = requireFlagValue(args, i, arg);
1690
+ i++;
1691
+ options.videoControlNetStrength = parseNumberValue(raw, arg);
1692
+ cliSet.videoControlNetStrength = true;
1693
+ } else if (arg === '--sam2-coordinates') {
1694
+ const raw = requireFlagValue(args, i, arg);
1695
+ i++;
1696
+ // Parse "x,y" or "x1,y1;x2,y2" format
1697
+ options.sam2Coordinates = raw.split(';').map(pair => {
1698
+ const [x, y] = pair.split(',').map(Number);
1699
+ if (!Number.isFinite(x) || !Number.isFinite(y)) {
1700
+ fatalCliError(`Invalid --sam2-coordinates format "${raw}". Use x,y or x1,y1;x2,y2.`, {
1701
+ code: 'INVALID_ARGUMENT',
1702
+ details: { flag: '--sam2-coordinates', value: raw }
1703
+ });
1704
+ }
1705
+ return { x, y };
1706
+ });
1707
+ cliSet.sam2Coordinates = true;
1708
+ } else if (arg === '--trim-end-frame') {
1709
+ options.trimEndFrame = true;
1710
+ cliSet.trimEndFrame = true;
1711
+ } else if (arg === '--first-frame-strength') {
1712
+ const raw = requireFlagValue(args, i, arg);
1713
+ i++;
1714
+ options.firstFrameStrength = parseNumberValue(raw, arg);
1715
+ cliSet.firstFrameStrength = true;
1716
+ } else if (arg === '--last-frame-strength') {
1717
+ const raw = requireFlagValue(args, i, arg);
1718
+ i++;
1719
+ options.lastFrameStrength = parseNumberValue(raw, arg);
1720
+ cliSet.lastFrameStrength = true;
1721
+ } else if (arg === '--extract-last-frame') {
1722
+ const videoArg = requireFlagValue(args, i, arg);
1723
+ i++;
1724
+ const imageArg = requireFlagValue(args, i, arg + ' (output image)');
1725
+ i++;
1726
+ options.extractLastFrame = videoArg;
1727
+ options.extractLastFrameOutput = imageArg;
1728
+ } else if (arg === '--concat-videos') {
1729
+ // Consume remaining positional args: <output> <clip1> <clip2> [clip3...]
1730
+ const outArg = requireFlagValue(args, i, arg + ' (output path)');
1731
+ i++;
1732
+ const clips = [];
1733
+ while (i + 1 < args.length && !args[i + 1].startsWith('-')) {
1734
+ i++;
1735
+ clips.push(args[i]);
1736
+ }
1737
+ if (clips.length < 2) {
1738
+ fatalCliError('--concat-videos requires at least 2 clip paths after the output path.', {
1739
+ code: 'INVALID_ARGUMENT',
1740
+ details: { flag: '--concat-videos', clipsProvided: clips.length }
1741
+ });
1742
+ }
1743
+ options.concatVideos = outArg;
1744
+ options.concatVideosClips = clips;
1745
+ } else if (arg === '--concat-audio') {
1746
+ const raw = requireFlagValue(args, i, arg);
1747
+ i++;
1748
+ options.concatAudio = raw;
1749
+ } else if (arg === '--concat-audio-start') {
1750
+ const raw = requireFlagValue(args, i, arg);
1751
+ i++;
1752
+ options.concatAudioStart = parseNonNegativeNumberValue(raw, arg);
1753
+ } else if (arg === '--list-media') {
1754
+ // Optional type argument (images|audio|all), default: images
1755
+ const next = args[i + 1];
1756
+ if (next && !next.startsWith('-') && ['images', 'audio', 'all'].includes(next)) {
1757
+ i++;
1758
+ options.listMedia = next;
1759
+ } else {
1760
+ options.listMedia = 'images';
1761
+ }
1762
+ // --- Memory commands ---
1763
+ } else if (arg === '--memory-set') {
1764
+ options.memoryAction = 'set';
1765
+ options.memoryKey = requireFlagValue(args, i, arg); i++;
1766
+ options.memoryValue = requireFlagValue(args, i, arg + ' (value)'); i++;
1767
+ } else if (arg === '--memory-get') {
1768
+ options.memoryAction = 'get';
1769
+ options.memoryKey = requireFlagValue(args, i, arg); i++;
1770
+ } else if (arg === '--memory-list') {
1771
+ options.memoryAction = 'list';
1772
+ } else if (arg === '--memory-remove' || arg === '--memory-delete') {
1773
+ options.memoryAction = 'remove';
1774
+ options.memoryKey = requireFlagValue(args, i, arg); i++;
1775
+ } else if (arg === '--memory-category') {
1776
+ options.memoryCategory = requireFlagValue(args, i, arg); i++;
1777
+ // --- Personality commands ---
1778
+ } else if (arg === '--personality-set') {
1779
+ options.personalityAction = 'set';
1780
+ options.personalityText = requireFlagValue(args, i, arg); i++;
1781
+ } else if (arg === '--personality-get') {
1782
+ options.personalityAction = 'get';
1783
+ } else if (arg === '--personality-clear') {
1784
+ options.personalityAction = 'clear';
1785
+ // --- Persona commands ---
1786
+ } else if (arg === '--persona-add') {
1787
+ options.personaAction = 'add';
1788
+ options.personaName = requireFlagValue(args, i, arg); i++;
1789
+ } else if (arg === '--persona-list') {
1790
+ options.personaAction = 'list';
1791
+ } else if (arg === '--persona-remove' || arg === '--persona-delete') {
1792
+ options.personaAction = 'remove';
1793
+ options.personaName = requireFlagValue(args, i, arg); i++;
1794
+ } else if (arg === '--persona-resolve') {
1795
+ options.personaAction = 'resolve';
1796
+ options.personaName = requireFlagValue(args, i, arg); i++;
1797
+ } else if (arg === '--persona') {
1798
+ // Shorthand: resolve persona + generate with context
1799
+ options.personaAction = 'generate';
1800
+ options.personaName = requireFlagValue(args, i, arg); i++;
1801
+ } else if (arg === '--relationship') {
1802
+ options.personaRelationship = requireFlagValue(args, i, arg); i++;
1803
+ } else if (arg === '--description') {
1804
+ options.personaDescription = requireFlagValue(args, i, arg); i++;
1805
+ } else if (arg === '--tags') {
1806
+ options.personaTags = requireFlagValue(args, i, arg).split(',').map(s => s.trim()); i++;
1807
+ } else if (arg === '--voice') {
1808
+ options.personaVoice = requireFlagValue(args, i, arg); i++;
1809
+ } else if (arg === '--voice-clip') {
1810
+ options.personaVoiceClip = requireFlagValue(args, i, arg); i++;
1811
+ // --- Content filter ---
1812
+ } else if (arg === '--no-filter') {
1813
+ options.noFilter = true;
1814
+ } else if (arg === '--last-image') {
1815
+ // Use image from last render as reference/context
1816
+ if (existsSync(LAST_RENDER_PATH)) {
1817
+ const lastRender = JSON.parse(readFileSync(LAST_RENDER_PATH, 'utf8'));
1818
+ let lastImagePath = null;
1819
+ if (lastRender.localPath && existsSync(lastRender.localPath)) {
1820
+ lastImagePath = lastRender.localPath;
1821
+ } else if (lastRender.urls?.[0]) {
1822
+ lastImagePath = lastRender.urls[0];
1823
+ }
1824
+ if (lastImagePath) {
1825
+ // Will be resolved later: video uses refImage, image editing uses contextImages
1826
+ options._lastImagePath = lastImagePath;
1827
+ }
1828
+ }
1829
+ } else if (arg === '--last') {
1830
+ // Show last render info
1831
+ if (existsSync(LAST_RENDER_PATH)) {
1832
+ console.log(readFileSync(LAST_RENDER_PATH, 'utf8'));
1833
+ } else {
1834
+ console.error('No previous render found.');
1835
+ }
1836
+ process.exit(0);
1837
+ } else if (arg === '--json') {
1838
+ options.json = true;
1839
+ } else if (arg === '--strict-size') {
1840
+ options.strictSize = true;
1841
+ cliSet.strictSize = true;
1842
+ } else if (arg === '-q' || arg === '--quiet') {
1843
+ options.quiet = true;
1844
+ } else if (arg === '--estimate-video-cost') {
1845
+ options.estimateVideoCost = true;
1846
+ } else if (arg === '--balance' || arg === '--balances') {
1847
+ options.showBalance = true;
1848
+ } else if (arg === '--version' || arg === '-V') {
1849
+ options.showVersion = true;
1850
+ } else if (arg === '--help') {
1851
+ console.log(`
1852
+ sogni-agent - Generate images and videos using Sogni AI
1853
+
1854
+ Usage: sogni-agent [options] "prompt"
1855
+
1856
+ Image Options:
1857
+ -o, --output <path> Save to file (otherwise prints URL)
1858
+ -Q, --quality <tier> Quality preset: fast|hq|pro (auto-selects model/steps/size)
1859
+ -m, --model <id> Model (default: z_image_turbo_bf16, overrides --quality)
1860
+ -w, --width <px> Width (default: 512)
1861
+ -h, --height <px> Height (default: 512)
1862
+ -n, --count <num> Number of images (default: 1)
1863
+ -s, --seed <num> Use specific seed
1864
+ --last-seed Reuse seed from previous render
1865
+ --seed-strategy <s> Seed strategy: random|prompt-hash
1866
+ --multi-angle Multiple angles LoRA mode (Qwen Image Edit)
1867
+ --angles-360 Generate 8 azimuths (front -> front-left)
1868
+ --angles-360-video [path] Assemble a looping 360 mp4 using i2v between angles (requires ffmpeg)
1869
+ --video-model <id> Override i2v model for 360 video (e.g. wan_v2.2-14b-fp8_i2v for higher quality)
1870
+ --azimuth <key> front|front-right|right|back-right|back|back-left|left|front-left
1871
+ --elevation <key> low-angle|eye-level|elevated|high-angle
1872
+ --distance <key> close-up|medium|wide
1873
+ --angle-strength <n> LoRA strength for multiple_angles (default: 0.9)
1874
+ --angle-description <text> Optional subject description
1875
+ --output-format <f> Image output format: png|jpg
1876
+ --sampler <name> Sampler (model-dependent)
1877
+ --scheduler <name> Scheduler (model-dependent)
1878
+ --lora <id> LoRA id (repeatable, edit only)
1879
+ --loras <ids> Comma-separated LoRA ids
1880
+ --lora-strength <n> LoRA strength (repeatable)
1881
+ --lora-strengths <n> Comma-separated LoRA strengths
1882
+ -c, --context <path> Context image for editing (can use multiple)
1883
+ --last-image Use last generated image as context
1884
+
1885
+ Photobooth (Face Transfer):
1886
+ --photobooth Face transfer mode (InstantID + SDXL Turbo)
1887
+ --ref <path|url> Face image (required with --photobooth)
1888
+ --cn-strength <n> ControlNet strength (default: 0.8)
1889
+ --cn-guidance-end <n> ControlNet guidance end point (default: 0.3)
1890
+
1891
+ Video Options:
1892
+ --video, -v Generate video instead of image
1893
+ --workflow <type> Video workflow: t2v|i2v|s2v|ia2v|a2v|v2v|animate-move|animate-replace
1894
+ --fps <num> Frames per second (model default unless set)
1895
+ --duration <sec> Duration in seconds (default: 5)
1896
+ --frames <num> Override total frames (optional)
1897
+ --target-resolution <px> Short-side target that preserves aspect ratio
1898
+ --auto-resize-assets Auto-resize video reference assets (default)
1899
+ --no-auto-resize-assets Disable auto-resize for video assets
1900
+ --estimate-video-cost Estimate video cost and exit (requires --steps)
1901
+ --ref <path|url> Reference image for video (start frame)
1902
+ --ref-end <path|url> End frame for interpolation/morphing
1903
+ --ref-audio <path> Uploaded/generated audio for ia2v/a2v, or s2v lip-sync
1904
+ --audio-start <sec> Start offset into --ref-audio for audio-driven clips
1905
+ --audio-duration <sec> Duration slice from --ref-audio
1906
+ --reference-audio-identity <path> Voice identity clip for LTX native audio
1907
+ --voice-persona <name> Use saved persona voice clip as LTX voice identity
1908
+ --ref-video <path> Reference video for animate/v2v workflows
1909
+ --video-start <sec> Start offset into --ref-video for segmented V2V/animate
1910
+ --controlnet-name <n> ControlNet type for v2v: canny|pose|depth|detailer
1911
+ --controlnet-strength <n> ControlNet strength for v2v (0.0-1.0, default: 0.8)
1912
+ --sam2-coordinates <coords> SAM2 click coords for animate-replace (x,y or x1,y1;x2,y2)
1913
+ --trim-end-frame Trim last frame for seamless video stitching
1914
+ --first-frame-strength <n> Keyframe strength for start frame (0.0-1.0)
1915
+ --last-frame-strength <n> Keyframe strength for end frame (0.0-1.0)
1916
+ --looping, --loop Create seamless loop (i2v only): A→B→A
1917
+ --last-image Use last generated image as reference
1918
+
1919
+ General:
1920
+ -t, --timeout <sec> Timeout in seconds (default: 30, video: 300)
1921
+ --steps <num> Override steps (model-dependent)
1922
+ --guidance <num> Override guidance (model-dependent)
1923
+ --token-type <type> Token type: spark|sogni|auto (default: spark, auto retries with alternate)
1924
+ --balance, --balances Show SPARK/SOGNI balances and exit
1925
+ --version, -V Show sogni-agent version and exit
1926
+ --extract-last-frame <video> <image> Extract last frame from a video (safe ffmpeg wrapper)
1927
+ --concat-videos <out> <clips...> Concatenate video clips (safe ffmpeg wrapper, min 2 clips)
1928
+ --concat-audio <path> Optional audio track to mux over --concat-videos output
1929
+ --concat-audio-start <sec> Start offset into --concat-audio
1930
+ --list-media [type] List recent inbound media files (images|audio|all, default: images)
1931
+ --no-filter Disable NSFW content filter
1932
+ --last Show last render info (JSON)
1933
+ --json Output JSON with all details
1934
+ --strict-size Do not auto-adjust video size to satisfy i2v reference resizing constraints
1935
+ -q, --quiet Suppress progress output
1936
+
1937
+ Memory (persistent user preferences):
1938
+ --memory-set <key> <value> Save a preference (e.g. --memory-set preferred_style "watercolor")
1939
+ --memory-get <key> Get a specific memory
1940
+ --memory-list List all saved memories
1941
+ --memory-remove <key> Delete a memory
1942
+ --memory-category <cat> Category for --memory-set: preference|fact|context (default: preference)
1943
+
1944
+ Personality (custom agent instructions):
1945
+ --personality-set <text> Set personality (e.g. --personality-set "Be concise, use cinematic lighting")
1946
+ --personality-get Show current personality
1947
+ --personality-clear Reset to default personality
1948
+
1949
+ Personas (named people with reference photos):
1950
+ --persona-add <name> Add a persona (combine with --ref, --relationship, --description, --voice-clip)
1951
+ --persona-list List all saved personas
1952
+ --persona-remove <name> Remove a persona and its files
1953
+ --persona-resolve <name> Show persona details and file paths
1954
+ --persona <name> Generate using a persona's reference photo (image context, video ref frame)
1955
+ --relationship <type> Persona relationship: self|partner|child|friend|pet (default: friend)
1956
+ --description <text> Persona appearance description
1957
+ --tags <names> Comma-separated nicknames/aliases
1958
+ --voice <text> Voice description (accent, tone, pitch)
1959
+ --voice-clip <path> Voice clip audio file for LTX 2.3 voice cloning
1960
+
1961
+ Image Models:
1962
+ z_image_turbo_bf16 Fast, general purpose (default)
1963
+ flux1-schnell-fp8 Very fast
1964
+ flux2_dev_fp8 High quality (slow)
1965
+ qwen_image_edit_2511_fp8 Image editing with context (up to 3 images)
1966
+ qwen_image_edit_2511_fp8_lightning Fast image editing
1967
+
1968
+ Recommended LTX 2.3 Video Models:
1969
+ ltx23-22b-fp8_t2v_distilled Text-to-video with native dialogue/audio
1970
+ ltx23-22b-fp8_i2v_distilled Image-to-video with native dialogue/audio
1971
+ ltx23-22b-fp8_ia2v_distilled Image+audio-to-video
1972
+ ltx23-22b-fp8_a2v_distilled Audio-to-video
1973
+ ltx23-22b-fp8_v2v_distilled Video-to-video with ControlNet
1974
+
1975
+ Seedance 2.0 Video Aliases:
1976
+ seedance2 Text-to-video, 4-15s, native audio
1977
+ seedance2-fast Fast 720p-capped text-to-video
1978
+ seedance2-ia2v Image+audio-to-video
1979
+ seedance2-v2v Video-to-video without ControlNet
1980
+
1981
+ WAN 2.2 Video Models:
1982
+ wan_v2.2-14b-fp8_t2v_lightx2v Text-to-video (fast)
1983
+ wan_v2.2-14b-fp8_i2v_lightx2v Fast simple image-to-video
1984
+ wan_v2.2-14b-fp8_i2v Higher quality
1985
+ wan_v2.2-14b-fp8_s2v_lightx2v Face lip-sync with uploaded audio (fast)
1986
+ wan_v2.2-14b-fp8_s2v Sound-to-video (quality)
1987
+ wan_v2.2-14b-fp8_animate-move_lightx2v Animate-move (fast)
1988
+ wan_v2.2-14b-fp8_animate-replace_lightx2v Animate-replace (fast)
1989
+
1990
+ LTX-2 / LTX-2.3 Video Models:
1991
+ ltx2-19b-fp8_t2v_distilled Text-to-video, fast 8-step
1992
+ ltx2-19b-fp8_t2v Text-to-video, quality 20-step
1993
+ ltx2-19b-fp8_i2v_distilled Image-to-video, fast 8-step
1994
+ ltx2-19b-fp8_i2v Image-to-video, quality 20-step
1995
+ ltx2-19b-fp8_ia2v_distilled Image+audio-to-video, fast 8-step
1996
+ ltx2-19b-fp8_a2v_distilled Audio-to-video, fast 8-step
1997
+ ltx2-19b-fp8_v2v_distilled Video-to-video with ControlNet (fast)
1998
+ ltx2-19b-fp8_v2v Video-to-video with ControlNet (quality)
1999
+
2000
+ Examples:
2001
+ sogni-agent "a cat wearing a hat"
2002
+ sogni-agent -o cat.jpg "a cat"
2003
+ sogni-agent --multi-angle -c subject.jpg --azimuth front-right --elevation eye-level --distance medium "studio portrait"
2004
+ sogni-agent --angles-360 -c subject.jpg "studio portrait"
2005
+ sogni-agent --video --ref cat.jpg -o cat.mp4 "cat walks around"
2006
+ sogni-agent --video "A narrator says \"welcome to the story\" as ocean waves crash"
2007
+ sogni-agent --video --ref cat.jpg --ref-audio speech.m4a -m wan_v2.2-14b-fp8_s2v_lightx2v "lip sync"
2008
+ sogni-agent --video --ref cover.jpg --ref-audio song.mp3 "music video"
2009
+ sogni-agent --video --ref-audio song.mp3 "abstract music visualizer"
2010
+ sogni-agent --video --reference-audio-identity voice.webm "NARRATOR: \"This is my voice.\""
2011
+ sogni-agent --video -m ltx23-22b-fp8_t2v_distilled --duration 20 "A wide cinematic aerial shot opens over steep tropical cliffs at golden hour, warm sunlight grazing the rock faces while sea mist drifts above the water below. Palm trees bend gently along the ridge as waves roll against the shoreline, leaving bright bands of foam across the dark stone. The camera glides forward in one continuous pass, revealing more of the coastline as sunlight flickers across wet surfaces and distant birds wheel through the haze. The scene holds a calm, upscale travel-film mood with smooth stabilized motion and crisp environmental detail."
2012
+ sogni-agent --video --ref subject.jpg --ref-video motion.mp4 --workflow animate-move "transfer motion"
2013
+ sogni-agent --video --last-image "gentle camera pan"
2014
+ sogni-agent -c photo.jpg "make the background a beach" -m qwen_image_edit_2511_fp8
2015
+ sogni-agent -c subject.jpg -c style.jpg "apply the style to the subject"
2016
+ sogni-agent --photobooth --ref face.jpg "80s fashion portrait"
2017
+ sogni-agent --photobooth --ref face.jpg -n 4 "LinkedIn professional headshot"
2018
+ sogni-agent -Q pro "a beautiful mountain landscape at sunset"
2019
+ sogni-agent -n 3 "a {red|blue|green} sports car on a highway"
2020
+ `);
2021
+ process.exit(0);
2022
+ } else if (arg === '--') {
2023
+ if (!options.prompt && args[i + 1] !== undefined) {
2024
+ options.prompt = args[i + 1];
2025
+ }
2026
+ break;
2027
+ } else if (arg.startsWith('-')) {
2028
+ fatalCliError(`Unknown option: ${arg}`, {
2029
+ code: 'INVALID_ARGUMENT',
2030
+ hint: 'Use --help to see supported options.'
2031
+ });
2032
+ } else if (!options.prompt) {
2033
+ options.prompt = arg;
2034
+ }
2035
+ }
2036
+
2037
+ let timeoutFromConfig = false;
2038
+ let widthFromConfig = false;
2039
+ let heightFromConfig = false;
2040
+ let fpsFromConfig = false;
2041
+ let configuredDefaultVideoWorkflow = null;
2042
+ if (openclawConfig) {
2043
+ const isNumber = (value) => Number.isFinite(value);
2044
+ if (!cliSet.width && isNumber(openclawConfig.defaultWidth)) {
2045
+ options.width = openclawConfig.defaultWidth;
2046
+ widthFromConfig = true;
2047
+ }
2048
+ if (!cliSet.height && isNumber(openclawConfig.defaultHeight)) {
2049
+ options.height = openclawConfig.defaultHeight;
2050
+ heightFromConfig = true;
2051
+ }
2052
+ if (!cliSet.count && isNumber(openclawConfig.defaultCount)) {
2053
+ options.count = openclawConfig.defaultCount;
2054
+ }
2055
+ if (!cliSet.tokenType && openclawConfig.defaultTokenType) {
2056
+ options.tokenType = openclawConfig.defaultTokenType;
2057
+ }
2058
+ if (!cliSet.seedStrategy && openclawConfig.seedStrategy) {
2059
+ options.seedStrategy = openclawConfig.seedStrategy;
2060
+ }
2061
+ if (options.video) {
2062
+ if (!cliSet.workflow && openclawConfig.defaultVideoWorkflow) {
2063
+ configuredDefaultVideoWorkflow = openclawConfig.defaultVideoWorkflow;
2064
+ }
2065
+ if (!cliSet.fps && isNumber(openclawConfig.defaultFps)) {
2066
+ options.fps = openclawConfig.defaultFps;
2067
+ fpsFromConfig = true;
2068
+ }
2069
+ if (!cliSet.frames && !cliSet.duration && isNumber(openclawConfig.defaultDurationSec)) {
2070
+ options.duration = openclawConfig.defaultDurationSec;
2071
+ }
2072
+ if (!cliSet.timeout && isNumber(openclawConfig.defaultVideoTimeoutSec)) {
2073
+ options.timeout = openclawConfig.defaultVideoTimeoutSec * 1000;
2074
+ timeoutFromConfig = true;
2075
+ }
2076
+ } else if (!cliSet.timeout && isNumber(openclawConfig.defaultImageTimeoutSec)) {
2077
+ options.timeout = openclawConfig.defaultImageTimeoutSec * 1000;
2078
+ timeoutFromConfig = true;
2079
+ }
2080
+ }
2081
+
2082
+ if (options.tokenType) {
2083
+ const token = options.tokenType.toLowerCase();
2084
+ if (token !== 'spark' && token !== 'sogni' && token !== 'auto') {
2085
+ fatalCliError('--token-type must be "spark", "sogni", or "auto".', {
2086
+ code: 'INVALID_ARGUMENT',
2087
+ details: { flag: '--token-type', value: options.tokenType }
2088
+ });
2089
+ }
2090
+ options.tokenType = token;
2091
+ }
2092
+
2093
+ // ---------------------------------------------------------------------------
2094
+ // Quality tier presets — auto-select model, steps, and dimensions
2095
+ // ---------------------------------------------------------------------------
2096
+ const QUALITY_TIERS = {
2097
+ fast: {
2098
+ model: 'z_image_turbo_bf16',
2099
+ steps: 8,
2100
+ shortSide: null,
2101
+ video: { steps: 8, shortSide: null }
2102
+ },
2103
+ hq: {
2104
+ model: 'z_image_turbo_bf16',
2105
+ steps: null,
2106
+ shortSide: 768,
2107
+ video: { steps: 8, shortSide: 1088 }
2108
+ },
2109
+ pro: {
2110
+ model: 'flux2_dev_fp8',
2111
+ steps: 40,
2112
+ shortSide: 1024,
2113
+ video: { steps: 20, shortSide: 1920 }
2114
+ }
2115
+ };
2116
+
2117
+ if (options.quality) {
2118
+ if (!QUALITY_TIERS[options.quality]) {
2119
+ fatalCliError('--quality must be "fast", "hq", or "pro".', {
2120
+ code: 'INVALID_ARGUMENT',
2121
+ details: { flag: '--quality', value: options.quality }
2122
+ });
2123
+ }
2124
+ const tier = QUALITY_TIERS[options.quality];
2125
+ if (!options.video) {
2126
+ // Only apply model if user didn't explicitly set one.
2127
+ if (!cliSet.model) {
2128
+ options.model = tier.model;
2129
+ }
2130
+ // Only apply steps if user didn't explicitly set them.
2131
+ if (!cliSet.steps && tier.steps) {
2132
+ options.steps = tier.steps;
2133
+ }
2134
+ // Auto-target short-side dimension if user didn't set width/height.
2135
+ if (tier.shortSide && !cliSet.width && !cliSet.height) {
2136
+ options.width = tier.shortSide;
2137
+ options.height = tier.shortSide;
2138
+ }
2139
+ }
2140
+ }
2141
+
2142
+ if (options.seedStrategy) {
2143
+ const normalizedStrategy = normalizeSeedStrategy(options.seedStrategy);
2144
+ if (!normalizedStrategy) {
2145
+ fatalCliError('--seed-strategy must be "random" or "prompt-hash".', {
2146
+ code: 'INVALID_ARGUMENT',
2147
+ details: { flag: '--seed-strategy', value: options.seedStrategy }
2148
+ });
2149
+ }
2150
+ options.seedStrategy = normalizedStrategy;
2151
+ }
2152
+
2153
+ if (cliSet.steps && !Number.isFinite(options.steps)) {
2154
+ fatalCliError('--steps must be a number.', {
2155
+ code: 'INVALID_ARGUMENT',
2156
+ details: { flag: '--steps', value: options.steps }
2157
+ });
2158
+ }
2159
+
2160
+ if (cliSet.guidance && !Number.isFinite(options.guidance)) {
2161
+ fatalCliError('--guidance must be a number.', {
2162
+ code: 'INVALID_ARGUMENT',
2163
+ details: { flag: '--guidance', value: options.guidance }
2164
+ });
2165
+ }
2166
+
2167
+ if (options.multiAngle) {
2168
+ if (options.video) {
2169
+ fatalCliError('--multi-angle is only for image editing.', { code: 'INVALID_ARGUMENT' });
2170
+ }
2171
+ if (options.angles360Video && !options.angles360) {
2172
+ fatalCliError('--angles-360-video requires --angles-360.', { code: 'INVALID_ARGUMENT' });
2173
+ }
2174
+ if (options.angles360Video && options.count !== 1) {
2175
+ fatalCliError('--angles-360-video requires --count 1.', {
2176
+ code: 'INVALID_ARGUMENT',
2177
+ details: { count: options.count }
2178
+ });
2179
+ }
2180
+ if (options._lastImagePath && options.contextImages.length === 0) {
2181
+ options.contextImages.push(options._lastImagePath);
2182
+ delete options._lastImagePath;
2183
+ }
2184
+ if (options.contextImages.length === 0) {
2185
+ fatalCliError('--multi-angle requires a reference image (--context or --last-image).', {
2186
+ code: 'INVALID_ARGUMENT'
2187
+ });
2188
+ }
2189
+ const azimuthKeys = MULTI_ANGLE_AZIMUTHS.map((a) => a.key);
2190
+ const elevationKeys = MULTI_ANGLE_ELEVATIONS.map((e) => e.key);
2191
+ const distanceKeys = MULTI_ANGLE_DISTANCES.map((d) => d.key);
2192
+
2193
+ if (!options.angles360) {
2194
+ options.azimuth = normalizeMultiAngleValue(options.azimuth, MULTI_ANGLE_AZIMUTH_ALIASES, azimuthKeys, 'azimuth');
2195
+ } else if (!options.quiet && cliSet.azimuth) {
2196
+ console.error('Warning: --azimuth ignored for --angles-360.');
2197
+ }
2198
+ options.elevation = normalizeMultiAngleValue(options.elevation, MULTI_ANGLE_ELEVATION_ALIASES, elevationKeys, 'elevation');
2199
+ options.distance = normalizeMultiAngleValue(options.distance, MULTI_ANGLE_DISTANCE_ALIASES, distanceKeys, 'distance');
2200
+
2201
+ if (options.model && !options.model.includes('qwen_image_edit_2511')) {
2202
+ fatalCliError('--multi-angle requires a Qwen Image Edit 2511 model.', {
2203
+ code: 'INVALID_ARGUMENT',
2204
+ details: { model: options.model }
2205
+ });
2206
+ }
2207
+ if (!options.model) {
2208
+ options.model = 'qwen_image_edit_2511_fp8_lightning';
2209
+ }
2210
+ if (!options.outputFormat) {
2211
+ options.outputFormat = 'jpg';
2212
+ }
2213
+ if (!options.sampler) {
2214
+ options.sampler = 'euler';
2215
+ }
2216
+ if (!options.scheduler) {
2217
+ options.scheduler = 'simple';
2218
+ }
2219
+ if (!options.angleDescription && options.prompt) {
2220
+ options.angleDescription = options.prompt;
2221
+ }
2222
+ if (options.loras.length === 0 && options.loraStrengths.length > 0) {
2223
+ if (options.loraStrengths.length > 1) {
2224
+ fatalCliError('--lora-strengths requires explicit --loras when using --multi-angle.', {
2225
+ code: 'INVALID_ARGUMENT'
2226
+ });
2227
+ }
2228
+ if (options.angleStrength === null || options.angleStrength === undefined) {
2229
+ options.angleStrength = options.loraStrengths[0];
2230
+ }
2231
+ options.loraStrengths = [];
2232
+ }
2233
+ if (!cliSet.guidance && (options.guidance === null || options.guidance === undefined)) {
2234
+ options.guidance = options.model.includes('lightning') ? 1.0 : 4.0;
2235
+ }
2236
+ if (options.angleStrength === null || options.angleStrength === undefined) {
2237
+ options.angleStrength = 0.9;
2238
+ }
2239
+
2240
+ const multiAngleStrength = options.angleStrength;
2241
+ let multiAngleIndex = options.loras.indexOf('multiple_angles');
2242
+ if (multiAngleIndex === -1) {
2243
+ options.loras.push('multiple_angles');
2244
+ multiAngleIndex = options.loras.length - 1;
2245
+ if (options.loraStrengths.length > 0) {
2246
+ options.loraStrengths.push(multiAngleStrength);
2247
+ }
2248
+ }
2249
+
2250
+ if (options.loraStrengths.length === 0 && options.loras.length > 0) {
2251
+ options.loraStrengths = options.loras.map((id) => (id === 'multiple_angles' ? multiAngleStrength : 1.0));
2252
+ } else if (options.loraStrengths.length === options.loras.length) {
2253
+ if (options.loraStrengths[multiAngleIndex] === undefined || options.loraStrengths[multiAngleIndex] === null) {
2254
+ options.loraStrengths[multiAngleIndex] = multiAngleStrength;
2255
+ }
2256
+ }
2257
+ }
2258
+
2259
+ if (options.outputFormat) {
2260
+ const normalized = options.outputFormat.toLowerCase();
2261
+ options.outputFormat = normalized === 'jpeg' ? 'jpg' : normalized;
2262
+ if (options.video) {
2263
+ if (options.outputFormat !== 'mp4') {
2264
+ fatalCliError('Video output format must be "mp4".', {
2265
+ code: 'INVALID_ARGUMENT',
2266
+ details: { outputFormat: options.outputFormat }
2267
+ });
2268
+ }
2269
+ } else if (!['png', 'jpg'].includes(options.outputFormat)) {
2270
+ fatalCliError('Image output format must be "png" or "jpg".', {
2271
+ code: 'INVALID_ARGUMENT',
2272
+ details: { outputFormat: options.outputFormat }
2273
+ });
2274
+ }
2275
+ }
2276
+
2277
+ if (options.loraStrengths.length > 0 && options.loras.length === 0) {
2278
+ fatalCliError('--lora-strength requires at least one --lora.', { code: 'INVALID_ARGUMENT' });
2279
+ }
2280
+
2281
+ if (options.loraStrengths.length > 0 && options.loras.length > 0 &&
2282
+ options.loraStrengths.length !== options.loras.length) {
2283
+ fatalCliError('--lora-strengths count must match --loras count.', {
2284
+ code: 'INVALID_ARGUMENT',
2285
+ details: { loras: options.loras.length, loraStrengths: options.loraStrengths.length }
2286
+ });
2287
+ }
2288
+
2289
+ if (options.video && options.loras.length > 0) {
2290
+ fatalCliError('--lora options are image-only.', { code: 'INVALID_ARGUMENT' });
2291
+ }
2292
+
2293
+ if (options.video && (options.sampler || options.scheduler)) {
2294
+ fatalCliError('--sampler/--scheduler are image-only options.', { code: 'INVALID_ARGUMENT' });
2295
+ }
2296
+
2297
+ applyPersonaAndVoiceReferences();
2298
+
2299
+ if (!options.video && options.autoResizeVideoAssets !== null) {
2300
+ fatalCliError('--auto-resize-assets is only valid with --video.', { code: 'INVALID_ARGUMENT' });
2301
+ }
2302
+
2303
+ if (options.estimateVideoCost && !options.video) {
2304
+ fatalCliError('--estimate-video-cost requires --video.', { code: 'INVALID_ARGUMENT' });
2305
+ }
2306
+
2307
+ if (options.angles360Video && !options.angles360) {
2308
+ fatalCliError('--angles-360-video requires --angles-360.', { code: 'INVALID_ARGUMENT' });
2309
+ }
2310
+
2311
+ // Normalize/validate video workflow before applying defaults
2312
+ if (options.video) {
2313
+ if (options.videoWorkflow) {
2314
+ const normalized = normalizeVideoWorkflow(options.videoWorkflow);
2315
+ if (!normalized) {
2316
+ fatalCliError(`Unknown workflow "${options.videoWorkflow}". Use t2v|i2v|s2v|ia2v|a2v|v2v|animate-move|animate-replace.`, {
2317
+ code: 'INVALID_ARGUMENT',
2318
+ details: { workflow: options.videoWorkflow }
2319
+ });
2320
+ }
2321
+ options.videoWorkflow = normalized;
2322
+ }
2323
+
2324
+ const workflowFromModel = inferVideoWorkflowFromModel(resolveVideoModelAlias(options.model, options.videoWorkflow));
2325
+ if (options.videoWorkflow && workflowFromModel && options.videoWorkflow !== workflowFromModel) {
2326
+ fatalCliError(`Workflow "${options.videoWorkflow}" does not match model "${options.model}".`, {
2327
+ code: 'INVALID_ARGUMENT',
2328
+ details: { workflow: options.videoWorkflow, model: options.model }
2329
+ });
2330
+ }
2331
+ if (!options.videoWorkflow) {
2332
+ options.videoWorkflow = workflowFromModel || inferVideoWorkflowFromAssets(options) || configuredDefaultVideoWorkflow || 't2v';
2333
+ }
2334
+ if (options.model) {
2335
+ options.model = resolveVideoModelAlias(options.model, options.videoWorkflow);
2336
+ }
2337
+ }
2338
+
2339
+ // Resolve --last-image after workflow is known
2340
+ if (options._lastImagePath) {
2341
+ if (options.video) {
2342
+ if (workflowRequiresImage(options.videoWorkflow)) {
2343
+ if (!options.refImage) options.refImage = options._lastImagePath;
2344
+ } else if (!options.quiet) {
2345
+ console.error(`Warning: --last-image ignored for ${options.videoWorkflow || 'current'} workflow.`);
2346
+ }
2347
+ } else if (options.photobooth) {
2348
+ if (!options.refImage) options.refImage = options._lastImagePath;
2349
+ } else {
2350
+ options.contextImages.push(options._lastImagePath);
2351
+ }
2352
+ delete options._lastImagePath;
2353
+ }
2354
+
2355
+ // Set defaults based on type and context
2356
+ if (options.video) {
2357
+ options.model = options.model || selectDefaultVideoModel(options.videoWorkflow, options, openclawConfig) || 'wan_v2.2-14b-fp8_i2v_lightx2v';
2358
+ options.model = resolveVideoModelAlias(options.model, options.videoWorkflow);
2359
+ const videoModelDefaults = getModelDefaults(options.model, openclawConfig);
2360
+ const isSeedanceVideo = isSeedanceModel(options.model);
2361
+ if (!cliSet.width && !widthFromConfig && Number.isFinite(videoModelDefaults?.defaultWidth)) {
2362
+ options.width = videoModelDefaults.defaultWidth;
2363
+ }
2364
+ if (!cliSet.height && !heightFromConfig && Number.isFinite(videoModelDefaults?.defaultHeight)) {
2365
+ options.height = videoModelDefaults.defaultHeight;
2366
+ }
2367
+ if (!cliSet.fps && !fpsFromConfig && Number.isFinite(videoModelDefaults?.fps)) {
2368
+ options.fps = videoModelDefaults.fps;
2369
+ }
2370
+ const videoQuality = options.quality ? QUALITY_TIERS[options.quality]?.video : null;
2371
+ if (videoQuality) {
2372
+ if (!isSeedanceVideo && !cliSet.steps && Number.isFinite(videoQuality.steps)) {
2373
+ options.steps = videoQuality.steps;
2374
+ }
2375
+ }
2376
+ const videoShortSide = cliSet.targetResolution
2377
+ ? options.targetResolution
2378
+ : (!isSeedanceVideo ? videoQuality?.shortSide : null);
2379
+ if (videoShortSide && !cliSet.width && !cliSet.height && !widthFromConfig && !heightFromConfig) {
2380
+ const dims = dimensionsWithShortSide(options.width, options.height, videoShortSide);
2381
+ options.width = dims.width;
2382
+ options.height = dims.height;
2383
+ }
2384
+ if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
2385
+ options.timeout = 300000; // 5 min for video
2386
+ }
2387
+ } else if (options.photobooth) {
2388
+ // Photobooth uses SDXL Turbo + InstantID ControlNet
2389
+ options.model = options.model || openclawConfig?.defaultPhotoboothModel || 'coreml-sogniXLturbo_alpha1_ad';
2390
+ if (!cliSet.width) options.width = 1024;
2391
+ if (!cliSet.height) options.height = 1024;
2392
+ if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
2393
+ options.timeout = 60000;
2394
+ }
2395
+ } else if (options.contextImages.length > 0) {
2396
+ // Use qwen edit model when context images provided (unless model explicitly set)
2397
+ options.model = options.model || openclawConfig?.defaultEditModel || 'qwen_image_edit_2511_fp8_lightning';
2398
+ if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
2399
+ options.timeout = 60000; // 1 min for editing
2400
+ }
2401
+ } else {
2402
+ options.model = options.model || openclawConfig?.defaultImageModel || 'z_image_turbo_bf16';
2403
+ }
2404
+
2405
+ if (!options.prompt && !options.estimateVideoCost && !options.multiAngle && !options.showBalance && !options.showVersion && !options.extractLastFrame && !options.concatVideos && !options.listMedia && !options.memoryAction && !options.personalityAction && !(options.personaAction && options.personaAction !== 'generate')) {
2406
+ fatalCliError('No prompt provided. Use --help for usage.', { code: 'INVALID_ARGUMENT' });
2407
+ }
2408
+
2409
+ if (!options.video && (options.refAudio || options.refVideo || options.referenceAudioIdentity || options.voicePersonaName || options.videoWorkflow || options.frames || options.targetResolution || options.audioStart !== null || options.audioDuration !== null || options.videoStart !== null)) {
2410
+ fatalCliError('Video-only options (--workflow/--frames/--target-resolution/--ref-audio/--ref-video/--reference-audio-identity/--voice-persona) require --video.', {
2411
+ code: 'INVALID_ARGUMENT'
2412
+ });
2413
+ }
2414
+
2415
+ if (options.photobooth) {
2416
+ if (!options.refImage) {
2417
+ fatalCliError('--photobooth requires --ref <face-image>.', { code: 'INVALID_ARGUMENT' });
2418
+ }
2419
+ if (options.video) {
2420
+ fatalCliError('--photobooth cannot be combined with --video.', { code: 'INVALID_ARGUMENT' });
2421
+ }
2422
+ if (options.contextImages.length > 0) {
2423
+ fatalCliError('--photobooth cannot be combined with -c/--context.', { code: 'INVALID_ARGUMENT' });
2424
+ }
2425
+ }
2426
+
2427
+ if (options.video) {
2428
+ if (options.videoWorkflow === 't2v') {
2429
+ if (options.refImage || options.refImageEnd || options.refAudio || options.refVideo) {
2430
+ fatalCliError('t2v does not accept reference image/audio/video.', {
2431
+ code: 'INVALID_ARGUMENT'
2432
+ });
2433
+ }
2434
+ } else if (options.videoWorkflow === 'i2v') {
2435
+ if (!options.refImage && !options.refImageEnd) {
2436
+ fatalCliError('i2v requires --ref and/or --ref-end.', { code: 'INVALID_ARGUMENT' });
2437
+ }
2438
+ if (options.refAudio || options.refVideo) {
2439
+ fatalCliError('i2v does not accept reference audio/video.', { code: 'INVALID_ARGUMENT' });
2440
+ }
2441
+ } else if (options.videoWorkflow === 's2v') {
2442
+ if (!options.refImage || !options.refAudio) {
2443
+ fatalCliError('s2v requires both --ref and --ref-audio.', { code: 'INVALID_ARGUMENT' });
2444
+ }
2445
+ if (options.refVideo) {
2446
+ fatalCliError('s2v does not accept reference video.', { code: 'INVALID_ARGUMENT' });
2447
+ }
2448
+ } else if (options.videoWorkflow === 'ia2v') {
2449
+ if (!options.refImage || !options.refAudio) {
2450
+ fatalCliError('ia2v requires both --ref and --ref-audio.', { code: 'INVALID_ARGUMENT' });
2451
+ }
2452
+ if (options.refImageEnd || options.refVideo) {
2453
+ fatalCliError('ia2v does not accept --ref-end or --ref-video.', { code: 'INVALID_ARGUMENT' });
2454
+ }
2455
+ } else if (options.videoWorkflow === 'a2v') {
2456
+ if (!options.refAudio) {
2457
+ fatalCliError('a2v requires --ref-audio.', { code: 'INVALID_ARGUMENT' });
2458
+ }
2459
+ if (options.refImage || options.refImageEnd || options.refVideo) {
2460
+ fatalCliError('a2v does not accept reference image/video.', { code: 'INVALID_ARGUMENT' });
2461
+ }
2462
+ } else if (options.videoWorkflow === 'v2v') {
2463
+ if (!options.refVideo) {
2464
+ fatalCliError('v2v requires --ref-video.', { code: 'INVALID_ARGUMENT' });
2465
+ }
2466
+ if (!options.videoControlNetName && !isSeedanceModel(options.model)) {
2467
+ fatalCliError('v2v requires --controlnet-name (canny|pose|depth|detailer).', { code: 'INVALID_ARGUMENT' });
2468
+ }
2469
+ if (options.refAudio) {
2470
+ fatalCliError('v2v does not accept reference audio.', { code: 'INVALID_ARGUMENT' });
2471
+ }
2472
+ } else if (options.videoWorkflow === 'animate-move' || options.videoWorkflow === 'animate-replace') {
2473
+ if (!options.refImage || !options.refVideo) {
2474
+ fatalCliError('animate workflows require both --ref and --ref-video.', { code: 'INVALID_ARGUMENT' });
2475
+ }
2476
+ if (options.refAudio) {
2477
+ fatalCliError('animate workflows do not accept reference audio.', { code: 'INVALID_ARGUMENT' });
2478
+ }
2479
+ }
2480
+
2481
+ if ((options.audioStart !== null || options.audioDuration !== null) && !options.refAudio) {
2482
+ fatalCliError('--audio-start/--audio-duration require --ref-audio.', { code: 'INVALID_ARGUMENT' });
2483
+ }
2484
+ if (options.videoStart !== null && !options.refVideo) {
2485
+ fatalCliError('--video-start requires --ref-video.', { code: 'INVALID_ARGUMENT' });
2486
+ }
2487
+
2488
+ if (options.referenceAudioIdentity && !['t2v', 'i2v'].includes(options.videoWorkflow)) {
2489
+ fatalCliError('--reference-audio-identity/--voice-persona is only supported for LTX native-audio t2v/i2v workflows.', {
2490
+ code: 'INVALID_ARGUMENT'
2491
+ });
2492
+ }
2493
+ if (options.referenceAudioIdentity && !isLtx2Model(options.model)) {
2494
+ fatalCliError('--reference-audio-identity/--voice-persona requires an LTX video model.', {
2495
+ code: 'INVALID_ARGUMENT',
2496
+ hint: `Use -m ${LTX23_WORKFLOW_MODELS[options.videoWorkflow] || LTX23_WORKFLOW_MODELS.t2v}`
2497
+ });
2498
+ }
2499
+
2500
+ // Validate controlnet-name values
2501
+ if (options.videoControlNetName) {
2502
+ const validControlNets = ['canny', 'pose', 'depth', 'detailer'];
2503
+ if (!validControlNets.includes(options.videoControlNetName)) {
2504
+ fatalCliError(`Unknown --controlnet-name "${options.videoControlNetName}". Use: ${validControlNets.join('|')}`, {
2505
+ code: 'INVALID_ARGUMENT',
2506
+ details: { flag: '--controlnet-name', value: options.videoControlNetName, allowed: validControlNets }
2507
+ });
2508
+ }
2509
+ }
2510
+
2511
+ // Validate SAM2 coordinates (only for animate-replace)
2512
+ if (options.sam2Coordinates && options.videoWorkflow !== 'animate-replace') {
2513
+ fatalCliError('--sam2-coordinates is only supported with animate-replace workflow.', { code: 'INVALID_ARGUMENT' });
2514
+ }
2515
+
2516
+ // Validate looping flag
2517
+ if (options.looping) {
2518
+ if (!options.video) {
2519
+ fatalCliError('--looping requires --video.', { code: 'INVALID_ARGUMENT' });
2520
+ }
2521
+ if (options.videoWorkflow !== 'i2v') {
2522
+ fatalCliError('--looping is only supported with i2v workflow.', { code: 'INVALID_ARGUMENT' });
2523
+ }
2524
+ if (!options.refImage) {
2525
+ fatalCliError('--looping requires --ref (reference image).', { code: 'INVALID_ARGUMENT' });
2526
+ }
2527
+ if (options.refImageEnd) {
2528
+ fatalCliError('--looping cannot be used with --ref-end (end frame is auto-generated).', { code: 'INVALID_ARGUMENT' });
2529
+ }
2530
+ }
2531
+ }
2532
+
2533
+ applyVideoPromptGuardrails();
2534
+
2535
+ if (options.video && isSeedanceModel(options.model) && !options.frames) {
2536
+ const clampedDuration = Math.max(4, Math.min(15, options.duration));
2537
+ if (clampedDuration !== options.duration) {
2538
+ if (!options.quiet) {
2539
+ console.error(`Adjusted Seedance video duration from ${options.duration}s to ${clampedDuration}s (supported range: 4-15s).`);
2540
+ }
2541
+ options.duration = clampedDuration;
2542
+ }
2543
+ }
2544
+
2545
+ // Video dimensions:
2546
+ // - Sogni video pipelines have model-specific min/max dimensions and divisors.
2547
+ // - When using i2v (or any ref-based workflow), the Sogni client wrapper will *resize the reference image*
2548
+ // with sharp `fit: inside` and then override the project width/height with the resized reference dims.
2549
+ // That means a "valid" requested size can still fail if the resized ref lands off the model divisor.
2550
+ if (options.video) {
2551
+ const videoDimensionRules = videoDimensionRulesFromDefaults(getModelDefaults(options.model, openclawConfig));
2552
+ if (!Number.isFinite(options.width) || options.width <= 0 || !Number.isFinite(options.height) || options.height <= 0) {
2553
+ fatalCliError('Video width/height must be positive numbers.', {
2554
+ code: 'INVALID_ARGUMENT',
2555
+ details: { width: options.width, height: options.height }
2556
+ });
2557
+ }
2558
+
2559
+ const originalVideoWidth = options.width;
2560
+ const originalVideoHeight = options.height;
2561
+ const normalizedVideoDims = normalizeVideoDimensionsLikeWrapper(options.width, options.height, videoDimensionRules);
2562
+ options.width = normalizedVideoDims.width;
2563
+ options.height = normalizedVideoDims.height;
2564
+ if (normalizedVideoDims.adjusted && !options.quiet) {
2565
+ console.error(
2566
+ `Auto-adjusted video dimensions from ${originalVideoWidth}x${originalVideoHeight} ` +
2567
+ `to ${options.width}x${options.height} to meet video requirements.`
2568
+ );
2569
+ }
2570
+
2571
+ if (options.videoWorkflow === 'i2v' && (options.refImage || options.refImageEnd)) {
2572
+ const references = [
2573
+ {
2574
+ key: 'refImage',
2575
+ path: options.refImage,
2576
+ label: 'Reference image',
2577
+ resizeFlag: '_needsRefResize'
2578
+ },
2579
+ {
2580
+ key: 'refImageEnd',
2581
+ path: options.refImageEnd,
2582
+ label: 'End reference image',
2583
+ resizeFlag: '_needsRefEndResize'
2584
+ }
2585
+ ];
2586
+ const localRefDims = new Map();
2587
+
2588
+ const isIncompatible = (predicted) => Boolean(predicted) && (
2589
+ predicted.width % videoDimensionRules.dimensionMultiple !== 0 ||
2590
+ predicted.height % videoDimensionRules.dimensionMultiple !== 0 ||
2591
+ predicted.width < videoDimensionRules.minDimension ||
2592
+ predicted.height < videoDimensionRules.minDimension
2593
+ );
2594
+
2595
+ for (const ref of references) {
2596
+ if (!ref.path || isHttpUrl(ref.path) || !existsSync(ref.path)) continue;
2597
+ const buffer = readFileSync(ref.path);
2598
+ const dims = getImageDimensionsFromBuffer(buffer);
2599
+ if (!dims?.width || !dims?.height) continue;
2600
+ localRefDims.set(ref.key, dims);
2601
+
2602
+ const predicted = predictSharpInsideResizeDims(dims.width, dims.height, options.width, options.height);
2603
+ if (!isIncompatible(predicted)) continue;
2604
+
2605
+ const candidate = pickCompatibleI2vBoundingBox(dims.width, dims.height, options.width, options.height, { allowImperfect: true, rules: videoDimensionRules });
2606
+ if (!candidate) {
2607
+ options[ref.resizeFlag] = true;
2608
+ if (!options.quiet) {
2609
+ console.error(
2610
+ `${ref.label} ${dims.width}x${dims.height} will be pre-resized to model-compatible dimensions ` +
2611
+ 'because no compatible bounding box exists for i2v workflow.'
2612
+ );
2613
+ }
2614
+ continue;
2615
+ }
2616
+
2617
+ if ((cliSet.width || cliSet.height) && options.strictSize) {
2618
+ fatalCliError(
2619
+ `${ref.label} ${dims.width}x${dims.height} would resize to ${predicted.width}x${predicted.height}, ` +
2620
+ `but both dimensions must be divisible by ${videoDimensionRules.dimensionMultiple}.`,
2621
+ {
2622
+ code: 'INVALID_VIDEO_SIZE',
2623
+ details: {
2624
+ referenceType: ref.key,
2625
+ referencePath: ref.path,
2626
+ reference: { width: dims.width, height: dims.height },
2627
+ requested: { width: options.width, height: options.height },
2628
+ resized: predicted
2629
+ },
2630
+ hint: `Try: --width ${candidate.width} --height ${candidate.height} (or omit --strict-size)`
2631
+ }
2632
+ );
2633
+ }
2634
+
2635
+ const beforeW = options.width;
2636
+ const beforeH = options.height;
2637
+ options.width = candidate.width;
2638
+ options.height = candidate.height;
2639
+
2640
+ const predictedAfter = predictSharpInsideResizeDims(dims.width, dims.height, options.width, options.height);
2641
+ options._adjustedVideoDims = {
2642
+ reason: 'i2v-ref-model-divisor',
2643
+ referenceType: ref.key,
2644
+ requested: { width: beforeW, height: beforeH },
2645
+ adjusted: { width: options.width, height: options.height },
2646
+ resizedFrom: predicted,
2647
+ resizedTo: predictedAfter || null
2648
+ };
2649
+ if (!options.quiet) {
2650
+ const mode = cliSet.width || cliSet.height ? 'Warning: Adjusted' : 'Auto-adjusted';
2651
+ console.error(
2652
+ `${mode} i2v video size from ${beforeW}x${beforeH} to ${options.width}x${options.height} ` +
2653
+ `because resized reference would be ${predicted.width}x${predicted.height}.`
2654
+ );
2655
+ }
2656
+ }
2657
+
2658
+ for (const ref of references) {
2659
+ const dims = localRefDims.get(ref.key);
2660
+ if (!dims) continue;
2661
+ const predicted = predictSharpInsideResizeDims(dims.width, dims.height, options.width, options.height);
2662
+ if (isIncompatible(predicted)) {
2663
+ options[ref.resizeFlag] = true;
2664
+ }
2665
+ }
2666
+
2667
+ const effectiveDimsSource = localRefDims.get('refImage') || localRefDims.get('refImageEnd') || null;
2668
+ if (effectiveDimsSource) {
2669
+ const predicted = predictSharpInsideResizeDims(
2670
+ effectiveDimsSource.width,
2671
+ effectiveDimsSource.height,
2672
+ options.width,
2673
+ options.height
2674
+ );
2675
+ if (predicted) {
2676
+ options._effectiveVideoDims = {
2677
+ width: predicted.width,
2678
+ height: predicted.height,
2679
+ refWidth: effectiveDimsSource.width,
2680
+ refHeight: effectiveDimsSource.height,
2681
+ requestedWidth: options.width,
2682
+ requestedHeight: options.height
2683
+ };
2684
+ }
2685
+ }
2686
+
2687
+ if ((options._needsRefResize || options._needsRefEndResize) && !options.quiet) {
2688
+ console.error('One or more i2v references require pre-resize to ensure model-compatible dimensions.');
2689
+ }
2690
+ }
2691
+ }
2692
+
2693
+ // Validate context images against model limits
2694
+ if (options.contextImages.length > 0 && !options.video) {
2695
+ const maxImages = getMaxContextImages(options.model);
2696
+ if (maxImages === 0) {
2697
+ fatalCliError(`Model ${options.model} does not support context images.`, {
2698
+ code: 'INVALID_ARGUMENT',
2699
+ details: { model: options.model },
2700
+ hint: 'Try: qwen_image_edit_2511_fp8 or qwen_image_edit_2511_fp8_lightning'
2701
+ });
2702
+ }
2703
+ if (options.contextImages.length > maxImages) {
2704
+ fatalCliError(`Model ${options.model} supports max ${maxImages} context images, got ${options.contextImages.length}.`, {
2705
+ code: 'INVALID_ARGUMENT',
2706
+ details: { model: options.model, maxImages, provided: options.contextImages.length }
2707
+ });
2708
+ }
2709
+ }
2710
+
2711
+ // Load last render seed if requested
2712
+ if (options.lastSeed) {
2713
+ if (existsSync(LAST_RENDER_PATH)) {
2714
+ try {
2715
+ const lastRender = JSON.parse(readFileSync(LAST_RENDER_PATH, 'utf8'));
2716
+ if (lastRender.seed) {
2717
+ options.seed = lastRender.seed;
2718
+ if (!options.quiet) console.error(`Using seed from last render: ${options.seed}`);
2719
+ }
2720
+ } catch (e) {
2721
+ console.error('Warning: Could not load last render seed');
2722
+ }
2723
+ } else {
2724
+ console.error('Warning: No previous render found, generating seed');
2725
+ }
2726
+ }
2727
+
2728
+ if (!options.estimateVideoCost && !options.showVersion && !options.extractLastFrame && !options.concatVideos && !options.listMedia && (options.seed === null || options.seed === undefined)) {
2729
+ const strategy = options.seedStrategy || openclawConfig?.seedStrategy || 'prompt-hash';
2730
+ const normalized = normalizeSeedStrategy(strategy) || 'prompt-hash';
2731
+ options.seedStrategy = normalized;
2732
+ options.seed = normalized === 'random'
2733
+ ? generateRandomSeed()
2734
+ : computePromptHashSeed(options);
2735
+ if (!options.quiet) console.error(`Using ${normalized} seed: ${options.seed}`);
2736
+ }
2737
+
2738
+ // Load credentials
2739
+ function loadCredentials() {
2740
+ if (existsSync(CREDENTIALS_PATH)) {
2741
+ const content = readFileSync(CREDENTIALS_PATH, 'utf8');
2742
+ const creds = {};
2743
+ for (const line of content.split('\n')) {
2744
+ const [key, val] = line.split('=');
2745
+ if (key && val) creds[key.trim()] = val.trim();
2746
+ }
2747
+ if (creds.SOGNI_API_KEY) {
2748
+ return {
2749
+ SOGNI_API_KEY: creds.SOGNI_API_KEY
2750
+ };
2751
+ }
2752
+ if (creds.SOGNI_USERNAME && creds.SOGNI_PASSWORD) {
2753
+ return creds;
2754
+ }
2755
+ }
2756
+
2757
+ if (hasEnv('SOGNI_API_KEY')) {
2758
+ return {
2759
+ SOGNI_API_KEY: getEnv('SOGNI_API_KEY')
2760
+ };
2761
+ }
2762
+
2763
+ if (hasEnv('SOGNI_USERNAME') && hasEnv('SOGNI_PASSWORD')) {
2764
+ return {
2765
+ SOGNI_USERNAME: getEnv('SOGNI_USERNAME'),
2766
+ SOGNI_PASSWORD: getEnv('SOGNI_PASSWORD')
2767
+ };
2768
+ }
2769
+
2770
+ const err = new Error('No Sogni credentials found.');
2771
+ err.code = 'MISSING_CREDENTIALS';
2772
+ err.hint = 'Set SOGNI_API_KEY or SOGNI_USERNAME/SOGNI_PASSWORD, or configure SOGNI_CREDENTIALS_PATH.';
2773
+ err.details = {
2774
+ triedEnv: ['SOGNI_API_KEY', 'SOGNI_USERNAME', 'SOGNI_PASSWORD'],
2775
+ triedFile: CREDENTIALS_PATH
2776
+ };
2777
+ throw err;
2778
+ }
2779
+
2780
+ // Save last render info
2781
+ function saveLastRender(info) {
2782
+ try {
2783
+ const dir = dirname(LAST_RENDER_PATH);
2784
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
2785
+ writeFileSync(LAST_RENDER_PATH, JSON.stringify(info, null, 2));
2786
+ } catch (e) {
2787
+ // Ignore save errors
2788
+ }
2789
+ }
2790
+
2791
+ // ---------------------------------------------------------------------------
2792
+ // Memory system — persistent user preferences on disk
2793
+ // ---------------------------------------------------------------------------
2794
+ const MEMORIES_PATH = getEnv('SOGNI_MEMORIES_PATH') || DEFAULT_MEMORIES_PATH;
2795
+
2796
+ function loadMemories() {
2797
+ try {
2798
+ if (existsSync(MEMORIES_PATH)) return JSON.parse(readFileSync(MEMORIES_PATH, 'utf8'));
2799
+ } catch {}
2800
+ return [];
2801
+ }
2802
+
2803
+ function saveMemories(memories) {
2804
+ const dir = dirname(MEMORIES_PATH);
2805
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
2806
+ writeFileSync(MEMORIES_PATH, JSON.stringify(memories, null, 2));
2807
+ }
2808
+
2809
+ function memorySet(key, value, category = 'preference', source = 'user') {
2810
+ const memories = loadMemories();
2811
+ const existing = memories.findIndex(m => m.key === key);
2812
+ const entry = { key, value, category, source, updatedAt: Date.now() };
2813
+ if (existing >= 0) { memories[existing] = { ...memories[existing], ...entry }; }
2814
+ else { memories.push({ id: randomBytes(8).toString('hex'), ...entry, createdAt: Date.now() }); }
2815
+ saveMemories(memories);
2816
+ return existing >= 0 ? 'updated' : 'created';
2817
+ }
2818
+
2819
+ function memoryRemove(key) {
2820
+ const memories = loadMemories();
2821
+ const filtered = memories.filter(m => m.key !== key);
2822
+ if (filtered.length === memories.length) return false;
2823
+ saveMemories(filtered);
2824
+ return true;
2825
+ }
2826
+
2827
+ // ---------------------------------------------------------------------------
2828
+ // Personality system — custom instructions for agent behavior
2829
+ // ---------------------------------------------------------------------------
2830
+ const PERSONALITY_PATH = getEnv('SOGNI_PERSONALITY_PATH') || DEFAULT_PERSONALITY_PATH;
2831
+
2832
+ function loadPersonality() {
2833
+ try {
2834
+ if (existsSync(PERSONALITY_PATH)) return readFileSync(PERSONALITY_PATH, 'utf8').trim();
2835
+ } catch {}
2836
+ return null;
2837
+ }
2838
+
2839
+ function savePersonality(text) {
2840
+ const dir = dirname(PERSONALITY_PATH);
2841
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
2842
+ writeFileSync(PERSONALITY_PATH, text);
2843
+ }
2844
+
2845
+ function clearPersonality() {
2846
+ try { if (existsSync(PERSONALITY_PATH)) unlinkSync(PERSONALITY_PATH); } catch {}
2847
+ }
2848
+
2849
+ // ---------------------------------------------------------------------------
2850
+ // Persona system — named people with reference photos and voice clips
2851
+ // ---------------------------------------------------------------------------
2852
+ const PERSONAS_DIR = getEnv('SOGNI_PERSONAS_DIR') || DEFAULT_PERSONAS_DIR;
2853
+ const PERSONAS_INDEX_PATH = join(PERSONAS_DIR, 'index.json');
2854
+
2855
+ function loadPersonas() {
2856
+ try {
2857
+ if (existsSync(PERSONAS_INDEX_PATH)) return JSON.parse(readFileSync(PERSONAS_INDEX_PATH, 'utf8'));
2858
+ } catch {}
2859
+ return [];
2860
+ }
2861
+
2862
+ function savePersonasIndex(personas) {
2863
+ if (!existsSync(PERSONAS_DIR)) mkdirSync(PERSONAS_DIR, { recursive: true });
2864
+ writeFileSync(PERSONAS_INDEX_PATH, JSON.stringify(personas, null, 2));
2865
+ }
2866
+
2867
+ function personaSlug(name) {
2868
+ return name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/(^-|-$)/g, '');
2869
+ }
2870
+
2871
+ function addPersona({ name, relationship, description, tags, voice, photoPath, voiceClipPath }) {
2872
+ const personas = loadPersonas();
2873
+ if (personas.find(p => p.name.toLowerCase() === name.toLowerCase())) {
2874
+ throw new Error(`Persona "${name}" already exists. Remove it first or use a different name.`);
2875
+ }
2876
+ const slug = personaSlug(name);
2877
+ const personaDir = join(PERSONAS_DIR, slug);
2878
+ if (!existsSync(personaDir)) mkdirSync(personaDir, { recursive: true });
2879
+
2880
+ // Copy photo
2881
+ let savedPhotoPath = null;
2882
+ if (photoPath) {
2883
+ const resolvedPhoto = expandHomePath(photoPath);
2884
+ if (!existsSync(resolvedPhoto)) throw new Error(`Photo not found: ${resolvedPhoto}`);
2885
+ const ext = extname(resolvedPhoto).toLowerCase() || '.jpg';
2886
+ savedPhotoPath = join(personaDir, `photo${ext}`);
2887
+ writeFileSync(savedPhotoPath, readFileSync(resolvedPhoto));
2888
+ }
2889
+
2890
+ // Copy voice clip
2891
+ let savedVoicePath = null;
2892
+ if (voiceClipPath) {
2893
+ const resolvedVoice = expandHomePath(voiceClipPath);
2894
+ if (!existsSync(resolvedVoice)) throw new Error(`Voice clip not found: ${resolvedVoice}`);
2895
+ const ext = extname(resolvedVoice).toLowerCase() || '.webm';
2896
+ savedVoicePath = join(personaDir, `voice-clip${ext}`);
2897
+ writeFileSync(savedVoicePath, readFileSync(resolvedVoice));
2898
+ }
2899
+
2900
+ const persona = {
2901
+ id: randomBytes(8).toString('hex'),
2902
+ name,
2903
+ slug,
2904
+ relationship: relationship || 'friend',
2905
+ description: description || '',
2906
+ tags: tags || [],
2907
+ voice: voice || null,
2908
+ photoPath: savedPhotoPath,
2909
+ voiceClipPath: savedVoicePath,
2910
+ createdAt: Date.now(),
2911
+ updatedAt: Date.now()
2912
+ };
2913
+ personas.push(persona);
2914
+ savePersonasIndex(personas);
2915
+ return persona;
2916
+ }
2917
+
2918
+ function removePersona(name) {
2919
+ const personas = loadPersonas();
2920
+ const idx = personas.findIndex(p => p.name.toLowerCase() === name.toLowerCase());
2921
+ if (idx < 0) return false;
2922
+ const persona = personas[idx];
2923
+ // Remove persona directory
2924
+ const personaDir = join(PERSONAS_DIR, persona.slug);
2925
+ try {
2926
+ if (existsSync(personaDir)) {
2927
+ const entries = readdirSync(personaDir);
2928
+ for (const entry of entries) {
2929
+ const fp = join(personaDir, entry);
2930
+ if (statSync(fp).isFile()) unlinkSync(fp);
2931
+ }
2932
+ rmdirSync(personaDir);
2933
+ }
2934
+ } catch {}
2935
+ personas.splice(idx, 1);
2936
+ savePersonasIndex(personas);
2937
+ return true;
2938
+ }
2939
+
2940
+ function resolvePersonaByName(name) {
2941
+ const personas = loadPersonas();
2942
+ // Match by name (case-insensitive)
2943
+ let match = personas.find(p => p.name.toLowerCase() === name.toLowerCase());
2944
+ if (match) return match;
2945
+ // Match by tag
2946
+ match = personas.find(p => p.tags?.some(t => t.toLowerCase() === name.toLowerCase()));
2947
+ if (match) return match;
2948
+ // Match implicit pronouns
2949
+ const lower = name.toLowerCase();
2950
+ if (lower === 'me' || lower === 'myself' || lower === 'i') {
2951
+ match = personas.find(p => p.relationship === 'self');
2952
+ } else if (lower.includes('wife') || lower.includes('husband') || lower.includes('partner')) {
2953
+ match = personas.find(p => p.relationship === 'partner');
2954
+ } else if (lower.includes('son') || lower.includes('daughter') || lower.includes('kid') || lower.includes('child')) {
2955
+ match = personas.find(p => p.relationship === 'child');
2956
+ } else if (lower.includes('dog') || lower.includes('cat') || lower.includes('pet')) {
2957
+ match = personas.find(p => p.relationship === 'pet');
2958
+ }
2959
+ return match || null;
2960
+ }
2961
+
2962
+ function applyPersonaAndVoiceReferences() {
2963
+ if (options.voicePersonaName) {
2964
+ const voicePersona = resolvePersonaByName(options.voicePersonaName);
2965
+ if (!voicePersona) {
2966
+ fatalCliError(`Voice persona "${options.voicePersonaName}" not found. Use --persona-list to see available personas.`, {
2967
+ code: 'PERSONA_NOT_FOUND'
2968
+ });
2969
+ }
2970
+ if (!voicePersona.voiceClipPath || !existsSync(voicePersona.voiceClipPath)) {
2971
+ fatalCliError(`Voice persona "${voicePersona.name}" does not have a saved voice clip.`, {
2972
+ code: 'PERSONA_VOICE_NOT_FOUND'
2973
+ });
2974
+ }
2975
+ if (!options.referenceAudioIdentity) {
2976
+ options.referenceAudioIdentity = voicePersona.voiceClipPath;
2977
+ cliSet.referenceAudioIdentity = true;
2978
+ }
2979
+ options._voicePersonaResolvedName = voicePersona.name;
2980
+ }
2981
+
2982
+ if (options.personaAction !== 'generate' || !options.personaName) return;
2983
+
2984
+ const persona = resolvePersonaByName(options.personaName);
2985
+ if (!persona) {
2986
+ fatalCliError(`Persona "${options.personaName}" not found. Use --persona-list to see available personas.`, {
2987
+ code: 'PERSONA_NOT_FOUND'
2988
+ });
2989
+ }
2990
+
2991
+ options._resolvedPersona = persona;
2992
+
2993
+ if (persona.photoPath && existsSync(persona.photoPath)) {
2994
+ if (options.video) {
2995
+ if (!options.refImage) {
2996
+ options.refImage = persona.photoPath;
2997
+ }
2998
+ } else {
2999
+ options.contextImages.push(persona.photoPath);
3000
+ }
3001
+ }
3002
+
3003
+ if (options.video && persona.voiceClipPath && existsSync(persona.voiceClipPath) && !options.referenceAudioIdentity) {
3004
+ options.referenceAudioIdentity = persona.voiceClipPath;
3005
+ options.voicePersonaName = options.voicePersonaName || persona.name;
3006
+ options._voicePersonaResolvedName = persona.name;
3007
+ }
3008
+ }
3009
+
3010
+ // Fetch image as buffer
3011
+ async function fetchMediaBuffer(pathOrUrl) {
3012
+ if (pathOrUrl.startsWith('http://') || pathOrUrl.startsWith('https://')) {
3013
+ await assertSafeUrl(pathOrUrl);
3014
+ const response = await fetch(pathOrUrl);
3015
+ if (!response.ok) {
3016
+ const err = new Error(`Failed to fetch media (${response.status} ${response.statusText})`);
3017
+ err.code = 'FETCH_FAILED';
3018
+ err.details = { url: pathOrUrl, status: response.status, statusText: response.statusText };
3019
+ throw err;
3020
+ }
3021
+ return Buffer.from(await response.arrayBuffer());
3022
+ }
3023
+ try {
3024
+ return readFileSync(pathOrUrl);
3025
+ } catch (e) {
3026
+ const err = new Error(`Failed to read media file: ${pathOrUrl}`);
3027
+ err.code = 'MISSING_FILE';
3028
+ err.hint = 'Check the path or use a URL.';
3029
+ err.details = { path: pathOrUrl, cause: e?.message || String(e) };
3030
+ throw err;
3031
+ }
3032
+ }
3033
+
3034
+ function resolveMultiAngleOutputConfig(outputPath, outputFormat) {
3035
+ if (!outputPath) return null;
3036
+ const ext = extname(outputPath);
3037
+ const desiredExt = (outputFormat || 'jpg').replace('.', '');
3038
+ if (!ext) {
3039
+ return { dir: outputPath, prefix: '', ext: desiredExt };
3040
+ }
3041
+ const dir = dirname(outputPath);
3042
+ const prefix = basename(outputPath, ext);
3043
+ return { dir, prefix, ext: ext.replace('.', '') || desiredExt };
3044
+ }
3045
+
3046
+ async function downloadUrlToFile(url, filePath) {
3047
+ const response = await fetch(url);
3048
+ if (!response.ok) {
3049
+ throw new Error(`Failed to download image: ${response.statusText}`);
3050
+ }
3051
+ const buffer = Buffer.from(await response.arrayBuffer());
3052
+ writeFileSync(filePath, buffer);
3053
+ }
3054
+
3055
+ function removeClientListener(client, event, handler) {
3056
+ if (typeof client.off === 'function') {
3057
+ client.off(event, handler);
3058
+ } else {
3059
+ client.removeListener(event, handler);
3060
+ }
3061
+ }
3062
+
3063
+ let execaPromise = null;
3064
+ async function loadExeca() {
3065
+ if (!execaPromise) {
3066
+ execaPromise = import('execa');
3067
+ }
3068
+ return execaPromise;
3069
+ }
3070
+
3071
+ async function ensureFfmpegAvailable() {
3072
+ const ffmpegPath = getEnv('FFMPEG_PATH') || 'ffmpeg';
3073
+ sanitizePath(ffmpegPath, 'FFMPEG_PATH');
3074
+ const result = await runCommand(ffmpegPath, ['-version'], { captureOutput: true });
3075
+ if (result.error || result.status !== 0) {
3076
+ const err = new Error('ffmpeg is required to assemble the 360 video.');
3077
+ err.code = 'MISSING_FFMPEG';
3078
+ err.hint = 'Install ffmpeg or set FFMPEG_PATH to a working ffmpeg binary.';
3079
+ err.details = { ffmpegPath };
3080
+ throw err;
3081
+ }
3082
+ // Verify the binary actually is ffmpeg (not an arbitrary executable)
3083
+ const stdout = result.stdout || '';
3084
+ if (!stdout.toLowerCase().includes('ffmpeg')) {
3085
+ const err = new Error('FFMPEG_PATH does not point to an ffmpeg binary.');
3086
+ err.code = 'INVALID_FFMPEG';
3087
+ err.hint = 'Ensure FFMPEG_PATH points to a real ffmpeg installation.';
3088
+ err.details = { ffmpegPath };
3089
+ throw err;
3090
+ }
3091
+ return ffmpegPath;
3092
+ }
3093
+
3094
+ function writeConcatList(filePath, frames, frameDuration) {
3095
+ const lines = [];
3096
+ frames.forEach((frame) => {
3097
+ lines.push(`file '${frame.replace(/'/g, "'\\''")}'`);
3098
+ lines.push(`duration ${frameDuration}`);
3099
+ });
3100
+ if (frames.length > 0) {
3101
+ const last = frames[frames.length - 1];
3102
+ lines.push(`file '${last.replace(/'/g, "'\\''")}'`);
3103
+ }
3104
+ writeFileSync(filePath, lines.join('\n'));
3105
+ }
3106
+
3107
+ function isNonEmptyFile(filePath) {
3108
+ try {
3109
+ if (!existsSync(filePath)) return false;
3110
+ const stat = statSync(filePath);
3111
+ return stat.isFile() && stat.size > 0;
3112
+ } catch {
3113
+ return false;
3114
+ }
3115
+ }
3116
+
3117
+ async function runCommand(command, args, { captureOutput = false } = {}) {
3118
+ const options = { reject: false };
3119
+ if (captureOutput) {
3120
+ options.stdout = 'pipe';
3121
+ options.stderr = 'pipe';
3122
+ } else {
3123
+ options.stdout = 'inherit';
3124
+ options.stderr = 'inherit';
3125
+ }
3126
+
3127
+ try {
3128
+ const { execa } = await loadExeca();
3129
+ const result = await execa(command, args, options);
3130
+ return {
3131
+ status: result.exitCode,
3132
+ error: null,
3133
+ stdout: result.stdout || '',
3134
+ stderr: result.stderr || ''
3135
+ };
3136
+ } catch (error) {
3137
+ return {
3138
+ status: Number.isInteger(error?.exitCode) ? error.exitCode : null,
3139
+ error,
3140
+ stdout: error?.stdout || '',
3141
+ stderr: error?.stderr || ''
3142
+ };
3143
+ }
3144
+ }
3145
+
3146
+ async function buildAngles360Video(outputPath, frames, fps) {
3147
+ sanitizePath(outputPath, '--angles-360-video output path');
3148
+ frames.forEach((f, i) => sanitizePath(f, `frame[${i}]`));
3149
+ const ffmpegPath = await ensureFfmpegAvailable();
3150
+ const tempListPath = outputPath.replace(/\.mp4$/i, '') + '.concat.txt';
3151
+ const frameDuration = 1 / fps;
3152
+ writeConcatList(tempListPath, frames, frameDuration);
3153
+
3154
+ const args = [
3155
+ '-y',
3156
+ '-f', 'concat',
3157
+ '-safe', '0',
3158
+ '-i', tempListPath,
3159
+ '-r', String(fps),
3160
+ '-pix_fmt', 'yuv420p',
3161
+ outputPath
3162
+ ];
3163
+ const result = await runCommand(ffmpegPath, args);
3164
+ if (result.error || result.status !== 0) {
3165
+ // ffmpeg sometimes exits non-zero even when the output file is usable.
3166
+ // Treat it as success if the output exists and is non-empty.
3167
+ if (isNonEmptyFile(outputPath)) {
3168
+ console.warn('Warning: ffmpeg exited non-zero, but output video exists and is non-empty. Continuing.');
3169
+ return;
3170
+ }
3171
+ const err = new Error('ffmpeg failed to build 360 video.');
3172
+ err.code = 'FFMPEG_FAILED';
3173
+ err.details = { outputPath };
3174
+ throw err;
3175
+ }
3176
+ }
3177
+
3178
+ async function extractLastFrameFromVideo(videoPath, outputImagePath) {
3179
+ sanitizePath(videoPath, 'video path');
3180
+ sanitizePath(outputImagePath, 'output image path');
3181
+ const ffmpegPath = await ensureFfmpegAvailable();
3182
+
3183
+ // Extract the last frame by reading through the video with update mode
3184
+ // This processes all frames but only keeps the last one
3185
+ const args = [
3186
+ '-i', videoPath,
3187
+ '-vf', 'select=gte(n\\,0)', // Select all frames (just pass-through)
3188
+ '-vsync', '0',
3189
+ '-update', '1', // Update same output file (keeps only last frame)
3190
+ '-q:v', '1', // Best quality
3191
+ '-y',
3192
+ outputImagePath
3193
+ ];
3194
+
3195
+ const result = await runCommand(ffmpegPath, args, { captureOutput: true });
3196
+
3197
+ if (result.error || result.status !== 0 || !isNonEmptyFile(outputImagePath)) {
3198
+ const stderr = result.stderr || '';
3199
+ const stdout = result.stdout || '';
3200
+ console.error('FFmpeg extraction failed:');
3201
+ console.error(' Video path:', videoPath);
3202
+ console.error(' Output path:', outputImagePath);
3203
+ console.error(' Exit code:', result.status);
3204
+ console.error(' Error:', result.error?.message || 'none');
3205
+ if (stderr) console.error(' Stderr:', stderr);
3206
+ if (stdout) console.error(' Stdout:', stdout);
3207
+ console.error(' Output file exists:', existsSync(outputImagePath));
3208
+ if (existsSync(outputImagePath)) {
3209
+ console.error(' Output file size:', statSync(outputImagePath).size);
3210
+ }
3211
+
3212
+ const err = new Error('Failed to extract last frame from video.');
3213
+ err.code = 'FFMPEG_EXTRACT_FAILED';
3214
+ err.details = { videoPath, outputImagePath, stderr, stdout, status: result.status };
3215
+ throw err;
3216
+ }
3217
+ }
3218
+
3219
+ async function buildConcatVideoFromClips(outputPath, clips, { audioPath = null, audioStart = null } = {}) {
3220
+ sanitizePath(outputPath, '--output path');
3221
+ clips.forEach((c, i) => sanitizePath(c, `clip[${i}]`));
3222
+ if (audioPath) sanitizePath(audioPath, '--concat-audio');
3223
+ const ffmpegPath = await ensureFfmpegAvailable();
3224
+ const tempListPath = outputPath.replace(/\.mp4$/i, '') + '.concat.txt';
3225
+ const lines = clips.map((clip) => `file '${clip.replace(/'/g, "'\\''")}'`);
3226
+ writeFileSync(tempListPath, lines.join('\n'));
3227
+
3228
+ const args = [
3229
+ '-y',
3230
+ '-f', 'concat',
3231
+ '-safe', '0',
3232
+ '-i', tempListPath,
3233
+ ];
3234
+ if (audioPath) {
3235
+ if (Number.isFinite(audioStart) && audioStart > 0) {
3236
+ args.push('-ss', String(audioStart));
3237
+ }
3238
+ args.push('-i', audioPath, '-map', '0:v:0', '-map', '1:a:0');
3239
+ }
3240
+ args.push(
3241
+ '-c:v', 'libx264',
3242
+ '-pix_fmt', 'yuv420p',
3243
+ '-c:a', 'aac',
3244
+ '-b:a', '192k',
3245
+ '-movflags', '+faststart'
3246
+ );
3247
+ if (audioPath) args.push('-shortest');
3248
+ args.push(outputPath);
3249
+
3250
+ const result = await runCommand(ffmpegPath, args);
3251
+ if (result.error || result.status !== 0) {
3252
+ if (isNonEmptyFile(outputPath)) {
3253
+ console.warn('Warning: ffmpeg exited non-zero, but output video exists and is non-empty. Continuing.');
3254
+ return;
3255
+ }
3256
+ const err = new Error('ffmpeg failed to concatenate 360 video clips.');
3257
+ err.code = 'FFMPEG_FAILED';
3258
+ err.details = { outputPath, clips: clips?.length ?? null };
3259
+ throw err;
3260
+ }
3261
+ }
3262
+
3263
+ async function runImageEditProjectWithEvents(client, editConfig, expectedCount, log, timeoutMs, label) {
3264
+ const results = [];
3265
+ let completed = 0;
3266
+ let projectId = null;
3267
+
3268
+ let resolvePromise;
3269
+ let rejectPromise;
3270
+ const completionPromise = new Promise((resolve, reject) => {
3271
+ resolvePromise = resolve;
3272
+ rejectPromise = reject;
3273
+ });
3274
+
3275
+ const onCompleted = (data) => {
3276
+ if (projectId && data.projectId !== projectId) return;
3277
+ if (!projectId) projectId = data.projectId;
3278
+ const jobData = data.job?.data || {};
3279
+ results.push({
3280
+ imageUrl: data.imageUrl,
3281
+ seed: jobData.seed,
3282
+ jobIndex: data.jobIndex,
3283
+ projectId: data.projectId
3284
+ });
3285
+ completed++;
3286
+ log(`Image ${completed}/${expectedCount}${label ? ` (${label})` : ''} completed`);
3287
+ if (completed >= expectedCount) {
3288
+ cleanup();
3289
+ resolvePromise({ results, projectId });
3290
+ }
3291
+ };
3292
+
3293
+ const onFailed = (data) => {
3294
+ if (projectId && data.projectId !== projectId) return;
3295
+ if (!projectId) projectId = data.projectId;
3296
+ cleanup();
3297
+ rejectPromise(new Error(data.error || 'Job failed'));
3298
+ };
3299
+
3300
+ const cleanup = () => {
3301
+ clearTimeout(timeout);
3302
+ removeClientListener(client, ClientEvent.JOB_COMPLETED, onCompleted);
3303
+ removeClientListener(client, ClientEvent.JOB_FAILED, onFailed);
3304
+ };
3305
+
3306
+ const timeout = setTimeout(() => {
3307
+ cleanup();
3308
+ rejectPromise(new Error(`Timeout after ${timeoutMs / 1000}s`));
3309
+ }, timeoutMs);
3310
+
3311
+ client.on(ClientEvent.JOB_COMPLETED, onCompleted);
3312
+ client.on(ClientEvent.JOB_FAILED, onFailed);
3313
+
3314
+ try {
3315
+ const projectResult = await client.createImageEditProject(editConfig);
3316
+ projectId = projectResult?.project?.id || projectId;
3317
+
3318
+ // Check for errors in the response (e.g., insufficient tokens)
3319
+ if (projectResult?.error || projectResult?.message) {
3320
+ cleanup();
3321
+ throw new Error(projectResult.error || projectResult.message);
3322
+ }
3323
+ if (!projectId) {
3324
+ cleanup();
3325
+ throw new Error('Failed to create project: no project ID returned');
3326
+ }
3327
+ } catch (error) {
3328
+ cleanup();
3329
+ throw error;
3330
+ }
3331
+
3332
+ return completionPromise;
3333
+ }
3334
+
3335
+ async function runMultiAngleFlow(client, log) {
3336
+ const contextBuffer = await fetchMediaBuffer(options.contextImages[0]);
3337
+ const azimuths = options.angles360
3338
+ ? MULTI_ANGLE_AZIMUTHS.map((a) => a.key)
3339
+ : [options.azimuth];
3340
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
3341
+ const steps = options.steps ?? modelDefaults?.steps ?? (options.model.includes('lightning') ? 4 : 20);
3342
+ const guidance = options.guidance ?? modelDefaults?.guidance ?? (options.model.includes('lightning') ? 1.0 : 4.0);
3343
+
3344
+ let outputConfig = resolveMultiAngleOutputConfig(options.output, options.outputFormat);
3345
+ let tempOutputDir = null;
3346
+ if (options.output && !outputConfig && !options.quiet) {
3347
+ console.error('Warning: Could not resolve output path for multi-angle output.');
3348
+ }
3349
+ if (options.angles360Video && !outputConfig) {
3350
+ tempOutputDir = mkdtempSync(join(tmpdir(), 'sogni-angles-'));
3351
+ outputConfig = {
3352
+ dir: tempOutputDir,
3353
+ prefix: 'angles-360',
3354
+ ext: (options.outputFormat || 'jpg').replace('.', '')
3355
+ };
3356
+ }
3357
+ let videoOutputPath = null;
3358
+ if (options.angles360Video) {
3359
+ if (typeof options.angles360Video === 'string') {
3360
+ videoOutputPath = options.angles360Video;
3361
+ } else if (options.output && outputConfig && outputConfig.ext === 'mp4') {
3362
+ videoOutputPath = options.output;
3363
+ } else if (outputConfig) {
3364
+ const baseName = outputConfig.prefix ? outputConfig.prefix : 'angles-360';
3365
+ videoOutputPath = join(outputConfig.dir, `${baseName}.mp4`);
3366
+ } else {
3367
+ videoOutputPath = join(process.cwd(), 'angles-360.mp4');
3368
+ }
3369
+ if (!videoOutputPath.toLowerCase().endsWith('.mp4')) {
3370
+ videoOutputPath += '.mp4';
3371
+ }
3372
+ }
3373
+ if (outputConfig) {
3374
+ if (outputConfig.ext === 'mp4') {
3375
+ outputConfig.ext = (options.outputFormat || 'jpg').replace('.', '');
3376
+ }
3377
+ if (!existsSync(outputConfig.dir)) {
3378
+ mkdirSync(outputConfig.dir, { recursive: true });
3379
+ }
3380
+ }
3381
+
3382
+ const angleResults = [];
3383
+ const videoFrames = [];
3384
+ for (const azimuth of azimuths) {
3385
+ const prompt = buildMultiAnglePrompt({
3386
+ azimuth,
3387
+ elevation: options.elevation,
3388
+ distance: options.distance,
3389
+ description: options.angleDescription
3390
+ });
3391
+ const editConfig = {
3392
+ modelId: options.model,
3393
+ positivePrompt: prompt,
3394
+ contextImages: [contextBuffer],
3395
+ numberOfMedia: options.count,
3396
+ width: options.width,
3397
+ height: options.height,
3398
+ steps,
3399
+ guidance,
3400
+ tokenType: options.tokenType || 'spark',
3401
+ waitForCompletion: false,
3402
+ disableNSFWFilter: options.noFilter === true
3403
+ };
3404
+ if (options.outputFormat) {
3405
+ editConfig.outputFormat = options.outputFormat;
3406
+ }
3407
+ if (options.sampler) {
3408
+ editConfig.sampler = options.sampler;
3409
+ }
3410
+ if (options.scheduler) {
3411
+ editConfig.scheduler = options.scheduler;
3412
+ }
3413
+ if (options.loras.length > 0) {
3414
+ editConfig.loras = options.loras;
3415
+ }
3416
+ if (options.loraStrengths.length > 0) {
3417
+ editConfig.loraStrengths = options.loraStrengths;
3418
+ }
3419
+ if (options.seed !== null && options.seed !== undefined) {
3420
+ editConfig.seed = options.seed;
3421
+ }
3422
+
3423
+ const { results } = await runImageEditProjectWithEvents(
3424
+ client,
3425
+ editConfig,
3426
+ options.count,
3427
+ log,
3428
+ options.timeout,
3429
+ azimuth
3430
+ );
3431
+ const urls = results.map((r) => r.imageUrl).filter(Boolean);
3432
+ const seeds = results.map((r) => r.seed ?? options.seed);
3433
+
3434
+ if (outputConfig) {
3435
+ const safeAzimuth = azimuth.replace(/[^a-z0-9-]/gi, '-');
3436
+ for (let i = 0; i < urls.length; i++) {
3437
+ const suffix = urls.length > 1 ? `-${i + 1}` : '';
3438
+ const prefix = outputConfig.prefix ? `${outputConfig.prefix}-` : '';
3439
+ const filename = `${prefix}${safeAzimuth}${suffix}.${outputConfig.ext}`;
3440
+ const filePath = join(outputConfig.dir, filename);
3441
+ await downloadUrlToFile(urls[i], filePath);
3442
+ if (options.angles360Video && i === 0) {
3443
+ videoFrames.push(filePath);
3444
+ }
3445
+ }
3446
+ }
3447
+
3448
+ angleResults.push({
3449
+ azimuth,
3450
+ elevation: options.elevation,
3451
+ distance: options.distance,
3452
+ prompt,
3453
+ urls,
3454
+ seeds
3455
+ });
3456
+ }
3457
+
3458
+ const renderInfo = {
3459
+ timestamp: new Date().toISOString(),
3460
+ type: options.angles360 ? 'multi-angle-360' : 'multi-angle',
3461
+ model: options.model,
3462
+ width: options.width,
3463
+ height: options.height,
3464
+ count: options.count,
3465
+ tokenType: options.tokenType || 'spark',
3466
+ seed: options.seed,
3467
+ seedStrategy: options.seedStrategy || null,
3468
+ outputFormat: options.outputFormat || null,
3469
+ sampler: options.sampler || null,
3470
+ scheduler: options.scheduler || null,
3471
+ loras: options.loras.length > 0 ? options.loras : null,
3472
+ loraStrengths: options.loraStrengths.length > 0 ? options.loraStrengths : null,
3473
+ angles: angleResults,
3474
+ localPath: options.output || null
3475
+ };
3476
+
3477
+ let videoModelId = null;
3478
+ if (videoOutputPath) {
3479
+ if (videoFrames.length === 0) {
3480
+ const err = new Error('No local frames available to assemble 360 video.');
3481
+ err.code = 'MISSING_FRAMES';
3482
+ err.hint = 'Ensure the frames were downloaded locally (provide --output dir or check permissions).';
3483
+ throw err;
3484
+ }
3485
+ const clipDir = mkdtempSync(join(tmpdir(), 'sogni-angles-clips-'));
3486
+ videoModelId = resolveVideoModelAlias(options.videoModel || openclawConfig?.videoModels?.i2v || VIDEO_WORKFLOW_DEFAULT_MODELS.i2v, 'i2v');
3487
+ const videoDefaults = getModelDefaults(videoModelId, openclawConfig);
3488
+ const videoDimensionRules = videoDimensionRulesFromDefaults(videoDefaults);
3489
+ const videoSteps = options.steps ?? videoDefaults?.steps;
3490
+ const videoGuidance = options.guidance ?? videoDefaults?.guidance;
3491
+ const segmentCount = videoFrames.length;
3492
+ let segmentDuration = options.duration;
3493
+ let segmentFrames = null;
3494
+ if (options.frames) {
3495
+ segmentFrames = Math.max(17, Math.round(options.frames / segmentCount));
3496
+ } else {
3497
+ segmentDuration = Math.max(1, Math.round(options.duration / segmentCount));
3498
+ }
3499
+ const videoPrompt = options.angleDescription || options.prompt || 'smooth camera rotation';
3500
+ const clipPaths = [];
3501
+
3502
+ for (let i = 0; i < videoFrames.length; i++) {
3503
+ const startPath = videoFrames[i];
3504
+ const endPath = videoFrames[(i + 1) % videoFrames.length];
3505
+
3506
+ // Validate i2v reference resizing constraints for this clip
3507
+ let startBuffer = readFileSync(startPath);
3508
+ let endBuffer = readFileSync(endPath);
3509
+ const startDims = getImageDimensionsFromBuffer(startBuffer);
3510
+ let clipWidth = options.width;
3511
+ let clipHeight = options.height;
3512
+ let needsResize = false;
3513
+
3514
+ if (startDims?.width && startDims?.height) {
3515
+ const predicted = predictSharpInsideResizeDims(startDims.width, startDims.height, clipWidth, clipHeight);
3516
+ if (predicted && (predicted.width % videoDimensionRules.dimensionMultiple !== 0 || predicted.height % videoDimensionRules.dimensionMultiple !== 0)) {
3517
+ // The resized reference will miss the model divisor, so adjust.
3518
+ const candidate = pickCompatibleI2vBoundingBox(startDims.width, startDims.height, clipWidth, clipHeight, { rules: videoDimensionRules });
3519
+ if (!candidate) {
3520
+ // No perfect match - will pre-resize the reference frames
3521
+ needsResize = true;
3522
+ if (i === 0 && !options.quiet) {
3523
+ console.error(
3524
+ `360 video reference frames will be pre-resized to model-compatible dimensions ` +
3525
+ `because no compatible bounding box exists.`
3526
+ );
3527
+ }
3528
+ } else {
3529
+ // Auto-adjust to compatible size
3530
+ if (!cliSet.width && !cliSet.height && !options.strictSize) {
3531
+ clipWidth = candidate.width;
3532
+ clipHeight = candidate.height;
3533
+ if (i === 0 && !options.quiet) {
3534
+ console.error(
3535
+ `Auto-adjusted 360 video clip size from ${options.width}x${options.height} ` +
3536
+ `to ${clipWidth}x${clipHeight} so resized reference is divisible by ${videoDimensionRules.dimensionMultiple} ` +
3537
+ `(would have been ${predicted.width}x${predicted.height}).`
3538
+ );
3539
+ }
3540
+ } else if (options.strictSize) {
3541
+ fatalCliError(
3542
+ `Reference frame ${startDims.width}x${startDims.height} would resize to ${predicted.width}x${predicted.height}, ` +
3543
+ `but both dimensions must be divisible by ${videoDimensionRules.dimensionMultiple}.`,
3544
+ {
3545
+ code: 'INVALID_VIDEO_SIZE',
3546
+ details: {
3547
+ clipIndex: i + 1,
3548
+ reference: { width: startDims.width, height: startDims.height },
3549
+ requested: { width: clipWidth, height: clipHeight },
3550
+ resized: predicted
3551
+ },
3552
+ hint: `Try: --width ${candidate.width} --height ${candidate.height} (or omit --strict-size)`
3553
+ }
3554
+ );
3555
+ } else {
3556
+ // User specified explicit dimensions but not --strict-size, auto-adjust anyway
3557
+ clipWidth = candidate.width;
3558
+ clipHeight = candidate.height;
3559
+ if (i === 0 && !options.quiet) {
3560
+ console.error(
3561
+ `Warning: Adjusted 360 video clip size from ${options.width}x${options.height} ` +
3562
+ `to ${clipWidth}x${clipHeight} because resized reference would be ${predicted.width}x${predicted.height} ` +
3563
+ `(not divisible by ${videoDimensionRules.dimensionMultiple}). Use --strict-size to fail instead.`
3564
+ );
3565
+ }
3566
+ }
3567
+ }
3568
+ }
3569
+ }
3570
+
3571
+ // Pre-resize reference frames if needed
3572
+ if (needsResize && startDims?.width && startDims?.height) {
3573
+ startBuffer = await resizeImageBufferForVideo(startBuffer, startDims.width, startDims.height, videoDimensionRules);
3574
+ const endDims = getImageDimensionsFromBuffer(endBuffer);
3575
+ if (endDims?.width && endDims?.height) {
3576
+ endBuffer = await resizeImageBufferForVideo(endBuffer, endDims.width, endDims.height, videoDimensionRules);
3577
+ }
3578
+ const resizedDims = getImageDimensionsFromBuffer(startBuffer);
3579
+ if (i === 0 && !options.quiet) {
3580
+ console.error(
3581
+ `Pre-resized 360 video frames from ${startDims.width}x${startDims.height} to ${resizedDims.width}x${resizedDims.height} ` +
3582
+ `(divisible by ${videoDimensionRules.dimensionMultiple}) to ensure i2v compatibility.`
3583
+ );
3584
+ }
3585
+ }
3586
+
3587
+ const clipConfig = {
3588
+ modelId: videoModelId,
3589
+ positivePrompt: videoPrompt,
3590
+ negativePrompt: '',
3591
+ stylePrompt: '',
3592
+ numberOfMedia: 1,
3593
+ referenceImage: startBuffer,
3594
+ referenceImageEnd: endBuffer,
3595
+ fps: options.fps,
3596
+ width: clipWidth,
3597
+ height: clipHeight,
3598
+ tokenType: options.tokenType || 'spark',
3599
+ waitForCompletion: true,
3600
+ disableNSFWFilter: options.noFilter === true
3601
+ };
3602
+ if (segmentFrames) {
3603
+ clipConfig.frames = segmentFrames;
3604
+ } else {
3605
+ clipConfig.duration = segmentDuration;
3606
+ }
3607
+ if (videoSteps) {
3608
+ clipConfig.steps = videoSteps;
3609
+ }
3610
+ if (videoGuidance !== null && videoGuidance !== undefined) {
3611
+ clipConfig.guidance = videoGuidance;
3612
+ }
3613
+ if (options.autoResizeVideoAssets !== null) {
3614
+ clipConfig.autoResizeVideoAssets = options.autoResizeVideoAssets;
3615
+ }
3616
+ const clipResult = await client.createVideoProject(clipConfig);
3617
+
3618
+ // Check for errors in the response (e.g., insufficient tokens)
3619
+ if (clipResult?.error || clipResult?.message) {
3620
+ throw new Error(clipResult.error || clipResult.message);
3621
+ }
3622
+
3623
+ const clipUrl = clipResult?.videoUrls?.[0];
3624
+ if (!clipUrl) {
3625
+ throw new Error('No video URL returned for 360 segment.');
3626
+ }
3627
+ const clipPath = join(clipDir, `segment-${i + 1}.mp4`);
3628
+ await downloadUrlToFile(clipUrl, clipPath);
3629
+ clipPaths.push(clipPath);
3630
+ }
3631
+
3632
+ await buildConcatVideoFromClips(videoOutputPath, clipPaths);
3633
+ if (!options.quiet) {
3634
+ console.error(`Saved 360 video: ${videoOutputPath}`);
3635
+ }
3636
+ }
3637
+ if (videoOutputPath) {
3638
+ renderInfo.videoPath = videoOutputPath;
3639
+ renderInfo.videoModel = videoModelId;
3640
+ }
3641
+ saveLastRender(renderInfo);
3642
+
3643
+ if (options.json) {
3644
+ console.log(JSON.stringify({
3645
+ success: true,
3646
+ type: renderInfo.type,
3647
+ model: renderInfo.model,
3648
+ width: renderInfo.width,
3649
+ height: renderInfo.height,
3650
+ count: renderInfo.count,
3651
+ tokenType: renderInfo.tokenType,
3652
+ seed: renderInfo.seed,
3653
+ seedStrategy: renderInfo.seedStrategy,
3654
+ outputFormat: renderInfo.outputFormat,
3655
+ sampler: renderInfo.sampler,
3656
+ scheduler: renderInfo.scheduler,
3657
+ loras: renderInfo.loras,
3658
+ loraStrengths: renderInfo.loraStrengths,
3659
+ videoPath: renderInfo.videoPath || null,
3660
+ videoModel: renderInfo.videoModel || null,
3661
+ angles: angleResults
3662
+ }));
3663
+ } else {
3664
+ if (videoOutputPath) {
3665
+ console.log(`video: ${videoOutputPath}`);
3666
+ }
3667
+ angleResults.forEach((angle) => {
3668
+ angle.urls.forEach((url, index) => {
3669
+ const suffix = angle.urls.length > 1 ? `#${index + 1}` : '';
3670
+ console.log(`${angle.azimuth}${suffix}: ${url}`);
3671
+ });
3672
+ });
3673
+ }
3674
+ }
3675
+
3676
+ async function ensureSufficientVideoBalance(client, log) {
3677
+ if (!options.video || options.estimateVideoCost) return;
3678
+ const tokenType = options.tokenType || 'spark';
3679
+ const tokenLabel = tokenType.toUpperCase();
3680
+ let balance;
3681
+ try {
3682
+ balance = await client.getBalance();
3683
+ } catch (err) {
3684
+ if (!options.quiet) {
3685
+ log(`Warning: Could not fetch balance (${err?.message || 'error'})`);
3686
+ }
3687
+ return;
3688
+ }
3689
+ const available = tokenType === 'sogni' ? balance.sogni : balance.spark;
3690
+ if (!Number.isFinite(available)) return;
3691
+ if (available <= 0) {
3692
+ throw buildBalanceError(
3693
+ `Insufficient ${tokenLabel} balance (have ${formatTokenValue(available)}).`,
3694
+ { tokenType, available }
3695
+ );
3696
+ }
3697
+
3698
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
3699
+ const steps = resolveVideoSteps(options.model, modelDefaults, options.steps);
3700
+ if (!Number.isFinite(steps) || steps <= 0) return;
3701
+
3702
+ let estimate;
3703
+ try {
3704
+ estimate = await client.estimateVideoCost({
3705
+ modelId: options.model,
3706
+ width: options.width,
3707
+ height: options.height,
3708
+ fps: options.fps,
3709
+ steps,
3710
+ numberOfMedia: options.count,
3711
+ tokenType,
3712
+ ...(options.frames ? { frames: options.frames } : { duration: options.duration })
3713
+ });
3714
+ } catch (err) {
3715
+ if (!options.quiet) {
3716
+ log(`Warning: Could not estimate video cost (${err?.message || 'error'})`);
3717
+ }
3718
+ return;
3719
+ }
3720
+ const required = parseCostEstimate(estimate, tokenType);
3721
+ if (Number.isFinite(required) && available < required) {
3722
+ throw buildBalanceError(
3723
+ `Insufficient ${tokenLabel} balance for video render (need ~${formatTokenValue(required)}, ` +
3724
+ `have ${formatTokenValue(available)}).`,
3725
+ { tokenType, available, required }
3726
+ );
3727
+ }
3728
+ }
3729
+
3730
+ // ---------------------------------------------------------------------------
3731
+ // Token auto-fallback: resolve 'auto' to 'spark', retry with 'sogni' on
3732
+ // insufficient balance errors.
3733
+ // ---------------------------------------------------------------------------
3734
+ const _isAutoToken = options.tokenType === 'auto';
3735
+ if (_isAutoToken) {
3736
+ options.tokenType = 'spark';
3737
+ }
3738
+
3739
+ async function main() {
3740
+ let exitCode = 0;
3741
+ const log = options.quiet ? () => {} : console.error.bind(console);
3742
+ let client = null;
3743
+
3744
+ try {
3745
+ if (options.showVersion) {
3746
+ if (options.json) {
3747
+ console.log(JSON.stringify({
3748
+ success: true,
3749
+ type: 'version',
3750
+ name: 'sogni-creative-agent-skill',
3751
+ version: PACKAGE_VERSION,
3752
+ timestamp: new Date().toISOString()
3753
+ }));
3754
+ } else {
3755
+ console.log(PACKAGE_VERSION);
3756
+ }
3757
+ return;
3758
+ }
3759
+
3760
+ // --- Utility commands (no Sogni auth required) ---
3761
+
3762
+ // Memory commands
3763
+ if (options.memoryAction) {
3764
+ const jsonOut = options.json || JSON_ERROR_MODE;
3765
+ if (options.memoryAction === 'list') {
3766
+ const memories = loadMemories();
3767
+ if (jsonOut) {
3768
+ console.log(JSON.stringify({ success: true, type: 'memory-list', memories, timestamp: new Date().toISOString() }));
3769
+ } else {
3770
+ if (memories.length === 0) { console.log('No memories saved.'); }
3771
+ else { memories.forEach(m => console.log(` ${m.key}: ${m.value} [${m.category || 'preference'}]`)); }
3772
+ }
3773
+ } else if (options.memoryAction === 'get') {
3774
+ const memories = loadMemories();
3775
+ const found = memories.find(m => m.key === options.memoryKey);
3776
+ if (jsonOut) {
3777
+ console.log(JSON.stringify({ success: true, type: 'memory-get', key: options.memoryKey, found: !!found, memory: found || null, timestamp: new Date().toISOString() }));
3778
+ } else {
3779
+ console.log(found ? `${found.key}: ${found.value}` : `Memory "${options.memoryKey}" not found.`);
3780
+ }
3781
+ } else if (options.memoryAction === 'set') {
3782
+ const action = memorySet(options.memoryKey, options.memoryValue, options.memoryCategory || 'preference');
3783
+ if (jsonOut) {
3784
+ console.log(JSON.stringify({ success: true, type: 'memory-set', action, key: options.memoryKey, value: options.memoryValue, timestamp: new Date().toISOString() }));
3785
+ } else {
3786
+ console.log(`Memory "${options.memoryKey}" ${action}.`);
3787
+ }
3788
+ } else if (options.memoryAction === 'remove') {
3789
+ const removed = memoryRemove(options.memoryKey);
3790
+ if (jsonOut) {
3791
+ console.log(JSON.stringify({ success: true, type: 'memory-remove', removed, key: options.memoryKey, timestamp: new Date().toISOString() }));
3792
+ } else {
3793
+ console.log(removed ? `Memory "${options.memoryKey}" removed.` : `Memory "${options.memoryKey}" not found.`);
3794
+ }
3795
+ }
3796
+ return;
3797
+ }
3798
+
3799
+ // Personality commands
3800
+ if (options.personalityAction) {
3801
+ const jsonOut = options.json || JSON_ERROR_MODE;
3802
+ if (options.personalityAction === 'get') {
3803
+ const text = loadPersonality();
3804
+ if (jsonOut) {
3805
+ console.log(JSON.stringify({ success: true, type: 'personality-get', personality: text, timestamp: new Date().toISOString() }));
3806
+ } else {
3807
+ console.log(text || '(no personality set — using default)');
3808
+ }
3809
+ } else if (options.personalityAction === 'set') {
3810
+ savePersonality(options.personalityText);
3811
+ if (jsonOut) {
3812
+ console.log(JSON.stringify({ success: true, type: 'personality-set', personality: options.personalityText, timestamp: new Date().toISOString() }));
3813
+ } else {
3814
+ console.log('Personality saved.');
3815
+ }
3816
+ } else if (options.personalityAction === 'clear') {
3817
+ clearPersonality();
3818
+ if (jsonOut) {
3819
+ console.log(JSON.stringify({ success: true, type: 'personality-clear', timestamp: new Date().toISOString() }));
3820
+ } else {
3821
+ console.log('Personality cleared.');
3822
+ }
3823
+ }
3824
+ return;
3825
+ }
3826
+
3827
+ // Persona commands (non-generate)
3828
+ if (options.personaAction && options.personaAction !== 'generate') {
3829
+ const jsonOut = options.json || JSON_ERROR_MODE;
3830
+ if (options.personaAction === 'list') {
3831
+ const personas = loadPersonas();
3832
+ if (jsonOut) {
3833
+ console.log(JSON.stringify({ success: true, type: 'persona-list', personas, timestamp: new Date().toISOString() }));
3834
+ } else {
3835
+ if (personas.length === 0) { console.log('No personas saved.'); }
3836
+ else { personas.forEach(p => console.log(` ${p.name} (${p.relationship}) — ${p.description || 'no description'}${p.voiceClipPath ? ' [has voice]' : ''}`)); }
3837
+ }
3838
+ } else if (options.personaAction === 'add') {
3839
+ const photoPath = options.personaPhoto || options.refImage;
3840
+ if (!photoPath) {
3841
+ fatalCliError('--persona-add requires a reference photo (--ref <path>).', { code: 'INVALID_ARGUMENT' });
3842
+ }
3843
+ const persona = addPersona({
3844
+ name: options.personaName,
3845
+ relationship: options.personaRelationship,
3846
+ description: options.personaDescription,
3847
+ tags: options.personaTags,
3848
+ voice: options.personaVoice,
3849
+ photoPath,
3850
+ voiceClipPath: options.personaVoiceClip
3851
+ });
3852
+ if (jsonOut) {
3853
+ console.log(JSON.stringify({ success: true, type: 'persona-add', persona, timestamp: new Date().toISOString() }));
3854
+ } else {
3855
+ console.log(`Persona "${persona.name}" saved (${persona.relationship}).`);
3856
+ if (persona.photoPath) console.log(` Photo: ${persona.photoPath}`);
3857
+ if (persona.voiceClipPath) console.log(` Voice: ${persona.voiceClipPath}`);
3858
+ }
3859
+ } else if (options.personaAction === 'remove') {
3860
+ const removed = removePersona(options.personaName);
3861
+ if (jsonOut) {
3862
+ console.log(JSON.stringify({ success: true, type: 'persona-remove', removed, name: options.personaName, timestamp: new Date().toISOString() }));
3863
+ } else {
3864
+ console.log(removed ? `Persona "${options.personaName}" removed.` : `Persona "${options.personaName}" not found.`);
3865
+ }
3866
+ } else if (options.personaAction === 'resolve') {
3867
+ const persona = resolvePersonaByName(options.personaName);
3868
+ if (jsonOut) {
3869
+ console.log(JSON.stringify({ success: true, type: 'persona-resolve', found: !!persona, persona: persona || null, timestamp: new Date().toISOString() }));
3870
+ } else {
3871
+ if (!persona) { console.log(`Persona "${options.personaName}" not found.`); }
3872
+ else {
3873
+ console.log(` Name: ${persona.name}`);
3874
+ console.log(` Relationship: ${persona.relationship}`);
3875
+ if (persona.description) console.log(` Description: ${persona.description}`);
3876
+ if (persona.tags?.length) console.log(` Tags: ${persona.tags.join(', ')}`);
3877
+ if (persona.voice) console.log(` Voice: ${persona.voice}`);
3878
+ if (persona.photoPath) console.log(` Photo: ${persona.photoPath}`);
3879
+ if (persona.voiceClipPath) console.log(` Voice clip: ${persona.voiceClipPath}`);
3880
+ }
3881
+ }
3882
+ }
3883
+ return;
3884
+ }
3885
+
3886
+ if (options._resolvedPersona) {
3887
+ const persona = options._resolvedPersona;
3888
+ if (persona.photoPath && existsSync(persona.photoPath)) {
3889
+ log(`Using persona "${persona.name}" (${persona.relationship}) ${options.video ? 'photo as reference frame' : 'photo as context'}`);
3890
+ }
3891
+ if (options.video && options.referenceAudioIdentity) {
3892
+ log(`Using persona "${options._voicePersonaResolvedName || persona.name}" voice identity`);
3893
+ }
3894
+ }
3895
+
3896
+ if (options.extractLastFrame) {
3897
+ const videoPath = sanitizePath(options.extractLastFrame, '--extract-last-frame video');
3898
+ const outputPath = sanitizePath(options.extractLastFrameOutput, '--extract-last-frame output');
3899
+ if (!existsSync(videoPath)) {
3900
+ const err = new Error(`Video file not found: ${videoPath}`);
3901
+ err.code = 'FILE_NOT_FOUND';
3902
+ throw err;
3903
+ }
3904
+ await extractLastFrameFromVideo(videoPath, outputPath);
3905
+ if (options.json || JSON_ERROR_MODE) {
3906
+ console.log(JSON.stringify({
3907
+ success: true,
3908
+ type: 'extract-last-frame',
3909
+ outputPath,
3910
+ timestamp: new Date().toISOString()
3911
+ }));
3912
+ } else {
3913
+ console.log(`Extracted last frame to: ${outputPath}`);
3914
+ }
3915
+ return;
3916
+ }
3917
+
3918
+ if (options.concatVideos) {
3919
+ const outputPath = sanitizePath(options.concatVideos, '--concat-videos output');
3920
+ const clips = options.concatVideosClips.map((c, i) => sanitizePath(c, `clip[${i}]`));
3921
+ const concatAudio = options.concatAudio ? sanitizePath(options.concatAudio, '--concat-audio') : null;
3922
+ for (const clip of clips) {
3923
+ if (!existsSync(clip)) {
3924
+ const err = new Error(`Clip file not found: ${clip}`);
3925
+ err.code = 'FILE_NOT_FOUND';
3926
+ throw err;
3927
+ }
3928
+ }
3929
+ if (concatAudio && !existsSync(concatAudio)) {
3930
+ const err = new Error(`Audio file not found: ${concatAudio}`);
3931
+ err.code = 'FILE_NOT_FOUND';
3932
+ throw err;
3933
+ }
3934
+ await buildConcatVideoFromClips(outputPath, clips, {
3935
+ audioPath: concatAudio,
3936
+ audioStart: options.concatAudioStart
3937
+ });
3938
+ if (options.json || JSON_ERROR_MODE) {
3939
+ console.log(JSON.stringify({
3940
+ success: true,
3941
+ type: 'concat-videos',
3942
+ outputPath,
3943
+ clipCount: clips.length,
3944
+ audioPath: concatAudio || null,
3945
+ audioStart: options.concatAudioStart ?? null,
3946
+ timestamp: new Date().toISOString()
3947
+ }));
3948
+ } else {
3949
+ console.log(`Concatenated ${clips.length} clips to: ${outputPath}${concatAudio ? ` with audio ${concatAudio}` : ''}`);
3950
+ }
3951
+ return;
3952
+ }
3953
+
3954
+ if (options.listMedia) {
3955
+ const mediaType = options.listMedia;
3956
+ const baseDir = MEDIA_INBOUND_DIR;
3957
+
3958
+ const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif']);
3959
+ const AUDIO_EXTS = new Set(['.m4a', '.mp3', '.wav', '.ogg']);
3960
+
3961
+ let allowedExts;
3962
+ if (mediaType === 'images') allowedExts = IMAGE_EXTS;
3963
+ else if (mediaType === 'audio') allowedExts = AUDIO_EXTS;
3964
+ else allowedExts = new Set([...IMAGE_EXTS, ...AUDIO_EXTS]);
3965
+
3966
+ const files = [];
3967
+ if (existsSync(baseDir)) {
3968
+ // Validate the base directory itself isn't a symlink pointing outside its expected parent.
3969
+ const allowedRoot = realpathSync(dirname(baseDir));
3970
+ const resolvedBase = realpathSync(baseDir);
3971
+ if (!isPathWithinBase(allowedRoot, resolvedBase)) {
3972
+ const err = new Error('Media directory resolves outside of its expected root.');
3973
+ err.code = 'INVALID_PATH';
3974
+ throw err;
3975
+ }
3976
+
3977
+ const entries = readdirSync(baseDir);
3978
+ for (const entry of entries) {
3979
+ const ext = extname(entry).toLowerCase();
3980
+ if (!allowedExts.has(ext)) continue;
3981
+ const fullPath = join(baseDir, entry);
3982
+ // Skip symlinks
3983
+ const lstats = lstatSync(fullPath);
3984
+ if (lstats.isSymbolicLink()) continue;
3985
+ if (!lstats.isFile()) continue;
3986
+ files.push({
3987
+ path: fullPath,
3988
+ name: entry,
3989
+ size: lstats.size,
3990
+ modified: lstats.mtime.toISOString()
3991
+ });
3992
+ }
3993
+ // Sort by mtime descending, return top 5
3994
+ files.sort((a, b) => b.modified.localeCompare(a.modified));
3995
+ files.splice(5);
3996
+ }
3997
+
3998
+ if (options.json || JSON_ERROR_MODE) {
3999
+ console.log(JSON.stringify({
4000
+ success: true,
4001
+ type: 'list-media',
4002
+ mediaType,
4003
+ files,
4004
+ timestamp: new Date().toISOString()
4005
+ }));
4006
+ } else {
4007
+ if (files.length === 0) {
4008
+ console.log(`No ${mediaType} files found in ${baseDir}`);
4009
+ } else {
4010
+ console.log(`Recent ${mediaType} (${files.length}):`);
4011
+ for (const f of files) {
4012
+ console.log(` ${f.name} (${f.size} bytes, ${f.modified})`);
4013
+ }
4014
+ }
4015
+ }
4016
+ return;
4017
+ }
4018
+
4019
+ const creds = loadCredentials();
4020
+ log('Connecting to Sogni...');
4021
+ client = new SogniClientWrapper({
4022
+ network: openclawConfig?.defaultNetwork || 'fast',
4023
+ autoConnect: false,
4024
+ ...(creds.SOGNI_API_KEY
4025
+ ? { apiKey: creds.SOGNI_API_KEY, authType: 'apiKey' }
4026
+ : {
4027
+ username: creds.SOGNI_USERNAME,
4028
+ password: creds.SOGNI_PASSWORD,
4029
+ authType: 'token'
4030
+ })
4031
+ });
4032
+
4033
+ await client.connect();
4034
+ log('Connected.');
4035
+
4036
+ if (options.showBalance) {
4037
+ const balance = await client.getBalance();
4038
+ const spark = Number.parseFloat(balance?.spark);
4039
+ const sogni = Number.parseFloat(balance?.sogni);
4040
+ if (options.json) {
4041
+ console.log(JSON.stringify({
4042
+ success: true,
4043
+ type: 'balance',
4044
+ spark: Number.isFinite(spark) ? spark : null,
4045
+ sogni: Number.isFinite(sogni) ? sogni : null,
4046
+ tokenType: options.tokenType || 'spark',
4047
+ timestamp: new Date().toISOString()
4048
+ }));
4049
+ } else {
4050
+ console.log(`SPARK: ${formatTokenValue(spark)}`);
4051
+ console.log(`SOGNI: ${formatTokenValue(sogni)}`);
4052
+ }
4053
+ return;
4054
+ }
4055
+
4056
+ await ensureSufficientVideoBalance(client, log);
4057
+
4058
+ if (options.estimateVideoCost) {
4059
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
4060
+ const steps = resolveVideoSteps(options.model, modelDefaults, options.steps);
4061
+ if (!Number.isFinite(steps) || steps <= 0) {
4062
+ const err = new Error('--estimate-video-cost requires --steps (or modelDefaults for this model).');
4063
+ err.code = 'MISSING_STEPS';
4064
+ err.hint = 'Pass --steps explicitly (e.g. --steps 4 for lightx2v models).';
4065
+ throw err;
4066
+ }
4067
+ const estimateParams = {
4068
+ modelId: options.model,
4069
+ width: options.width,
4070
+ height: options.height,
4071
+ fps: options.fps,
4072
+ steps,
4073
+ numberOfMedia: options.count,
4074
+ tokenType: options.tokenType || 'spark'
4075
+ };
4076
+ if (options.frames) {
4077
+ estimateParams.frames = options.frames;
4078
+ } else {
4079
+ estimateParams.duration = options.duration;
4080
+ }
4081
+ const estimate = await client.estimateVideoCost(estimateParams);
4082
+ if (options.json) {
4083
+ const duration = options.frames ? Math.max(1, Math.round((options.frames - 1) / options.fps)) : options.duration;
4084
+ console.log(JSON.stringify({
4085
+ success: true,
4086
+ type: 'video-cost',
4087
+ model: options.model,
4088
+ width: options.width,
4089
+ height: options.height,
4090
+ fps: options.fps,
4091
+ frames: options.frames ?? null,
4092
+ duration,
4093
+ steps,
4094
+ tokenType: options.tokenType || 'spark',
4095
+ count: options.count,
4096
+ estimate
4097
+ }));
4098
+ } else {
4099
+ console.log(`Estimated cost: ${JSON.stringify(estimate)}`);
4100
+ }
4101
+ return;
4102
+ }
4103
+
4104
+ if (options.multiAngle) {
4105
+ if (options.contextImages.length > 1 && !options.quiet) {
4106
+ console.error('Warning: --multi-angle uses the first context image only.');
4107
+ }
4108
+ await runMultiAngleFlow(client, log);
4109
+ return;
4110
+ }
4111
+
4112
+ const results = [];
4113
+ let completedJobs = 0;
4114
+ let loopingStartImageBuffer;
4115
+
4116
+ const completionPromise = new Promise((resolve, reject) => {
4117
+ const timeout = setTimeout(() => {
4118
+ reject(new Error(`Timeout after ${options.timeout / 1000}s`));
4119
+ }, options.timeout);
4120
+
4121
+ client.on(ClientEvent.JOB_COMPLETED, (data) => {
4122
+ const jobData = data.job?.data || {};
4123
+ results.push({
4124
+ imageUrl: data.imageUrl,
4125
+ videoUrl: data.videoUrl,
4126
+ seed: jobData.seed,
4127
+ jobIndex: data.jobIndex,
4128
+ projectId: data.projectId
4129
+ });
4130
+ completedJobs++;
4131
+ log(`${options.video ? 'Video' : 'Image'} ${completedJobs}/${options.count} completed`);
4132
+
4133
+ if (completedJobs >= options.count) {
4134
+ clearTimeout(timeout);
4135
+ resolve();
4136
+ }
4137
+ });
4138
+
4139
+ client.on(ClientEvent.JOB_FAILED, (data) => {
4140
+ clearTimeout(timeout);
4141
+ reject(new Error(data.error || 'Job failed'));
4142
+ });
4143
+
4144
+ client.on(ClientEvent.PROJECT_FAILED, (data) => {
4145
+ clearTimeout(timeout);
4146
+ const message = data?.message || data?.error || 'Project failed';
4147
+ reject(new Error(message));
4148
+ });
4149
+
4150
+ client.on(ClientEvent.PROJECT_EVENT, (event) => {
4151
+ if (event?.type !== 'error') return;
4152
+ clearTimeout(timeout);
4153
+ const message = event?.error?.message || event?.error?.error || 'Project failed';
4154
+ reject(new Error(message));
4155
+ });
4156
+
4157
+ client.on(ClientEvent.JOB_EVENT, (event) => {
4158
+ if (event?.type !== 'error') return;
4159
+ clearTimeout(timeout);
4160
+ const message = event?.error?.message || event?.error?.error || 'Job failed';
4161
+ reject(new Error(message));
4162
+ });
4163
+
4164
+ // Progress for video
4165
+ if (options.video) {
4166
+ client.on(ClientEvent.PROJECT_PROGRESS, (data) => {
4167
+ if (data.percentage && data.percentage > 0) {
4168
+ log(`Progress: ${Math.round(data.percentage)}%`);
4169
+ }
4170
+ });
4171
+ }
4172
+ });
4173
+
4174
+ if (options.video) {
4175
+ // Video generation
4176
+ log(`Generating video (${options.videoWorkflow}) with ${options.model}...`);
4177
+ if (options.refImage) log(`Reference image: ${options.refImage}`);
4178
+ if (options.refImageEnd) log(`End frame: ${options.refImageEnd}`);
4179
+ if (options.refAudio) log(`Reference audio: ${options.refAudio}`);
4180
+ if (options.referenceAudioIdentity) log(`Voice identity: ${options._voicePersonaResolvedName || options.referenceAudioIdentity}`);
4181
+ if (options.refVideo) log(`Reference video: ${options.refVideo}`);
4182
+
4183
+ let imageBuffer = options.refImage ? await fetchMediaBuffer(options.refImage) : undefined;
4184
+ let endImageBuffer = options.refImageEnd ? await fetchMediaBuffer(options.refImageEnd) : undefined;
4185
+ const audioBuffer = options.refAudio ? await fetchMediaBuffer(options.refAudio) : undefined;
4186
+ const videoBuffer = options.refVideo ? await fetchMediaBuffer(options.refVideo) : undefined;
4187
+ const audioIdentityBuffer = options.referenceAudioIdentity ? await fetchMediaBuffer(options.referenceAudioIdentity) : undefined;
4188
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
4189
+ const videoDimensionRules = videoDimensionRulesFromDefaults(modelDefaults);
4190
+
4191
+ // Pre-resize reference images to model-compatible dimensions if needed for i2v workflow.
4192
+ if (options.videoWorkflow === 'i2v' && imageBuffer && options._needsRefResize) {
4193
+ const dims = getImageDimensionsFromBuffer(imageBuffer);
4194
+ if (dims?.width && dims?.height) {
4195
+ const resizedBuffer = await resizeImageBufferForVideo(imageBuffer, dims.width, dims.height, videoDimensionRules);
4196
+ const resizedDims = getImageDimensionsFromBuffer(resizedBuffer);
4197
+ if (!options.quiet) {
4198
+ console.error(
4199
+ `Pre-resized reference image from ${dims.width}x${dims.height} to ${resizedDims.width}x${resizedDims.height} ` +
4200
+ `(divisible by ${videoDimensionRules.dimensionMultiple}) to ensure i2v compatibility.`
4201
+ );
4202
+ }
4203
+ imageBuffer = resizedBuffer;
4204
+ }
4205
+ }
4206
+ if (options.videoWorkflow === 'i2v' && endImageBuffer && options._needsRefEndResize) {
4207
+ const dims = getImageDimensionsFromBuffer(endImageBuffer);
4208
+ if (dims?.width && dims?.height) {
4209
+ const resizedBuffer = await resizeImageBufferForVideo(endImageBuffer, dims.width, dims.height, videoDimensionRules);
4210
+ const resizedDims = getImageDimensionsFromBuffer(resizedBuffer);
4211
+ if (!options.quiet) {
4212
+ console.error(
4213
+ `Pre-resized end reference image from ${dims.width}x${dims.height} to ${resizedDims.width}x${resizedDims.height} ` +
4214
+ `(divisible by ${videoDimensionRules.dimensionMultiple}) to ensure i2v compatibility.`
4215
+ );
4216
+ }
4217
+ endImageBuffer = resizedBuffer;
4218
+ }
4219
+ }
4220
+ // Preserve the prepared start-frame buffer so looping (A->B->A) can reuse it later.
4221
+ loopingStartImageBuffer = imageBuffer;
4222
+
4223
+ const steps = resolveVideoSteps(options.model, modelDefaults, options.steps);
4224
+ const guidance = options.guidance ?? modelDefaults?.guidance;
4225
+
4226
+ const projectConfig = {
4227
+ modelId: options.model,
4228
+ positivePrompt: options.prompt,
4229
+ negativePrompt: '',
4230
+ stylePrompt: '',
4231
+ numberOfMedia: options.count,
4232
+ referenceImage: imageBuffer,
4233
+ fps: options.fps,
4234
+ width: options.width,
4235
+ height: options.height,
4236
+ tokenType: options.tokenType || 'spark',
4237
+ waitForCompletion: false,
4238
+ disableNSFWFilter: options.noFilter === true
4239
+ };
4240
+
4241
+ if (options.outputFormat) {
4242
+ projectConfig.outputFormat = options.outputFormat;
4243
+ }
4244
+ if (options.autoResizeVideoAssets !== null) {
4245
+ projectConfig.autoResizeVideoAssets = options.autoResizeVideoAssets;
4246
+ }
4247
+
4248
+ if (options.frames) {
4249
+ projectConfig.frames = options.frames;
4250
+ } else {
4251
+ projectConfig.duration = options.duration;
4252
+ }
4253
+
4254
+ // Add end frame for interpolation if provided
4255
+ if (endImageBuffer) {
4256
+ projectConfig.referenceImageEnd = endImageBuffer;
4257
+ }
4258
+ if (audioBuffer) {
4259
+ projectConfig.referenceAudio = audioBuffer;
4260
+ }
4261
+ if (options.audioStart !== null) {
4262
+ projectConfig.audioStart = options.audioStart;
4263
+ }
4264
+ if (options.audioDuration !== null) {
4265
+ projectConfig.audioDuration = options.audioDuration;
4266
+ }
4267
+ if (audioIdentityBuffer) {
4268
+ projectConfig.referenceAudioIdentity = audioIdentityBuffer;
4269
+ }
4270
+ if (videoBuffer) {
4271
+ projectConfig.referenceVideo = videoBuffer;
4272
+ }
4273
+ if (options.videoStart !== null) {
4274
+ projectConfig.videoStart = options.videoStart;
4275
+ }
4276
+ if (options.seed !== null && options.seed !== undefined) {
4277
+ projectConfig.seed = options.seed;
4278
+ }
4279
+ if (Number.isFinite(steps)) {
4280
+ projectConfig.steps = steps;
4281
+ }
4282
+ if (guidance !== null && guidance !== undefined) {
4283
+ projectConfig.guidance = guidance;
4284
+ }
4285
+ if (modelDefaults?.sampler) {
4286
+ projectConfig.sampler = modelDefaults.sampler;
4287
+ }
4288
+ if (modelDefaults?.scheduler) {
4289
+ projectConfig.scheduler = modelDefaults.scheduler;
4290
+ }
4291
+ if (modelDefaults?.shift !== null && modelDefaults?.shift !== undefined) {
4292
+ projectConfig.shift = modelDefaults.shift;
4293
+ }
4294
+ if (options.videoControlNetName && !isSeedanceModel(options.model)) {
4295
+ const controlNetStrength = resolveVideoControlNetStrength(options.videoControlNetName, options.videoControlNetStrength);
4296
+ projectConfig.controlNet = {
4297
+ name: options.videoControlNetName,
4298
+ strength: controlNetStrength
4299
+ };
4300
+ if (options.videoControlNetName !== 'detailer') {
4301
+ projectConfig.detailerStrength = 0.6;
4302
+ }
4303
+ } else if (options.videoControlNetName && isSeedanceModel(options.model) && !options.quiet) {
4304
+ console.error('Warning: --controlnet-name ignored for Seedance V2V models.');
4305
+ }
4306
+ if (options.sam2Coordinates) {
4307
+ projectConfig.sam2Coordinates = options.sam2Coordinates;
4308
+ }
4309
+ if (options.trimEndFrame) {
4310
+ projectConfig.trimEndFrame = true;
4311
+ }
4312
+ if (options.firstFrameStrength != null) {
4313
+ projectConfig.firstFrameStrength = options.firstFrameStrength;
4314
+ }
4315
+ if (options.lastFrameStrength != null) {
4316
+ projectConfig.lastFrameStrength = options.lastFrameStrength;
4317
+ }
4318
+
4319
+ const videoResult = await client.createVideoProject(projectConfig);
4320
+
4321
+ // Check for errors in the response (e.g., insufficient tokens)
4322
+ if (videoResult?.error || videoResult?.message) {
4323
+ throw new Error(videoResult.error || videoResult.message);
4324
+ }
4325
+ } else if (options.contextImages.length > 0) {
4326
+ // Image editing with context images
4327
+ log(`Editing with ${options.model}...`);
4328
+ log(`Context images: ${options.contextImages.length}`);
4329
+ if (options.seed !== null && options.seed !== undefined) log(`Using seed: ${options.seed}`);
4330
+
4331
+ // Load all context images as buffers
4332
+ const contextBuffers = await Promise.all(
4333
+ options.contextImages.map(img => fetchMediaBuffer(img))
4334
+ );
4335
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
4336
+ const steps = options.steps ?? modelDefaults?.steps ?? (options.model.includes('lightning') ? 4 : 20);
4337
+ const guidance = options.guidance ?? modelDefaults?.guidance ?? (options.model.includes('lightning') ? 3.5 : 7.5);
4338
+
4339
+ const editConfig = {
4340
+ modelId: options.model,
4341
+ positivePrompt: options.prompt,
4342
+ contextImages: contextBuffers,
4343
+ numberOfMedia: options.count,
4344
+ width: options.width,
4345
+ height: options.height,
4346
+ steps,
4347
+ guidance,
4348
+ tokenType: options.tokenType || 'spark',
4349
+ disableNSFWFilter: options.noFilter === true
4350
+ };
4351
+
4352
+ if (options.outputFormat) {
4353
+ editConfig.outputFormat = options.outputFormat;
4354
+ }
4355
+ if (options.sampler) {
4356
+ editConfig.sampler = options.sampler;
4357
+ }
4358
+ if (options.scheduler) {
4359
+ editConfig.scheduler = options.scheduler;
4360
+ }
4361
+ if (options.loras.length > 0) {
4362
+ editConfig.loras = options.loras;
4363
+ }
4364
+ if (options.loraStrengths.length > 0) {
4365
+ editConfig.loraStrengths = options.loraStrengths;
4366
+ }
4367
+
4368
+ if (options.seed !== null && options.seed !== undefined) {
4369
+ editConfig.seed = options.seed;
4370
+ }
4371
+
4372
+ await client.createImageEditProject(editConfig);
4373
+ } else if (options.photobooth) {
4374
+ // Photobooth: face transfer with InstantID ControlNet
4375
+ log(`Photobooth with ${options.model}...`);
4376
+ if (options.seed !== null && options.seed !== undefined) log(`Using seed: ${options.seed}`);
4377
+
4378
+ const faceBuffer = await fetchMediaBuffer(options.refImage);
4379
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
4380
+ const steps = options.steps ?? modelDefaults?.steps ?? 7;
4381
+ const guidance = options.guidance ?? modelDefaults?.guidance ?? 2;
4382
+
4383
+ const projectConfig = {
4384
+ modelId: options.model,
4385
+ positivePrompt: options.prompt,
4386
+ negativePrompt: '',
4387
+ stylePrompt: '',
4388
+ numberOfMedia: options.count,
4389
+ tokenType: options.tokenType || 'spark',
4390
+ waitForCompletion: false,
4391
+ sizePreset: 'custom',
4392
+ width: options.width,
4393
+ height: options.height,
4394
+ steps,
4395
+ guidance,
4396
+ disableNSFWFilter: options.noFilter === true,
4397
+ sampler: options.sampler || 'dpmpp_sde',
4398
+ scheduler: options.scheduler || 'karras',
4399
+ controlNet: {
4400
+ name: 'instantid',
4401
+ image: faceBuffer,
4402
+ strength: options.cnStrength ?? 0.7,
4403
+ mode: 'balanced',
4404
+ guidanceStart: 0,
4405
+ guidanceEnd: options.cnGuidanceEnd ?? 0.6,
4406
+ }
4407
+ };
4408
+
4409
+ if (options.outputFormat) projectConfig.outputFormat = options.outputFormat;
4410
+ if (options.seed !== null && options.seed !== undefined) projectConfig.seed = options.seed;
4411
+ if (options.loras.length > 0) projectConfig.loras = options.loras;
4412
+ if (options.loraStrengths.length > 0) projectConfig.loraStrengths = options.loraStrengths;
4413
+
4414
+ const projectResult = await client.createImageProject(projectConfig);
4415
+
4416
+ // Check for errors in the response (e.g., insufficient tokens)
4417
+ if (projectResult?.error || projectResult?.message) {
4418
+ throw new Error(projectResult.error || projectResult.message);
4419
+ }
4420
+ } else {
4421
+ // Standard image generation
4422
+ log(`Generating with ${options.model}...`);
4423
+ if (options.seed !== null && options.seed !== undefined) log(`Using seed: ${options.seed}`);
4424
+ const modelDefaults = getModelDefaults(options.model, openclawConfig);
4425
+ const guidance = options.guidance ?? modelDefaults?.guidance ?? 1.0;
4426
+ const steps = options.steps ?? modelDefaults?.steps;
4427
+
4428
+ const useVariations = options.count > 1 && hasPromptVariations(options.prompt);
4429
+ const variationCount = useVariations ? options.count : 1;
4430
+ const imagesPerCall = useVariations ? 1 : options.count;
4431
+
4432
+ for (let vi = 0; vi < variationCount; vi++) {
4433
+ let expandedPrompt = useVariations
4434
+ ? expandPromptVariation(options.prompt, vi)
4435
+ : options.prompt;
4436
+ // Sanitize batch prompts to prevent grid/collage artifacts
4437
+ if (imagesPerCall > 1) expandedPrompt = sanitizeBatchPrompt(expandedPrompt);
4438
+ if (useVariations) {
4439
+ log(`Variation ${vi + 1}/${variationCount}: "${expandedPrompt}"`);
4440
+ }
4441
+
4442
+ const projectConfig = {
4443
+ modelId: options.model,
4444
+ positivePrompt: expandedPrompt,
4445
+ negativePrompt: '',
4446
+ stylePrompt: '',
4447
+ numberOfMedia: imagesPerCall,
4448
+ tokenType: options.tokenType || 'spark',
4449
+ waitForCompletion: false,
4450
+ sizePreset: 'custom',
4451
+ width: options.width,
4452
+ height: options.height,
4453
+ guidance,
4454
+ disableNSFWFilter: options.noFilter === true
4455
+ };
4456
+ if (options.outputFormat) {
4457
+ projectConfig.outputFormat = options.outputFormat;
4458
+ }
4459
+ if (options.sampler) {
4460
+ projectConfig.sampler = options.sampler;
4461
+ }
4462
+ if (options.scheduler) {
4463
+ projectConfig.scheduler = options.scheduler;
4464
+ }
4465
+ if (steps) {
4466
+ projectConfig.steps = steps;
4467
+ }
4468
+
4469
+ if (options.seed !== null && options.seed !== undefined) {
4470
+ projectConfig.seed = options.seed;
4471
+ }
4472
+
4473
+ await client.createImageProject(projectConfig);
4474
+ }
4475
+ }
4476
+
4477
+ // Wait for completion via events
4478
+ await completionPromise;
4479
+
4480
+ if (results.length > 0) {
4481
+ const urls = results.map(r => options.video ? r.videoUrl : r.imageUrl).filter(Boolean);
4482
+ const firstResult = results[0];
4483
+
4484
+ // Save last render info
4485
+ const seeds = results.map(r => r.seed ?? options.seed);
4486
+ const renderInfo = {
4487
+ timestamp: new Date().toISOString(),
4488
+ type: options.video ? 'video' : 'image',
4489
+ prompt: options.prompt,
4490
+ model: options.model,
4491
+ width: options.width,
4492
+ height: options.height,
4493
+ seed: firstResult.seed ?? options.seed,
4494
+ seedStrategy: options.seedStrategy || null,
4495
+ seeds,
4496
+ projectId: firstResult.projectId,
4497
+ urls: urls,
4498
+ localPath: options.output || null,
4499
+ tokenType: options.tokenType || 'spark',
4500
+ quality: options.quality || null
4501
+ };
4502
+ if (options.outputFormat) {
4503
+ renderInfo.outputFormat = options.outputFormat;
4504
+ }
4505
+ if (options.sampler) {
4506
+ renderInfo.sampler = options.sampler;
4507
+ }
4508
+ if (options.scheduler) {
4509
+ renderInfo.scheduler = options.scheduler;
4510
+ }
4511
+ if (options.loras.length > 0) {
4512
+ renderInfo.loras = options.loras;
4513
+ }
4514
+ if (options.loraStrengths.length > 0) {
4515
+ renderInfo.loraStrengths = options.loraStrengths;
4516
+ }
4517
+ if (options.video) {
4518
+ renderInfo.workflow = options.videoWorkflow;
4519
+ renderInfo.fps = options.fps;
4520
+ renderInfo.duration = options.frames ? options.frames / options.fps : options.duration;
4521
+ if (options.frames) renderInfo.frames = options.frames;
4522
+ if (options.targetResolution) renderInfo.targetResolution = options.targetResolution;
4523
+ if (options.autoResizeVideoAssets !== null) {
4524
+ renderInfo.autoResizeVideoAssets = options.autoResizeVideoAssets;
4525
+ }
4526
+ renderInfo.refImage = options.refImage;
4527
+ renderInfo.refImageEnd = options.refImageEnd;
4528
+ if (options.refAudio) {
4529
+ renderInfo.refAudio = options.refAudio;
4530
+ if (options.audioStart !== null) renderInfo.audioStart = options.audioStart;
4531
+ if (options.audioDuration !== null) renderInfo.audioDuration = options.audioDuration;
4532
+ }
4533
+ if (options.referenceAudioIdentity) {
4534
+ renderInfo.referenceAudioIdentity = options.referenceAudioIdentity;
4535
+ if (options._voicePersonaResolvedName || options.voicePersonaName) {
4536
+ renderInfo.voicePersonaName = options._voicePersonaResolvedName || options.voicePersonaName;
4537
+ }
4538
+ }
4539
+ if (options.refVideo) {
4540
+ renderInfo.refVideo = options.refVideo;
4541
+ if (options.videoStart !== null) renderInfo.videoStart = options.videoStart;
4542
+ }
4543
+ if (options.videoControlNetName && !isSeedanceModel(options.model)) {
4544
+ renderInfo.controlNet = {
4545
+ name: options.videoControlNetName,
4546
+ strength: resolveVideoControlNetStrength(options.videoControlNetName, options.videoControlNetStrength)
4547
+ };
4548
+ }
4549
+ if (options.sam2Coordinates) renderInfo.sam2Coordinates = options.sam2Coordinates;
4550
+ if (options.trimEndFrame) renderInfo.trimEndFrame = true;
4551
+ if (options.firstFrameStrength != null) renderInfo.firstFrameStrength = options.firstFrameStrength;
4552
+ if (options.lastFrameStrength != null) renderInfo.lastFrameStrength = options.lastFrameStrength;
4553
+ }
4554
+ if (options.contextImages.length > 0) {
4555
+ renderInfo.contextImages = options.contextImages;
4556
+ }
4557
+ if (options.photobooth) {
4558
+ renderInfo.photobooth = true;
4559
+ renderInfo.refImage = options.refImage;
4560
+ }
4561
+ saveLastRender(renderInfo);
4562
+
4563
+ // Save to file if requested
4564
+ if (options.output && urls[0]) {
4565
+ const response = await fetch(urls[0]);
4566
+ const buffer = Buffer.from(await response.arrayBuffer());
4567
+
4568
+ const dir = dirname(options.output);
4569
+ if (dir && dir !== '.' && !existsSync(dir)) mkdirSync(dir, { recursive: true });
4570
+
4571
+ // Handle looping for i2v workflow
4572
+ if (options.looping && options.videoWorkflow === 'i2v' && options.refImage) {
4573
+ log('Creating looping video (A→B→A)...');
4574
+
4575
+ // Save first clip temporarily
4576
+ const tempDir = mkdtempSync(join(tmpdir(), 'sogni-loop-'));
4577
+ const clip1Path = join(tempDir, 'clip1.mp4');
4578
+ const lastFramePath = join(tempDir, 'last-frame.png');
4579
+ const clip2Path = join(tempDir, 'clip2.mp4');
4580
+
4581
+ writeFileSync(clip1Path, buffer);
4582
+ log('Extracting last frame...');
4583
+ await extractLastFrameFromVideo(clip1Path, lastFramePath);
4584
+
4585
+ // Generate second clip (last frame → original image)
4586
+ log('Generating return clip (B→A)...');
4587
+
4588
+ // Get model defaults for steps and guidance
4589
+ const modelDefaults2 = getModelDefaults(options.model, openclawConfig);
4590
+ const steps2 = resolveVideoSteps(options.model, modelDefaults2, options.steps);
4591
+ const guidance2 = options.guidance ?? modelDefaults2?.guidance;
4592
+
4593
+ const projectConfig2 = {
4594
+ modelId: options.model,
4595
+ positivePrompt: options.prompt,
4596
+ negativePrompt: '',
4597
+ stylePrompt: '',
4598
+ numberOfMedia: 1,
4599
+ referenceImage: readFileSync(lastFramePath),
4600
+ referenceImageEnd: loopingStartImageBuffer,
4601
+ fps: options.fps,
4602
+ width: options.width,
4603
+ height: options.height,
4604
+ tokenType: options.tokenType || 'spark',
4605
+ waitForCompletion: false,
4606
+ disableNSFWFilter: options.noFilter === true
4607
+ };
4608
+
4609
+ if (options.frames) projectConfig2.frames = options.frames;
4610
+ else if (options.duration) projectConfig2.duration = options.duration;
4611
+ if (Number.isFinite(steps2)) projectConfig2.steps = steps2;
4612
+ if (guidance2 !== null && guidance2 !== undefined) projectConfig2.guidance = guidance2;
4613
+
4614
+ // Create a new client for second clip to avoid event conflicts
4615
+ const creds = loadCredentials();
4616
+ const client2 = new SogniClientWrapper({
4617
+ network: openclawConfig?.defaultNetwork || 'fast',
4618
+ autoConnect: false,
4619
+ ...(creds.SOGNI_API_KEY
4620
+ ? { apiKey: creds.SOGNI_API_KEY, authType: 'apiKey' }
4621
+ : {
4622
+ username: creds.SOGNI_USERNAME,
4623
+ password: creds.SOGNI_PASSWORD,
4624
+ authType: 'token'
4625
+ })
4626
+ });
4627
+ await client2.connect();
4628
+
4629
+ // Create second clip and wait for completion via events
4630
+ const clip2Promise = new Promise((resolve, reject) => {
4631
+ const timeout = setTimeout(() => {
4632
+ reject(new Error('Second clip generation timed out'));
4633
+ }, options.timeout);
4634
+
4635
+ client2.on(ClientEvent.JOB_COMPLETED, async (data) => {
4636
+ try {
4637
+ clearTimeout(timeout);
4638
+ const clip2Url = data.videoUrl;
4639
+ if (!clip2Url) {
4640
+ reject(new Error('No video URL returned for second clip.'));
4641
+ return;
4642
+ }
4643
+
4644
+ // Download second clip
4645
+ const response2 = await fetch(clip2Url);
4646
+ const buffer2 = Buffer.from(await response2.arrayBuffer());
4647
+ writeFileSync(clip2Path, buffer2);
4648
+
4649
+ await client2.disconnect();
4650
+ resolve();
4651
+ } catch (err) {
4652
+ clearTimeout(timeout);
4653
+ reject(err);
4654
+ }
4655
+ });
4656
+
4657
+ client2.on(ClientEvent.JOB_FAILED, (data) => {
4658
+ clearTimeout(timeout);
4659
+ reject(new Error(data.error || 'Second clip generation failed'));
4660
+ });
4661
+
4662
+ client2.on(ClientEvent.PROJECT_FAILED, (data) => {
4663
+ clearTimeout(timeout);
4664
+ reject(new Error(data?.message || 'Second clip project failed'));
4665
+ });
4666
+
4667
+ // Show progress for second clip
4668
+ client2.on(ClientEvent.PROJECT_PROGRESS, (data) => {
4669
+ if (data.percentage && data.percentage > 0) {
4670
+ log(`Progress: ${Math.round(data.percentage)}%`);
4671
+ }
4672
+ });
4673
+ });
4674
+
4675
+ const clip2Result = await client2.createVideoProject(projectConfig2);
4676
+
4677
+ // Check for errors in the response (e.g., insufficient tokens)
4678
+ if (clip2Result?.error || clip2Result?.message) {
4679
+ throw new Error(clip2Result.error || clip2Result.message);
4680
+ }
4681
+
4682
+ await clip2Promise;
4683
+
4684
+ log('Concatenating clips...');
4685
+ await buildConcatVideoFromClips(options.output, [clip1Path, clip2Path]);
4686
+ log(`Saved looping video to ${options.output}`);
4687
+ } else {
4688
+ writeFileSync(options.output, buffer);
4689
+ log(`Saved to ${options.output}`);
4690
+ }
4691
+ }
4692
+
4693
+ // Output result
4694
+ if (options.json) {
4695
+ const output = {
4696
+ success: true,
4697
+ type: options.video ? 'video' : 'image',
4698
+ prompt: options.prompt,
4699
+ model: options.model,
4700
+ width: options.width,
4701
+ height: options.height,
4702
+ seed: firstResult.seed ?? options.seed,
4703
+ seedStrategy: options.seedStrategy || null,
4704
+ seeds,
4705
+ urls: urls,
4706
+ localPath: options.output || null,
4707
+ tokenType: options.tokenType || 'spark'
4708
+ };
4709
+ if (options.outputFormat) {
4710
+ output.outputFormat = options.outputFormat;
4711
+ }
4712
+ if (options.sampler) {
4713
+ output.sampler = options.sampler;
4714
+ }
4715
+ if (options.scheduler) {
4716
+ output.scheduler = options.scheduler;
4717
+ }
4718
+ if (options.loras.length > 0) {
4719
+ output.loras = options.loras;
4720
+ }
4721
+ if (options.loraStrengths.length > 0) {
4722
+ output.loraStrengths = options.loraStrengths;
4723
+ }
4724
+ if (options.video) {
4725
+ output.workflow = options.videoWorkflow;
4726
+ output.fps = options.fps;
4727
+ output.duration = options.frames ? options.frames / options.fps : options.duration;
4728
+ if (options.frames) output.frames = options.frames;
4729
+ if (options.targetResolution) output.targetResolution = options.targetResolution;
4730
+ output.strictSize = options.strictSize || false;
4731
+ if (options.autoResizeVideoAssets !== null) {
4732
+ output.autoResizeVideoAssets = options.autoResizeVideoAssets;
4733
+ }
4734
+ if (options.refImage) output.refImage = options.refImage;
4735
+ if (options.refImageEnd) output.refImageEnd = options.refImageEnd;
4736
+ if (options.refAudio) {
4737
+ output.refAudio = options.refAudio;
4738
+ if (options.audioStart !== null) output.audioStart = options.audioStart;
4739
+ if (options.audioDuration !== null) output.audioDuration = options.audioDuration;
4740
+ }
4741
+ if (options.referenceAudioIdentity) {
4742
+ output.referenceAudioIdentity = options.referenceAudioIdentity;
4743
+ if (options._voicePersonaResolvedName || options.voicePersonaName) {
4744
+ output.voicePersonaName = options._voicePersonaResolvedName || options.voicePersonaName;
4745
+ }
4746
+ }
4747
+ if (options.refVideo) {
4748
+ output.refVideo = options.refVideo;
4749
+ if (options.videoStart !== null) output.videoStart = options.videoStart;
4750
+ }
4751
+ if (options.videoControlNetName && !isSeedanceModel(options.model)) {
4752
+ output.controlNet = {
4753
+ name: options.videoControlNetName,
4754
+ strength: resolveVideoControlNetStrength(options.videoControlNetName, options.videoControlNetStrength)
4755
+ };
4756
+ }
4757
+ if (options.sam2Coordinates) output.sam2Coordinates = options.sam2Coordinates;
4758
+ if (options.trimEndFrame) output.trimEndFrame = true;
4759
+ if (options.firstFrameStrength != null) output.firstFrameStrength = options.firstFrameStrength;
4760
+ if (options.lastFrameStrength != null) output.lastFrameStrength = options.lastFrameStrength;
4761
+ if (options._effectiveVideoDims?.width && options._effectiveVideoDims?.height) {
4762
+ output.effectiveWidth = options._effectiveVideoDims.width;
4763
+ output.effectiveHeight = options._effectiveVideoDims.height;
4764
+ output.effectiveFromReference = {
4765
+ width: options._effectiveVideoDims.refWidth,
4766
+ height: options._effectiveVideoDims.refHeight
4767
+ };
4768
+ }
4769
+ if (options._adjustedVideoDims) {
4770
+ output.adjustedVideoDims = options._adjustedVideoDims;
4771
+ }
4772
+ }
4773
+ if (options.contextImages.length > 0) {
4774
+ output.contextImages = options.contextImages;
4775
+ }
4776
+ if (options.photobooth) {
4777
+ output.photobooth = true;
4778
+ output.refImage = options.refImage;
4779
+ output.controlNet = {
4780
+ name: 'instantid',
4781
+ strength: options.cnStrength ?? 0.7,
4782
+ guidanceEnd: options.cnGuidanceEnd ?? 0.6,
4783
+ };
4784
+ }
4785
+ console.log(JSON.stringify(output));
4786
+ } else {
4787
+ urls.forEach(url => console.log(url));
4788
+ }
4789
+ } else {
4790
+ throw new Error('No output generated - may have been filtered');
4791
+ }
4792
+
4793
+ } catch (error) {
4794
+ // Token auto-fallback: if using auto mode and got insufficient balance, retry with the other token
4795
+ const isBalanceError = error.code === 'INSUFFICIENT_BALANCE' || /insufficient/i.test(error.message);
4796
+ if (_isAutoToken && isBalanceError && options.tokenType === 'spark') {
4797
+ log('Insufficient SPARK balance — retrying with SOGNI tokens...');
4798
+ options.tokenType = 'sogni';
4799
+ try {
4800
+ if (client?.isConnected?.()) {
4801
+ await Promise.race([client.disconnect(), new Promise(r => setTimeout(r, 1000))]);
4802
+ }
4803
+ } catch (_) {}
4804
+ return main();
4805
+ }
4806
+
4807
+ exitCode = 1;
4808
+ const shouldJson = options.json || IS_OPENCLAW_INVOCATION;
4809
+ if (shouldJson) {
4810
+ const payload = {
4811
+ success: false,
4812
+ error: error.message,
4813
+ prompt: options.prompt ?? null
4814
+ };
4815
+ if (error.code) payload.errorCode = error.code;
4816
+ if (error.details) payload.errorDetails = error.details;
4817
+ if (error.hint) payload.hint = error.hint;
4818
+ payload.timestamp = new Date().toISOString();
4819
+ payload.node = process.versions.node;
4820
+ payload.cwd = process.cwd();
4821
+ payload.context = {
4822
+ video: options.video || false,
4823
+ workflow: options.video ? (options.videoWorkflow || null) : null,
4824
+ model: options.model || null,
4825
+ width: Number.isFinite(options.width) ? options.width : null,
4826
+ height: Number.isFinite(options.height) ? options.height : null,
4827
+ strictSize: options.video ? (options.strictSize || false) : null,
4828
+ count: Number.isFinite(options.count) ? options.count : null,
4829
+ tokenType: options.tokenType || 'spark',
4830
+ fps: options.video ? options.fps : null,
4831
+ duration: options.video ? (options.frames ? options.frames / options.fps : options.duration) : null,
4832
+ frames: options.video ? (options.frames ?? null) : null,
4833
+ autoResizeVideoAssets: options.video ? (options.autoResizeVideoAssets ?? null) : null,
4834
+ refImage: options.video ? (options.refImage ?? null) : null,
4835
+ refImageEnd: options.video ? (options.refImageEnd ?? null) : null,
4836
+ refAudio: options.video ? (options.refAudio ?? null) : null,
4837
+ referenceAudioIdentity: options.video ? (options.referenceAudioIdentity ?? null) : null,
4838
+ refVideo: options.video ? (options.refVideo ?? null) : null,
4839
+ effectiveWidth: options.video ? (options._effectiveVideoDims?.width ?? null) : null,
4840
+ effectiveHeight: options.video ? (options._effectiveVideoDims?.height ?? null) : null,
4841
+ adjustedVideoDims: options.video ? (options._adjustedVideoDims ?? null) : null
4842
+ };
4843
+ if (IS_OPENCLAW_INVOCATION) payload.openclaw = true;
4844
+ console.log(JSON.stringify(payload));
4845
+ if (!options.json) {
4846
+ console.error(`Error: ${error.message}`);
4847
+ if (error.hint) console.error(`Hint: ${error.hint}`);
4848
+ }
4849
+ } else {
4850
+ console.error(`Error: ${error.message}`);
4851
+ if (error.hint) console.error(`Hint: ${error.hint}`);
4852
+ }
4853
+ } finally {
4854
+ try {
4855
+ if (client?.isConnected?.()) {
4856
+ await Promise.race([
4857
+ client.disconnect(),
4858
+ new Promise(resolve => setTimeout(resolve, 1000))
4859
+ ]);
4860
+ }
4861
+ } catch (e) {}
4862
+ }
4863
+ process.exit(exitCode);
4864
+ }
4865
+
4866
+ main();