@sogni-ai/sogni-creative-agent-skill 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/sogni-agent.mjs CHANGED
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * sogni-agent - Generate images and videos using Sogni AI
3
+ * sogni-agent - Generate images, videos, and music using Sogni AI
4
4
  * Usage: sogni-agent [options] "prompt"
5
5
  */
6
6
 
7
- import { SogniClientWrapper, ClientEvent, getMaxContextImages } from '@sogni-ai/sogni-client-wrapper';
7
+ import { SogniClientWrapper, ClientEvent, getMaxContextImages as getWrapperMaxContextImages } from '@sogni-ai/sogni-client-wrapper';
8
8
  import JSON5 from 'json5';
9
9
  import { createHash, randomBytes } from 'crypto';
10
+ import { createRequire } from 'module';
10
11
  import { readFileSync, writeFileSync, existsSync, mkdirSync, mkdtempSync, statSync, readdirSync, realpathSync, lstatSync, unlinkSync, rmdirSync } from 'fs';
11
12
  import { join, dirname, basename, extname, sep } from 'path';
12
13
  import { homedir, tmpdir } from 'os';
@@ -17,7 +18,11 @@ import { assertSafeUrl } from './ssrf-guard.mjs';
17
18
  import {
18
19
  LTX23_WORKFLOW_MODELS,
19
20
  QUALITY_TIERS,
21
+ SEEDANCE_V2V_REFERENCE_MAX_DURATION_SECONDS,
20
22
  VIDEO_WORKFLOW_DEFAULT_MODELS,
23
+ buildStoryboardVideoHostedToolSequenceInput,
24
+ detectReferenceAudioFormat,
25
+ dimensionsForAspectRatio,
21
26
  dimensionsWithShortSide,
22
27
  getModelDefaults,
23
28
  getVideoPromptGuardrailPlan,
@@ -25,6 +30,7 @@ import {
25
30
  inferVideoWorkflowFromModel,
26
31
  isLtx2Model,
27
32
  isSeedanceModel,
33
+ isSeedanceModelSelection,
28
34
  normalizeVideoWorkflow,
29
35
  planCliVideoBrain,
30
36
  resolveVideoControlNetStrength,
@@ -32,9 +38,13 @@ import {
32
38
  resolveVideoSteps,
33
39
  sanitizeBatchPrompt,
34
40
  selectDefaultVideoModel,
41
+ shouldTrimSeedanceV2VSourceVideo,
35
42
  workflowRequiresImage
36
43
  } from './generated/creative-agent-runtime.mjs';
37
44
 
45
+ const require = createRequire(import.meta.url);
46
+ const { parseCreativeWorkflowSseChunk } = require('@sogni-ai/sogni-client-wrapper');
47
+
38
48
  // ---------------------------------------------------------------------------
39
49
  // Path sanitization — defense-in-depth for any value that becomes a file path
40
50
  // or process argument. execaSync runs argument arrays without shell expansion,
@@ -76,11 +86,48 @@ const DEFAULT_MEMORIES_PATH = join(homedir(), '.config', 'sogni', 'memories.json
76
86
  const DEFAULT_PERSONALITY_PATH = join(homedir(), '.config', 'sogni', 'personality.txt');
77
87
  const DEFAULT_PERSONAS_DIR = join(homedir(), '.config', 'sogni', 'personas');
78
88
  const DEFAULT_PERSONAS_INDEX_PATH = join(homedir(), '.config', 'sogni', 'personas', 'index.json');
89
+ const DEFAULT_API_BASE_URL = 'https://api.sogni.ai';
90
+ const DEFAULT_SAFE_API_HOSTS = Object.freeze(['api.sogni.ai']);
91
+ const LOOPBACK_API_HOSTS = Object.freeze(['localhost', '127.0.0.1', '::1']);
92
+ const DEFAULT_LLM_MODEL = 'qwen3.6-35b-a3b-gguf-iq4xs';
93
+ const SOGNI_APP_SOURCE = 'sogni-creative-agent-skill';
79
94
  const OPENCLAW_CONFIG_PATH = getEnv('OPENCLAW_CONFIG_PATH') || DEFAULT_OPENCLAW_CONFIG_PATH;
80
95
  const IS_OPENCLAW_INVOCATION = Boolean(getEnv('OPENCLAW_PLUGIN_CONFIG'));
81
96
  const RAW_ARGS = process.argv.slice(2);
82
97
  const CLI_WANTS_JSON = RAW_ARGS.includes('--json');
83
98
  const JSON_ERROR_MODE = CLI_WANTS_JSON || IS_OPENCLAW_INVOCATION;
99
+ const SOCKET_EVENT_SUBSCRIPTIONS = Object.freeze({
100
+ modelAvailability: false
101
+ });
102
+ const MUSIC_MODEL_IDS = {
103
+ turbo: 'ace_step_1.5_turbo',
104
+ speed: 'ace_step_1.5_turbo',
105
+ fast: 'ace_step_1.5_turbo',
106
+ sft: 'ace_step_1.5_sft',
107
+ lyrics: 'ace_step_1.5_sft',
108
+ lyric: 'ace_step_1.5_sft'
109
+ };
110
+ const MUSIC_MODEL_DEFAULTS = {
111
+ 'ace_step_1.5_turbo': {
112
+ steps: { min: 4, max: 16, default: 8 },
113
+ shift: { min: 1, max: 6, default: 3 },
114
+ sampler: { allowed: ['euler', 'euler_ancestral'], default: 'euler' },
115
+ scheduler: { allowed: ['simple'], default: 'simple' }
116
+ },
117
+ 'ace_step_1.5_sft': {
118
+ steps: { min: 10, max: 100, default: 50 },
119
+ guidance: { min: 1, max: 15, default: 5 },
120
+ shift: { min: 1, max: 6, default: 3 },
121
+ sampler: { allowed: ['euler', 'euler_ancestral', 'er_sde'], default: 'er_sde' },
122
+ scheduler: { allowed: ['simple', 'linear_quadratic'], default: 'linear_quadratic' }
123
+ }
124
+ };
125
+ const MUSIC_DURATION_LIMITS = { min: 10, max: 600, default: 30 };
126
+ const MUSIC_BPM_LIMITS = { min: 30, max: 300, default: 120 };
127
+ const MUSIC_PROMPT_STRENGTH_LIMITS = { min: 0, max: 10 };
128
+ const MUSIC_CREATIVITY_LIMITS = { min: 0, max: 2 };
129
+ const MUSIC_OUTPUT_FORMATS = new Set(['mp3', 'flac', 'wav']);
130
+ const MUSIC_TIME_SIGNATURES = new Set(['2', '3', '4', '6']);
84
131
 
85
132
  function expandHomePath(rawPath) {
86
133
  if (typeof rawPath !== 'string') return rawPath;
@@ -96,6 +143,18 @@ function resolveConfiguredPath(rawPath, fallbackPath, label) {
96
143
  return sanitizePath(candidate, label);
97
144
  }
98
145
 
146
+ async function disableLiveModelAvailabilityEvents(wrapper) {
147
+ const sdkClient = wrapper?.client;
148
+
149
+ try {
150
+ if (typeof sdkClient?.setSocketEventSubscriptions === 'function') {
151
+ await sdkClient.setSocketEventSubscriptions(SOCKET_EVENT_SUBSCRIPTIONS);
152
+ }
153
+ } catch (err) {
154
+ // Subscription optimization is best-effort and must not block generation.
155
+ }
156
+ }
157
+
99
158
  function isPathWithinBase(basePath, targetPath) {
100
159
  return targetPath === basePath || targetPath.startsWith(`${basePath}${sep}`);
101
160
  }
@@ -226,6 +285,9 @@ function applyCreativeBrainPreflight() {
226
285
  widthFromPrompt = true;
227
286
  heightFromPrompt = true;
228
287
  }
288
+ if (plan.dimensionSource === 'aspect' && plan.aspectRatio && !cliSet.width && !cliSet.height) {
289
+ aspectRatioFromPrompt = plan.aspectRatio;
290
+ }
229
291
  if (
230
292
  Number.isFinite(plan.targetResolution) &&
231
293
  !cliSet.targetResolution &&
@@ -250,6 +312,100 @@ function normalizeSeedStrategy(value) {
250
312
  return null;
251
313
  }
252
314
 
315
+ function normalizeApiToolMode(value) {
316
+ const normalized = String(value || 'creative-agent').toLowerCase();
317
+ if (normalized === 'creative-agent' || normalized === 'rich') return 'creative-agent';
318
+ if (normalized === 'hosted' || normalized === 'true') return true;
319
+ if (normalized === 'none' || normalized === 'false') return false;
320
+ return null;
321
+ }
322
+
323
+ function normalizeApiWorkflowKind(value) {
324
+ const normalized = String(value || '').toLowerCase().replace(/-/g, '_');
325
+ if (normalized === 'image_to_video' || normalized === 'i2v') return 'image_to_video';
326
+ if (normalized === 'hosted_tool_sequence' || normalized === 'tool_sequence') return 'hosted_tool_sequence';
327
+ if (normalized === 'storyboard_video' || normalized === 'storyboard_to_video' || normalized === 'gpt_image_2_seedance' || normalized === 'gpt_image_seedance') {
328
+ return 'storyboard_video';
329
+ }
330
+ return null;
331
+ }
332
+
333
+ function appendApiPath(baseUrl, path) {
334
+ const base = String(baseUrl || DEFAULT_API_BASE_URL).replace(/\/+$/, '');
335
+ const suffix = path.startsWith('/') ? path : `/${path}`;
336
+ return `${base}${suffix}`;
337
+ }
338
+
339
+ function getApiBaseUrl() {
340
+ return options.apiBaseUrl || getEnv('SOGNI_API_BASE_URL') || getEnv('SOGNI_REST_ENDPOINT') || DEFAULT_API_BASE_URL;
341
+ }
342
+
343
+ function getApiAllowedHosts() {
344
+ const configured = String(getEnv('SOGNI_API_ALLOWED_HOSTS') || '')
345
+ .split(',')
346
+ .map((host) => host.trim().toLowerCase())
347
+ .filter(Boolean);
348
+ return Array.from(new Set([...DEFAULT_SAFE_API_HOSTS, ...configured]));
349
+ }
350
+
351
+ function allowUnsafeApiBaseUrl() {
352
+ return getEnv('SOGNI_ALLOW_UNSAFE_API_BASE_URL') === '1';
353
+ }
354
+
355
+ function isLoopbackApiUrl(parsed) {
356
+ return LOOPBACK_API_HOSTS.includes(parsed.hostname.replace(/^\[|\]$/g, '').toLowerCase());
357
+ }
358
+
359
+ async function buildSafeApiUrl(path) {
360
+ const url = appendApiPath(getApiBaseUrl(), path);
361
+ const unsafeAllowed = allowUnsafeApiBaseUrl();
362
+
363
+ let parsed;
364
+ try {
365
+ parsed = new URL(url);
366
+ } catch {
367
+ const err = new Error('Invalid Sogni API base URL.');
368
+ err.code = 'INVALID_API_BASE_URL';
369
+ throw err;
370
+ }
371
+
372
+ if (parsed.username || parsed.password) {
373
+ const err = new Error('Sogni API base URL must not contain credentials.');
374
+ err.code = 'UNSAFE_API_BASE_URL';
375
+ throw err;
376
+ }
377
+
378
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
379
+ const err = new Error(`Sogni API URL protocol ${parsed.protocol} is not allowed.`);
380
+ err.code = 'UNSAFE_API_BASE_URL';
381
+ throw err;
382
+ }
383
+
384
+ if (unsafeAllowed) return url;
385
+
386
+ if (isLoopbackApiUrl(parsed)) {
387
+ const err = new Error('Loopback Sogni API base URLs require SOGNI_ALLOW_UNSAFE_API_BASE_URL=1 for isolated local testing.');
388
+ err.code = 'UNSAFE_API_BASE_URL';
389
+ throw err;
390
+ }
391
+
392
+ try {
393
+ await assertSafeUrl(url, {
394
+ allowedProtocols: ['https:'],
395
+ allowedHosts: getApiAllowedHosts()
396
+ });
397
+ } catch (err) {
398
+ const wrapped = new Error(
399
+ `${err.message}. Set SOGNI_API_ALLOWED_HOSTS for a trusted custom API host, or SOGNI_ALLOW_UNSAFE_API_BASE_URL=1 for isolated local testing.`
400
+ );
401
+ wrapped.code = 'UNSAFE_API_BASE_URL';
402
+ wrapped.cause = err;
403
+ throw wrapped;
404
+ }
405
+
406
+ return url;
407
+ }
408
+
253
409
  function generateRandomSeed() {
254
410
  return randomBytes(4).readUInt32BE(0);
255
411
  }
@@ -275,7 +431,7 @@ function computePromptHashSeed(opts) {
275
431
  const payload = {
276
432
  prompt: opts.prompt || '',
277
433
  model: opts.model || '',
278
- workflow: opts.video ? opts.videoWorkflow : 'image',
434
+ workflow: opts.video ? opts.videoWorkflow : opts.music ? 'music' : 'image',
279
435
  width: opts.width,
280
436
  height: opts.height,
281
437
  azimuth: opts.azimuth || '',
@@ -285,6 +441,15 @@ function computePromptHashSeed(opts) {
285
441
  outputFormat: opts.outputFormat || '',
286
442
  sampler: opts.sampler || '',
287
443
  scheduler: opts.scheduler || '',
444
+ musicLyrics: opts.musicLyrics || '',
445
+ musicLanguage: opts.musicLanguage || '',
446
+ musicBpm: opts.musicBpm ?? null,
447
+ musicKeyscale: opts.musicKeyscale || '',
448
+ musicTimesig: opts.musicTimesig || '',
449
+ musicComposerMode: opts.musicComposerMode ?? null,
450
+ musicPromptStrength: opts.musicPromptStrength ?? null,
451
+ musicCreativity: opts.musicCreativity ?? null,
452
+ musicShift: opts.musicShift ?? null,
288
453
  targetResolution: opts.targetResolution ?? null,
289
454
  loras: opts.loras || [],
290
455
  loraStrengths: opts.loraStrengths || [],
@@ -338,6 +503,17 @@ function parseNumberList(raw, flagName) {
338
503
  return entries.map((entry) => parseNumberValue(entry, flagName));
339
504
  }
340
505
 
506
+ function parseBoundedNumberValue(raw, flagName, limits) {
507
+ const num = parseNumberValue(raw, flagName);
508
+ if (num < limits.min || num > limits.max) {
509
+ fatalCliError(`${flagName} must be between ${limits.min} and ${limits.max}.`, {
510
+ code: 'INVALID_ARGUMENT',
511
+ details: { flag: flagName, value: raw, min: limits.min, max: limits.max }
512
+ });
513
+ }
514
+ return num;
515
+ }
516
+
341
517
  function requireFlagValue(argv, index, flagName) {
342
518
  const value = argv[index + 1];
343
519
  if (value === undefined) {
@@ -419,17 +595,12 @@ function isHttpUrl(value) {
419
595
  }
420
596
 
421
597
  function isHttpsUrl(value) {
422
- return typeof value === 'string' && value.startsWith('https://');
423
- }
424
-
425
- function isSeedanceModelSelection(modelId) {
426
- if (!modelId) return false;
427
- return (
428
- isSeedanceModel(modelId) ||
429
- isSeedanceModel(resolveVideoModelAlias(modelId, 't2v')) ||
430
- isSeedanceModel(resolveVideoModelAlias(modelId, 'ia2v')) ||
431
- isSeedanceModel(resolveVideoModelAlias(modelId, 'v2v'))
432
- );
598
+ if (typeof value !== 'string') return false;
599
+ try {
600
+ return new URL(value).protocol === 'https:';
601
+ } catch {
602
+ return false;
603
+ }
433
604
  }
434
605
 
435
606
  function getPngDimensions(buffer) {
@@ -530,6 +701,38 @@ function isWanAnimateVideoModelId(modelId) {
530
701
  );
531
702
  }
532
703
 
704
+ function isGptImage2ModelSelection(modelId) {
705
+ const normalized = String(modelId || '').trim().toLowerCase();
706
+ return ['gpt-image-2', 'gptimage2', 'gpt-image', 'gpt_image_2'].includes(normalized);
707
+ }
708
+
709
+ function normalizeMusicModelId(value) {
710
+ const raw = String(value || '').trim();
711
+ if (!raw) return null;
712
+ const normalized = raw.toLowerCase().replace(/-/g, '_').replace(/ace_step_1_5/g, 'ace_step_1.5');
713
+ return MUSIC_MODEL_IDS[normalized] || (MUSIC_MODEL_DEFAULTS[normalized] ? normalized : null);
714
+ }
715
+
716
+ function getMusicModelDefaults(modelId) {
717
+ return MUSIC_MODEL_DEFAULTS[normalizeMusicModelId(modelId)] || null;
718
+ }
719
+
720
+ function normalizeMusicTimeSignature(value) {
721
+ const raw = String(value || '').trim();
722
+ if (!raw) return null;
723
+ const match = raw.match(/^([2346])(?:\s*\/\s*(?:4|8))?$/);
724
+ return match ? match[1] : raw;
725
+ }
726
+
727
+ function requiresSparkOnlyToken(modelId) {
728
+ return isGptImage2ModelSelection(modelId) || isSeedanceModel(modelId);
729
+ }
730
+
731
+ function getMaxContextImages(modelId) {
732
+ if (isGptImage2ModelSelection(modelId)) return 16;
733
+ return getWrapperMaxContextImages(modelId);
734
+ }
735
+
533
736
  function videoDurationLimitsLikeWrapper(modelId) {
534
737
  if (isSeedanceModel(modelId)) return { min: 4, max: 15 };
535
738
  if (isLtx2Model(modelId) || isWanAnimateVideoModelId(modelId)) return { min: 1, max: 20 };
@@ -800,7 +1003,7 @@ const openclawConfig = loadOpenClawPluginConfig();
800
1003
  const CREDENTIALS_PATH = resolveConfiguredPath(
801
1004
  getEnv('SOGNI_CREDENTIALS_PATH') || openclawConfig?.credentialsPath,
802
1005
  DEFAULT_CREDENTIALS_PATH,
803
- 'SOGNI credentials path'
1006
+ 'SOGNI API key credentials path'
804
1007
  );
805
1008
  const LAST_RENDER_PATH = resolveConfiguredPath(
806
1009
  getEnv('SOGNI_LAST_RENDER_PATH') || openclawConfig?.lastRenderPath,
@@ -845,6 +1048,16 @@ const options = {
845
1048
  seed: null,
846
1049
  lastSeed: false,
847
1050
  seedStrategy: null,
1051
+ music: false,
1052
+ musicLyrics: null,
1053
+ musicLanguage: null,
1054
+ musicBpm: null,
1055
+ musicKeyscale: null,
1056
+ musicTimesig: null,
1057
+ musicComposerMode: null,
1058
+ musicPromptStrength: null,
1059
+ musicCreativity: null,
1060
+ musicShift: null,
848
1061
  video: false,
849
1062
  videoWorkflow: null,
850
1063
  fps: 16,
@@ -898,6 +1111,24 @@ const options = {
898
1111
  personaVoice: null,
899
1112
  personaVoiceClip: null,
900
1113
  personaPhoto: null, // alias for --ref when used with --persona-add
1114
+ apiChat: false,
1115
+ apiBaseUrl: null,
1116
+ llmModel: DEFAULT_LLM_MODEL,
1117
+ apiTools: 'creative-agent',
1118
+ apiToolExecution: true,
1119
+ apiSystemPrompt: null,
1120
+ apiWorkflowAction: null, // start|list|get|events|stream|cancel
1121
+ apiWorkflowKind: null, // image_to_video|hosted_tool_sequence
1122
+ apiWorkflowInput: null,
1123
+ apiWorkflowTitle: null,
1124
+ apiWorkflowIdempotencyKey: null,
1125
+ apiWorkflowId: null,
1126
+ apiWorkflowWatch: false,
1127
+ apiVideoPrompt: null,
1128
+ apiNegativePrompt: null,
1129
+ apiGenerateAudio: null,
1130
+ apiExpandPrompt: null,
1131
+ storyboardFrames: null,
901
1132
  noFilter: false // Disable NSFW content filter
902
1133
  };
903
1134
  const cliSet = {
@@ -925,6 +1156,16 @@ const cliSet = {
925
1156
  angleDescription: false,
926
1157
  seed: false,
927
1158
  seedStrategy: false,
1159
+ music: false,
1160
+ musicLyrics: false,
1161
+ musicLanguage: false,
1162
+ musicBpm: false,
1163
+ musicKeyscale: false,
1164
+ musicTimesig: false,
1165
+ musicComposerMode: false,
1166
+ musicPromptStrength: false,
1167
+ musicCreativity: false,
1168
+ musicShift: false,
928
1169
  video: false,
929
1170
  workflow: false,
930
1171
  fps: false,
@@ -953,7 +1194,20 @@ const cliSet = {
953
1194
  sam2Coordinates: false,
954
1195
  trimEndFrame: false,
955
1196
  firstFrameStrength: false,
956
- lastFrameStrength: false
1197
+ lastFrameStrength: false,
1198
+ apiBaseUrl: false,
1199
+ llmModel: false,
1200
+ apiTools: false,
1201
+ apiSystemPrompt: false,
1202
+ apiWorkflowKind: false,
1203
+ apiWorkflowInput: false,
1204
+ apiWorkflowTitle: false,
1205
+ apiWorkflowIdempotencyKey: false,
1206
+ apiVideoPrompt: false,
1207
+ apiNegativePrompt: false,
1208
+ apiGenerateAudio: false,
1209
+ apiExpandPrompt: false,
1210
+ storyboardFrames: false
957
1211
  };
958
1212
 
959
1213
  // Parse CLI args
@@ -1099,6 +1353,70 @@ for (let i = 0; i < args.length; i++) {
1099
1353
  cliSet.seedStrategy = true;
1100
1354
  } else if (arg === '--last-seed' || arg === '--reseed') {
1101
1355
  options.lastSeed = true;
1356
+ } else if (arg === '--music' || arg === '--generate-music') {
1357
+ options.music = true;
1358
+ cliSet.music = true;
1359
+ } else if (arg === '--music-model' || arg === '--audio-model') {
1360
+ const raw = requireFlagValue(args, i, arg);
1361
+ i++;
1362
+ options.model = raw;
1363
+ cliSet.model = true;
1364
+ } else if (arg === '--lyrics') {
1365
+ const raw = requireFlagValue(args, i, arg);
1366
+ i++;
1367
+ options.musicLyrics = raw;
1368
+ cliSet.musicLyrics = true;
1369
+ } else if (arg === '--language' || arg === '--lyrics-language') {
1370
+ const raw = requireFlagValue(args, i, arg);
1371
+ i++;
1372
+ options.musicLanguage = raw;
1373
+ cliSet.musicLanguage = true;
1374
+ } else if (arg === '--bpm') {
1375
+ const raw = requireFlagValue(args, i, arg);
1376
+ i++;
1377
+ options.musicBpm = parseBoundedNumberValue(raw, arg, MUSIC_BPM_LIMITS);
1378
+ cliSet.musicBpm = true;
1379
+ } else if (arg === '--keyscale' || arg === '--key-scale' || arg === '--key') {
1380
+ const raw = requireFlagValue(args, i, arg);
1381
+ i++;
1382
+ options.musicKeyscale = raw;
1383
+ cliSet.musicKeyscale = true;
1384
+ } else if (arg === '--timesig' || arg === '--time-signature') {
1385
+ const raw = requireFlagValue(args, i, arg);
1386
+ i++;
1387
+ options.musicTimesig = normalizeMusicTimeSignature(raw);
1388
+ cliSet.musicTimesig = true;
1389
+ } else if (arg === '--composer-mode') {
1390
+ options.musicComposerMode = true;
1391
+ cliSet.musicComposerMode = true;
1392
+ } else if (arg === '--no-composer-mode') {
1393
+ options.musicComposerMode = false;
1394
+ cliSet.musicComposerMode = true;
1395
+ } else if (arg === '--prompt-strength') {
1396
+ const raw = requireFlagValue(args, i, arg);
1397
+ i++;
1398
+ options.musicPromptStrength = parseBoundedNumberValue(raw, arg, MUSIC_PROMPT_STRENGTH_LIMITS);
1399
+ cliSet.musicPromptStrength = true;
1400
+ } else if (arg === '--creativity') {
1401
+ const raw = requireFlagValue(args, i, arg);
1402
+ i++;
1403
+ options.musicCreativity = parseBoundedNumberValue(raw, arg, MUSIC_CREATIVITY_LIMITS);
1404
+ cliSet.musicCreativity = true;
1405
+ } else if (arg === '--music-shift' || arg === '--audio-shift') {
1406
+ const raw = requireFlagValue(args, i, arg);
1407
+ i++;
1408
+ options.musicShift = parseNumberValue(raw, arg);
1409
+ cliSet.musicShift = true;
1410
+ } else if (arg === '--audio-format') {
1411
+ const raw = requireFlagValue(args, i, arg);
1412
+ i++;
1413
+ options.outputFormat = raw;
1414
+ cliSet.outputFormat = true;
1415
+ } else if (arg === '--length') {
1416
+ const raw = requireFlagValue(args, i, arg);
1417
+ i++;
1418
+ options.duration = parsePositiveIntegerValue(raw, arg);
1419
+ cliSet.duration = true;
1102
1420
  } else if (arg === '--video' || arg === '-v') {
1103
1421
  options.video = true;
1104
1422
  cliSet.video = true;
@@ -1278,6 +1596,103 @@ for (let i = 0; i < args.length; i++) {
1278
1596
  } else {
1279
1597
  options.listMedia = 'images';
1280
1598
  }
1599
+ // --- Hosted Sogni API paths ---
1600
+ } else if (arg === '--api-chat') {
1601
+ options.apiChat = true;
1602
+ } else if (arg === '--api-base-url' || arg === '--api-base') {
1603
+ const raw = requireFlagValue(args, i, arg);
1604
+ i++;
1605
+ options.apiBaseUrl = raw;
1606
+ cliSet.apiBaseUrl = true;
1607
+ } else if (arg === '--llm-model') {
1608
+ const raw = requireFlagValue(args, i, arg);
1609
+ i++;
1610
+ options.llmModel = raw;
1611
+ cliSet.llmModel = true;
1612
+ } else if (arg === '--api-tools') {
1613
+ const raw = requireFlagValue(args, i, arg);
1614
+ i++;
1615
+ options.apiTools = raw;
1616
+ cliSet.apiTools = true;
1617
+ } else if (arg === '--no-api-tool-execution') {
1618
+ options.apiToolExecution = false;
1619
+ } else if (arg === '--system') {
1620
+ const raw = requireFlagValue(args, i, arg);
1621
+ i++;
1622
+ options.apiSystemPrompt = raw;
1623
+ cliSet.apiSystemPrompt = true;
1624
+ } else if (arg === '--api-workflow' || arg === '--creative-workflow') {
1625
+ const raw = requireFlagValue(args, i, arg);
1626
+ i++;
1627
+ options.apiWorkflowAction = 'start';
1628
+ options.apiWorkflowKind = raw;
1629
+ cliSet.apiWorkflowKind = true;
1630
+ } else if (arg === '--workflow-input') {
1631
+ const raw = requireFlagValue(args, i, arg);
1632
+ i++;
1633
+ options.apiWorkflowInput = raw;
1634
+ cliSet.apiWorkflowInput = true;
1635
+ } else if (arg === '--workflow-title') {
1636
+ const raw = requireFlagValue(args, i, arg);
1637
+ i++;
1638
+ options.apiWorkflowTitle = raw;
1639
+ cliSet.apiWorkflowTitle = true;
1640
+ } else if (arg === '--workflow-idempotency-key' || arg === '--idempotency-key') {
1641
+ const raw = requireFlagValue(args, i, arg);
1642
+ i++;
1643
+ options.apiWorkflowIdempotencyKey = raw;
1644
+ cliSet.apiWorkflowIdempotencyKey = true;
1645
+ } else if (arg === '--storyboard-frames') {
1646
+ const raw = requireFlagValue(args, i, arg);
1647
+ i++;
1648
+ options.storyboardFrames = parsePositiveIntegerValue(raw, arg);
1649
+ cliSet.storyboardFrames = true;
1650
+ } else if (arg === '--video-prompt') {
1651
+ const raw = requireFlagValue(args, i, arg);
1652
+ i++;
1653
+ options.apiVideoPrompt = raw;
1654
+ cliSet.apiVideoPrompt = true;
1655
+ } else if (arg === '--negative-prompt') {
1656
+ const raw = requireFlagValue(args, i, arg);
1657
+ i++;
1658
+ options.apiNegativePrompt = raw;
1659
+ cliSet.apiNegativePrompt = true;
1660
+ } else if (arg === '--generate-audio') {
1661
+ options.apiGenerateAudio = true;
1662
+ cliSet.apiGenerateAudio = true;
1663
+ } else if (arg === '--no-generate-audio') {
1664
+ options.apiGenerateAudio = false;
1665
+ cliSet.apiGenerateAudio = true;
1666
+ } else if (arg === '--expand-prompt') {
1667
+ options.apiExpandPrompt = true;
1668
+ cliSet.apiExpandPrompt = true;
1669
+ } else if (arg === '--no-expand-prompt') {
1670
+ options.apiExpandPrompt = false;
1671
+ cliSet.apiExpandPrompt = true;
1672
+ } else if (arg === '--watch-workflow' || arg === '--watch') {
1673
+ options.apiWorkflowWatch = true;
1674
+ } else if (arg === '--list-workflows') {
1675
+ options.apiWorkflowAction = 'list';
1676
+ } else if (arg === '--get-workflow') {
1677
+ const raw = requireFlagValue(args, i, arg);
1678
+ i++;
1679
+ options.apiWorkflowAction = 'get';
1680
+ options.apiWorkflowId = raw;
1681
+ } else if (arg === '--workflow-events') {
1682
+ const raw = requireFlagValue(args, i, arg);
1683
+ i++;
1684
+ options.apiWorkflowAction = 'events';
1685
+ options.apiWorkflowId = raw;
1686
+ } else if (arg === '--stream-workflow') {
1687
+ const raw = requireFlagValue(args, i, arg);
1688
+ i++;
1689
+ options.apiWorkflowAction = 'stream';
1690
+ options.apiWorkflowId = raw;
1691
+ } else if (arg === '--cancel-workflow') {
1692
+ const raw = requireFlagValue(args, i, arg);
1693
+ i++;
1694
+ options.apiWorkflowAction = 'cancel';
1695
+ options.apiWorkflowId = raw;
1281
1696
  // --- Memory commands ---
1282
1697
  } else if (arg === '--memory-set') {
1283
1698
  options.memoryAction = 'set';
@@ -1368,7 +1783,7 @@ for (let i = 0; i < args.length; i++) {
1368
1783
  options.showVersion = true;
1369
1784
  } else if (arg === '--help') {
1370
1785
  console.log(`
1371
- sogni-agent - Generate images and videos using Sogni AI
1786
+ sogni-agent - Generate images, videos, and music using Sogni AI
1372
1787
 
1373
1788
  Usage: sogni-agent [options] "prompt"
1374
1789
 
@@ -1378,7 +1793,7 @@ Image Options:
1378
1793
  -m, --model <id> Model (default: z_image_turbo_bf16, overrides --quality)
1379
1794
  -w, --width <px> Width (default: 512)
1380
1795
  -h, --height <px> Height (default: 512)
1381
- -n, --count <num> Number of images (default: 1)
1796
+ -n, --count <num> Number of outputs (default: 1)
1382
1797
  -s, --seed <num> Use specific seed
1383
1798
  --last-seed Reuse seed from previous render
1384
1799
  --seed-strategy <s> Seed strategy: random|prompt-hash
@@ -1391,7 +1806,7 @@ Image Options:
1391
1806
  --distance <key> close-up|medium|wide
1392
1807
  --angle-strength <n> LoRA strength for multiple_angles (default: 0.9)
1393
1808
  --angle-description <text> Optional subject description
1394
- --output-format <f> Image output format: png|jpg
1809
+ --output-format <f> Image output format: png|jpg (webp for gpt-image-2)
1395
1810
  --sampler <name> Sampler (model-dependent)
1396
1811
  --scheduler <name> Scheduler (model-dependent)
1397
1812
  --lora <id> LoRA id (repeatable, edit only)
@@ -1407,6 +1822,23 @@ Photobooth (Face Transfer):
1407
1822
  --cn-strength <n> ControlNet strength (default: 0.8)
1408
1823
  --cn-guidance-end <n> ControlNet guidance end point (default: 0.3)
1409
1824
 
1825
+ Music Options:
1826
+ --music Generate music/audio instead of image
1827
+ --music-model <id> Music model: turbo|sft|ace_step_1.5_turbo|ace_step_1.5_sft
1828
+ --lyrics <text> Optional song lyrics (omit for instrumental)
1829
+ --language <code> Lyrics language code (default: en)
1830
+ --duration <sec> Music duration in seconds (10-600, default: 30)
1831
+ --length <sec> Alias for --duration
1832
+ --bpm <num> Beats per minute (30-300)
1833
+ --keyscale <text> Key/scale, e.g. "C major" or "A minor"
1834
+ --timesig <n> Time signature: 2|3|4|6 (also accepts 4/4)
1835
+ --composer-mode Enable AI composer mode
1836
+ --no-composer-mode Disable AI composer mode
1837
+ --prompt-strength <n> Prompt adherence (0-10)
1838
+ --creativity <n> Composition variation/temperature (0-2)
1839
+ --music-shift <n> Audio model shift parameter (1-6)
1840
+ --audio-format <f> Alias for --output-format: mp3|flac|wav
1841
+
1410
1842
  Video Options:
1411
1843
  --video, -v Generate video instead of image
1412
1844
  --workflow <type> Video workflow: t2v|i2v|s2v|ia2v|a2v|v2v|animate-move|animate-replace
@@ -1435,8 +1867,31 @@ Video Options:
1435
1867
  --looping, --loop Create seamless loop (i2v only): A→B→A
1436
1868
  --last-image Use last generated image as reference
1437
1869
 
1870
+ Hosted API Modes:
1871
+ --api-chat Use /v1/chat/completions with rich creative-agent tools
1872
+ --api-tools <mode> creative-agent|rich|hosted|none (default: creative-agent)
1873
+ --no-api-tool-execution Ask for tool calls/plans but do not execute Sogni tools
1874
+ --llm-model <id> LLM model for --api-chat (default: ${DEFAULT_LLM_MODEL})
1875
+ --system <text> System prompt for --api-chat
1876
+ --api-workflow <kind> Start /v1/creative-agent/workflows: image-to-video|hosted-tool-sequence|storyboard-video
1877
+ --workflow-input <json|path|@path> JSON input for hosted-tool-sequence/custom image-to-video/storyboard-video
1878
+ --workflow-title <text> Title for hosted-tool-sequence or storyboard-video workflow input
1879
+ --workflow-idempotency-key <key> Reuse safely when retrying a workflow start request
1880
+ --storyboard-frames <n> Frame/beat count for --api-workflow storyboard-video
1881
+ --video-prompt <text> Motion prompt for --api-workflow image-to-video
1882
+ --negative-prompt <text> Negative prompt for --api-workflow image-to-video
1883
+ --generate-audio, --no-generate-audio Toggle audio generation for image-to-video workflows
1884
+ --expand-prompt, --no-expand-prompt Toggle prompt expansion for image-to-video workflows
1885
+ --watch-workflow Stream workflow events after starting
1886
+ --list-workflows List recent durable creative workflows
1887
+ --get-workflow <id> Fetch a workflow snapshot
1888
+ --workflow-events <id> Fetch workflow event history
1889
+ --stream-workflow <id> Stream workflow events over SSE
1890
+ --cancel-workflow <id> Cancel a running workflow
1891
+ --api-base-url <url> Sogni API base URL (default: ${DEFAULT_API_BASE_URL})
1892
+
1438
1893
  General:
1439
- -t, --timeout <sec> Timeout in seconds (default: 30, video: 300)
1894
+ -t, --timeout <sec> Timeout in seconds (default: 30, video: 300, music: 600)
1440
1895
  --steps <num> Override steps (model-dependent)
1441
1896
  --guidance <num> Override guidance (model-dependent)
1442
1897
  --token-type <type> Token type: spark|sogni|auto (default: spark, auto retries with alternate)
@@ -1479,6 +1934,7 @@ Personas (named people with reference photos):
1479
1934
 
1480
1935
  Image Models:
1481
1936
  z_image_turbo_bf16 Fast, general purpose (default)
1937
+ gpt-image-2 OpenAI GPT Image 2 text-to-image and edit (up to 16 context images)
1482
1938
  flux1-schnell-fp8 Very fast
1483
1939
  flux2_dev_fp8 High quality (slow)
1484
1940
  qwen_image_edit_2511_fp8 Image editing with context (up to 3 images)
@@ -1491,7 +1947,11 @@ Recommended LTX 2.3 Video Models:
1491
1947
  ltx23-22b-fp8_a2v_distilled Audio-to-video
1492
1948
  ltx23-22b-fp8_v2v_distilled Video-to-video with ControlNet
1493
1949
 
1494
- Seedance 2.0 Video Aliases:
1950
+ Music Models:
1951
+ ace_step_1.5_turbo Default direct music generation
1952
+ ace_step_1.5_sft Experimental model with stronger lyric handling
1953
+
1954
+ Seedance 2.0 Video Model Selectors:
1495
1955
  seedance2 Text-to-video, 4-15s, native audio, HTTPS multimodal refs
1496
1956
  seedance2-fast Fast 720p-capped text-to-video
1497
1957
  seedance2-ia2v Image+audio-to-video
@@ -1522,11 +1982,16 @@ Examples:
1522
1982
  sogni-agent --multi-angle -c subject.jpg --azimuth front-right --elevation eye-level --distance medium "studio portrait"
1523
1983
  sogni-agent --angles-360 -c subject.jpg "studio portrait"
1524
1984
  sogni-agent --video --ref cat.jpg -o cat.mp4 "cat walks around"
1525
- sogni-agent --video "A narrator says \"welcome to the story\" as ocean waves crash"
1985
+ sogni-agent --video 'A narrator says "welcome to the story" as ocean waves crash'
1526
1986
  sogni-agent --video --ref cat.jpg --ref-audio speech.m4a -m wan_v2.2-14b-fp8_s2v_lightx2v "lip sync"
1527
1987
  sogni-agent --video --ref cover.jpg --ref-audio song.mp3 "music video"
1528
1988
  sogni-agent --video --ref-audio song.mp3 "abstract music visualizer"
1529
- sogni-agent --video --reference-audio-identity voice.webm "NARRATOR: \"This is my voice.\""
1989
+ sogni-agent --music --duration 30 "uplifting cinematic synthwave theme for a product launch"
1990
+ sogni-agent --music --lyrics "Rise with the morning light" --bpm 128 --keyscale "C major" --output-format mp3 "bright indie pop chorus"
1991
+ sogni-agent --video --reference-audio-identity voice.webm 'NARRATOR: "This is my voice."'
1992
+ sogni-agent --api-chat "Create a 4-shot product video concept for a red sneaker"
1993
+ sogni-agent --api-workflow image-to-video --video-prompt "slow push-in as it comes alive" "a graphite robot sketch"
1994
+ sogni-agent --api-workflow storyboard-video --storyboard-frames 6 "Create a 12s 9:16 bakery launch video with GPT Image 2 and Seedance"
1530
1995
  sogni-agent --video -m ltx23-22b-fp8_t2v_distilled --duration 20 "A wide cinematic aerial shot opens over steep tropical cliffs at golden hour, warm sunlight grazing the rock faces while sea mist drifts above the water below. Palm trees bend gently along the ridge as waves roll against the shoreline, leaving bright bands of foam across the dark stone. The camera glides forward in one continuous pass, revealing more of the coastline as sunlight flickers across wet surfaces and distant birds wheel through the haze. The scene holds a calm, upscale travel-film mood with smooth stabilized motion and crisp environmental detail."
1531
1996
  sogni-agent --video --ref subject.jpg --ref-video motion.mp4 --workflow animate-move "transfer motion"
1532
1997
  sogni-agent --video --last-image "gentle camera pan"
@@ -1561,6 +2026,7 @@ let widthFromPrompt = false;
1561
2026
  let heightFromPrompt = false;
1562
2027
  let targetResolutionFromPrompt = false;
1563
2028
  let durationFromPrompt = false;
2029
+ let aspectRatioFromPrompt = null;
1564
2030
  let configuredDefaultVideoWorkflow = null;
1565
2031
  if (openclawConfig) {
1566
2032
  const isNumber = (value) => Number.isFinite(value);
@@ -1578,10 +2044,27 @@ if (openclawConfig) {
1578
2044
  if (!cliSet.tokenType && openclawConfig.defaultTokenType) {
1579
2045
  options.tokenType = openclawConfig.defaultTokenType;
1580
2046
  }
2047
+ if (!cliSet.apiBaseUrl && openclawConfig.apiBaseUrl) {
2048
+ options.apiBaseUrl = openclawConfig.apiBaseUrl;
2049
+ }
2050
+ if (!cliSet.llmModel && openclawConfig.defaultLlmModel) {
2051
+ options.llmModel = openclawConfig.defaultLlmModel;
2052
+ }
2053
+ if (!cliSet.apiTools && openclawConfig.defaultApiToolMode) {
2054
+ options.apiTools = openclawConfig.defaultApiToolMode;
2055
+ }
1581
2056
  if (!cliSet.seedStrategy && openclawConfig.seedStrategy) {
1582
2057
  options.seedStrategy = openclawConfig.seedStrategy;
1583
2058
  }
1584
- if (options.video) {
2059
+ if (options.music) {
2060
+ if (!cliSet.duration && isNumber(openclawConfig.defaultMusicDurationSec)) {
2061
+ options.duration = openclawConfig.defaultMusicDurationSec;
2062
+ }
2063
+ if (!cliSet.timeout && isNumber(openclawConfig.defaultMusicTimeoutSec)) {
2064
+ options.timeout = openclawConfig.defaultMusicTimeoutSec * 1000;
2065
+ timeoutFromConfig = true;
2066
+ }
2067
+ } else if (options.video) {
1585
2068
  if (!cliSet.workflow && openclawConfig.defaultVideoWorkflow) {
1586
2069
  configuredDefaultVideoWorkflow = openclawConfig.defaultVideoWorkflow;
1587
2070
  }
@@ -1613,6 +2096,26 @@ if (options.tokenType) {
1613
2096
  options.tokenType = token;
1614
2097
  }
1615
2098
 
2099
+ const normalizedApiToolMode = normalizeApiToolMode(options.apiTools);
2100
+ if (normalizedApiToolMode === null) {
2101
+ fatalCliError('--api-tools must be "creative-agent", "rich", "hosted", or "none".', {
2102
+ code: 'INVALID_ARGUMENT',
2103
+ details: { flag: '--api-tools', value: options.apiTools }
2104
+ });
2105
+ }
2106
+ options.apiTools = normalizedApiToolMode;
2107
+
2108
+ if (options.apiWorkflowKind) {
2109
+ const normalized = normalizeApiWorkflowKind(options.apiWorkflowKind);
2110
+ if (!normalized) {
2111
+ fatalCliError('--api-workflow must be "image-to-video", "hosted-tool-sequence", or "storyboard-video".', {
2112
+ code: 'INVALID_ARGUMENT',
2113
+ details: { flag: '--api-workflow', value: options.apiWorkflowKind }
2114
+ });
2115
+ }
2116
+ options.apiWorkflowKind = normalized;
2117
+ }
2118
+
1616
2119
  if (options.quality) {
1617
2120
  if (!QUALITY_TIERS[options.quality]) {
1618
2121
  fatalCliError('--quality must be "fast", "hq", or "pro".', {
@@ -1620,8 +2123,13 @@ if (options.quality) {
1620
2123
  details: { flag: '--quality', value: options.quality }
1621
2124
  });
1622
2125
  }
2126
+ if (options.music) {
2127
+ fatalCliError('--quality is not used for --music. Use --music-model turbo|sft for music model selection.', {
2128
+ code: 'INVALID_ARGUMENT'
2129
+ });
2130
+ }
1623
2131
  const tier = QUALITY_TIERS[options.quality];
1624
- if (!options.video) {
2132
+ if (!options.video && !options.music) {
1625
2133
  // Only apply model if user didn't explicitly set one.
1626
2134
  if (!cliSet.model) {
1627
2135
  options.model = tier.model;
@@ -1663,6 +2171,26 @@ if (cliSet.guidance && !Number.isFinite(options.guidance)) {
1663
2171
  });
1664
2172
  }
1665
2173
 
2174
+ if (options.music && options.video) {
2175
+ fatalCliError('--music cannot be combined with --video.', { code: 'INVALID_ARGUMENT' });
2176
+ }
2177
+
2178
+ if (options.music && (
2179
+ cliSet.width ||
2180
+ cliSet.height ||
2181
+ options.strictSize ||
2182
+ options.multiAngle ||
2183
+ options.angles360Video ||
2184
+ options.photobooth ||
2185
+ options.contextImages.length > 0 ||
2186
+ options.refImage ||
2187
+ options.refImageEnd
2188
+ )) {
2189
+ fatalCliError('--music cannot be combined with image/video reference or sizing options.', {
2190
+ code: 'INVALID_ARGUMENT'
2191
+ });
2192
+ }
2193
+
1666
2194
  if (options.multiAngle) {
1667
2195
  if (options.video) {
1668
2196
  fatalCliError('--multi-angle is only for image editing.', { code: 'INVALID_ARGUMENT' });
@@ -1758,15 +2286,22 @@ if (options.multiAngle) {
1758
2286
  if (options.outputFormat) {
1759
2287
  const normalized = options.outputFormat.toLowerCase();
1760
2288
  options.outputFormat = normalized === 'jpeg' ? 'jpg' : normalized;
1761
- if (options.video) {
2289
+ if (options.music) {
2290
+ if (!MUSIC_OUTPUT_FORMATS.has(options.outputFormat)) {
2291
+ fatalCliError('Music output format must be "mp3", "flac", or "wav".', {
2292
+ code: 'INVALID_ARGUMENT',
2293
+ details: { outputFormat: options.outputFormat }
2294
+ });
2295
+ }
2296
+ } else if (options.video) {
1762
2297
  if (options.outputFormat !== 'mp4') {
1763
2298
  fatalCliError('Video output format must be "mp4".', {
1764
2299
  code: 'INVALID_ARGUMENT',
1765
2300
  details: { outputFormat: options.outputFormat }
1766
2301
  });
1767
2302
  }
1768
- } else if (!['png', 'jpg'].includes(options.outputFormat)) {
1769
- fatalCliError('Image output format must be "png" or "jpg".', {
2303
+ } else if (!['png', 'jpg', ...(isGptImage2ModelSelection(options.model) ? ['webp'] : [])].includes(options.outputFormat)) {
2304
+ fatalCliError(isGptImage2ModelSelection(options.model) ? 'GPT Image 2 output format must be "png", "jpg", or "webp".' : 'Image output format must be "png" or "jpg".', {
1770
2305
  code: 'INVALID_ARGUMENT',
1771
2306
  details: { outputFormat: options.outputFormat }
1772
2307
  });
@@ -1785,7 +2320,7 @@ if (options.loraStrengths.length > 0 && options.loras.length > 0 &&
1785
2320
  });
1786
2321
  }
1787
2322
 
1788
- if (options.video && options.loras.length > 0) {
2323
+ if ((options.video || options.music) && options.loras.length > 0) {
1789
2324
  fatalCliError('--lora options are image-only.', { code: 'INVALID_ARGUMENT' });
1790
2325
  }
1791
2326
 
@@ -1820,6 +2355,15 @@ if (options.video) {
1820
2355
  options.videoWorkflow = normalized;
1821
2356
  }
1822
2357
 
2358
+ if (
2359
+ options._lastImagePath &&
2360
+ !options.refImage &&
2361
+ (!options.videoWorkflow || workflowRequiresImage(options.videoWorkflow) || isSeedanceModelSelection(options.model))
2362
+ ) {
2363
+ options.refImage = options._lastImagePath;
2364
+ delete options._lastImagePath;
2365
+ }
2366
+
1823
2367
  applyCreativeBrainPreflight();
1824
2368
 
1825
2369
  if (!options.videoWorkflow && isSeedanceModelSelection(options.model)) {
@@ -1864,7 +2408,41 @@ if (options._lastImagePath) {
1864
2408
  }
1865
2409
 
1866
2410
  // Set defaults based on type and context
1867
- if (options.video) {
2411
+ if (options.music) {
2412
+ const configuredMusicModel = options.model || openclawConfig?.defaultMusicModel || 'turbo';
2413
+ options.model = normalizeMusicModelId(configuredMusicModel);
2414
+ if (!options.model) {
2415
+ fatalCliError(`Unknown music model "${configuredMusicModel}". Use turbo, sft, ace_step_1.5_turbo, or ace_step_1.5_sft.`, {
2416
+ code: 'INVALID_ARGUMENT',
2417
+ details: { flag: cliSet.model ? '--model' : 'defaultMusicModel', value: configuredMusicModel }
2418
+ });
2419
+ }
2420
+ const musicDefaults = getMusicModelDefaults(options.model);
2421
+ if (!cliSet.duration || !Number.isFinite(options.duration)) {
2422
+ options.duration = MUSIC_DURATION_LIMITS.default;
2423
+ }
2424
+ if (!options.outputFormat) {
2425
+ options.outputFormat = 'mp3';
2426
+ }
2427
+ if (!cliSet.steps) {
2428
+ options.steps = musicDefaults.steps.default;
2429
+ }
2430
+ if (!cliSet.guidance && musicDefaults.guidance) {
2431
+ options.guidance = musicDefaults.guidance.default;
2432
+ }
2433
+ if (!cliSet.sampler) {
2434
+ options.sampler = musicDefaults.sampler.default;
2435
+ }
2436
+ if (!cliSet.scheduler) {
2437
+ options.scheduler = musicDefaults.scheduler.default;
2438
+ }
2439
+ if (!cliSet.musicShift) {
2440
+ options.musicShift = musicDefaults.shift.default;
2441
+ }
2442
+ if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
2443
+ options.timeout = 600000;
2444
+ }
2445
+ } else if (options.video) {
1868
2446
  options.model = options.model || selectDefaultVideoModel(options.videoWorkflow, options, openclawConfig) || 'wan_v2.2-14b-fp8_i2v_lightx2v';
1869
2447
  options.model = resolveVideoModelAlias(options.model, options.videoWorkflow);
1870
2448
  const videoModelDefaults = getModelDefaults(options.model, openclawConfig);
@@ -1892,6 +2470,15 @@ if (options.video) {
1892
2470
  options.width = dims.width;
1893
2471
  options.height = dims.height;
1894
2472
  }
2473
+ if (aspectRatioFromPrompt && !cliSet.width && !cliSet.height) {
2474
+ const dims = dimensionsForAspectRatio(options.width, options.height, aspectRatioFromPrompt);
2475
+ if (dims) {
2476
+ options.width = dims.width;
2477
+ options.height = dims.height;
2478
+ widthFromPrompt = true;
2479
+ heightFromPrompt = true;
2480
+ }
2481
+ }
1895
2482
  if (!cliSet.timeout && !timeoutFromConfig && options.timeout === 30000) {
1896
2483
  options.timeout = 300000; // 5 min for video
1897
2484
  }
@@ -1913,11 +2500,130 @@ if (options.video) {
1913
2500
  options.model = options.model || openclawConfig?.defaultImageModel || 'z_image_turbo_bf16';
1914
2501
  }
1915
2502
 
1916
- if (!options.prompt && !options.estimateVideoCost && !options.multiAngle && !options.showBalance && !options.showVersion && !options.extractLastFrame && !options.concatVideos && !options.listMedia && !options.memoryAction && !options.personalityAction && !(options.personaAction && options.personaAction !== 'generate')) {
2503
+ if (options.music) {
2504
+ const musicDefaults = getMusicModelDefaults(options.model);
2505
+ if (options.duration < MUSIC_DURATION_LIMITS.min || options.duration > MUSIC_DURATION_LIMITS.max) {
2506
+ fatalCliError(`Music duration must be between ${MUSIC_DURATION_LIMITS.min} and ${MUSIC_DURATION_LIMITS.max} seconds.`, {
2507
+ code: 'INVALID_ARGUMENT',
2508
+ details: { duration: options.duration }
2509
+ });
2510
+ }
2511
+ if (options.musicBpm !== null && options.musicBpm !== undefined) {
2512
+ if (options.musicBpm < MUSIC_BPM_LIMITS.min || options.musicBpm > MUSIC_BPM_LIMITS.max) {
2513
+ fatalCliError(`Music BPM must be between ${MUSIC_BPM_LIMITS.min} and ${MUSIC_BPM_LIMITS.max}.`, {
2514
+ code: 'INVALID_ARGUMENT',
2515
+ details: { bpm: options.musicBpm }
2516
+ });
2517
+ }
2518
+ }
2519
+ if (options.musicTimesig && !MUSIC_TIME_SIGNATURES.has(options.musicTimesig)) {
2520
+ fatalCliError('--timesig must be one of 2, 3, 4, or 6.', {
2521
+ code: 'INVALID_ARGUMENT',
2522
+ details: { timesig: options.musicTimesig }
2523
+ });
2524
+ }
2525
+ if (options.steps !== null && options.steps !== undefined) {
2526
+ const { min, max } = musicDefaults.steps;
2527
+ if (!Number.isFinite(options.steps) || options.steps < min || options.steps > max) {
2528
+ fatalCliError(`--steps for ${options.model} must be between ${min} and ${max}.`, {
2529
+ code: 'INVALID_ARGUMENT',
2530
+ details: { model: options.model, steps: options.steps, min, max }
2531
+ });
2532
+ }
2533
+ }
2534
+ if (options.guidance !== null && options.guidance !== undefined && musicDefaults.guidance) {
2535
+ const { min, max } = musicDefaults.guidance;
2536
+ if (!Number.isFinite(options.guidance) || options.guidance < min || options.guidance > max) {
2537
+ fatalCliError(`--guidance for ${options.model} must be between ${min} and ${max}.`, {
2538
+ code: 'INVALID_ARGUMENT',
2539
+ details: { model: options.model, guidance: options.guidance, min, max }
2540
+ });
2541
+ }
2542
+ }
2543
+ if (options.musicShift !== null && options.musicShift !== undefined) {
2544
+ const { min, max } = musicDefaults.shift;
2545
+ if (!Number.isFinite(options.musicShift) || options.musicShift < min || options.musicShift > max) {
2546
+ fatalCliError(`--music-shift for ${options.model} must be between ${min} and ${max}.`, {
2547
+ code: 'INVALID_ARGUMENT',
2548
+ details: { model: options.model, shift: options.musicShift, min, max }
2549
+ });
2550
+ }
2551
+ }
2552
+ if (options.sampler && !musicDefaults.sampler.allowed.includes(options.sampler)) {
2553
+ fatalCliError(`--sampler for ${options.model} must be one of ${musicDefaults.sampler.allowed.join('|')}.`, {
2554
+ code: 'INVALID_ARGUMENT',
2555
+ details: { model: options.model, sampler: options.sampler, allowed: musicDefaults.sampler.allowed }
2556
+ });
2557
+ }
2558
+ if (options.scheduler && !musicDefaults.scheduler.allowed.includes(options.scheduler)) {
2559
+ fatalCliError(`--scheduler for ${options.model} must be one of ${musicDefaults.scheduler.allowed.join('|')}.`, {
2560
+ code: 'INVALID_ARGUMENT',
2561
+ details: { model: options.model, scheduler: options.scheduler, allowed: musicDefaults.scheduler.allowed }
2562
+ });
2563
+ }
2564
+ }
2565
+
2566
+ const apiWorkflowUtilityAction = options.apiWorkflowAction && options.apiWorkflowAction !== 'start';
2567
+ const apiWorkflowStartAction = options.apiWorkflowAction === 'start';
2568
+ const apiWorkflowStartHasExternalInput = options.apiWorkflowAction === 'start' && options.apiWorkflowInput;
2569
+ const personaUtilityAction = Boolean(options.personaAction && options.personaAction !== 'generate');
2570
+ const commandUsesGenerationSeed = !options.apiChat &&
2571
+ !apiWorkflowUtilityAction &&
2572
+ !options.estimateVideoCost &&
2573
+ !options.showBalance &&
2574
+ !options.showVersion &&
2575
+ !options.extractLastFrame &&
2576
+ !options.concatVideos &&
2577
+ !options.listMedia &&
2578
+ !options.memoryAction &&
2579
+ !options.personalityAction &&
2580
+ !personaUtilityAction;
2581
+ if (apiWorkflowStartAction && options.apiWorkflowKind === 'image_to_video' && !options.prompt && !apiWorkflowStartHasExternalInput) {
2582
+ fatalCliError('--api-workflow image-to-video requires a prompt or --workflow-input JSON.', { code: 'INVALID_ARGUMENT' });
2583
+ }
2584
+ if (apiWorkflowStartAction && options.apiWorkflowKind === 'hosted_tool_sequence' && !apiWorkflowStartHasExternalInput) {
2585
+ fatalCliError('--api-workflow hosted-tool-sequence requires --workflow-input JSON.', { code: 'INVALID_ARGUMENT' });
2586
+ }
2587
+ if (apiWorkflowStartAction && options.apiWorkflowKind === 'storyboard_video' && !options.prompt && !apiWorkflowStartHasExternalInput) {
2588
+ fatalCliError('--api-workflow storyboard-video requires a prompt or --workflow-input JSON.', { code: 'INVALID_ARGUMENT' });
2589
+ }
2590
+ if (!options.prompt && !options.apiChat && !apiWorkflowUtilityAction && !apiWorkflowStartAction && !options.estimateVideoCost && !options.multiAngle && !options.showBalance && !options.showVersion && !options.extractLastFrame && !options.concatVideos && !options.listMedia && !options.memoryAction && !options.personalityAction && !personaUtilityAction) {
1917
2591
  fatalCliError('No prompt provided. Use --help for usage.', { code: 'INVALID_ARGUMENT' });
1918
2592
  }
1919
2593
 
1920
- if (!options.video && (options.refAudio || options.refVideo || options.referenceAudioIdentity || options.voicePersonaName || options.videoWorkflow || options.frames || options.targetResolution || options.audioStart !== null || options.audioDuration !== null || options.videoStart !== null)) {
2594
+ if (options.apiChat && !options.prompt && options.contextImages.length === 0 && !options.refImage && !options.refImageEnd) {
2595
+ fatalCliError('--api-chat requires a prompt or an image reference for vision-only planning.', { code: 'INVALID_ARGUMENT' });
2596
+ }
2597
+
2598
+ const apiMediaRefs = getApiModeMediaReferences();
2599
+ const apiImageRefs = apiMediaRefs.filter(ref => ref.kind === 'image');
2600
+ const apiNonImageRefs = apiMediaRefs.filter(ref => ref.kind !== 'image');
2601
+ if (options.apiChat && apiNonImageRefs.length > 0) {
2602
+ fatalCliError(
2603
+ `--api-chat does not support ${formatApiMediaFlags(apiNonImageRefs)}. Use the direct CLI path for audio/video media workflows.`,
2604
+ { code: 'UNSUPPORTED_API_MEDIA_REFERENCE' }
2605
+ );
2606
+ }
2607
+ if (options.apiChat && options.apiToolExecution && apiImageRefs.length > 0) {
2608
+ fatalCliError(
2609
+ '--api-chat with server-side tool execution does not currently support image references. Use the direct CLI path for uploaded-media workflows, or pass --no-api-tool-execution for vision-only chat/planning.',
2610
+ { code: 'UNSUPPORTED_API_UPLOAD_EXECUTION' }
2611
+ );
2612
+ }
2613
+ if (options.apiWorkflowAction && apiMediaRefs.length > 0) {
2614
+ fatalCliError(
2615
+ `Hosted workflow API modes do not accept CLI media reference flags (${formatApiMediaFlags(apiMediaRefs)}). Use --workflow-input JSON for hosted workflow inputs, or use the direct CLI path for local media workflows.`,
2616
+ { code: 'UNSUPPORTED_API_MEDIA_REFERENCE' }
2617
+ );
2618
+ }
2619
+ if (options.apiWorkflowAction === 'start' && options.apiWorkflowKind === 'image_to_video' && options.apiWorkflowTitle) {
2620
+ fatalCliError('--workflow-title is currently only supported with --api-workflow hosted-tool-sequence or storyboard-video.', {
2621
+ code: 'INVALID_ARGUMENT',
2622
+ details: { flag: '--workflow-title', workflow: options.apiWorkflowKind }
2623
+ });
2624
+ }
2625
+
2626
+ if (!options.video && !options.apiChat && !options.apiWorkflowAction && (options.refAudio || options.refVideo || options.referenceAudioIdentity || options.voicePersonaName || options.videoWorkflow || options.frames || options.targetResolution || options.audioStart !== null || options.audioDuration !== null || options.videoStart !== null)) {
1921
2627
  fatalCliError('Video-only options (--workflow/--frames/--target-resolution/--ref-audio/--ref-video/--reference-audio-identity/--voice-persona) require --video.', {
1922
2628
  code: 'INVALID_ARGUMENT'
1923
2629
  });
@@ -2246,8 +2952,8 @@ if (options.contextImages.length > 0 && !options.video) {
2246
2952
  }
2247
2953
  }
2248
2954
 
2249
- // Load last render seed if requested
2250
- if (options.lastSeed) {
2955
+ // Load last render seed if requested for a command that can use it.
2956
+ if (options.lastSeed && commandUsesGenerationSeed) {
2251
2957
  if (existsSync(LAST_RENDER_PATH)) {
2252
2958
  try {
2253
2959
  const lastRender = JSON.parse(readFileSync(LAST_RENDER_PATH, 'utf8'));
@@ -2263,7 +2969,7 @@ if (options.lastSeed) {
2263
2969
  }
2264
2970
  }
2265
2971
 
2266
- if (!options.estimateVideoCost && !options.showVersion && !options.extractLastFrame && !options.concatVideos && !options.listMedia && (options.seed === null || options.seed === undefined)) {
2972
+ if (commandUsesGenerationSeed && (options.seed === null || options.seed === undefined)) {
2267
2973
  const strategy = options.seedStrategy || openclawConfig?.seedStrategy || 'prompt-hash';
2268
2974
  const normalized = normalizeSeedStrategy(strategy) || 'prompt-hash';
2269
2975
  options.seedStrategy = normalized;
@@ -2287,9 +2993,6 @@ function loadCredentials() {
2287
2993
  SOGNI_API_KEY: creds.SOGNI_API_KEY
2288
2994
  };
2289
2995
  }
2290
- if (creds.SOGNI_USERNAME && creds.SOGNI_PASSWORD) {
2291
- return creds;
2292
- }
2293
2996
  }
2294
2997
 
2295
2998
  if (hasEnv('SOGNI_API_KEY')) {
@@ -2297,19 +3000,12 @@ function loadCredentials() {
2297
3000
  SOGNI_API_KEY: getEnv('SOGNI_API_KEY')
2298
3001
  };
2299
3002
  }
2300
-
2301
- if (hasEnv('SOGNI_USERNAME') && hasEnv('SOGNI_PASSWORD')) {
2302
- return {
2303
- SOGNI_USERNAME: getEnv('SOGNI_USERNAME'),
2304
- SOGNI_PASSWORD: getEnv('SOGNI_PASSWORD')
2305
- };
2306
- }
2307
3003
 
2308
- const err = new Error('No Sogni credentials found.');
3004
+ const err = new Error('No Sogni API key found.');
2309
3005
  err.code = 'MISSING_CREDENTIALS';
2310
- err.hint = 'Set SOGNI_API_KEY or SOGNI_USERNAME/SOGNI_PASSWORD, or configure SOGNI_CREDENTIALS_PATH.';
3006
+ err.hint = 'Set SOGNI_API_KEY, or configure SOGNI_CREDENTIALS_PATH with SOGNI_API_KEY. You can find your API key by logging into https://dashboard.sogni.ai and clicking your username.';
2311
3007
  err.details = {
2312
- triedEnv: ['SOGNI_API_KEY', 'SOGNI_USERNAME', 'SOGNI_PASSWORD'],
3008
+ triedEnv: ['SOGNI_API_KEY'],
2313
3009
  triedFile: CREDENTIALS_PATH
2314
3010
  };
2315
3011
  throw err;
@@ -2326,20 +3022,637 @@ function saveLastRender(info) {
2326
3022
  }
2327
3023
  }
2328
3024
 
2329
- // ---------------------------------------------------------------------------
2330
- // Memory system — persistent user preferences on disk
2331
- // ---------------------------------------------------------------------------
2332
- const MEMORIES_PATH = getEnv('SOGNI_MEMORIES_PATH') || DEFAULT_MEMORIES_PATH;
3025
+ function requireApiKeyCredentials(creds, modeLabel) {
3026
+ if (creds?.SOGNI_API_KEY) return creds.SOGNI_API_KEY;
3027
+ const err = new Error(`${modeLabel} requires SOGNI_API_KEY API-key authentication.`);
3028
+ err.code = 'MISSING_API_KEY';
3029
+ err.hint = 'Create an API key and set SOGNI_API_KEY; username/password auth is only supported by the direct client-wrapper path.';
3030
+ throw err;
3031
+ }
2333
3032
 
2334
- function loadMemories() {
2335
- try {
2336
- if (existsSync(MEMORIES_PATH)) return JSON.parse(readFileSync(MEMORIES_PATH, 'utf8'));
2337
- } catch {}
2338
- return [];
3033
+ function apiRequestHeaders(apiKey, extra = {}) {
3034
+ return {
3035
+ 'Content-Type': 'application/json',
3036
+ Authorization: `Bearer ${apiKey}`,
3037
+ 'api-key': apiKey,
3038
+ ...extra
3039
+ };
2339
3040
  }
2340
3041
 
2341
- function saveMemories(memories) {
2342
- const dir = dirname(MEMORIES_PATH);
3042
+ async function fetchApiJson(path, { apiKey, method = 'GET', body = undefined, headers = {} } = {}) {
3043
+ const url = await buildSafeApiUrl(path);
3044
+ const init = {
3045
+ method,
3046
+ headers: apiRequestHeaders(apiKey, headers),
3047
+ ...(body === undefined ? {} : { body: JSON.stringify(body) })
3048
+ };
3049
+
3050
+ const response = await fetch(url, init);
3051
+ const text = await response.text();
3052
+ let payload = {};
3053
+ if (text) {
3054
+ try {
3055
+ payload = JSON.parse(text);
3056
+ } catch {
3057
+ payload = { message: text };
3058
+ }
3059
+ }
3060
+ if (!response.ok) {
3061
+ const err = new Error(payload?.message || payload?.error?.message || response.statusText || 'Sogni API request failed');
3062
+ err.code = 'API_REQUEST_FAILED';
3063
+ err.details = { url, status: response.status, payload };
3064
+ throw err;
3065
+ }
3066
+ return payload;
3067
+ }
3068
+
3069
+ function getApiModeMediaReferences() {
3070
+ const refs = [];
3071
+ for (const value of options.contextImages || []) {
3072
+ if (value) refs.push({ flag: '-c/--context', value, kind: 'image' });
3073
+ }
3074
+ if (options.refImage) refs.push({ flag: '--ref', value: options.refImage, kind: 'image' });
3075
+ if (options.refImageEnd) refs.push({ flag: '--ref-end', value: options.refImageEnd, kind: 'image' });
3076
+ if (options.refAudio) refs.push({ flag: '--ref-audio', value: options.refAudio, kind: 'audio' });
3077
+ if (options.referenceAudioIdentity) refs.push({ flag: '--reference-audio-identity', value: options.referenceAudioIdentity, kind: 'audio' });
3078
+ if (options.refVideo) refs.push({ flag: '--ref-video', value: options.refVideo, kind: 'video' });
3079
+ return refs;
3080
+ }
3081
+
3082
+ function formatApiMediaFlags(refs) {
3083
+ return [...new Set(refs.map(ref => ref.flag))].join(', ');
3084
+ }
3085
+
3086
+ function extractApiEnvelopeData(payload) {
3087
+ return payload?.data && typeof payload.data === 'object' ? payload.data : payload;
3088
+ }
3089
+
3090
+ function extractChatMessage(payload) {
3091
+ const data = extractApiEnvelopeData(payload);
3092
+ return data?.choices?.[0]?.message || data?.choices?.[0]?.delta || payload?.choices?.[0]?.message || {};
3093
+ }
3094
+
3095
+ function extractChatWorkflows(payload) {
3096
+ const data = extractApiEnvelopeData(payload);
3097
+ return data?.creative_workflows || data?.creativeWorkflows || payload?.creative_workflows || payload?.creativeWorkflows || [];
3098
+ }
3099
+
3100
+ function mimeTypeForPath(pathOrUrl, fallback = 'application/octet-stream') {
3101
+ const clean = String(pathOrUrl || '').split('?')[0].toLowerCase();
3102
+ if (clean.endsWith('.jpg') || clean.endsWith('.jpeg')) return 'image/jpeg';
3103
+ if (clean.endsWith('.png')) return 'image/png';
3104
+ if (clean.endsWith('.mp3')) return 'audio/mpeg';
3105
+ if (clean.endsWith('.wav')) return 'audio/wav';
3106
+ if (clean.endsWith('.m4a')) return 'audio/mp4';
3107
+ if (clean.endsWith('.webm')) return 'audio/webm';
3108
+ if (clean.endsWith('.ogg')) return 'audio/ogg';
3109
+ if (clean.endsWith('.flac')) return 'audio/flac';
3110
+ if (clean.endsWith('.mp4')) return 'video/mp4';
3111
+ if (clean.endsWith('.mov')) return 'video/quicktime';
3112
+ return fallback;
3113
+ }
3114
+
3115
+ async function imageDataUriFromPathOrUrl(pathOrUrl) {
3116
+ const mimeType = mimeTypeForPath(pathOrUrl);
3117
+ if (mimeType !== 'image/jpeg' && mimeType !== 'image/png') {
3118
+ const err = new Error(`API chat vision supports PNG or JPEG image references, got ${pathOrUrl}.`);
3119
+ err.code = 'UNSUPPORTED_MEDIA_TYPE';
3120
+ throw err;
3121
+ }
3122
+ const buffer = await fetchMediaBuffer(pathOrUrl);
3123
+ return `data:${mimeType};base64,${buffer.toString('base64')}`;
3124
+ }
3125
+
3126
+ async function buildApiChatMessages() {
3127
+ const system = options.apiSystemPrompt ||
3128
+ 'You are a concise creative production assistant. Use Sogni creative tools when they help produce concrete media.';
3129
+ const imageRefs = [
3130
+ ...options.contextImages,
3131
+ options.refImage,
3132
+ options.refImageEnd
3133
+ ].filter(Boolean);
3134
+
3135
+ const messages = [{ role: 'system', content: system }];
3136
+ if (imageRefs.length === 0) {
3137
+ messages.push({ role: 'user', content: options.prompt });
3138
+ return messages;
3139
+ }
3140
+
3141
+ if (options.apiToolExecution) {
3142
+ const err = new Error(
3143
+ '--api-chat with server-side tool execution does not currently support image references. ' +
3144
+ 'Use the direct CLI path for uploaded-media workflows, or pass --no-api-tool-execution for vision-only chat/planning.'
3145
+ );
3146
+ err.code = 'UNSUPPORTED_API_UPLOAD_EXECUTION';
3147
+ throw err;
3148
+ }
3149
+
3150
+ const content = [{ type: 'text', text: options.prompt || 'Describe the attached media.' }];
3151
+ for (const ref of imageRefs) {
3152
+ content.push({ type: 'image_url', image_url: { url: await imageDataUriFromPathOrUrl(ref) } });
3153
+ }
3154
+ messages.push({ role: 'user', content });
3155
+ return messages;
3156
+ }
3157
+
3158
+ async function runApiChat(log) {
3159
+ const creds = loadCredentials();
3160
+ const apiKey = requireApiKeyCredentials(creds, '--api-chat');
3161
+ const body = {
3162
+ model: options.llmModel || DEFAULT_LLM_MODEL,
3163
+ messages: await buildApiChatMessages(),
3164
+ temperature: 0.4,
3165
+ max_tokens: 1600,
3166
+ token_type: options.tokenType || 'spark',
3167
+ app_source: SOGNI_APP_SOURCE,
3168
+ appSource: SOGNI_APP_SOURCE,
3169
+ sogni_tools: options.apiTools,
3170
+ sogni_tool_execution: options.apiToolExecution
3171
+ };
3172
+ const payload = await fetchApiJson('/v1/chat/completions', {
3173
+ apiKey,
3174
+ method: 'POST',
3175
+ body
3176
+ });
3177
+ const message = extractChatMessage(payload);
3178
+ const workflows = extractChatWorkflows(payload);
3179
+ const toolCalls = message.tool_calls || message.toolCalls || [];
3180
+
3181
+ if (options.json) {
3182
+ console.log(JSON.stringify({
3183
+ success: true,
3184
+ type: 'api-chat',
3185
+ content: message.content || '',
3186
+ toolCalls,
3187
+ creativeWorkflows: workflows,
3188
+ raw: payload
3189
+ }));
3190
+ return;
3191
+ }
3192
+
3193
+ if (message.content) console.log(message.content);
3194
+ if (toolCalls.length > 0) {
3195
+ console.log('\nTool calls:');
3196
+ for (const call of toolCalls) {
3197
+ console.log(` - ${call.function?.name || call.name || call.id || 'tool_call'}`);
3198
+ }
3199
+ }
3200
+ if (workflows.length > 0) {
3201
+ console.log('\nCreative workflows:');
3202
+ for (const workflow of workflows) {
3203
+ console.log(` - ${workflow.workflowId || workflow.id}: ${workflow.status || 'submitted'}`);
3204
+ }
3205
+ }
3206
+ if (!message.content && toolCalls.length === 0 && workflows.length === 0) {
3207
+ log('No API chat content returned.');
3208
+ }
3209
+ }
3210
+
3211
+ function parseWorkflowInput(raw) {
3212
+ if (!raw) return null;
3213
+ const sourcePath = raw.startsWith('@') ? raw.slice(1) : raw;
3214
+ const expanded = expandHomePath(sourcePath);
3215
+ const text = raw.startsWith('@') || existsSync(expanded)
3216
+ ? readFileSync(expanded, 'utf8')
3217
+ : raw;
3218
+ try {
3219
+ return JSON.parse(text);
3220
+ } catch (error) {
3221
+ const err = new Error(`Invalid --workflow-input JSON: ${error?.message || String(error)}`);
3222
+ err.code = 'INVALID_WORKFLOW_INPUT';
3223
+ throw err;
3224
+ }
3225
+ }
3226
+
3227
+ function buildImageToVideoWorkflowInput() {
3228
+ const parsed = parseWorkflowInput(options.apiWorkflowInput);
3229
+ if (parsed) return parsed;
3230
+ const input = {
3231
+ prompt: options.prompt
3232
+ };
3233
+ if (options.apiVideoPrompt) input.videoPrompt = options.apiVideoPrompt;
3234
+ if (options.apiNegativePrompt) input.negativePrompt = options.apiNegativePrompt;
3235
+ if (Number.isFinite(options.width)) input.width = options.width;
3236
+ if (Number.isFinite(options.height)) input.height = options.height;
3237
+ if (Number.isFinite(options.duration)) input.duration = options.duration;
3238
+ if (options.model) input.imageModel = options.model;
3239
+ if (options.videoModel) input.videoModel = options.videoModel;
3240
+ if (Number.isFinite(options.count)) input.numberOfMedia = options.count;
3241
+ if (options.seed !== null && options.seed !== undefined) input.seed = options.seed;
3242
+ if (options.apiGenerateAudio !== null) input.generateAudio = options.apiGenerateAudio;
3243
+ if (options.apiExpandPrompt !== null) input.expandPrompt = options.apiExpandPrompt;
3244
+ return input;
3245
+ }
3246
+
3247
+ function buildHostedToolSequenceWorkflowInput() {
3248
+ const parsed = parseWorkflowInput(options.apiWorkflowInput);
3249
+ if (!parsed) {
3250
+ const err = new Error('--api-workflow hosted-tool-sequence requires --workflow-input JSON.');
3251
+ err.code = 'MISSING_WORKFLOW_INPUT';
3252
+ throw err;
3253
+ }
3254
+ if (options.apiWorkflowTitle && !parsed.title) {
3255
+ parsed.title = options.apiWorkflowTitle;
3256
+ }
3257
+ return parsed;
3258
+ }
3259
+
3260
+ function storyboardWorkflowImageQualityFromCli() {
3261
+ if (!cliSet.quality || !options.quality) return undefined;
3262
+ if (options.quality === 'pro') return 'high';
3263
+ if (options.quality === 'fast') return 'low';
3264
+ return 'medium';
3265
+ }
3266
+
3267
+ function storyboardWorkflowInputFromParsedValue(parsed) {
3268
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null;
3269
+ if (Array.isArray(parsed.steps)) return parsed;
3270
+
3271
+ const storyline = typeof parsed.storyline === 'string'
3272
+ ? parsed.storyline
3273
+ : typeof parsed.script === 'string'
3274
+ ? parsed.script
3275
+ : typeof parsed.storyboardScript === 'string'
3276
+ ? parsed.storyboardScript
3277
+ : null;
3278
+ if (!storyline) return null;
3279
+ const explicitCliVideoModel = options.videoModel
3280
+ || (cliSet.model && isSeedanceModelSelection(options.model) ? options.model : undefined);
3281
+ const explicitCliImageModel = cliSet.model && !isSeedanceModelSelection(options.model) ? options.model : undefined;
3282
+
3283
+ return buildStoryboardVideoHostedToolSequenceInput({
3284
+ storyline,
3285
+ userIntentText: typeof parsed.userIntentText === 'string'
3286
+ ? parsed.userIntentText
3287
+ : typeof parsed.prompt === 'string'
3288
+ ? parsed.prompt
3289
+ : options.prompt || storyline,
3290
+ title: typeof parsed.title === 'string' ? parsed.title : options.apiWorkflowTitle,
3291
+ frameCount: typeof parsed.frameCount === 'number'
3292
+ ? parsed.frameCount
3293
+ : typeof parsed.storyboardFrames === 'number'
3294
+ ? parsed.storyboardFrames
3295
+ : options.storyboardFrames ?? undefined,
3296
+ videoDurationSec: typeof parsed.videoDurationSec === 'number'
3297
+ ? parsed.videoDurationSec
3298
+ : cliSet.duration && Number.isFinite(options.duration)
3299
+ ? options.duration
3300
+ : undefined,
3301
+ videoTargetResolution: Number.isFinite(parsed.videoTargetResolution)
3302
+ ? parsed.videoTargetResolution
3303
+ : cliSet.targetResolution && Number.isFinite(options.targetResolution)
3304
+ ? options.targetResolution
3305
+ : undefined,
3306
+ imageModel: typeof parsed.imageModel === 'string' ? parsed.imageModel : explicitCliImageModel,
3307
+ imageQuality: typeof parsed.imageQuality === 'string'
3308
+ ? parsed.imageQuality
3309
+ : typeof parsed.gptImageQuality === 'string'
3310
+ ? parsed.gptImageQuality
3311
+ : storyboardWorkflowImageQualityFromCli(),
3312
+ imageOutputFormat: typeof parsed.imageOutputFormat === 'string'
3313
+ ? parsed.imageOutputFormat
3314
+ : typeof parsed.outputFormat === 'string'
3315
+ ? parsed.outputFormat
3316
+ : cliSet.outputFormat
3317
+ ? options.outputFormat
3318
+ : undefined,
3319
+ videoModel: typeof parsed.videoModel === 'string' ? parsed.videoModel : explicitCliVideoModel,
3320
+ generateAudio: typeof parsed.generateAudio === 'boolean' ? parsed.generateAudio : options.apiGenerateAudio ?? undefined,
3321
+ });
3322
+ }
3323
+
3324
+ function buildStoryboardStorylineMessages() {
3325
+ const durationLine = cliSet.duration && Number.isFinite(options.duration)
3326
+ ? `Target duration: ${options.duration} seconds.`
3327
+ : 'Target duration: infer a Seedance-safe duration between 4 and 15 seconds from the request.';
3328
+ const frameLine = Number.isFinite(options.storyboardFrames)
3329
+ ? `Storyboard beat count: exactly ${options.storyboardFrames}.`
3330
+ : 'Storyboard beat count: infer a compact 4-8 beat plan unless the user asks otherwise.';
3331
+ const targetResolutionLine = cliSet.targetResolution && Number.isFinite(options.targetResolution)
3332
+ ? `Video target short-side resolution: ${options.targetResolution}p.`
3333
+ : '';
3334
+ const system = [
3335
+ 'You write production-ready video storyboard storylines for a GPT Image 2 storyboard sheet that will be rendered into a Seedance 2.0 video.',
3336
+ 'Return only the storyline/script. Do not call tools, do not ask follow-up questions, and do not include markdown fences.',
3337
+ 'Use this exact plain-text structure so downstream compilers can parse it: Project Title, Total Duration, then one SCENE NN - Title block per beat.',
3338
+ 'Each scene block must put each field on its own line: TIME, PURPOSE, VISUAL, ACTION, CAMERA, LIGHTING/STYLE, TRANSITION, DIALOGUE/VO, AUDIO/SFX, MUSIC, VISIBLE TEXT.',
3339
+ 'When there is no spoken dialogue or voiceover, write DIALOGUE/VO: [no dialogue]. Do not write None, N/A, or leave it blank.',
3340
+ 'If the user requires exact visible text, repeat that exact text only in the relevant VISIBLE TEXT field and preserve spelling exactly.',
3341
+ 'Keep it concise enough for one GPT Image 2 storyboard image and one Seedance video prompt, while preserving cause-and-effect story progression.',
3342
+ ].join(' ');
3343
+ const user = [
3344
+ 'Original user request:',
3345
+ options.prompt,
3346
+ '',
3347
+ durationLine,
3348
+ frameLine,
3349
+ targetResolutionLine,
3350
+ ].filter(Boolean).join('\n');
3351
+ return [
3352
+ { role: 'system', content: system },
3353
+ { role: 'user', content: user },
3354
+ ];
3355
+ }
3356
+
3357
+ async function generateStoryboardWorkflowStoryline(apiKey) {
3358
+ const payload = await fetchApiJson('/v1/chat/completions', {
3359
+ apiKey,
3360
+ method: 'POST',
3361
+ body: {
3362
+ model: options.llmModel || DEFAULT_LLM_MODEL,
3363
+ messages: buildStoryboardStorylineMessages(),
3364
+ temperature: 0.45,
3365
+ max_tokens: 1800,
3366
+ token_type: options.tokenType || 'spark',
3367
+ app_source: SOGNI_APP_SOURCE,
3368
+ appSource: SOGNI_APP_SOURCE,
3369
+ sogni_tools: false,
3370
+ sogni_tool_execution: false
3371
+ }
3372
+ });
3373
+ const message = extractChatMessage(payload);
3374
+ const storyline = typeof message.content === 'string' ? message.content.trim() : '';
3375
+ if (!storyline) {
3376
+ const err = new Error('Storyboard-video planning did not return a storyline.');
3377
+ err.code = 'EMPTY_STORYBOARD_STORYLINE';
3378
+ err.details = { payload };
3379
+ throw err;
3380
+ }
3381
+ return { storyline, raw: payload };
3382
+ }
3383
+
3384
+ async function buildStoryboardVideoWorkflowInput(apiKey) {
3385
+ const parsed = parseWorkflowInput(options.apiWorkflowInput);
3386
+ const parsedPlan = storyboardWorkflowInputFromParsedValue(parsed);
3387
+ if (parsedPlan) {
3388
+ return parsedPlan.input ? { plan: parsedPlan, planningRaw: null } : { plan: { input: parsedPlan }, planningRaw: null };
3389
+ }
3390
+
3391
+ const { storyline, raw } = await generateStoryboardWorkflowStoryline(apiKey);
3392
+ const explicitCliVideoModel = options.videoModel
3393
+ || (cliSet.model && isSeedanceModelSelection(options.model) ? options.model : undefined);
3394
+ const explicitCliImageModel = cliSet.model && !isSeedanceModelSelection(options.model) ? options.model : undefined;
3395
+ const plan = buildStoryboardVideoHostedToolSequenceInput({
3396
+ storyline,
3397
+ userIntentText: options.prompt,
3398
+ title: options.apiWorkflowTitle,
3399
+ frameCount: options.storyboardFrames ?? undefined,
3400
+ videoDurationSec: cliSet.duration && Number.isFinite(options.duration) ? options.duration : undefined,
3401
+ videoTargetResolution: cliSet.targetResolution && Number.isFinite(options.targetResolution) ? options.targetResolution : undefined,
3402
+ imageModel: explicitCliImageModel,
3403
+ imageQuality: storyboardWorkflowImageQualityFromCli(),
3404
+ imageOutputFormat: cliSet.outputFormat ? options.outputFormat : undefined,
3405
+ videoModel: explicitCliVideoModel,
3406
+ generateAudio: options.apiGenerateAudio ?? undefined,
3407
+ });
3408
+ return { plan, planningRaw: raw };
3409
+ }
3410
+
3411
+ function workflowFromPayload(payload) {
3412
+ const data = extractApiEnvelopeData(payload);
3413
+ return data?.workflow || payload?.workflow || payload;
3414
+ }
3415
+
3416
+ function workflowsFromPayload(payload) {
3417
+ const data = extractApiEnvelopeData(payload);
3418
+ return data?.workflows || payload?.workflows || [];
3419
+ }
3420
+
3421
+ function eventsFromPayload(payload) {
3422
+ const data = extractApiEnvelopeData(payload);
3423
+ return data?.events || payload?.events || [];
3424
+ }
3425
+
3426
+ function printWorkflowSummary(workflow) {
3427
+ console.log(`Workflow: ${workflow.workflowId || workflow.id || '(unknown)'}`);
3428
+ if (workflow.kind) console.log(`Kind: ${workflow.kind}`);
3429
+ if (workflow.status) console.log(`Status: ${workflow.status}`);
3430
+ if (workflow.title) console.log(`Title: ${workflow.title}`);
3431
+ const artifacts = Array.isArray(workflow.artifacts) ? workflow.artifacts : [];
3432
+ if (artifacts.length > 0) {
3433
+ console.log('\nArtifacts:');
3434
+ for (const artifact of artifacts) {
3435
+ console.log(` - ${artifact.type || artifact.mediaType || 'artifact'}: ${artifact.url || artifact.id || JSON.stringify(artifact)}`);
3436
+ }
3437
+ }
3438
+ }
3439
+
3440
+ function printWorkflowSseFrames(raw) {
3441
+ const frames = typeof parseCreativeWorkflowSseChunk === 'function'
3442
+ ? parseCreativeWorkflowSseChunk(raw)
3443
+ : parseWorkflowSseChunk(raw);
3444
+ for (const frame of frames) {
3445
+ const data = frame.data && typeof frame.data === 'object' ? frame.data : {};
3446
+ const suffix = data.status ? ` ${data.status}` : data.message ? ` ${data.message}` : '';
3447
+ console.log(`[${frame.id || '-'}] ${frame.event}${suffix}`);
3448
+ }
3449
+ }
3450
+
3451
+ function parseWorkflowSseChunk(raw) {
3452
+ const frames = [];
3453
+ const chunks = String(raw || '').split(/\r?\n\r?\n/).filter(chunk => chunk.trim());
3454
+ for (const chunk of chunks) {
3455
+ const frame = { id: null, event: 'message', data: null };
3456
+ const dataLines = [];
3457
+ for (const line of chunk.split(/\r?\n/)) {
3458
+ if (!line || line.startsWith(':')) continue;
3459
+ const separator = line.indexOf(':');
3460
+ const field = separator >= 0 ? line.slice(0, separator) : line;
3461
+ const value = separator >= 0 ? line.slice(separator + 1).replace(/^ /, '') : '';
3462
+ if (field === 'id') frame.id = value;
3463
+ else if (field === 'event') frame.event = value || 'message';
3464
+ else if (field === 'data') dataLines.push(value);
3465
+ }
3466
+ if (dataLines.length > 0) {
3467
+ const dataText = dataLines.join('\n');
3468
+ try {
3469
+ frame.data = JSON.parse(dataText);
3470
+ } catch {
3471
+ frame.data = { message: dataText };
3472
+ }
3473
+ }
3474
+ frames.push(frame);
3475
+ }
3476
+ return frames;
3477
+ }
3478
+
3479
+ async function streamApiWorkflowEvents(apiKey, workflowId) {
3480
+ const url = await buildSafeApiUrl(`/v1/creative-agent/workflows/${encodeURIComponent(workflowId)}/events/stream`);
3481
+
3482
+ const response = await fetch(url, {
3483
+ method: 'GET',
3484
+ headers: apiRequestHeaders(apiKey, { Accept: 'text/event-stream' })
3485
+ });
3486
+ if (!response.ok) {
3487
+ const err = new Error(`Workflow stream failed (${response.status} ${response.statusText})`);
3488
+ err.code = 'API_STREAM_FAILED';
3489
+ throw err;
3490
+ }
3491
+ if (!response.body) return;
3492
+
3493
+ const reader = response.body.getReader();
3494
+ const decoder = new TextDecoder();
3495
+ let buffer = '';
3496
+ try {
3497
+ for (;;) {
3498
+ const { done, value } = await reader.read();
3499
+ if (done) break;
3500
+ buffer += decoder.decode(value, { stream: true });
3501
+ let boundary = buffer.search(/\r?\n\r?\n/);
3502
+ while (boundary !== -1) {
3503
+ const chunk = buffer.slice(0, boundary);
3504
+ const match = buffer.slice(boundary).match(/^\r?\n\r?\n/);
3505
+ buffer = buffer.slice(boundary + (match?.[0].length || 2));
3506
+ printWorkflowSseFrames(chunk);
3507
+ boundary = buffer.search(/\r?\n\r?\n/);
3508
+ }
3509
+ }
3510
+ buffer += decoder.decode();
3511
+ if (buffer.trim()) {
3512
+ printWorkflowSseFrames(buffer);
3513
+ }
3514
+ } finally {
3515
+ try { reader.releaseLock(); } catch {}
3516
+ }
3517
+ }
3518
+
3519
+ async function runApiWorkflow() {
3520
+ const creds = loadCredentials();
3521
+ const apiKey = requireApiKeyCredentials(creds, '--api-workflow');
3522
+ const tokenType = options.tokenType || 'spark';
3523
+ let payload;
3524
+ let type = 'api-workflow';
3525
+
3526
+ if (options.apiWorkflowAction === 'list') {
3527
+ payload = await fetchApiJson('/v1/creative-agent/workflows?limit=20', { apiKey });
3528
+ const workflows = workflowsFromPayload(payload);
3529
+ if (options.json) {
3530
+ console.log(JSON.stringify({ success: true, type, action: 'list', workflows, raw: payload }));
3531
+ } else {
3532
+ for (const workflow of workflows) {
3533
+ console.log(`${workflow.workflowId || workflow.id}\t${workflow.status || '-'}\t${workflow.title || ''}`);
3534
+ }
3535
+ }
3536
+ return;
3537
+ }
3538
+
3539
+ if (options.apiWorkflowAction === 'get' || options.apiWorkflowAction === 'events' || options.apiWorkflowAction === 'stream' || options.apiWorkflowAction === 'cancel') {
3540
+ const id = options.apiWorkflowId;
3541
+ if (!id) {
3542
+ const err = new Error('Workflow id is required.');
3543
+ err.code = 'MISSING_WORKFLOW_ID';
3544
+ throw err;
3545
+ }
3546
+ if (options.apiWorkflowAction === 'stream') {
3547
+ if (options.json) {
3548
+ console.log(JSON.stringify({ success: true, type, action: 'stream', workflowId: id, note: 'Streaming writes SSE frames as text output.' }));
3549
+ }
3550
+ await streamApiWorkflowEvents(apiKey, id);
3551
+ return;
3552
+ }
3553
+ const path = options.apiWorkflowAction === 'events'
3554
+ ? `/v1/creative-agent/workflows/${encodeURIComponent(id)}/events`
3555
+ : options.apiWorkflowAction === 'cancel'
3556
+ ? `/v1/creative-agent/workflows/${encodeURIComponent(id)}/cancel`
3557
+ : `/v1/creative-agent/workflows/${encodeURIComponent(id)}`;
3558
+ payload = await fetchApiJson(path, {
3559
+ apiKey,
3560
+ method: options.apiWorkflowAction === 'cancel' ? 'POST' : 'GET'
3561
+ });
3562
+ if (options.apiWorkflowAction === 'events') {
3563
+ const events = eventsFromPayload(payload);
3564
+ if (options.json) console.log(JSON.stringify({ success: true, type, action: 'events', workflowId: id, events, raw: payload }));
3565
+ else console.log(JSON.stringify(events, null, 2));
3566
+ return;
3567
+ }
3568
+ const workflow = workflowFromPayload(payload);
3569
+ if (options.json) console.log(JSON.stringify({ success: true, type, action: options.apiWorkflowAction, workflow, raw: payload }));
3570
+ else printWorkflowSummary(workflow);
3571
+ return;
3572
+ }
3573
+
3574
+ const requestedKind = options.apiWorkflowKind || 'image_to_video';
3575
+ let kind = requestedKind;
3576
+ let input;
3577
+ let storyboardPlan = null;
3578
+ let storyboardPlanningRaw = null;
3579
+
3580
+ if (requestedKind === 'storyboard_video') {
3581
+ const built = await buildStoryboardVideoWorkflowInput(apiKey);
3582
+ storyboardPlan = built.plan;
3583
+ storyboardPlanningRaw = built.planningRaw;
3584
+ kind = 'hosted_tool_sequence';
3585
+ input = storyboardPlan.input;
3586
+ } else {
3587
+ input = requestedKind === 'hosted_tool_sequence'
3588
+ ? buildHostedToolSequenceWorkflowInput()
3589
+ : buildImageToVideoWorkflowInput();
3590
+ }
3591
+
3592
+ payload = await fetchApiJson('/v1/creative-agent/workflows', {
3593
+ apiKey,
3594
+ method: 'POST',
3595
+ headers: options.apiWorkflowIdempotencyKey
3596
+ ? { 'Idempotency-Key': options.apiWorkflowIdempotencyKey }
3597
+ : {},
3598
+ body: {
3599
+ kind,
3600
+ input,
3601
+ ...(options.apiWorkflowIdempotencyKey ? { idempotency_key: options.apiWorkflowIdempotencyKey } : {}),
3602
+ token_type: tokenType,
3603
+ app_source: SOGNI_APP_SOURCE,
3604
+ appSource: SOGNI_APP_SOURCE
3605
+ }
3606
+ });
3607
+ const workflow = workflowFromPayload(payload);
3608
+ const workflowId = workflow?.workflowId || workflow?.id;
3609
+ if (options.json) {
3610
+ console.log(JSON.stringify({
3611
+ success: true,
3612
+ type,
3613
+ action: 'start',
3614
+ workflowKind: requestedKind,
3615
+ ...(storyboardPlan ? {
3616
+ storyline: storyboardPlan.storyline,
3617
+ storyboardPlan: {
3618
+ title: storyboardPlan.title,
3619
+ frameCount: storyboardPlan.frameCount,
3620
+ image: storyboardPlan.image,
3621
+ video: storyboardPlan.video,
3622
+ warnings: storyboardPlan.warnings,
3623
+ },
3624
+ } : {}),
3625
+ workflow,
3626
+ raw: payload,
3627
+ ...(storyboardPlanningRaw ? { planningRaw: storyboardPlanningRaw } : {}),
3628
+ }));
3629
+ } else {
3630
+ if (storyboardPlan?.storyline) {
3631
+ console.log('Generated storyline:\n');
3632
+ console.log(storyboardPlan.storyline);
3633
+ console.log('');
3634
+ }
3635
+ printWorkflowSummary(workflow);
3636
+ }
3637
+ if (options.apiWorkflowWatch && workflowId) {
3638
+ await streamApiWorkflowEvents(apiKey, workflowId);
3639
+ }
3640
+ }
3641
+
3642
+ // ---------------------------------------------------------------------------
3643
+ // Memory system — persistent user preferences on disk
3644
+ // ---------------------------------------------------------------------------
3645
+ const MEMORIES_PATH = getEnv('SOGNI_MEMORIES_PATH') || DEFAULT_MEMORIES_PATH;
3646
+
3647
+ function loadMemories() {
3648
+ try {
3649
+ if (existsSync(MEMORIES_PATH)) return JSON.parse(readFileSync(MEMORIES_PATH, 'utf8'));
3650
+ } catch {}
3651
+ return [];
3652
+ }
3653
+
3654
+ function saveMemories(memories) {
3655
+ const dir = dirname(MEMORIES_PATH);
2343
3656
  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
2344
3657
  writeFileSync(MEMORIES_PATH, JSON.stringify(memories, null, 2));
2345
3658
  }
@@ -2569,6 +3882,176 @@ async function fetchMediaBuffer(pathOrUrl) {
2569
3882
  }
2570
3883
  }
2571
3884
 
3885
+ async function fetchMediaBlob(pathOrUrl, fallbackMimeType = 'application/octet-stream') {
3886
+ if (pathOrUrl.startsWith('http://') || pathOrUrl.startsWith('https://')) {
3887
+ await assertSafeUrl(pathOrUrl);
3888
+ const response = await fetch(pathOrUrl);
3889
+ if (!response.ok) {
3890
+ const err = new Error(`Failed to fetch media (${response.status} ${response.statusText})`);
3891
+ err.code = 'FETCH_FAILED';
3892
+ err.details = { url: pathOrUrl, status: response.status, statusText: response.statusText };
3893
+ throw err;
3894
+ }
3895
+ const contentType = response.headers.get('content-type')?.split(';')[0]?.trim();
3896
+ const mimeType = contentType || mimeTypeForPath(pathOrUrl, fallbackMimeType);
3897
+ return new Blob([await response.arrayBuffer()], { type: mimeType });
3898
+ }
3899
+
3900
+ const buffer = await fetchMediaBuffer(pathOrUrl);
3901
+ return new Blob([buffer], { type: mimeTypeForPath(pathOrUrl, fallbackMimeType) });
3902
+ }
3903
+
3904
+ async function prepareReferenceAudioIdentityMedia(pathOrUrl) {
3905
+ const cleanExt = extname(String(pathOrUrl || '').split('?')[0]).toLowerCase();
3906
+ if (!pathOrUrl.startsWith('http://') && !pathOrUrl.startsWith('https://') && (cleanExt === '.wav' || cleanExt === '.wave')) {
3907
+ const sourcePath = sanitizePath(pathOrUrl, '--reference-audio-identity');
3908
+ const ffmpegPath = await ensureFfmpegAvailable();
3909
+ const tempDir = mkdtempSync(join(tmpdir(), 'sogni-audio-id-'));
3910
+ const outputPath = join(tempDir, 'voice-identity.m4a');
3911
+ try {
3912
+ const result = await runCommand(ffmpegPath, [
3913
+ '-hide_banner',
3914
+ '-loglevel', 'error',
3915
+ '-y',
3916
+ '-i', sourcePath,
3917
+ '-vn',
3918
+ '-ac', '1',
3919
+ '-c:a', 'aac',
3920
+ '-b:a', '96k',
3921
+ outputPath
3922
+ ], { captureOutput: true });
3923
+
3924
+ if (result.error || result.status !== 0 || !isNonEmptyFile(outputPath)) {
3925
+ const err = new Error('Failed to normalize WAV voice identity audio to M4A.');
3926
+ err.code = 'FFMPEG_AUDIO_ID_FAILED';
3927
+ err.hint = 'Provide an .m4a/.mp3/.webm voice clip, or install ffmpeg so WAV clips can be converted.';
3928
+ err.details = { sourcePath, stderr: result.stderr || '', stdout: result.stdout || '', status: result.status };
3929
+ throw err;
3930
+ }
3931
+
3932
+ const buffer = readFileSync(outputPath);
3933
+ return new Blob([buffer], { type: 'audio/mp4' });
3934
+ } finally {
3935
+ try { if (existsSync(outputPath)) unlinkSync(outputPath); } catch {}
3936
+ try { rmdirSync(tempDir); } catch {}
3937
+ }
3938
+ }
3939
+
3940
+ return fetchMediaBlob(pathOrUrl, 'audio/mp4');
3941
+ }
3942
+
3943
+ function mediaTempInputPath(tempDir, sourceLabel, fallbackExt) {
3944
+ const cleanExt = extname(String(sourceLabel || '').split('?')[0]).toLowerCase();
3945
+ const ext = /^[.][a-z0-9]{1,8}$/i.test(cleanExt) ? cleanExt : fallbackExt;
3946
+ return join(tempDir, `input${ext}`);
3947
+ }
3948
+
3949
+ async function transcodeMp3ReferenceAudioBuffer(buffer, sourceLabel) {
3950
+ const ffmpegPath = await ensureFfmpegAvailable();
3951
+ const tempDir = mkdtempSync(join(tmpdir(), 'sogni-ref-audio-'));
3952
+ const inputPath = mediaTempInputPath(tempDir, sourceLabel, '.mp3');
3953
+ const outputPath = join(tempDir, 'reference-audio.m4a');
3954
+ try {
3955
+ writeFileSync(inputPath, buffer);
3956
+ const result = await runCommand(ffmpegPath, [
3957
+ '-hide_banner',
3958
+ '-loglevel', 'error',
3959
+ '-y',
3960
+ '-i', inputPath,
3961
+ '-vn',
3962
+ '-c:a', 'aac',
3963
+ '-b:a', '192k',
3964
+ '-movflags', '+faststart',
3965
+ outputPath
3966
+ ], { captureOutput: true });
3967
+
3968
+ if (result.error || result.status !== 0 || !isNonEmptyFile(outputPath)) {
3969
+ const err = new Error('Failed to prepare MP3 reference audio for video generation.');
3970
+ err.code = 'FFMPEG_AUDIO_PREP_FAILED';
3971
+ err.hint = 'Install ffmpeg with AAC support, or provide M4A/WAV reference audio.';
3972
+ err.details = { sourceLabel, stderr: result.stderr || '', stdout: result.stdout || '', status: result.status };
3973
+ throw err;
3974
+ }
3975
+
3976
+ return readFileSync(outputPath);
3977
+ } finally {
3978
+ try { if (existsSync(inputPath)) unlinkSync(inputPath); } catch {}
3979
+ try { if (existsSync(outputPath)) unlinkSync(outputPath); } catch {}
3980
+ try { rmdirSync(tempDir); } catch {}
3981
+ }
3982
+ }
3983
+
3984
+ async function prepareReferenceAudioForVideoBuffer(buffer, sourceLabel) {
3985
+ const mimeType = mimeTypeForPath(sourceLabel, 'application/octet-stream');
3986
+ const sourceFormat = detectReferenceAudioFormat(buffer, mimeType);
3987
+ if (sourceFormat !== 'mp3') return buffer;
3988
+
3989
+ const prepared = await transcodeMp3ReferenceAudioBuffer(buffer, sourceLabel);
3990
+ if (!options.quiet) {
3991
+ console.error('Prepared MP3 reference audio as M4A for video provider compatibility.');
3992
+ }
3993
+ return prepared;
3994
+ }
3995
+
3996
+ async function trimSeedanceV2VSourceVideoBuffer(buffer, sourceLabel, startOffset, requestedDuration) {
3997
+ const ffmpegPath = await ensureFfmpegAvailable();
3998
+ const tempDir = mkdtempSync(join(tmpdir(), 'sogni-seedance-v2v-'));
3999
+ const inputPath = mediaTempInputPath(tempDir, sourceLabel, '.mp4');
4000
+ const outputPath = join(tempDir, 'seedance-source.mp4');
4001
+ const start = Math.max(0, Number(startOffset) || 0);
4002
+ const duration = Math.max(
4003
+ 0.1,
4004
+ Math.min(SEEDANCE_V2V_REFERENCE_MAX_DURATION_SECONDS, Number(requestedDuration) || SEEDANCE_V2V_REFERENCE_MAX_DURATION_SECONDS),
4005
+ );
4006
+ try {
4007
+ writeFileSync(inputPath, buffer);
4008
+ const result = await runCommand(ffmpegPath, [
4009
+ '-hide_banner',
4010
+ '-loglevel', 'error',
4011
+ '-y',
4012
+ '-ss', String(start),
4013
+ '-i', inputPath,
4014
+ '-t', String(duration),
4015
+ '-map', '0:v:0',
4016
+ '-an',
4017
+ '-c:v', 'libx264',
4018
+ '-preset', 'veryfast',
4019
+ '-crf', '18',
4020
+ '-pix_fmt', 'yuv420p',
4021
+ '-movflags', '+faststart',
4022
+ outputPath
4023
+ ], { captureOutput: true });
4024
+
4025
+ if (result.error || result.status !== 0 || !isNonEmptyFile(outputPath)) {
4026
+ const err = new Error('Failed to prepare Seedance video-to-video reference clip.');
4027
+ err.code = 'FFMPEG_SEEDANCE_V2V_PREP_FAILED';
4028
+ err.hint = 'Install ffmpeg with libx264 support, or provide a reference clip that starts at the desired frame.';
4029
+ err.details = { sourceLabel, start, duration, stderr: result.stderr || '', stdout: result.stdout || '', status: result.status };
4030
+ throw err;
4031
+ }
4032
+
4033
+ return readFileSync(outputPath);
4034
+ } finally {
4035
+ try { if (existsSync(inputPath)) unlinkSync(inputPath); } catch {}
4036
+ try { if (existsSync(outputPath)) unlinkSync(outputPath); } catch {}
4037
+ try { rmdirSync(tempDir); } catch {}
4038
+ }
4039
+ }
4040
+
4041
+ async function appendSafeSeedanceReferenceUrl(target, pathOrUrl, label) {
4042
+ if (!isHttpsUrl(pathOrUrl)) return false;
4043
+ try {
4044
+ await assertSafeUrl(pathOrUrl, { allowedProtocols: ['https:'] });
4045
+ } catch (error) {
4046
+ const err = new Error(`${label} URL is not safe to forward: ${error?.message || String(error)}`);
4047
+ err.code = 'INVALID_URL';
4048
+ err.details = { url: pathOrUrl, label };
4049
+ throw err;
4050
+ }
4051
+ target.push(pathOrUrl);
4052
+ return true;
4053
+ }
4054
+
2572
4055
  function resolveMultiAngleOutputConfig(outputPath, outputFormat) {
2573
4056
  if (!outputPath) return null;
2574
4057
  const ext = extname(outputPath);
@@ -2815,7 +4298,7 @@ async function runImageEditProjectWithEvents(client, editConfig, expectedCount,
2815
4298
  if (!projectId) projectId = data.projectId;
2816
4299
  const jobData = data.job?.data || {};
2817
4300
  results.push({
2818
- imageUrl: data.imageUrl,
4301
+ resultUrl: data.resultUrl || data.imageUrl,
2819
4302
  seed: jobData.seed,
2820
4303
  jobIndex: data.jobIndex,
2821
4304
  projectId: data.projectId
@@ -2966,7 +4449,7 @@ async function runMultiAngleFlow(client, log) {
2966
4449
  options.timeout,
2967
4450
  azimuth
2968
4451
  );
2969
- const urls = results.map((r) => r.imageUrl).filter(Boolean);
4452
+ const urls = results.map((r) => r.resultUrl).filter(Boolean);
2970
4453
  const seeds = results.map((r) => r.seed ?? options.seed);
2971
4454
 
2972
4455
  if (outputConfig) {
@@ -3284,12 +4767,21 @@ async function ensureSufficientVideoBalance(client, log) {
3284
4767
 
3285
4768
  // ---------------------------------------------------------------------------
3286
4769
  // Token auto-fallback: resolve 'auto' to 'spark', retry with 'sogni' on
3287
- // insufficient balance errors.
4770
+ // insufficient balance errors for native Sogni models. External API-backed
4771
+ // models are Spark-only and must not silently fall back to SOGNI tokens.
3288
4772
  // ---------------------------------------------------------------------------
4773
+ const _requiresSparkOnlyToken = requiresSparkOnlyToken(options.model);
4774
+ if (_requiresSparkOnlyToken && options.tokenType === 'sogni') {
4775
+ if (!options.quiet) {
4776
+ console.error(`${options.model} requires SPARK tokens; using --token-type spark.`);
4777
+ }
4778
+ options.tokenType = 'spark';
4779
+ }
3289
4780
  const _isAutoToken = options.tokenType === 'auto';
3290
4781
  if (_isAutoToken) {
3291
4782
  options.tokenType = 'spark';
3292
4783
  }
4784
+ const _allowAutoTokenFallback = _isAutoToken && !_requiresSparkOnlyToken;
3293
4785
 
3294
4786
  async function main() {
3295
4787
  let exitCode = 0;
@@ -3448,6 +4940,16 @@ async function main() {
3448
4940
  }
3449
4941
  }
3450
4942
 
4943
+ if (options.apiChat) {
4944
+ await runApiChat(log);
4945
+ return;
4946
+ }
4947
+
4948
+ if (options.apiWorkflowAction) {
4949
+ await runApiWorkflow(log);
4950
+ return;
4951
+ }
4952
+
3451
4953
  if (options.extractLastFrame) {
3452
4954
  const videoPath = sanitizePath(options.extractLastFrame, '--extract-last-frame video');
3453
4955
  const outputPath = sanitizePath(options.extractLastFrameOutput, '--extract-last-frame output');
@@ -3574,18 +5076,15 @@ async function main() {
3574
5076
  const creds = loadCredentials();
3575
5077
  log('Connecting to Sogni...');
3576
5078
  client = new SogniClientWrapper({
5079
+ appSource: SOGNI_APP_SOURCE,
3577
5080
  network: openclawConfig?.defaultNetwork || 'fast',
3578
5081
  autoConnect: false,
3579
- ...(creds.SOGNI_API_KEY
3580
- ? { apiKey: creds.SOGNI_API_KEY, authType: 'apiKey' }
3581
- : {
3582
- username: creds.SOGNI_USERNAME,
3583
- password: creds.SOGNI_PASSWORD,
3584
- authType: 'token'
3585
- })
5082
+ apiKey: creds.SOGNI_API_KEY,
5083
+ authType: 'apiKey'
3586
5084
  });
3587
5085
 
3588
5086
  await client.connect();
5087
+ await disableLiveModelAvailabilityEvents(client);
3589
5088
  log('Connected.');
3590
5089
 
3591
5090
  if (options.showBalance) {
@@ -3667,14 +5166,13 @@ async function main() {
3667
5166
  client.on(ClientEvent.JOB_COMPLETED, (data) => {
3668
5167
  const jobData = data.job?.data || {};
3669
5168
  results.push({
3670
- imageUrl: data.imageUrl,
3671
- videoUrl: data.videoUrl,
5169
+ resultUrl: data.resultUrl || (options.music ? data.audioUrl : options.video ? data.videoUrl : data.imageUrl),
3672
5170
  seed: jobData.seed,
3673
5171
  jobIndex: data.jobIndex,
3674
5172
  projectId: data.projectId
3675
5173
  });
3676
5174
  completedJobs++;
3677
- log(`${options.video ? 'Video' : 'Image'} ${completedJobs}/${options.count} completed`);
5175
+ log(`${options.music ? 'Music' : options.video ? 'Video' : 'Image'} ${completedJobs}/${options.count} completed`);
3678
5176
 
3679
5177
  if (completedJobs >= options.count) {
3680
5178
  clearTimeout(timeout);
@@ -3707,8 +5205,8 @@ async function main() {
3707
5205
  reject(new Error(message));
3708
5206
  });
3709
5207
 
3710
- // Progress for video
3711
- if (options.video) {
5208
+ // Progress for longer-running media jobs.
5209
+ if (options.video || options.music) {
3712
5210
  client.on(ClientEvent.PROJECT_PROGRESS, (data) => {
3713
5211
  if (data.percentage && data.percentage > 0) {
3714
5212
  log(`Progress: ${Math.round(data.percentage)}%`);
@@ -3730,20 +5228,50 @@ async function main() {
3730
5228
  const seedanceReferenceImageUrls = [];
3731
5229
  const seedanceReferenceVideoUrls = [];
3732
5230
  const seedanceReferenceAudioUrls = [];
3733
- const useRefImageUrl = isSeedanceVideo && isHttpsUrl(options.refImage);
3734
- const useRefImageEndUrl = isSeedanceVideo && isHttpsUrl(options.refImageEnd);
3735
- const useRefAudioUrl = isSeedanceVideo && isHttpsUrl(options.refAudio);
3736
- const useRefVideoUrl = isSeedanceVideo && isHttpsUrl(options.refVideo);
3737
- if (useRefImageUrl) seedanceReferenceImageUrls.push(options.refImage);
3738
- if (useRefImageEndUrl) seedanceReferenceImageUrls.push(options.refImageEnd);
3739
- if (useRefAudioUrl) seedanceReferenceAudioUrls.push(options.refAudio);
3740
- if (useRefVideoUrl) seedanceReferenceVideoUrls.push(options.refVideo);
5231
+ const useRefImageUrl = isSeedanceVideo && await appendSafeSeedanceReferenceUrl(seedanceReferenceImageUrls, options.refImage, 'Reference image');
5232
+ const useRefImageEndUrl = isSeedanceVideo && await appendSafeSeedanceReferenceUrl(seedanceReferenceImageUrls, options.refImageEnd, 'End reference image');
5233
+ const refAudioFormatByPath = options.refAudio
5234
+ ? detectReferenceAudioFormat(new Uint8Array(), mimeTypeForPath(options.refAudio, 'application/octet-stream'))
5235
+ : 'unknown';
5236
+ const useRefAudioUrl = isSeedanceVideo
5237
+ && refAudioFormatByPath !== 'mp3'
5238
+ && await appendSafeSeedanceReferenceUrl(seedanceReferenceAudioUrls, options.refAudio, 'Reference audio');
5239
+ const useRefVideoUrl = isSeedanceVideo
5240
+ && options.videoStart === null
5241
+ && await appendSafeSeedanceReferenceUrl(seedanceReferenceVideoUrls, options.refVideo, 'Reference video');
3741
5242
 
3742
5243
  let imageBuffer = options.refImage && !useRefImageUrl ? await fetchMediaBuffer(options.refImage) : undefined;
3743
5244
  let endImageBuffer = options.refImageEnd && !useRefImageEndUrl ? await fetchMediaBuffer(options.refImageEnd) : undefined;
3744
- const audioBuffer = options.refAudio && !useRefAudioUrl ? await fetchMediaBuffer(options.refAudio) : undefined;
3745
- const videoBuffer = options.refVideo && !useRefVideoUrl ? await fetchMediaBuffer(options.refVideo) : undefined;
3746
- const audioIdentityBuffer = options.referenceAudioIdentity ? await fetchMediaBuffer(options.referenceAudioIdentity) : undefined;
5245
+ let audioBuffer = options.refAudio && !useRefAudioUrl ? await fetchMediaBuffer(options.refAudio) : undefined;
5246
+ let videoBuffer = options.refVideo && !useRefVideoUrl ? await fetchMediaBuffer(options.refVideo) : undefined;
5247
+ let projectVideoStart = options.videoStart;
5248
+ if (audioBuffer) {
5249
+ audioBuffer = await prepareReferenceAudioForVideoBuffer(audioBuffer, options.refAudio);
5250
+ }
5251
+ if (
5252
+ videoBuffer
5253
+ && isSeedanceVideo
5254
+ && options.videoWorkflow === 'v2v'
5255
+ && shouldTrimSeedanceV2VSourceVideo({
5256
+ sourceDurationSeconds: null,
5257
+ requestedDurationSeconds: options.duration,
5258
+ startOffsetSeconds: options.videoStart ?? 0
5259
+ })
5260
+ ) {
5261
+ videoBuffer = await trimSeedanceV2VSourceVideoBuffer(
5262
+ videoBuffer,
5263
+ options.refVideo,
5264
+ options.videoStart ?? 0,
5265
+ options.duration,
5266
+ );
5267
+ projectVideoStart = null;
5268
+ if (!options.quiet) {
5269
+ console.error('Prepared Seedance V2V reference video clip before upload.');
5270
+ }
5271
+ }
5272
+ const audioIdentityMedia = options.referenceAudioIdentity
5273
+ ? await prepareReferenceAudioIdentityMedia(options.referenceAudioIdentity)
5274
+ : undefined;
3747
5275
  const modelDefaults = getModelDefaults(options.model, openclawConfig);
3748
5276
  const videoDimensionRules = videoDimensionRulesFromDefaults(modelDefaults, options.model);
3749
5277
 
@@ -3823,8 +5351,8 @@ async function main() {
3823
5351
  if (options.audioDuration !== null) {
3824
5352
  projectConfig.audioDuration = options.audioDuration;
3825
5353
  }
3826
- if (audioIdentityBuffer) {
3827
- projectConfig.referenceAudioIdentity = audioIdentityBuffer;
5354
+ if (audioIdentityMedia) {
5355
+ projectConfig.referenceAudioIdentity = audioIdentityMedia;
3828
5356
  }
3829
5357
  if (videoBuffer) {
3830
5358
  projectConfig.referenceVideo = videoBuffer;
@@ -3838,8 +5366,8 @@ async function main() {
3838
5366
  if (seedanceReferenceAudioUrls.length > 0) {
3839
5367
  projectConfig.referenceAudioUrls = seedanceReferenceAudioUrls;
3840
5368
  }
3841
- if (options.videoStart !== null) {
3842
- projectConfig.videoStart = options.videoStart;
5369
+ if (projectVideoStart !== null) {
5370
+ projectConfig.videoStart = projectVideoStart;
3843
5371
  }
3844
5372
  if (options.seed !== null && options.seed !== undefined) {
3845
5373
  projectConfig.seed = options.seed;
@@ -3890,6 +5418,67 @@ async function main() {
3890
5418
  if (videoResult?.error || videoResult?.message) {
3891
5419
  throw new Error(videoResult.error || videoResult.message);
3892
5420
  }
5421
+ } else if (options.music) {
5422
+ log(`Generating music with ${options.model}...`);
5423
+ if (options.seed !== null && options.seed !== undefined) log(`Using seed: ${options.seed}`);
5424
+
5425
+ const projectConfig = {
5426
+ modelId: options.model,
5427
+ positivePrompt: options.prompt,
5428
+ numberOfMedia: options.count,
5429
+ duration: options.duration,
5430
+ steps: options.steps,
5431
+ tokenType: options.tokenType || 'spark',
5432
+ waitForCompletion: false,
5433
+ disableNSFWFilter: options.noFilter === true,
5434
+ outputFormat: options.outputFormat || 'mp3'
5435
+ };
5436
+
5437
+ if (options.guidance !== null && options.guidance !== undefined) {
5438
+ projectConfig.guidance = options.guidance;
5439
+ }
5440
+ if (options.sampler) {
5441
+ projectConfig.sampler = options.sampler;
5442
+ }
5443
+ if (options.scheduler) {
5444
+ projectConfig.scheduler = options.scheduler;
5445
+ }
5446
+ if (options.musicShift !== null && options.musicShift !== undefined) {
5447
+ projectConfig.shift = options.musicShift;
5448
+ }
5449
+ if (options.musicBpm !== null && options.musicBpm !== undefined) {
5450
+ projectConfig.bpm = options.musicBpm;
5451
+ }
5452
+ if (options.musicTimesig) {
5453
+ projectConfig.timesignature = options.musicTimesig;
5454
+ }
5455
+ if (options.musicLanguage) {
5456
+ projectConfig.language = options.musicLanguage;
5457
+ }
5458
+ if (options.musicLyrics) {
5459
+ projectConfig.lyrics = options.musicLyrics;
5460
+ }
5461
+ if (options.musicKeyscale) {
5462
+ projectConfig.keyscale = options.musicKeyscale;
5463
+ }
5464
+ if (options.musicComposerMode !== null && options.musicComposerMode !== undefined) {
5465
+ projectConfig.composerMode = options.musicComposerMode;
5466
+ }
5467
+ if (options.musicPromptStrength !== null && options.musicPromptStrength !== undefined) {
5468
+ projectConfig.promptStrength = options.musicPromptStrength;
5469
+ }
5470
+ if (options.musicCreativity !== null && options.musicCreativity !== undefined) {
5471
+ projectConfig.creativity = options.musicCreativity;
5472
+ }
5473
+ if (options.seed !== null && options.seed !== undefined) {
5474
+ projectConfig.seed = options.seed;
5475
+ }
5476
+
5477
+ const audioResult = await client.createAudioProject(projectConfig);
5478
+
5479
+ if (audioResult?.error || audioResult?.message) {
5480
+ throw new Error(audioResult.error || audioResult.message);
5481
+ }
3893
5482
  } else if (options.contextImages.length > 0) {
3894
5483
  // Image editing with context images
3895
5484
  log(`Editing with ${options.model}...`);
@@ -3903,6 +5492,13 @@ async function main() {
3903
5492
  const modelDefaults = getModelDefaults(options.model, openclawConfig);
3904
5493
  const steps = options.steps ?? modelDefaults?.steps ?? (options.model.includes('lightning') ? 4 : 20);
3905
5494
  const guidance = options.guidance ?? modelDefaults?.guidance ?? (options.model.includes('lightning') ? 3.5 : 7.5);
5495
+ const gptImageQuality = isGptImage2ModelSelection(options.model)
5496
+ ? options.quality === 'pro'
5497
+ ? 'high'
5498
+ : options.quality === 'fast'
5499
+ ? 'low'
5500
+ : 'medium'
5501
+ : null;
3906
5502
 
3907
5503
  const editConfig = {
3908
5504
  modelId: options.model,
@@ -3920,6 +5516,9 @@ async function main() {
3920
5516
  if (options.outputFormat) {
3921
5517
  editConfig.outputFormat = options.outputFormat;
3922
5518
  }
5519
+ if (gptImageQuality) {
5520
+ editConfig.gptImageQuality = gptImageQuality;
5521
+ }
3923
5522
  if (options.sampler) {
3924
5523
  editConfig.sampler = options.sampler;
3925
5524
  }
@@ -3937,7 +5536,11 @@ async function main() {
3937
5536
  editConfig.seed = options.seed;
3938
5537
  }
3939
5538
 
3940
- await client.createImageEditProject(editConfig);
5539
+ if (isGptImage2ModelSelection(options.model)) {
5540
+ await client.createImageProject(editConfig);
5541
+ } else {
5542
+ await client.createImageEditProject(editConfig);
5543
+ }
3941
5544
  } else if (options.photobooth) {
3942
5545
  // Photobooth: face transfer with InstantID ControlNet
3943
5546
  log(`Photobooth with ${options.model}...`);
@@ -3992,6 +5595,13 @@ async function main() {
3992
5595
  const modelDefaults = getModelDefaults(options.model, openclawConfig);
3993
5596
  const guidance = options.guidance ?? modelDefaults?.guidance ?? 1.0;
3994
5597
  const steps = options.steps ?? modelDefaults?.steps;
5598
+ const gptImageQuality = isGptImage2ModelSelection(options.model)
5599
+ ? options.quality === 'pro'
5600
+ ? 'high'
5601
+ : options.quality === 'fast'
5602
+ ? 'low'
5603
+ : 'medium'
5604
+ : null;
3995
5605
 
3996
5606
  const useVariations = options.count > 1 && hasPromptVariations(options.prompt);
3997
5607
  const variationCount = useVariations ? options.count : 1;
@@ -4024,6 +5634,9 @@ async function main() {
4024
5634
  if (options.outputFormat) {
4025
5635
  projectConfig.outputFormat = options.outputFormat;
4026
5636
  }
5637
+ if (gptImageQuality) {
5638
+ projectConfig.gptImageQuality = gptImageQuality;
5639
+ }
4027
5640
  if (options.sampler) {
4028
5641
  projectConfig.sampler = options.sampler;
4029
5642
  }
@@ -4046,18 +5659,18 @@ async function main() {
4046
5659
  await completionPromise;
4047
5660
 
4048
5661
  if (results.length > 0) {
4049
- const urls = results.map(r => options.video ? r.videoUrl : r.imageUrl).filter(Boolean);
5662
+ const urls = results.map(r => r.resultUrl).filter(Boolean);
4050
5663
  const firstResult = results[0];
4051
5664
 
4052
5665
  // Save last render info
4053
5666
  const seeds = results.map(r => r.seed ?? options.seed);
4054
5667
  const renderInfo = {
4055
5668
  timestamp: new Date().toISOString(),
4056
- type: options.video ? 'video' : 'image',
5669
+ type: options.music ? 'music' : options.video ? 'video' : 'image',
4057
5670
  prompt: options.prompt,
4058
5671
  model: options.model,
4059
- width: options.width,
4060
- height: options.height,
5672
+ width: options.music ? null : options.width,
5673
+ height: options.music ? null : options.height,
4061
5674
  seed: firstResult.seed ?? options.seed,
4062
5675
  seedStrategy: options.seedStrategy || null,
4063
5676
  seeds,
@@ -4082,6 +5695,23 @@ async function main() {
4082
5695
  if (options.loraStrengths.length > 0) {
4083
5696
  renderInfo.loraStrengths = options.loraStrengths;
4084
5697
  }
5698
+ if (options.music) {
5699
+ renderInfo.duration = options.duration;
5700
+ renderInfo.bpm = options.musicBpm ?? null;
5701
+ renderInfo.keyscale = options.musicKeyscale || null;
5702
+ renderInfo.timesignature = options.musicTimesig || null;
5703
+ renderInfo.language = options.musicLanguage || null;
5704
+ renderInfo.composerMode = options.musicComposerMode;
5705
+ if (options.musicPromptStrength !== null && options.musicPromptStrength !== undefined) {
5706
+ renderInfo.promptStrength = options.musicPromptStrength;
5707
+ }
5708
+ if (options.musicCreativity !== null && options.musicCreativity !== undefined) {
5709
+ renderInfo.creativity = options.musicCreativity;
5710
+ }
5711
+ if (options.musicShift !== null && options.musicShift !== undefined) {
5712
+ renderInfo.shift = options.musicShift;
5713
+ }
5714
+ }
4085
5715
  if (options.video) {
4086
5716
  renderInfo.workflow = options.videoWorkflow;
4087
5717
  renderInfo.fps = options.fps;
@@ -4182,17 +5812,14 @@ async function main() {
4182
5812
  // Create a new client for second clip to avoid event conflicts
4183
5813
  const creds = loadCredentials();
4184
5814
  const client2 = new SogniClientWrapper({
5815
+ appSource: SOGNI_APP_SOURCE,
4185
5816
  network: openclawConfig?.defaultNetwork || 'fast',
4186
5817
  autoConnect: false,
4187
- ...(creds.SOGNI_API_KEY
4188
- ? { apiKey: creds.SOGNI_API_KEY, authType: 'apiKey' }
4189
- : {
4190
- username: creds.SOGNI_USERNAME,
4191
- password: creds.SOGNI_PASSWORD,
4192
- authType: 'token'
4193
- })
5818
+ apiKey: creds.SOGNI_API_KEY,
5819
+ authType: 'apiKey'
4194
5820
  });
4195
5821
  await client2.connect();
5822
+ await disableLiveModelAvailabilityEvents(client2);
4196
5823
 
4197
5824
  // Create second clip and wait for completion via events
4198
5825
  const clip2Promise = new Promise((resolve, reject) => {
@@ -4203,7 +5830,7 @@ async function main() {
4203
5830
  client2.on(ClientEvent.JOB_COMPLETED, async (data) => {
4204
5831
  try {
4205
5832
  clearTimeout(timeout);
4206
- const clip2Url = data.videoUrl;
5833
+ const clip2Url = data.resultUrl || data.videoUrl;
4207
5834
  if (!clip2Url) {
4208
5835
  reject(new Error('No video URL returned for second clip.'));
4209
5836
  return;
@@ -4262,11 +5889,11 @@ async function main() {
4262
5889
  if (options.json) {
4263
5890
  const output = {
4264
5891
  success: true,
4265
- type: options.video ? 'video' : 'image',
5892
+ type: options.music ? 'music' : options.video ? 'video' : 'image',
4266
5893
  prompt: options.prompt,
4267
5894
  model: options.model,
4268
- width: options.width,
4269
- height: options.height,
5895
+ width: options.music ? null : options.width,
5896
+ height: options.music ? null : options.height,
4270
5897
  seed: firstResult.seed ?? options.seed,
4271
5898
  seedStrategy: options.seedStrategy || null,
4272
5899
  seeds,
@@ -4289,6 +5916,23 @@ async function main() {
4289
5916
  if (options.loraStrengths.length > 0) {
4290
5917
  output.loraStrengths = options.loraStrengths;
4291
5918
  }
5919
+ if (options.music) {
5920
+ output.duration = options.duration;
5921
+ output.bpm = options.musicBpm ?? null;
5922
+ output.keyscale = options.musicKeyscale || null;
5923
+ output.timesignature = options.musicTimesig || null;
5924
+ output.language = options.musicLanguage || null;
5925
+ output.composerMode = options.musicComposerMode;
5926
+ if (options.musicPromptStrength !== null && options.musicPromptStrength !== undefined) {
5927
+ output.promptStrength = options.musicPromptStrength;
5928
+ }
5929
+ if (options.musicCreativity !== null && options.musicCreativity !== undefined) {
5930
+ output.creativity = options.musicCreativity;
5931
+ }
5932
+ if (options.musicShift !== null && options.musicShift !== undefined) {
5933
+ output.shift = options.musicShift;
5934
+ }
5935
+ }
4292
5936
  if (options.video) {
4293
5937
  output.workflow = options.videoWorkflow;
4294
5938
  output.fps = options.fps;
@@ -4361,7 +6005,7 @@ async function main() {
4361
6005
  } catch (error) {
4362
6006
  // Token auto-fallback: if using auto mode and got insufficient balance, retry with the other token
4363
6007
  const isBalanceError = error.code === 'INSUFFICIENT_BALANCE' || /insufficient/i.test(error.message);
4364
- if (_isAutoToken && isBalanceError && options.tokenType === 'spark') {
6008
+ if (_allowAutoTokenFallback && isBalanceError && options.tokenType === 'spark') {
4365
6009
  log('Insufficient SPARK balance — retrying with SOGNI tokens...');
4366
6010
  options.tokenType = 'sogni';
4367
6011
  try {