npm - @sogni-ai/sogni-creative-agent-skill - Versions diffs - 3.3.0 → 3.3.2 - Mend

@sogni-ai/sogni-creative-agent-skill 3.3.0 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/SKILL.md +1 -1
package/generated/creative-agent-runtime.mjs +7 -7
package/openclaw.plugin.json +1 -1
package/package.json +2 -2
package/skill-package.json +1 -1
package/sogni-agent.mjs +434 -14
package/version.mjs +1 -1

package/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 name: sogni-creative-agent-skill
 description: "Sogni Creative Agent Skill: agent skill and CLI for image, video, and music generation using Sogni AI's decentralized GPU network. Supports personas (named people with saved reference photos and voice clips), persistent memories (user preferences across sessions), custom personality, style transfer, angle synthesis, and multi-step creative workflows. Ask the agent to \"draw\", \"generate\", \"create an image\", \"make a video/animate\", \"make music\", \"apply a style\", or \"generate me as a superhero\"."
 metadata:
-  version: "3.1.1"
+  version: "3.3.2"
   homepage: https://sogni.ai
   clawdbot:
     emoji: "🎨"

package/generated/creative-agent-runtime.mjs CHANGED Viewed

@@ -314,12 +314,12 @@ const GATING_POLICIES = [
         "trigger": {
             "allOf": [
                 "has_active_persona",
-                "requests_video_generation",
+                "requests_persona_video_generation",
                 "no_persona_image_in_session"
             ],
             "sources": {
                 "has_active_persona": "session_state",
-                "requests_video_generation": "planner",
+                "requests_persona_video_generation": "planner",
                 "no_persona_image_in_session": "session_state"
             }
         },
@@ -2184,7 +2184,7 @@ const PROMPT_CONTRACTS = [
         "contractId": "generate_video_v1",
         "version": "1.1.0",
         "toolName": "generate_video",
-        "baseDescription": "generate_video produces text-to-video clips and Seedance multimodal reference videos.\nUse for text-only video generation with no source image input. For Seedance, also use this\ntool when uploaded/generated images, videos, or audio are loose references. Use animate_photo\nonly when a non-Seedance source image must become the first frame of an LTX/WAN animation.\n\nSEEDANCE UPLOADED STORYBOARD DEFAULT: When the user uploads a storyboard, shot sheet,\nmood board, or trailer concept image and asks to make a movie trailer/video/clip from it,\ndefault to one Seedance generate_video call with referenceImageIndices=[-1]. Do not first\nextract panels with edit_image, do not generate replacement keyframes, and do not make four\nseparate LTX animate_photo clips unless the user explicitly asks for separate clips or LTX.\nUse seedance2 when premium Spark access is available; if premium access is unavailable,\nexplain the limitation or use the best non-Seedance fallback the user accepts.\n\nSTORYTELLING / COMMERCIAL / TRAILER PROMPTS: For creative video requests, turn the brief\ninto timed, causally connected visual beats before writing the final prompt. Default social\nvideo is 15s 9:16 with a strong first 1-2s, visible escalation, payoff, and brand/CTA/final\nimage. Commercials should show audience desire/problem, transformation, proof/benefit, and\nCTA. Trailers should follow hook → world → disruption → escalation → reveal → title/CTA.\nEvery beat must be generatable: subject, setting, action, camera, lighting, audio, and text\nrole where relevant. Avoid vague \"cinematic\" filler, feature dumps, and beautiful images with\nno visible change.\n\nVIDEO PROMPT QUOTING: ONLY use double quotes for spoken dialogue in video prompts. Never\nquote on-screen text, titles, captions, or visual text elements — describe them without\nquotes. Quotes signal speech to the model and confuse audio generation.\n\nSTORYBOARD TEXT: Structural headings, section numbers, slide titles, panel titles, and\ncaptions in storyboard references may become short audio-only narration/VO or\nkey-message beats, but they are not subtitles, title cards, lower thirds, or visible\noverlays unless the user explicitly asks for visible text, on-screen text, a title\ncard, subtitle, lower third, signage, or CTA. Keep narration as separate brief phrases\nwith pauses; do not concatenate storyboard labels into run-on voiceover.\n\nDIALOGUE DURATION: Spoken dialogue must fit the clip. Estimate 2.5 words per second\nnatural delivery plus ~1s per acting beat. Hard maximum 3.75 words/second.\nCheck: dialogue words ÷ 2.5 + beats ≤ duration. Do not submit oversized dialogue.\n\nLATEST USER DURATION WINS: In follow-up turns, use the newest duration the user states,\neven if a previous assistant message mentioned a longer script/runtime. For example, if\nhistory says \"the full script is 66 seconds\" but the user now says \"do a 30 second version\",\ngenerate the 30 second version. Do not ask a clarification question just because history\ncontains another duration; treat the latest user request as the override.\n\nSEEDANCE SHORT-DURATION LIMIT: Seedance supports 4-15s clips. If the user explicitly asks\nfor Seedance below 4s, do not silently round up. Ask whether they prefer a 4s Seedance clip\nor an exact-duration LTX clip. If the user did not explicitly ask for Seedance, choose the\nmodel/tool that can satisfy the requested duration exactly.",
+        "baseDescription": "generate_video produces text-to-video clips and Seedance multimodal reference videos.\nUse for text-only video generation with no source image input. For Seedance, also use this\ntool when uploaded/generated images, videos, or audio are loose references. Use animate_photo\nonly when a non-Seedance source image must become the first frame of an LTX/WAN animation.\n\nSEEDANCE UPLOADED STORYBOARD DEFAULT: When the user uploads a storyboard, shot sheet,\nmood board, or trailer concept image and asks to make a movie trailer/video/clip from it,\ndefault to one Seedance generate_video call with referenceImageIndices=[-1]. Do not first\nextract panels with edit_image, do not generate replacement keyframes, and do not make four\nseparate LTX animate_photo clips unless the user explicitly asks for separate clips or LTX.\nUse seedance2 when premium Spark access is available; if premium access is unavailable,\nexplain the limitation or use the best non-Seedance fallback the user accepts.\n\nEXACT / INCLUDED VIDEO PROMPTS: If the user asks for a Seedance video using uploaded or\ngenerated references and says to use a prompt exactly, pass only that literal quoted prompt\nto generate_video and set skipPromptProcessing=true plus expandPrompt=false. Do not treat\nwords inside the literal prompt, such as storyboard, script, thumbnails, or panels, as a\nrequest to create a storyboard image. If the user includes a timecoded script inside a\nvideo request, keep it in the generate_video prompt. Explicit constraints like no storyboard\npanels, no subtitles, or no captions are constraints on the video render, not instructions\nto call edit_image or generate_image.\n\nSTORYTELLING / COMMERCIAL / TRAILER PROMPTS: For creative video requests, turn the brief\ninto timed, causally connected visual beats before writing the final prompt. Default social\nvideo is 15s 9:16 with a strong first 1-2s, visible escalation, payoff, and brand/CTA/final\nimage. Commercials should show audience desire/problem, transformation, proof/benefit, and\nCTA. Trailers should follow hook → world → disruption → escalation → reveal → title/CTA.\nEvery beat must be generatable: subject, setting, action, camera, lighting, audio, and text\nrole where relevant. Avoid vague \"cinematic\" filler, feature dumps, and beautiful images with\nno visible change.\n\nVIDEO PROMPT QUOTING: ONLY use double quotes for spoken dialogue in video prompts. Never\nquote on-screen text, titles, captions, or visual text elements — describe them without\nquotes. Quotes signal speech to the model and confuse audio generation.\n\nSTORYBOARD TEXT: Structural headings, section numbers, slide titles, panel titles, and\ncaptions in storyboard references may become short audio-only narration/VO or\nkey-message beats, but they are not subtitles, title cards, lower thirds, or visible\noverlays unless the user explicitly asks for visible text, on-screen text, a title\ncard, subtitle, lower third, signage, or CTA. Keep narration as separate brief phrases\nwith pauses; do not concatenate storyboard labels into run-on voiceover.\n\nDIALOGUE DURATION: Spoken dialogue must fit the clip. Estimate 2.5 words per second\nnatural delivery plus ~1s per acting beat. Hard maximum 3.75 words/second.\nCheck: dialogue words ÷ 2.5 + beats ≤ duration. Do not submit oversized dialogue.\n\nLATEST USER DURATION WINS: In follow-up turns, use the newest duration the user states,\neven if a previous assistant message mentioned a longer script/runtime. For example, if\nhistory says \"the full script is 66 seconds\" but the user now says \"do a 30 second version\",\ngenerate the 30 second version. Do not ask a clarification question just because history\ncontains another duration; treat the latest user request as the override.\n\nSEEDANCE SHORT-DURATION LIMIT: Seedance supports 4-15s clips. If the user explicitly asks\nfor Seedance below 4s, do not silently round up. Ask whether they prefer a 4s Seedance clip\nor an exact-duration LTX clip. If the user did not explicitly ask for Seedance, choose the\nmodel/tool that can satisfy the requested duration exactly.",
         "parameterDocs": {
             "prompt": "Video prompt. Use double quotes ONLY for spoken dialogue. Describe visual text without quotes.",
             "duration": "Clip duration in seconds. Plan dialogue word count against the 3.75 words/second ceiling."
@@ -2216,7 +2216,7 @@ const PROMPT_CONTRACTS = [
         "contractId": "video_to_video_v1",
         "version": "1.0.0",
         "toolName": "video_to_video",
-        "baseDescription": "video_to_video transforms an uploaded video. Use for uploaded-video restyling, enhancement,\nupscaling/remastering, motion transfer from video to image, subject replacement, edge/pose/\ndepth-guided restyle, or explicit Seedance V2V transforms.\n\nThis tool requires an uploaded video source. Do not use it for generated video indices. For\ngenerated or uploaded partial edits use replace_video_segment; for appended time use\nextend_video; for logos/text overlays use overlay_video; for stitching use stitch_video.\n\nChoose controlMode by intent. Use detailer for quality-only enhancement without restyling.\nUse seedance-v2v only when the user asks to transform/enhance/remaster an uploaded video\nwith Seedance. For detailer, describe the original scene plus quality terms, not new content.",
+        "baseDescription": "video_to_video transforms an uploaded video. Use for uploaded-video restyling, enhancement,\nupscaling/remastering, motion transfer from video to image, subject replacement, edge/pose/\ndepth-guided restyle, or explicit Seedance V2V transforms.\n\nThis tool requires an uploaded video source. Do not use it for generated video indices. For\ngenerated or uploaded partial edits use replace_video_segment; for appended time use\nextend_video; for logos/text overlays use overlay_video; for stitching use stitch_video.\n\nChoose controlMode by intent. Use detailer for quality-only enhancement without restyling.\nUse seedance-v2v only when the user asks to transform/enhance/remaster an uploaded video\nwith Seedance, including Seedance-fast uploaded-video upscale/remaster requests. For detailer,\ndescribe the original scene plus quality terms, not new content.",
         "parameterDocs": {
             "prompt": "Describe the target appearance in present tense. For detailer, describe the original content plus quality qualifiers only.",
             "videoSourceIndex": "Uploaded video index. Omit when there is one uploaded video; use 0 for first uploaded video or -1 if using negative upload notation.",
@@ -2240,7 +2240,7 @@ const PROMPT_CONTRACTS = [
         "contractId": "replace_video_segment_v1",
         "version": "1.0.0",
         "toolName": "replace_video_segment",
-        "baseDescription": "Use replace_video_segment when the user wants to regenerate a specific time range of an\nexisting video: \"regenerate from Xs to Ys\", \"redo the last N seconds\", \"swap out the middle\",\n\"fix the [start/middle/end] of the video\", or \"replace the [bumper/intro/outro/end card/\ntag/sting] at the [start/end] of the video\". Use explicit startSeconds and endSeconds; use\n-1 sentinels when exact base duration is unknown — the handler probes and resolves.\n\nWhen the replacement is already another uploaded or generated video clip, still use\nreplace_video_segment but pass replacementVideoIndex. Example: \"splice video 2 into video 1\nat 5s\" means videoIndex=-1, replacementVideoIndex=-2, startSeconds=5, endSeconds=5.\nUse endSeconds=startSeconds for insertion; use a wider endSeconds only when the user says to\nreplace/remove that base-video range. Do not use stitch_video for \"into the middle\"/\"insert\"\nrequests, because stitch_video only concatenates full clips end-to-end.\n\nFor time-sliced interleaving from existing videos — \"alternate 1s from each video\", \"weave\none-second clips from video 1 and video 2\", \"cut back and forth every N seconds\" — do NOT\nuse stitch_video and do NOT omit replacementVideoIndex. Start with the first requested video\nas the base, then call replace_video_segment once for each window that should come from the\nother video. Set replacementVideoIndex to that other existing video and set\nreplacementStartSeconds/replacementEndSeconds to the next source slice from that\nreplacement video. For ordinary\nalternation, preserve the base duration: set endSeconds=startSeconds+sliceDuration, not\nendSeconds=startSeconds insertion, unless the user explicitly asks to lengthen the output by\ninserting extra slices. Skip no-op windows that already come from the base video; only splice\nwindows that should come from a different source. Example for two 10s uploads alternating every 1s starting with video\n1: replace base windows 1..2, 3..4,\n5..6, 7..8, and 9..10 with slices 0..1, 1..2, 2..3, 3..4, and 4..5 from video 2. After\neach successful splice, target the newest composite video index for the next splice.\nThe -1 time sentinel applies only to base startSeconds/endSeconds when the base duration is\nunknown. Never use -1 for replacementStartSeconds or replacementEndSeconds; source windows\nmust use concrete non-negative seconds. For uploaded/generated videos with duration metadata,\nuse that known duration directly; do not call analyze_video just to learn the clip length for\nroutine alternating slices. Do not add a final tail splice with an unknown source end — stop at\nthe known clip duration or skip a no-op tail window.\n\nDo NOT call generate_video or animate_photo to re-render an existing video just to change\npart of it (the bumper, the intro, the end card, a single scene, the last few seconds, etc.).\nUse replace_video_segment — it preserves the unchanged portion, keeps the original audio\noutside the replaced window, and costs far less.\n\nAuto-detects the base video's model, so OMIT videoModel unless the user explicitly demands\na different model. Short requested windows are supported by rendering with model-specific\nhandles and trimming the rendered clip before splicing, so still pass the user's exact\nstartSeconds/endSeconds.",
+        "baseDescription": "Use replace_video_segment when the user wants to regenerate a specific time range of an\nexisting video: \"regenerate from Xs to Ys\", \"redo the last N seconds\", \"swap out the middle\",\n\"fix the [start/middle/end] of the video\", or \"replace the [bumper/intro/outro/end card/\ntag/sting] at the [start/end] of the video\". Use explicit startSeconds and endSeconds.\nFor relative requests like \"last 3 seconds\", resolve against the known base duration when\nduration metadata or prior tool arguments provide it. For \"bumper/end card/outro at the end\"\nwithout exact seconds, use the known storyboard timing when available; otherwise choose a\nsmall end-card window such as the final 1-3 seconds based on the base duration. If the base\nduration/window is genuinely unknown, inspect the video first or ask for the missing window;\ndo not submit ambiguous placeholder times.\n\nWhen the replacement is already another uploaded or generated video clip, still use\nreplace_video_segment but pass replacementVideoIndex. Example: \"splice video 2 into video 1\nat 5s\" means videoIndex=-1, replacementVideoIndex=-2, startSeconds=5, endSeconds=5.\nUse endSeconds=startSeconds for insertion; use a wider endSeconds only when the user says to\nreplace/remove that base-video range. Do not use stitch_video for \"into the middle\"/\"insert\"\nrequests, because stitch_video only concatenates full clips end-to-end.\n\nFor time-sliced interleaving from existing videos — \"alternate 1s from each video\", \"weave\none-second clips from video 1 and video 2\", \"cut back and forth every N seconds\" — do NOT\nuse stitch_video and do NOT omit replacementVideoIndex. Start with the first requested video\nas the base, then call replace_video_segment once for each window that should come from the\nother video. Set replacementVideoIndex to that other existing video and set\nreplacementStartSeconds/replacementEndSeconds to the next source slice from that\nreplacement video. For ordinary\nalternation, preserve the base duration: set endSeconds=startSeconds+sliceDuration, not\nendSeconds=startSeconds insertion, unless the user explicitly asks to lengthen the output by\ninserting extra slices. Skip no-op windows that already come from the base video; only splice\nwindows that should come from a different source. Example for two 10s uploads alternating every 1s starting with video\n1: replace base windows 1..2, 3..4,\n5..6, 7..8, and 9..10 with slices 0..1, 1..2, 2..3, 3..4, and 4..5 from video 2. After\neach successful splice, target the newest composite video index for the next splice.\nThe -1 time sentinel applies only to base startSeconds/endSeconds when the base duration is\nunknown. Never use -1 for replacementStartSeconds or replacementEndSeconds; source windows\nmust use concrete non-negative seconds. For uploaded/generated videos with duration metadata,\nuse that known duration directly; do not call analyze_video just to learn the clip length for\nroutine alternating slices. Do not add a final tail splice with an unknown source end — stop at\nthe known clip duration or skip a no-op tail window.\n\nDo NOT call generate_video or animate_photo to re-render an existing video just to change\npart of it (the bumper, the intro, the end card, a single scene, the last few seconds, etc.).\nUse replace_video_segment — it preserves the unchanged portion, keeps the original audio\noutside the replaced window, and costs far less.\n\nAuto-detects the base video's model, so OMIT videoModel unless the user explicitly demands\na different model. Short requested windows are supported by rendering with model-specific\nhandles and trimming the rendered clip before splicing, so still pass the user's exact\nstartSeconds/endSeconds.",
         "parameterDocs": {
             "startSeconds": "Start of segment to replace in seconds. Use -1 sentinel if exact base duration is unknown.",
             "endSeconds": "End of segment to replace in seconds. Use the same value as startSeconds for insertion with replacementVideoIndex.",
@@ -2433,9 +2433,9 @@ const PROMPT_CONTRACTS = [
         "contractId": "finalize_response_v1",
         "version": "1.1.0",
         "toolName": "finalize_response",
-        "baseDescription": "finalize_response marks the turn complete and stops the tool loop. Use after the requested\nworkflow succeeds, partially succeeds, fails with a surfaced error, or needs no tool action.\n\nWhen the user asked for a script, storyboard, ad concept, trailer, creator video, meme/parody,\nor music prompt and no media tool is required, deliver the final creative in a clean Markdown\ncontract: title, concept/objective, audience if relevant, timed beats or script, audio/text\nnotes, generation prompt(s), CTA, and brief assumptions. For revisions, apply the feedback\ndirectly while preserving approved elements and rejected constraints.\n\nDo not call any other tool after finalize_response. Keep the summary short and grounded in\nactual tool results; do not claim exact metadata that no tool returned.",
+        "baseDescription": "finalize_response marks the turn complete and stops the tool loop. Use after the requested\nworkflow succeeds, partially succeeds, fails with a surfaced error, or needs no tool action.\n\nWhen the user asked for a script, storyboard, ad concept, trailer, creator video, meme/parody,\nor music prompt and no media tool is required, deliver the final creative in a clean Markdown\ncontract: title, concept/objective, audience if relevant, timed beats or script, audio/text\nnotes, generation prompt(s), CTA, and brief assumptions. For revisions, apply the feedback\ndirectly while preserving approved elements and rejected constraints.\n\nDo not call any other tool after finalize_response. Keep the summary short and grounded in\nactual tool results; do not claim exact metadata that no tool returned.\nFor no-action/text-only answers, such as product, feature, model, pricing, or capability\nquestions, the summary is the final answer the user sees. Provide the substantive answer\nthere; never leave it empty and never use a placeholder like \"Done.\"",
         "parameterDocs": {
-            "summary": "Short user-visible closeout. Mention produced media or the concrete blocker; avoid duplicating prior tool output.",
+            "summary": "User-visible closeout. For no-action/text-only answers, include the complete substantive answer here. For media workflows, mention produced media or the concrete blocker; avoid duplicating prior tool output.",
             "outcome": "success, partial, asked_user, failed, or no_action based on the actual turn outcome."
         }
     },

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "sogni-creative-agent-skill",
   "name": "Sogni Creative Agent Skill — Image, Video & Music Generation",
   "description": "Agent skill and CLI for Sogni AI image, video, and music generation.",
-  "version": "3.1.1",
+  "version": "3.3.2",
   "skills": [
     "."
   ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sogni-ai/sogni-creative-agent-skill",
-  "version": "3.3.0",
+  "version": "3.3.2",
   "description": "Sogni Creative Agent Skill: agent skill and CLI for Sogni AI image, video, and music generation.",
   "type": "module",
   "main": "sogni-agent.mjs",
@@ -67,7 +67,7 @@
     "sogni-agent.mjs"
   ],
   "dependencies": {
-    "@sogni-ai/sogni-intelligence-client": "^2.4.0",
+    "@sogni-ai/sogni-intelligence-client": "^2.4.1",
     "execa": "^9.6.1",
     "json5": "^2.2.3",
     "sharp": "^0.34.5"

package/skill-package.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "private": true,
   "type": "module",
   "dependencies": {
-    "@sogni-ai/sogni-intelligence-client": "^2.4.0",
+    "@sogni-ai/sogni-intelligence-client": "^2.4.1",
     "execa": "^9.6.1",
     "json5": "^2.2.3",
     "sharp": "^0.34.5"

package/sogni-agent.mjs CHANGED Viewed

@@ -66,9 +66,15 @@ import {
 import {
   extractToolCallProgressUpdate
 } from '@sogni-ai/sogni-intelligence-client/chatRun';
+import {
+  SEEDANCE_R2V_REFERENCE_AUDIO_MAX_DURATION_SECONDS,
+  prepareSeedanceV2VSourceVideo as prepareSharedSeedanceV2VSourceVideo
+} from '@sogni-ai/sogni-intelligence-client/media';
 import {
   SEEDANCE_REFERENCE_LIMITS,
   SeedanceReferenceLimitError,
+  seedanceTerminalGenerationFailurePayloadFromError,
+  seedanceTerminalPolicyPayloadFromError,
   validateSeedanceReferenceCounts
 } from '@sogni-ai/sogni-intelligence-client/tools';
@@ -220,15 +226,19 @@ function isPathWithinBase(basePath, targetPath) {
 }
 function buildCliErrorPayload({ message, code, details, hint, prompt }) {
-  const classified = classifySkillError({ message, code });
+  const classified = classifyCliError({ message, code });
   const payload = {
     success: false,
-    error: message || 'Unknown error',
+    error: classified.message || message || 'Unknown error',
     errorType: classified.error_type,
     errorCategory: classified.category,
     retryable: classified.retryable,
     prompt: prompt ?? null
   };
+  if (classified.metadata) payload.metadata = classified.metadata;
+  if (classified.technicalError && classified.technicalError !== payload.error) {
+    payload.technicalError = classified.technicalError;
+  }
   if (code) payload.errorCode = code;
   if (details) payload.errorDetails = details;
   if (hint) payload.hint = hint;
@@ -239,11 +249,71 @@ function buildCliErrorPayload({ message, code, details, hint, prompt }) {
   return payload;
 }
+function cliErrorMessage(error) {
+  if (typeof error === 'string') return error;
+  if (error instanceof Error) return error.message || String(error);
+  if (error && typeof error === 'object') {
+    const record = error;
+    if (typeof record.message === 'string') return record.message;
+    if (typeof record.error === 'string') return record.error;
+  }
+  return String(error ?? 'Unknown error');
+}
+function seedanceFriendlyGenerationMessage(payload) {
+  const raw = [
+    payload?.message,
+    payload?.vendorError,
+    payload?.vendorErrorCode
+  ].filter(Boolean).join(' ');
+  if (/\baudio\s+format\b[\s\S]{0,120}\b(?:not valid|invalid)\b/i.test(raw)) {
+    return 'Seedance rejected the audio reference format for this model. Try a different audio file, trim/convert the clip, or use a non-Seedance audio-driven workflow such as LTX sound-to-video.';
+  }
+  return payload?.message || 'Seedance could not complete this video.';
+}
+function classifyCliError(error) {
+  const rawMessage = cliErrorMessage(error);
+  const seedancePolicyPayload = seedanceTerminalPolicyPayloadFromError(error);
+  if (seedancePolicyPayload) {
+    return {
+      error_type: 'SAFETY_REJECTED',
+      category: 'content_refused',
+      message: seedancePolicyPayload.message,
+      retryable: false,
+      metadata: seedancePolicyPayload,
+      technicalError: rawMessage
+    };
+  }
+  const seedanceGenerationPayload = seedanceTerminalGenerationFailurePayloadFromError(error);
+  if (seedanceGenerationPayload) {
+    const vendorCode = seedanceGenerationPayload.vendorErrorCode;
+    const isInvalidParameter = vendorCode === 'InvalidParameter' ||
+      seedanceGenerationPayload.error === 'seedance_reference_audio_too_long';
+    return {
+      error_type: isInvalidParameter ? 'PARAMETER_INVALID' : 'GPU_WORKER_FAILED',
+      category: isInvalidParameter ? 'schema_validation' : 'transient_failure',
+      message: seedanceFriendlyGenerationMessage(seedanceGenerationPayload),
+      retryable: !isInvalidParameter,
+      metadata: seedanceGenerationPayload,
+      technicalError: rawMessage
+    };
+  }
+  return classifySkillError(error);
+}
 function addCanonicalErrorFields(payload, error) {
-  const classified = classifySkillError(error);
+  const classified = classifyCliError(error);
+  payload.error = classified.message;
   payload.errorType = classified.error_type;
   payload.errorCategory = classified.category;
   payload.retryable = classified.retryable;
+  if (classified.metadata) payload.metadata = classified.metadata;
+  if (classified.technicalError && classified.technicalError !== classified.message) {
+    payload.technicalError = classified.technicalError;
+  }
   return payload;
 }
@@ -3653,6 +3723,12 @@ function apiMediaReferenceEndpoint(ref, action) {
     : `/v1/media/${action}Url`;
 }
+function apiMediaReferenceV2Endpoint(ref, action) {
+  return ref.kind === 'image'
+    ? `/v2/image/${action}Url`
+    : `/v2/media/${action}Url`;
+}
 function apiMediaReferenceUrlPath(ref, file, index, action, jobId) {
   const params = new URLSearchParams();
   params.set('type', apiMediaReferenceUploadType(ref, index));
@@ -3666,6 +3742,19 @@ function apiMediaReferenceUrlPath(ref, file, index, action, jobId) {
   return `${apiMediaReferenceEndpoint(ref, action)}?${params.toString()}`;
 }
+function apiMediaReferenceV2UrlPath(ref, file, index, action, jobId) {
+  const params = new URLSearchParams();
+  params.set('type', apiMediaReferenceUploadType(ref, index));
+  params.set('jobId', jobId);
+  params.set('contentType', file.mimeType);
+  if (ref.kind === 'image') {
+    params.set('imageId', `media_ref_${index + 1}`);
+  } else {
+    params.set('id', `media_ref_${index + 1}`);
+  }
+  return `${apiMediaReferenceV2Endpoint(ref, action)}?${params.toString()}`;
+}
 function apiStoredMediaUrl(payload, key) {
   const data = extractApiEnvelopeData(payload);
   const value = data?.[key] || payload?.[key];
@@ -3676,6 +3765,41 @@ function apiStoredMediaUrl(payload, key) {
   throw err;
 }
+function apiStoredMediaUploadPost(payload) {
+  const data = extractApiEnvelopeData(payload);
+  const url = data?.url || data?.uploadUrl;
+  if (typeof url === 'string' && url) {
+    const fields = data?.fields && typeof data.fields === 'object' ? data.fields : {};
+    return { url, fields };
+  }
+  const err = new Error('Sogni API did not return a presigned POST URL for media reference upload.');
+  err.code = 'MEDIA_UPLOAD_FAILED';
+  err.details = { payload };
+  throw err;
+}
+async function postApiMediaUploadForm(uploadPayload, file) {
+  const { url, fields } = apiStoredMediaUploadPost(uploadPayload);
+  const form = new FormData();
+  for (const [key, value] of Object.entries(fields)) {
+    if (value === undefined || value === null) continue;
+    form.append(key, String(value));
+  }
+  const body = file.buffer || readFileSync(file.filePath);
+  form.append('file', new Blob([body], { type: file.mimeType }), file.filename);
+  const response = await fetch(url, {
+    method: 'POST',
+    body: form,
+  });
+  if (!response.ok) {
+    const err = new Error(`Failed to upload ${file.filename} (${response.status} ${response.statusText}).`);
+    err.code = 'MEDIA_UPLOAD_FAILED';
+    err.details = { uploadUrl: url, status: response.status, statusText: response.statusText };
+    throw err;
+  }
+}
 async function putApiMediaUpload(uploadUrl, file) {
   const response = await fetch(uploadUrl, {
     method: 'PUT',
@@ -3766,6 +3890,31 @@ async function uploadPreparedApiMediaReference(ref, index, apiKey, file) {
   };
 }
+async function uploadPreparedApiMediaReferenceV2(ref, index, apiKey, file) {
+  if (!apiKey) {
+    const err = new Error(`${ref.flag} media references require SOGNI_API_KEY so the CLI can upload them before execution.`);
+    err.code = 'MISSING_API_KEY';
+    throw err;
+  }
+  const jobId = `sogni-agent-${Date.now()}-${index + 1}-${randomBytes(4).toString('hex')}`;
+  const uploadPayload = await fetchApiJson(apiMediaReferenceV2UrlPath(ref, file, index, 'upload', jobId), { apiKey });
+  await postApiMediaUploadForm(uploadPayload, file);
+  const downloadPayload = await fetchApiJson(apiMediaReferenceV2UrlPath(ref, file, index, 'download', jobId), { apiKey });
+  const url = apiStoredMediaUrl(downloadPayload, 'downloadUrl');
+  return {
+    url,
+    filename: file.filename,
+    byte_length: file.byteLength,
+    mime_type: file.mimeType,
+    prompt_label: file.filename,
+    storage: {
+      jobId,
+      type: apiMediaReferenceUploadType(ref, index),
+      version: 'v2',
+    },
+  };
+}
 async function uploadLocalApiMediaReference(ref, index, apiKey) {
   return uploadPreparedApiMediaReference(ref, index, apiKey, localApiMediaReferenceFile(ref));
 }
@@ -5419,6 +5568,235 @@ async function prepareReferenceAudioForVideoBuffer(buffer, sourceLabel) {
   return prepared;
 }
+function mediaFilenameFromSource(sourceLabel, fallbackName) {
+  const raw = String(sourceLabel || '');
+  try {
+    if (isHttpUrl(raw)) {
+      const pathname = new URL(raw).pathname;
+      const name = basename(decodeURIComponent(pathname));
+      return name || fallbackName;
+    }
+  } catch {
+    // Fall through to path handling.
+  }
+  const name = basename(raw.split('?')[0]);
+  return name || fallbackName;
+}
+function withMediaExtension(filename, extension) {
+  const cleanExtension = extension.startsWith('.') ? extension : `.${extension}`;
+  const currentExt = extname(filename);
+  const base = currentExt ? filename.slice(0, -currentExt.length) : filename;
+  return `${base || 'reference'}${cleanExtension}`;
+}
+async function probeLocalMediaDurationSeconds(pathOrUrl) {
+  if (isHttpUrl(pathOrUrl)) return undefined;
+  const ffprobePath = getEnv('FFPROBE_PATH') || 'ffprobe';
+  sanitizePath(ffprobePath, 'FFPROBE_PATH');
+  const result = await runCommand(ffprobePath, [
+    '-v', 'error',
+    '-show_entries', 'format=duration',
+    '-of', 'default=noprint_wrappers=1:nokey=1',
+    pathOrUrl,
+  ], { captureOutput: true });
+  if (result.error || result.status !== 0) return undefined;
+  const parsed = Number(String(result.stdout || '').trim());
+  return Number.isFinite(parsed) && parsed >= 0 ? parsed : undefined;
+}
+async function transcodeSeedanceReferenceAudioToMp3(request) {
+  const ffmpegPath = await ensureFfmpegAvailable();
+  const tempDir = mkdtempSync(join(tmpdir(), 'sogni-seedance-audio-'));
+  const inputPath = mediaTempInputPath(tempDir, request.filename, '.audio');
+  const outputPath = join(tempDir, 'reference-audio.mp3');
+  try {
+    writeFileSync(inputPath, Buffer.from(request.data));
+    const result = await runCommand(ffmpegPath, [
+      '-hide_banner',
+      '-loglevel', 'error',
+      '-y',
+      '-i', inputPath,
+      '-vn',
+      '-ac', '2',
+      '-ar', '44100',
+      '-c:a', 'libmp3lame',
+      '-b:a', '128k',
+      outputPath
+    ], { captureOutput: true });
+    if (result.error || result.status !== 0 || !isNonEmptyFile(outputPath)) {
+      const err = new Error('Failed to convert Seedance reference audio to MP3.');
+      err.code = 'FFMPEG_SEEDANCE_AUDIO_PREP_FAILED';
+      err.hint = 'Seedance accepts MP3 audio references only. Install ffmpeg with MP3 support or provide an MP3 clip.';
+      err.details = { sourceLabel: request.filename, stderr: result.stderr || '', stdout: result.stdout || '', status: result.status };
+      throw err;
+    }
+    return { data: readFileSync(outputPath), mimeType: 'audio/mpeg' };
+  } finally {
+    try { if (existsSync(inputPath)) unlinkSync(inputPath); } catch {}
+    try { if (existsSync(outputPath)) unlinkSync(outputPath); } catch {}
+    try { rmdirSync(tempDir); } catch {}
+  }
+}
+async function trimSeedanceReferenceAudioToMp3(request) {
+  const ffmpegPath = await ensureFfmpegAvailable();
+  const tempDir = mkdtempSync(join(tmpdir(), 'sogni-seedance-audio-'));
+  const inputPath = mediaTempInputPath(tempDir, request.filename, '.audio');
+  const outputPath = join(tempDir, 'reference-audio.mp3');
+  const start = Math.max(0, Number(request.start) || 0);
+  const duration = Math.max(
+    0.1,
+    Math.min(15, Number(request.duration) || 15),
+  );
+  try {
+    writeFileSync(inputPath, Buffer.from(request.data));
+    const result = await runCommand(ffmpegPath, [
+      '-hide_banner',
+      '-loglevel', 'error',
+      '-y',
+      '-ss', String(start),
+      '-i', inputPath,
+      '-t', String(duration),
+      '-vn',
+      '-ac', '2',
+      '-ar', '44100',
+      '-c:a', 'libmp3lame',
+      '-b:a', '128k',
+      outputPath
+    ], { captureOutput: true });
+    if (result.error || result.status !== 0 || !isNonEmptyFile(outputPath)) {
+      const err = new Error('Failed to trim Seedance reference audio to MP3.');
+      err.code = 'FFMPEG_SEEDANCE_AUDIO_TRIM_FAILED';
+      err.hint = 'Seedance accepts MP3 audio references only and short audio windows. Try a shorter MP3 clip.';
+      err.details = { sourceLabel: request.filename, start, duration, stderr: result.stderr || '', stdout: result.stdout || '', status: result.status };
+      throw err;
+    }
+    return { data: readFileSync(outputPath), mimeType: 'audio/mpeg' };
+  } finally {
+    try { if (existsSync(inputPath)) unlinkSync(inputPath); } catch {}
+    try { if (existsSync(outputPath)) unlinkSync(outputPath); } catch {}
+    try { rmdirSync(tempDir); } catch {}
+  }
+}
+async function trimSeedanceV2VSourceVideo(request) {
+  return {
+    data: await trimSeedanceV2VSourceVideoBuffer(
+      Buffer.from(request.data),
+      request.filename,
+      request.start,
+      request.duration,
+    ),
+    mimeType: 'video/mp4',
+  };
+}
+function seedanceReferenceAudioWindow() {
+  const requestedDuration = options.audioDuration ?? options.duration;
+  const maxDurationSeconds = Math.min(
+    Number.isFinite(Number(requestedDuration)) && Number(requestedDuration) > 0
+      ? Number(requestedDuration)
+      : SEEDANCE_R2V_REFERENCE_AUDIO_MAX_DURATION_SECONDS,
+    15,
+  );
+  return {
+    maxDurationSeconds,
+    startOffsetSeconds: options.audioStart ?? 0,
+  };
+}
+async function prepareSeedanceReferenceAudioUploadFile(pathOrUrl, buffer) {
+  const filename = mediaFilenameFromSource(pathOrUrl, 'reference-audio');
+  const rawMimeType = mimeTypeForPath(pathOrUrl, 'application/octet-stream');
+  const mimeType = normalizeReferenceAudioMimeType(rawMimeType) || rawMimeType;
+  const sourceFormat = detectReferenceAudioFormat(buffer, mimeType);
+  const sourceDurationSeconds = await probeLocalMediaDurationSeconds(pathOrUrl);
+  const window = seedanceReferenceAudioWindow();
+  const shouldTrim =
+    window.startOffsetSeconds > 0 ||
+    (Number.isFinite(sourceDurationSeconds) && sourceDurationSeconds > window.maxDurationSeconds);
+  let prepared = { data: buffer, mimeType: 'audio/mpeg' };
+  let action = null;
+  if (shouldTrim) {
+    prepared = await trimSeedanceReferenceAudioToMp3({
+      data: buffer,
+      filename,
+      inputMimeType: mimeType,
+      sourceFormat,
+      duration: window.maxDurationSeconds,
+      start: window.startOffsetSeconds,
+    });
+    action = 'trimmed and converted';
+  } else if (sourceFormat !== 'mp3') {
+    prepared = await transcodeSeedanceReferenceAudioToMp3({
+      data: buffer,
+      filename,
+      inputMimeType: mimeType,
+      sourceFormat,
+    });
+    action = 'converted';
+  }
+  if (!options.quiet && action) {
+    console.error(`Prepared Seedance reference audio as ${action} MP3 before upload.`);
+  }
+  const data = Buffer.from(prepared.data);
+  return {
+    buffer: data,
+    filename: withMediaExtension(filename, 'mp3'),
+    byteLength: data.length,
+    mimeType: 'audio/mpeg',
+  };
+}
+async function prepareSeedanceReferenceVideoUploadFile(pathOrUrl, buffer) {
+  const filename = mediaFilenameFromSource(pathOrUrl, 'reference-video.mp4');
+  const rawMimeType = mimeTypeForPath(pathOrUrl, 'video/mp4');
+  const sourceDurationSeconds = await probeLocalMediaDurationSeconds(pathOrUrl);
+  const requestedDuration = Number.isFinite(Number(options.duration))
+    ? Number(options.duration)
+    : SEEDANCE_V2V_REFERENCE_MAX_DURATION_SECONDS;
+  const prepared = await prepareSharedSeedanceV2VSourceVideo(
+    buffer,
+    rawMimeType,
+    filename,
+    sourceDurationSeconds,
+    requestedDuration,
+    options.videoStart ?? 0,
+    { trimVideo: trimSeedanceV2VSourceVideo },
+  );
+  if (!options.quiet && prepared.trimmed) {
+    console.error('Prepared Seedance V2V reference video clip before upload.');
+  }
+  const data = Buffer.from(prepared.data);
+  return {
+    buffer: data,
+    filename: withMediaExtension(filename, 'mp4'),
+    byteLength: data.length,
+    mimeType: prepared.mimeType || 'video/mp4',
+  };
+}
+async function uploadSeedanceReferenceAudioUrl(pathOrUrl, apiKey, index = 0) {
+  const ref = { flag: '--ref-audio', value: pathOrUrl, kind: 'audio' };
+  const buffer = await fetchMediaBuffer(pathOrUrl);
+  const file = await prepareSeedanceReferenceAudioUploadFile(pathOrUrl, buffer);
+  const uploaded = await uploadPreparedApiMediaReferenceV2(ref, index, apiKey, file);
+  return uploaded.url;
+}
+async function uploadSeedanceReferenceVideoUrl(pathOrUrl, apiKey, index = 0) {
+  const ref = { flag: '--ref-video', value: pathOrUrl, kind: 'video' };
+  const buffer = await fetchMediaBuffer(pathOrUrl);
+  const file = await prepareSeedanceReferenceVideoUploadFile(pathOrUrl, buffer);
+  const uploaded = await uploadPreparedApiMediaReferenceV2(ref, index, apiKey, file);
+  return uploaded.url;
+}
 async function trimSeedanceV2VSourceVideoBuffer(buffer, sourceLabel, startOffset, requestedDuration) {
   const ffmpegPath = await ensureFfmpegAvailable();
   const tempDir = mkdtempSync(join(tmpdir(), 'sogni-seedance-v2v-'));
@@ -6733,12 +7111,41 @@ async function main() {
               || mimeTypeForPath(options.refAudio, 'application/octet-stream')
           )
         : 'unknown';
-      const useRefAudioUrl = isSeedanceVideo
-        && refAudioFormatByPath !== 'mp3'
-        && await appendSafeSeedanceReferenceUrl(seedanceReferenceAudioUrls, options.refAudio, 'Reference audio');
-      const useRefVideoUrl = isSeedanceVideo
-        && options.videoStart === null
-        && await appendSafeSeedanceReferenceUrl(seedanceReferenceVideoUrls, options.refVideo, 'Reference video');
+      let projectVideoStart = options.videoStart;
+      let useRefAudioUrl = false;
+      if (isSeedanceVideo && options.refAudio) {
+        const shouldUploadAudio =
+          !isHttpsUrl(options.refAudio) ||
+          refAudioFormatByPath !== 'mp3' ||
+          options.audioStart !== null ||
+          options.audioDuration !== null;
+        if (shouldUploadAudio) {
+          const uploadedAudioUrl = await uploadSeedanceReferenceAudioUrl(
+            options.refAudio,
+            creds.SOGNI_API_KEY,
+            0,
+          );
+          seedanceReferenceAudioUrls.push(uploadedAudioUrl);
+          useRefAudioUrl = true;
+        } else {
+          useRefAudioUrl = await appendSafeSeedanceReferenceUrl(seedanceReferenceAudioUrls, options.refAudio, 'Reference audio');
+        }
+      }
+      let useRefVideoUrl = false;
+      if (isSeedanceVideo && options.refVideo) {
+        if (isHttpsUrl(options.refVideo) && options.videoStart === null) {
+          useRefVideoUrl = await appendSafeSeedanceReferenceUrl(seedanceReferenceVideoUrls, options.refVideo, 'Reference video');
+        } else {
+          const uploadedVideoUrl = await uploadSeedanceReferenceVideoUrl(
+            options.refVideo,
+            creds.SOGNI_API_KEY,
+            0,
+          );
+          seedanceReferenceVideoUrls.push(uploadedVideoUrl);
+          useRefVideoUrl = true;
+          projectVideoStart = null;
+        }
+      }
       // Seedance loose-reference extras: -c/--context images beyond start/end,
       // plus repeated --ref-audio / --ref-video entries past the first. The
@@ -6757,7 +7164,7 @@ async function main() {
           }
           await appendSafeSeedanceReferenceUrl(seedanceReferenceImageUrls, ctxImage, 'Seedance image reference');
         }
-        for (const extraAudio of options.refAudios) {
+        for (const [extraAudioIndex, extraAudio] of options.refAudios.entries()) {
           if (!isHttpsUrl(extraAudio)) {
             fatalCliError(
               `Additional --ref-audio "${extraAudio}" must be an HTTPS URL. ` +
@@ -6765,7 +7172,21 @@ async function main() {
               { code: 'INVALID_ARGUMENT', details: { flag: '--ref-audio', value: extraAudio } },
             );
           }
-          await appendSafeSeedanceReferenceUrl(seedanceReferenceAudioUrls, extraAudio, 'Seedance audio reference');
+          const extraAudioFormat = detectReferenceAudioFormat(
+            new Uint8Array(),
+            normalizeReferenceAudioMimeType(mimeTypeForPath(extraAudio, 'application/octet-stream'))
+              || mimeTypeForPath(extraAudio, 'application/octet-stream')
+          );
+          if (extraAudioFormat !== 'mp3') {
+            const uploadedAudioUrl = await uploadSeedanceReferenceAudioUrl(
+              extraAudio,
+              creds.SOGNI_API_KEY,
+              extraAudioIndex + 1,
+            );
+            seedanceReferenceAudioUrls.push(uploadedAudioUrl);
+          } else {
+            await appendSafeSeedanceReferenceUrl(seedanceReferenceAudioUrls, extraAudio, 'Seedance audio reference');
+          }
         }
         for (const extraVideo of options.refVideos) {
           if (!isHttpsUrl(extraVideo)) {
@@ -6783,7 +7204,6 @@ async function main() {
       let endImageBuffer = options.refImageEnd && !useRefImageEndUrl ? await fetchMediaBuffer(options.refImageEnd) : undefined;
       let audioBuffer = options.refAudio && !useRefAudioUrl ? await fetchMediaBuffer(options.refAudio) : undefined;
       let videoBuffer = options.refVideo && !useRefVideoUrl ? await fetchMediaBuffer(options.refVideo) : undefined;
-      let projectVideoStart = options.videoStart;
       if (audioBuffer) {
         audioBuffer = await prepareReferenceAudioForVideoBuffer(audioBuffer, options.refAudio);
       }
@@ -6884,10 +7304,10 @@ async function main() {
       if (audioBuffer) {
         projectConfig.referenceAudio = audioBuffer;
       }
-      if (options.audioStart !== null) {
+      if (options.audioStart !== null && !useRefAudioUrl) {
         projectConfig.audioStart = options.audioStart;
       }
-      if (options.audioDuration !== null) {
+      if (options.audioDuration !== null && !useRefAudioUrl) {
         projectConfig.audioDuration = options.audioDuration;
       }
       if (audioIdentityMedia) {

package/version.mjs CHANGED Viewed

	@@ -1 +1 @@
1	- export const PACKAGE_VERSION = '3.3.0';
1	+ export const PACKAGE_VERSION = '3.3.2';