npm - @vibeframe/mcp-server - Versions diffs - 0.104.2 → 0.105.1 - Mend

@vibeframe/mcp-server 0.104.2 → 0.105.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +303 -46
package/package.json +3 -3

package/dist/index.js CHANGED Viewed

@@ -7613,7 +7613,7 @@ YAML cues that drive narration, backdrop generation, and timing.
 \`\`\`yaml
 narration: "Introduce the promise in one crisp sentence."
-backdrop: "Cinematic abstract technology backdrop, precise light, premium editorial feel"
+backdrop: "Topic-aligned editorial background plate, abstract visual system, no readable text, no logos, no consumer products, clean negative space for HTML overlays"
 duration: 4
 \`\`\`
@@ -7624,7 +7624,7 @@ screen and one spoken breath.
 \`\`\`yaml
 narration: "Show the mechanism or proof point that makes the promise believable."
-backdrop: "Layered interface details, subtle motion trails, high-contrast product storytelling"
+backdrop: "Topic-aligned analytical background plate, abstract dashboard structure, no readable text, no product photos, no shoes, no unrelated objects"
 duration: 4
 \`\`\`
@@ -7635,7 +7635,7 @@ before/after.
 \`\`\`yaml
 narration: "Close with the action the viewer should remember."
-backdrop: "Resolved hero frame, confident final composition, clean negative space"
+backdrop: "Resolved editorial background plate, confident final composition, clean negative space, no readable text, no logos, no unrelated products"
 duration: 4
 \`\`\`
@@ -7688,6 +7688,33 @@ consult this file \u2014 run the generate command directly.
 Browse named styles: \`vibe scene list-styles\`. Re-seed from one with
 \`vibe scene init . --visual-style "Swiss Pulse"\` (idempotent).
+## Brief and local media
+\`brief.md\` is raw intent, not a strict schema. It may contain messy notes,
+pasted research, links, product requirements, or a one-line idea. \`vibe init
+--from brief.md\` uses it only to seed \`STORYBOARD.md\` and \`DESIGN.md\`;
+after init, those two files are the working source of truth.
+Use \`media/\` for user-provided source files: product photos, screenshots,
+logos, B-roll, recorded narration, or reference clips. Keep those inputs
+inside this project so build references stay project-relative. Do not put user
+media in \`references/\`; that directory is reserved for local composition
+rules installed by VibeFrame.
+When a beat should reuse a local file, reference it from \`STORYBOARD.md\`
+with a project-relative path:
+\`\`\`yaml
+backdrop: "media/product-shot.png" # existing still image
+video: "media/broll.mp4"           # existing video/B-roll
+narration: "media/voice.wav"       # existing recorded narration
+asset: "media/logo.png"            # generic local asset reference
+\`\`\`
+Use text cues when you want VibeFrame to generate an asset. Use path cues
+when you want VibeFrame to reuse a local file. Avoid absolute paths or parent
+directory references; copy files into \`media/\` first.
 ## Provider keys and project scope
 Use VibeFrame CLI generation for project assets:
@@ -7735,9 +7762,11 @@ the framework-level minimum, not the cinematic craft layer.
 - \`DESIGN.md\` \u2014 visual identity contract (palette, type, motion, transitions)
 - \`STORYBOARD.md\` \u2014 per-beat narration/backdrop/duration cues for \`vibe build\`
+- \`media/\` \u2014 user-provided source files (photos, logos, clips, voice recordings)
 - \`index.html\` \u2014 root composition (timeline)
 - \`compositions/scene-*.html\` \u2014 per-scene HTML authored by you or the agent
-- \`assets/\` \u2014 shared media (narration audio, images, video)
+- \`assets/\` \u2014 generated/canonical build media (narration audio, images, video)
+- \`references/\` \u2014 composition rule docs installed by VibeFrame, not user media
 - \`transcript.json\` \u2014 Whisper word-level transcript (if narration exists)
 - \`hyperframes.json\` \u2014 HF registry config (speak to both toolchains)
 - \`vibe.config.json\` \u2014 canonical VibeFrame config (providers, budget)
@@ -7768,7 +7797,11 @@ npx hyperframes render
    \`\`\`
 4. Videos use \`muted\` with a separate \`<audio>\` element for the audio track.
 5. Sub-compositions use \`data-composition-src="compositions/file.html"\`.
-6. Only deterministic logic \u2014 no \`Date.now()\`, \`Math.random()\`, or network fetches.
+6. For render-stable text, do not apply continuous \`scale\`, \`x\`, \`y\`, or
+   \`filter\` tweens to \`.scene-content\` or any ancestor containing live text.
+   Animate background/media layers instead; text/cards should enter briefly and
+   then hold still at their final CSS positions.
+7. Only deterministic logic \u2014 no \`Date.now()\`, \`Math.random()\`, or network fetches.
 ## Linting \u2014 run after changes
@@ -7878,7 +7911,11 @@ async function scaffoldSceneProject(opts) {
     if (await pathExists(metaPath)) {
       skipped2.push(metaPath);
     } else {
-      await writeFile(metaPath, JSON.stringify(buildHyperframesMeta(name, now), null, 2) + "\n", "utf-8");
+      await writeFile(
+        metaPath,
+        JSON.stringify(buildHyperframesMeta(name, now), null, 2) + "\n",
+        "utf-8"
+      );
       created.push(metaPath);
     }
     const rootPath = resolve2(dir, "index.html");
@@ -7893,11 +7930,7 @@ async function scaffoldSceneProject(opts) {
   if (await pathExists(vibeConfigJsonPath)) {
     skipped2.push(vibeConfigJsonPath);
   } else {
-    await writeFile(
-      vibeConfigJsonPath,
-      projectConfigJson({ name, aspect }),
-      "utf-8"
-    );
+    await writeFile(vibeConfigJsonPath, projectConfigJson({ name, aspect }), "utf-8");
     created.push(vibeConfigJsonPath);
   }
   const vibePath = resolve2(dir, "vibe.project.yaml");
@@ -7936,11 +7969,7 @@ ${existing}`, "utf-8");
   if (await pathExists(designPath)) {
     skipped2.push(designPath);
   } else {
-    await writeFile(
-      designPath,
-      buildDesignMd({ name, style: opts.visualStyle }),
-      "utf-8"
-    );
+    await writeFile(designPath, buildDesignMd({ name, style: opts.visualStyle }), "utf-8");
     created.push(designPath);
   }
   const storyboardPath = resolve2(dir, "STORYBOARD.md");
@@ -24322,6 +24351,53 @@ function pickReferenceImageUrl(input3) {
     return input3;
   return void 0;
 }
+function isFalFileInput(value) {
+  return value.startsWith("http://") || value.startsWith("https://") || value.startsWith("data:");
+}
+async function normaliseReferences(input3, client) {
+  if (!input3)
+    return [];
+  const references = [];
+  for (const ref of input3) {
+    if (!isFalFileInput(ref.url))
+      continue;
+    references.push({
+      ...ref,
+      url: ref.url.startsWith("data:") ? await uploadDataUri(client, ref.url) : ref.url
+    });
+  }
+  return references;
+}
+async function uploadDataUri(client, dataUri) {
+  const blob = dataUriToBlob(dataUri);
+  return client.storage.upload(blob, {
+    lifecycle: { expiresIn: "1h" }
+  });
+}
+function dataUriToBlob(dataUri) {
+  const match2 = /^data:([^;,]+)?(;base64)?,(.*)$/s.exec(dataUri);
+  if (!match2)
+    throw new Error("Invalid data URI reference.");
+  const mimeType = match2[1] || "application/octet-stream";
+  const isBase64 = Boolean(match2[2]);
+  const payload = match2[3] ?? "";
+  const buffer = isBase64 ? Buffer.from(payload, "base64") : Buffer.from(decodeURIComponent(payload), "utf-8");
+  return new Blob([new Uint8Array(buffer)], { type: mimeType });
+}
+function groupReferences(references) {
+  const image_urls = [];
+  const video_urls = [];
+  const audio_urls = [];
+  for (const ref of references) {
+    if (ref.kind === "image")
+      image_urls.push(ref.url);
+    if (ref.kind === "video")
+      video_urls.push(ref.url);
+    if (ref.kind === "audio")
+      audio_urls.push(ref.url);
+  }
+  return { image_urls, video_urls, audio_urls };
+}
 function normaliseAspect(value) {
   if (!value)
     return "auto";
@@ -24344,7 +24420,7 @@ function normaliseDuration(value) {
     return "auto";
   return Math.max(4, Math.min(15, Math.round(value)));
 }
-var import_client, ENDPOINT_TEXT_TO_VIDEO, ENDPOINT_IMAGE_TO_VIDEO, DEFAULT_VARIANT, VALID_RESOLUTIONS, VALID_ASPECTS, FalProvider, falProvider;
+var import_client, ENDPOINT_TEXT_TO_VIDEO, ENDPOINT_IMAGE_TO_VIDEO, ENDPOINT_REFERENCE_TO_VIDEO, DEFAULT_VARIANT, VALID_RESOLUTIONS, VALID_ASPECTS, FalProvider, falProvider;
 var init_FalProvider = __esm({
   "../ai-providers/dist/fal/FalProvider.js"() {
     "use strict";
@@ -24357,6 +24433,10 @@ var init_FalProvider = __esm({
       "seedance-2.0": "bytedance/seedance-2.0/image-to-video",
       "seedance-2.0-fast": "bytedance/seedance-2.0/fast/image-to-video"
     };
+    ENDPOINT_REFERENCE_TO_VIDEO = {
+      "seedance-2.0": "bytedance/seedance-2.0/reference-to-video",
+      "seedance-2.0-fast": "bytedance/seedance-2.0/fast/reference-to-video"
+    };
     DEFAULT_VARIANT = "seedance-2.0";
     VALID_RESOLUTIONS = ["480p", "720p", "1080p"];
     VALID_ASPECTS = ["21:9", "16:9", "4:3", "1:1", "3:4", "9:16", "auto"];
@@ -24365,7 +24445,7 @@ var init_FalProvider = __esm({
         this.id = "seedance";
         this.name = "fal.ai (Seedance 2.0)";
         this.description = "fal.ai hosting ByteDance Seedance 2.0 \u2014 Artificial Analysis #2 on both text-to-video and image-to-video leaderboards";
-        this.capabilities = ["text-to-video", "image-to-video"];
+        this.capabilities = ["text-to-video", "image-to-video", "reference-to-video"];
         this.iconUrl = "/icons/fal.svg";
         this.isAvailable = true;
       }
@@ -24404,9 +24484,11 @@ var init_FalProvider = __esm({
             error: `Unknown Seedance variant: ${variant}. Valid: ${Object.keys(ENDPOINT_TEXT_TO_VIDEO).join(", ")}.`
           };
         }
-        const referenceImage = pickReferenceImageUrl(options?.referenceImage);
+        const references = await normaliseReferences(options?.references, this.client);
+        const hasReferences = references.length > 0;
+        const referenceImage = hasReferences ? void 0 : pickReferenceImageUrl(options?.referenceImage);
         const isImageToVideo = !!referenceImage;
-        const endpointId = isImageToVideo ? ENDPOINT_IMAGE_TO_VIDEO[variant] : ENDPOINT_TEXT_TO_VIDEO[variant];
+        const endpointId = hasReferences ? ENDPOINT_REFERENCE_TO_VIDEO[variant] : isImageToVideo ? ENDPOINT_IMAGE_TO_VIDEO[variant] : ENDPOINT_TEXT_TO_VIDEO[variant];
         const aspect = normaliseAspect(options?.aspectRatio);
         const resolution = normaliseResolution(options?.resolution);
         const duration = normaliseDuration(options?.duration);
@@ -24416,13 +24498,26 @@ var init_FalProvider = __esm({
           resolution,
           duration
         };
-        if (referenceImage)
+        if (hasReferences) {
+          const grouped = groupReferences(references);
+          if (grouped.image_urls.length > 0)
+            input3.image_urls = grouped.image_urls;
+          if (grouped.video_urls.length > 0)
+            input3.video_urls = grouped.video_urls;
+          if (grouped.audio_urls.length > 0)
+            input3.audio_urls = grouped.audio_urls;
+        } else if (referenceImage) {
           input3.image_url = referenceImage;
+        }
         if (options?.negativePrompt)
           input3.negative_prompt = options.negativePrompt;
         if (typeof options?.seed === "number")
           input3.seed = options.seed;
-        if (options?.lastFrame)
+        if (typeof options?.generateAudio === "boolean")
+          input3.generate_audio = options.generateAudio;
+        if (options?.endUserId)
+          input3.end_user_id = options.endUserId;
+        if (!hasReferences && options?.lastFrame)
           input3.end_image_url = options.lastFrame;
         try {
           const out = await this.client.subscribe(endpointId, { input: input3, logs: false });
@@ -24469,14 +24564,15 @@ var init_fal = __esm({
       // Review this alias at the 1.0 cut.
       aliases: ["fal"],
       models: ["seedance-2.0", "seedance-2.0-fast"],
-      capabilities: ["text-to-video", "image-to-video", "native-audio"],
+      capabilities: ["text-to-video", "image-to-video", "reference-to-video", "native-audio"],
       apiKey: "fal",
       kinds: ["video"],
       resolverPriority: { video: 1 },
       commandsUnlocked: [
         "generate video -p seedance (Seedance 2.0 via fal.ai \u2014 default since v0.57)",
         "generate video -p seedance --seedance-model fast (lower-latency variant)",
-        "generate video -p seedance -i <image> (image-to-video)"
+        "generate video -p seedance -i <image> (image-to-video)",
+        "generate video -p seedance --ref-images <images...> (reference-to-video)"
       ]
     });
   }
@@ -284700,7 +284796,7 @@ ${newComment.split("\n").map((c) => ` * ${c}`).join("\n")}
           /*mapfn*/
           (name) => ts_FindAllReferences_exports.getReferenceEntriesForNode(-1, name, program2, program2.getSourceFiles(), cancellationToken)
         );
-        const groupedReferences = groupReferences(references);
+        const groupedReferences = groupReferences2(references);
         if (!every(
           groupedReferences.declarations,
           /*callback*/
@@ -284709,7 +284805,7 @@ ${newComment.split("\n").map((c) => ` * ${c}`).join("\n")}
           groupedReferences.valid = false;
         }
         return groupedReferences;
-        function groupReferences(referenceEntries) {
+        function groupReferences2(referenceEntries) {
           const classReferences = { accessExpressions: [], typeUsages: [] };
           const groupedReferences2 = { functionCalls: [], declarations: [], classReferences, valid: true };
           const functionSymbols = map3(functionNames, getSymbolTargetAtLocation);
@@ -449179,13 +449275,18 @@ Requirements (non-negotiable):
   producer's seek lands past the timeline's natural end and visibility state
   goes stale \u2014 the hold phase renders BLACK. Anchor the timeline to the full
   beat duration via either:
-    1. A subtle idle motion spanning 0\u2192duration on a parent element, e.g.
-       \`tl.fromTo(".scene-content", { scale: 1.0 }, { scale: 1.015, duration: <beat>, ease: "none" }, 0);\`
+    1. A subtle idle motion spanning 0\u2192duration on a background/media layer,
+       e.g. \`tl.fromTo(".backdrop", { scale: 1.0 }, { scale: 1.015, duration: <beat>, ease: "none" }, 0);\`
        (Ken-Burns, breathing opacity, gradient drift \u2014 should be barely
        perceptible so it doesn't compete with entry/exit beats).
     2. OR an explicit \`tl.set(target, { ...natural state... }, <beat - 0.001>)\`
        anchor at the end.
   This is the #2 source of "text disappears mid-beat" bugs after \`.clip\` sizing.
+- Do not apply continuous \`scale\`, \`x\`, \`y\`, \`filter\`, or other transform
+  tweens to \`.scene-content\` or any ancestor that contains live text/cards.
+  Animate the backdrop/media plane instead; let text enter briefly, then hold
+  still at its final CSS position. Continuous transforms on text ancestors can
+  create subpixel shimmer in screenshot-captured renders.
 - Timed children inside the composition have \`class="clip"\` plus
   \`data-start\`, \`data-duration\`, \`data-track-index\`.
 - If \`assets/backdrop-${ctx.beat.id}.png\` exists, use that local file as the
@@ -449255,7 +449356,9 @@ Reference shape (verbatim \u2014 match this skeleton exactly, no DOCTYPE / html
       const tl = gsap.timeline({ paused: true });
       // Idle motion spanning full beat duration \u2014 required to keep timeline
       // length aligned with data-duration (otherwise hold phase goes black).
-      tl.fromTo(".scene-content", { scale: 1.0 }, { scale: 1.015, duration: <sec>, ease: "none" }, 0);
+      // Keep continuous motion on the background/media layer so live text does
+      // not shimmer from subpixel resampling.
+      tl.fromTo(".backdrop", { scale: 1.0 }, { scale: 1.015, duration: <sec>, ease: "none" }, 0);
       // entry tweens
       window.__timelines["${compositionId}"] = tl;
     </script>
@@ -451197,6 +451300,32 @@ var init_build_asset_metadata = __esm({
   }
 });
+// ../cli/src/commands/_shared/build-backdrop-prompt.ts
+function augmentBackdropPrompt(cue) {
+  const trimmed = cue.trim();
+  const lower = trimmed.toLowerCase();
+  const requestsTextOrMarks = /\b(text|typography|title|headline|label|caption|logo|logos|wordmark|brand mark|brand marks)\b/.test(lower);
+  const forbidsTextOrMarks = /\b(no|without|avoid)\s+(readable\s+)?(text|typography|titles?|headlines?|labels?|captions?|brand\s+logos?|logos?|wordmarks?|brand\s+marks?)\b/.test(
+    lower
+  );
+  const allowsTextOrMarks = requestsTextOrMarks && !forbidsTextOrMarks;
+  const overlayContract = allowsTextOrMarks ? "The image is a video background or end-card plate; do not add any text, logos, charts, or UI beyond what the scene cue explicitly requests." : "The image is a background only; HTML overlays will provide all final text, charts, logos, and UI labels.";
+  const textRule = allowsTextOrMarks ? "If text, logos, or brand marks are explicitly requested, keep them minimal, legible, and do not invent extras." : "No readable text, labels, UI copy, logos, brand marks, watermarks, or invented typography.";
+  return [
+    "Create a 16:9 video background plate for a HyperFrames scene.",
+    overlayContract,
+    `Scene cue: ${trimmed}`,
+    textRule,
+    "Avoid unrelated consumer product photography, shoes, packaging, food, people, celebrity faces, advertisements, and random objects unless explicitly requested by the scene cue.",
+    "Leave generous negative space for overlay text and cards. Keep the result topic-aligned, editorial, cinematic, and non-distracting."
+  ].join(" ");
+}
+var init_build_backdrop_prompt = __esm({
+  "../cli/src/commands/_shared/build-backdrop-prompt.ts"() {
+    "use strict";
+  }
+});
 // ../cli/src/commands/_shared/storyboard-edit.ts
 function validateStoryboardMarkdown(markdown) {
   const parsed = parseStoryboard(markdown);
@@ -451481,6 +451610,7 @@ async function createBuildPlan(opts) {
     const voice = stringOrUndefined3(cue.voice) ?? resolved.voice;
     const narrationText = stringOrUndefined3(cue.narration);
     const backdropPrompt = stringOrUndefined3(cue.backdrop);
+    const augmentedBackdropPrompt = backdropPrompt ? augmentBackdropPrompt(backdropPrompt) : null;
     const videoPrompt = stringOrUndefined3(cue.video);
     const musicPrompt = stringOrUndefined3(cue.music);
     const genericReference = resolveGenericAssetReference(projectDir, cue.asset);
@@ -451500,9 +451630,9 @@ async function createBuildPlan(opts) {
       voice,
       ext: resolved.narration.resolved === "elevenlabs" ? "mp3" : "wav"
     }) : null;
-    const backdropCache = backdropPrompt && !backdropReference ? backdropCacheDescriptor({
+    const backdropCache = augmentedBackdropPrompt && !backdropReference ? backdropCacheDescriptor({
       beatId: beat.id,
-      cue: backdropPrompt,
+      cue: augmentedBackdropPrompt,
       provider: resolved.image.resolved,
       quality: imageQuality,
       size: imageSize2,
@@ -452032,6 +452162,7 @@ var init_build_plan = __esm({
     init_build_asset_reference();
     init_build_cache();
     init_build_asset_metadata();
+    init_build_backdrop_prompt();
     init_composer_resolve();
     init_storyboard_parse();
     init_project_config();
@@ -452432,6 +452563,9 @@ async function executeVideoGenerate(options) {
     prompt: prompt3,
     provider = "kling",
     image,
+    refImages,
+    refVideos,
+    refAudio,
     duration = 5,
     ratio = "16:9",
     seed,
@@ -452440,6 +452574,7 @@ async function executeVideoGenerate(options) {
     resolution,
     veoModel = "3.1-fast",
     seedanceModel = "quality",
+    generateAudio,
     output: output3,
     wait = true,
     apiKey
@@ -452478,8 +452613,13 @@ async function executeVideoGenerate(options) {
     if (provider === "seedance" || provider === "fal") {
       const fal = new FalProvider();
       await fal.initialize({ apiKey: key2 });
+      const references = await prepareSeedanceReferences({
+        refImages,
+        refVideos,
+        refAudio
+      });
       let falImage = referenceImage;
-      if (falImage && falImage.startsWith("data:")) {
+      if (falImage && falImage.startsWith("data:") && references.length === 0) {
         const uploadHost = await resolveUploadHost();
         const upload = await uploadHost.uploadImage(referenceImageBuffer, {
           filename: image,
@@ -452490,11 +452630,14 @@ async function executeVideoGenerate(options) {
       const model = seedanceModel === "fast" || seedanceModel === "seedance-2.0-fast" ? "seedance-2.0-fast" : "seedance-2.0";
       const result = await fal.generateVideo(prompt3, {
         prompt: prompt3,
-        referenceImage: falImage,
+        referenceImage: references.length > 0 ? void 0 : falImage,
+        references: references.length > 0 ? references : void 0,
         duration,
         aspectRatio: ratio,
         negativePrompt: negative,
-        model
+        model,
+        resolution,
+        generateAudio
       });
       if (result.status === "failed")
         return { success: false, error: result.error || "Seedance generation failed" };
@@ -452671,6 +452814,54 @@ async function executeVideoGenerate(options) {
     };
   }
 }
+async function prepareSeedanceReferences(opts) {
+  const references = [];
+  for (const sourcePath of opts.refImages ?? []) {
+    references.push({
+      kind: "image",
+      url: await fileInputToUrlOrDataUri(sourcePath, "image/png"),
+      sourcePath
+    });
+  }
+  for (const sourcePath of opts.refVideos ?? []) {
+    references.push({
+      kind: "video",
+      url: await fileInputToUrlOrDataUri(sourcePath, "video/mp4"),
+      sourcePath
+    });
+  }
+  for (const sourcePath of opts.refAudio ?? []) {
+    references.push({
+      kind: "audio",
+      url: await fileInputToUrlOrDataUri(sourcePath, "audio/mpeg"),
+      sourcePath
+    });
+  }
+  return references;
+}
+async function fileInputToUrlOrDataUri(input3, fallbackMimeType) {
+  if (input3.startsWith("http://") || input3.startsWith("https://") || input3.startsWith("data:")) {
+    return input3;
+  }
+  const absPath = resolve28(process.cwd(), input3);
+  const buffer = await readFile14(absPath);
+  return `data:${mimeTypeForPath(input3, fallbackMimeType)};base64,${buffer.toString("base64")}`;
+}
+function mimeTypeForPath(path14, fallback2) {
+  const ext = path14.toLowerCase().split(".").pop();
+  const mimeTypes = {
+    jpg: "image/jpeg",
+    jpeg: "image/jpeg",
+    png: "image/png",
+    gif: "image/gif",
+    webp: "image/webp",
+    mp4: "video/mp4",
+    mov: "video/quicktime",
+    mp3: "audio/mpeg",
+    wav: "audio/wav"
+  };
+  return mimeTypes[ext || ""] || fallback2;
+}
 async function executeVideoStatus(options) {
   const {
     taskId,
@@ -454786,8 +454977,9 @@ async function dispatchNarration(beat, ctx) {
 async function dispatchBackdrop(beat, ctx) {
   const reference = assetReferenceForBeat(ctx.projectDir, "backdrop", beat);
   if (reference) return referencePrimitiveOutcome("backdrop", beat, ctx, reference);
-  const prompt3 = stringOrUndefined4(beat.cues?.backdrop);
-  if (!prompt3) return { status: "no-cue" };
+  const cue = stringOrUndefined4(beat.cues?.backdrop);
+  if (!cue) return { status: "no-cue" };
+  const prompt3 = augmentBackdropPrompt(cue);
   const rel = `assets/backdrop-${beat.id}.png`;
   const abs = join33(ctx.projectDir, rel);
   const size = ctx.imageSize ?? "1536x1024";
@@ -455765,6 +455957,7 @@ var init_scene_build = __esm({
     init_root_sync();
     init_build_cache();
     init_build_asset_metadata();
+    init_build_backdrop_prompt();
     init_ai_video();
     init_music();
     init_status_jobs();
@@ -463884,10 +464077,10 @@ var init_provider_resolver = __esm({
 // ../cli/src/commands/_shared/openai-image.ts
 function resolveOpenAIImageModel(modelAlias) {
-  const isGptImage2 = modelAlias === "2" || modelAlias === "gpt-image-2";
+  const isGptImage15 = modelAlias === "1.5" || modelAlias === "gpt-image-1.5";
   return {
-    openaiModel: isGptImage2 ? "gpt-image-2" : void 0,
-    modelLabel: isGptImage2 ? "GPT Image 2" : "GPT Image 1.5"
+    openaiModel: isGptImage15 ? "gpt-image-1.5" : "gpt-image-2",
+    modelLabel: isGptImage15 ? "GPT Image 1.5" : "GPT Image 2"
   };
 }
 async function executeOpenAIImageGenerate(prompt3, options, ctx) {
@@ -463924,7 +464117,7 @@ function registerImageCommand(parent) {
     "1:1"
   ).option("--quality <quality>", "Quality: standard, hd (openai only)", "standard").option("--style <style>", "Style: vivid, natural (openai only)", "vivid").option("--count <n>", "Number of images to generate", "1").option(
     "-m, --model <model>",
-    "Model. Gemini: flash, 3.1-flash, latest, pro. OpenAI: 1.5 (default), 2 (gpt-image-2)"
+    "Model. Gemini: flash, 3.1-flash, latest, pro. OpenAI: 2 (default), 1.5"
   ).option("--dry-run", "Preview parameters without executing").addHelpText(
     "after",
     `
@@ -465358,8 +465551,8 @@ function registerVideoCommand(parent) {
     "quality"
   ).option("--negative <prompt>", "Negative prompt - what to avoid (Kling/Veo)").option("--resolution <res>", "Video resolution: 720p, 1080p, 4k (Veo only)").option("--last-frame <path>", "Last frame image for frame interpolation (Veo only)").option(
     "--ref-images <paths...>",
-    "Reference images for character consistency (Veo 3.1 only, max 3)"
-  ).option("--person <mode>", "Person generation: allow_all, allow_adult (Veo only)").option("--veo-model <model>", "Veo model: 3.0, 3.1, 3.1-fast (default: 3.1-fast)", "3.1-fast").option(
+    "Reference images for Seedance reference-to-video or Veo character consistency"
+  ).option("--ref-videos <paths...>", "Reference videos for Seedance reference-to-video").option("--ref-audio <paths...>", "Reference audio for Seedance reference-to-video").option("--no-generate-audio", "Disable native audio when the provider supports it").option("--person <mode>", "Person generation: allow_all, allow_adult (Veo only)").option("--veo-model <model>", "Veo model: 3.0, 3.1, 3.1-fast (default: 3.1-fast)", "3.1-fast").option(
     "--runway-model <model>",
     "Runway model: gen4.5 (default, text+image-to-video), gen4_turbo (image-to-video only)",
     "gen4.5"
@@ -465502,7 +465695,11 @@ Examples:
               negative: options.negative,
               resolution: options.resolution,
               veoModel: options.veoModel,
-              seedanceModel: options.seedanceModel
+              seedanceModel: options.seedanceModel,
+              refImages: options.refImages,
+              refVideos: options.refVideos,
+              refAudio: options.refAudio,
+              generateAudio: options.generateAudio
             }
           }
         });
@@ -465789,8 +465986,13 @@ Examples:
       } else if (provider === "seedance") {
         const fal = new FalProvider();
         await fal.initialize({ apiKey });
+        const seedanceReferences = await prepareSeedanceReferences2({
+          refImages: options.refImages,
+          refVideos: options.refVideos,
+          refAudio: options.refAudio
+        });
         let falImage = referenceImage;
-        if (falImage && falImage.startsWith("data:")) {
+        if (falImage && falImage.startsWith("data:") && seedanceReferences.length === 0) {
           try {
             const uploadHost = await resolveUploadHost();
             spinner2.text = `Uploading image via ${uploadHost.provider} for Seedance...`;
@@ -465813,11 +466015,14 @@ Examples:
         const falModel = seedanceModel === "fast" || seedanceModel === "seedance-2.0-fast" ? "seedance-2.0-fast" : "seedance-2.0";
         result = await fal.generateVideo(prompt3, {
           prompt: prompt3,
-          referenceImage: falImage,
+          referenceImage: seedanceReferences.length > 0 ? void 0 : falImage,
+          references: seedanceReferences.length > 0 ? seedanceReferences : void 0,
           duration: options.duration ? parseInt(options.duration) : void 0,
           aspectRatio: options.ratio,
           negativePrompt: options.negative,
-          model: falModel
+          model: falModel,
+          resolution: options.resolution,
+          generateAudio: options.generateAudio
         });
         finalResult = result;
       }
@@ -465872,6 +466077,54 @@ Examples:
     }
   });
 }
+async function prepareSeedanceReferences2(opts) {
+  const references = [];
+  for (const sourcePath of opts.refImages ?? []) {
+    references.push({
+      kind: "image",
+      url: await fileInputToUrlOrDataUri2(sourcePath, "image/png"),
+      sourcePath
+    });
+  }
+  for (const sourcePath of opts.refVideos ?? []) {
+    references.push({
+      kind: "video",
+      url: await fileInputToUrlOrDataUri2(sourcePath, "video/mp4"),
+      sourcePath
+    });
+  }
+  for (const sourcePath of opts.refAudio ?? []) {
+    references.push({
+      kind: "audio",
+      url: await fileInputToUrlOrDataUri2(sourcePath, "audio/mpeg"),
+      sourcePath
+    });
+  }
+  return references;
+}
+async function fileInputToUrlOrDataUri2(input3, fallbackMimeType) {
+  if (input3.startsWith("http://") || input3.startsWith("https://") || input3.startsWith("data:")) {
+    return input3;
+  }
+  const absPath = resolve63(process.cwd(), input3);
+  const buffer = await readFile32(absPath);
+  return `data:${mimeTypeForPath2(input3, fallbackMimeType)};base64,${buffer.toString("base64")}`;
+}
+function mimeTypeForPath2(path14, fallback2) {
+  const ext = path14.toLowerCase().split(".").pop();
+  const mimeTypes = {
+    jpg: "image/jpeg",
+    jpeg: "image/jpeg",
+    png: "image/png",
+    gif: "image/gif",
+    webp: "image/webp",
+    mp4: "video/mp4",
+    mov: "video/quicktime",
+    mp3: "audio/mpeg",
+    wav: "audio/wav"
+  };
+  return mimeTypes[ext || ""] || fallback2;
+}
 async function recordVideoNoWaitJob(opts) {
   return createAndWriteJobRecord({
     jobType: "generate-video",
@@ -472211,6 +472464,9 @@ var generateVideoTool = defineTool({
       "Video provider (default: seedance when FAL_API_KEY is configured, otherwise first configured provider)"
     ),
     image: z5.string().optional().describe("Reference image path for image-to-video"),
+    refImages: z5.array(z5.string()).optional().describe("Reference images for Seedance reference-to-video"),
+    refVideos: z5.array(z5.string()).optional().describe("Reference videos for Seedance reference-to-video"),
+    refAudio: z5.array(z5.string()).optional().describe("Reference audio files for Seedance reference-to-video"),
     duration: z5.number().optional().describe("Duration in seconds (default: 5; Seedance accepts 4-15)"),
     ratio: z5.string().optional().describe("Aspect ratio: 16:9, 9:16, 1:1 (default: 16:9)"),
     mode: z5.string().optional().describe("Kling mode: std or pro"),
@@ -472219,6 +472475,7 @@ var generateVideoTool = defineTool({
     veoModel: z5.string().optional().describe("Veo model: 3.0, 3.1, 3.1-fast"),
     runwayModel: z5.string().optional().describe("Runway model: gen4.5, gen4_turbo"),
     seedanceModel: z5.string().optional().describe("Seedance variant: quality or fast (fal.ai only)"),
+    generateAudio: z5.boolean().optional().describe("Generate native synchronized audio when supported"),
     output: z5.string().optional().describe("Output file path (downloads video)"),
     wait: z5.boolean().optional().describe("Wait for completion (default: true)")
   }),

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vibeframe/mcp-server",
-  "version": "0.104.2",
+  "version": "0.105.1",
   "description": "VibeFrame MCP Server - AI-native video editing via Model Context Protocol",
   "type": "module",
   "bin": {
@@ -57,8 +57,8 @@
     "tsx": "^4.21.0",
     "typescript": "^5.3.3",
     "vitest": "^1.2.2",
-    "@vibeframe/core": "0.104.2",
-    "@vibeframe/cli": "0.104.2"
+    "@vibeframe/core": "0.105.1",
+    "@vibeframe/cli": "0.105.1"
   },
   "engines": {
     "node": ">=20"