@koda-sl/baker-cli 0.90.1 → 0.91.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  defaultRegistry,
10
10
  generateCatalog,
11
11
  validateCanvasDeep
12
- } from "./chunk-2E4H2GIJ.js";
12
+ } from "./chunk-LMVDA3EZ.js";
13
13
 
14
14
  // src/cli.ts
15
15
  import { defineCommand as defineCommand141, runMain } from "citty";
@@ -8287,6 +8287,24 @@ import { promisify as promisify2 } from "util";
8287
8287
  var execFileAsync2 = promisify2(execFile2);
8288
8288
  var PYSCENEDETECT_THRESHOLD = 18;
8289
8289
  var PYSCENEDETECT_MIN_SCENE_LEN_S = 0.25;
8290
+ var PYSCENEDETECT_RECHECK_THRESHOLD = 27;
8291
+ var PYSCENEDETECT_RECHECK_MIN_SCENE_LEN_S = 0.6;
8292
+ function isLikelyOverSegmented(cuts, opts = {}) {
8293
+ const minCuts = opts.minCuts ?? 6;
8294
+ const maxMedianGap = opts.medianGapS ?? 2;
8295
+ const sorted = [...cuts].filter((c) => Number.isFinite(c) && c > 0).sort((a, b) => a - b);
8296
+ if (sorted.length < minCuts) return false;
8297
+ const gaps = [];
8298
+ let prev = 0;
8299
+ for (const c of sorted) {
8300
+ gaps.push(c - prev);
8301
+ prev = c;
8302
+ }
8303
+ gaps.sort((a, b) => a - b);
8304
+ const mid = Math.floor(gaps.length / 2);
8305
+ const median = gaps.length % 2 ? gaps[mid] : (gaps[mid - 1] + gaps[mid]) / 2;
8306
+ return median < maxMedianGap;
8307
+ }
8290
8308
  function timecodeToSeconds(tc) {
8291
8309
  const m = tc.trim().match(/^(\d+):(\d{1,2}):(\d{1,2}(?:\.\d+)?)$/);
8292
8310
  if (!m) return null;
@@ -8306,9 +8324,7 @@ function parsePySceneDetectCsvCuts(csv) {
8306
8324
  }
8307
8325
  return [...new Set(cuts)].sort((a, b) => a - b);
8308
8326
  }
8309
- async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
8310
- const threshold = opts.threshold ?? PYSCENEDETECT_THRESHOLD;
8311
- const minSceneLenS = opts.minSceneLenS ?? PYSCENEDETECT_MIN_SCENE_LEN_S;
8327
+ async function runSceneDetectOnce(filePath, threshold, minSceneLenS, timeoutMs) {
8312
8328
  const outDir = await mkdtemp(join2(tmpdir(), "baker-scenedetect-"));
8313
8329
  try {
8314
8330
  await execFileAsync2(
@@ -8326,7 +8342,7 @@ async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
8326
8342
  "list-scenes",
8327
8343
  "--quiet"
8328
8344
  ],
8329
- { encoding: "utf-8", maxBuffer: 32 * 1024 * 1024, timeout: opts.timeout_ms ?? 12e4 }
8345
+ { encoding: "utf-8", maxBuffer: 32 * 1024 * 1024, timeout: timeoutMs }
8330
8346
  );
8331
8347
  const csvName = (await readdir2(outDir)).find((f) => f.toLowerCase().endsWith(".csv"));
8332
8348
  if (!csvName) return [];
@@ -8335,6 +8351,22 @@ async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
8335
8351
  await rm(outDir, { recursive: true, force: true });
8336
8352
  }
8337
8353
  }
8354
+ async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
8355
+ const pinned = opts.threshold !== void 0;
8356
+ const threshold = opts.threshold ?? PYSCENEDETECT_THRESHOLD;
8357
+ const minSceneLenS = opts.minSceneLenS ?? PYSCENEDETECT_MIN_SCENE_LEN_S;
8358
+ const timeoutMs = opts.timeout_ms ?? 12e4;
8359
+ const cuts = await runSceneDetectOnce(filePath, threshold, minSceneLenS, timeoutMs);
8360
+ if (!pinned && isLikelyOverSegmented(cuts)) {
8361
+ return await runSceneDetectOnce(
8362
+ filePath,
8363
+ PYSCENEDETECT_RECHECK_THRESHOLD,
8364
+ PYSCENEDETECT_RECHECK_MIN_SCENE_LEN_S,
8365
+ timeoutMs
8366
+ );
8367
+ }
8368
+ return cuts;
8369
+ }
8338
8370
 
8339
8371
  // src/engine/scaffold/video.ts
8340
8372
  import { z as z3 } from "zod";
@@ -8438,7 +8470,7 @@ var FIXED_TTS_MODEL = "elevenlabs/eleven_v3";
8438
8470
  var FIXED_SFX_MODEL = "elevenlabs/eleven_text_to_sound_v2";
8439
8471
  var FIXED_MUSIC_MODEL = "elevenlabs/music-v1";
8440
8472
  var FIXED_VOICE_CONVERT_MODEL = "elevenlabs/eleven_multilingual_sts_v2";
8441
- var MUSIC_BED_GAIN_DB = -12;
8473
+ var MUSIC_BED_GAIN_DB = -20;
8442
8474
  var AMBIENT_BED_GAIN_DB = -20;
8443
8475
  var TRANSITION_DEFAULT_S = 0.4;
8444
8476
  var XFADE_BY_TYPE = {
@@ -8821,7 +8853,16 @@ function todoPath2(el, label) {
8821
8853
  return `[TODO: drop one real source image for ${label} (${el.type})${desc}${expr} \u2014 reused across every frame it appears in${fresh}${same}]`;
8822
8854
  }
8823
8855
  function buildElementSlots(elements) {
8824
- const usedIds = /* @__PURE__ */ new Set(["prompt", "spine", "overlaid", "audio_mix", "final", "music_bed"]);
8856
+ const usedIds = /* @__PURE__ */ new Set([
8857
+ "prompt",
8858
+ "spine",
8859
+ "overlaid",
8860
+ "captions",
8861
+ "captions_transcript",
8862
+ "audio_mix",
8863
+ "final",
8864
+ "music_bed"
8865
+ ]);
8825
8866
  const slots = [];
8826
8867
  assignElementLabels2(elements).forEach(({ el, label }, i) => {
8827
8868
  let id = sanitizeId2(`el_${label}`, `el_${i}`);
@@ -8892,6 +8933,9 @@ function buildFramePrompt(edge, sceneIndex, framePrompt, present, hasAnchor, mod
8892
8933
  "a studio) contains any text or graphics, DO NOT reproduce them \u2014 render the subject/scene",
8893
8934
  "only, leaving the regions where overlays will sit clean. Imperfect/garbled letterforms or",
8894
8935
  "stray icons are the worst outcome; leave those areas blank.",
8936
+ "A SCREEN/UI surface \u2014 an app, website, chat, dashboard, or phone display \u2014 is NEVER",
8937
+ "rendered here: leave any phone/screen OFF or blank-screened. The real interface is",
8938
+ "composited later as a screenshot or a brand HTML block, never AI-generated.",
8895
8939
  "",
8896
8940
  "FRAMING \u2014 ONE UNCUT FRAME:",
8897
8941
  "Render ONE single uncut photographic frame: NO split screen, NO panels, NO dividing line,",
@@ -8959,8 +9003,13 @@ function ingestFrameRef(url, edge, ctx, nodes) {
8959
9003
  function buildFrameRef(edge, url, framePrompt, present, ctx, nodes) {
8960
9004
  const tag = ctx.tag ?? "";
8961
9005
  if (ctx.reuse && url) return ingestFrameRef(url, edge, ctx, nodes);
8962
- const hasOriginal = Boolean(url);
8963
- const originalRef = hasOriginal && url ? ingestFrameRef(url, edge, ctx, nodes) : void 0;
9006
+ const hasPersonOrAnimal = present.some((s) => {
9007
+ const t = s.type.toLowerCase();
9008
+ return t === "person" || t === "animal";
9009
+ });
9010
+ const useOriginalAnchor = Boolean(url) && !hasPersonOrAnimal;
9011
+ const hasOriginal = useOriginalAnchor;
9012
+ const originalRef = useOriginalAnchor && url ? ingestFrameRef(url, edge, ctx, nodes) : void 0;
8964
9013
  const reference = [...present.map((s) => s.ref), ...originalRef ? [originalRef] : []];
8965
9014
  const genParams = {
8966
9015
  model: ctx.imageModel,
@@ -9151,12 +9200,21 @@ function emitSceneClip(i, scene, present, mode, nativeTurn, ambientBroll, frames
9151
9200
  return { ref: `$ref:s${i}${tag}_clip_trim.video`, scene_s: lengths.dur, out };
9152
9201
  }
9153
9202
  var COMPOSITE_LAYOUTS = /* @__PURE__ */ new Set(["split_screen", "pip", "keyed_overlay"]);
9203
+ var UI_SURFACE_RE = /\b(?:app|ui|web ?site|web ?page|website|browser|chat|interface|mock-?up|in[- ]?app|dashboard|app screen|phone screen|screen[- ]?(?:recording|capture|grab|share))\b/i;
9204
+ function regionIsUiSurface(r) {
9205
+ return UI_SURFACE_RE.test(`${r.panel ?? ""} ${r.summary ?? ""} ${r.frame_prompt ?? ""}`);
9206
+ }
9207
+ function isUiOnlyComposite(regions) {
9208
+ const ui = regions.filter(regionIsUiSurface).length;
9209
+ return ui >= 1 && regions.length - ui <= 1;
9210
+ }
9154
9211
  function layeredComposition(scene) {
9155
9212
  const comp = scene.composition;
9156
9213
  const layout = (comp?.layout ?? "").toLowerCase();
9157
9214
  if (!COMPOSITE_LAYOUTS.has(layout)) return null;
9158
9215
  const regions = (comp?.regions ?? []).filter((r) => Boolean(r) && typeof r === "object");
9159
9216
  if (regions.length < 2) return null;
9217
+ if (isUiOnlyComposite(regions)) return null;
9160
9218
  return { layout, regions, comp: comp ?? {} };
9161
9219
  }
9162
9220
  function splitAxisOf(comp, regions) {
@@ -9320,24 +9378,15 @@ function emitFlashHold(i, scene, slots, ctx, lengths, out, ar, nodes, clips) {
9320
9378
  });
9321
9379
  clips.push({ ref: `$ref:s${i}_clip.video`, scene_s: lengths.dur, out });
9322
9380
  }
9323
- function musicScriptDigest(blueprint) {
9324
- const lines = blueprint.scenes.flatMap((s) => (s.dialogue ?? []).map((d) => d.line?.trim())).filter((l) => Boolean(l));
9325
- const script = lines.join(" ").slice(0, 500);
9381
+ function musicArcDigest(blueprint) {
9326
9382
  const roles = blueprint.scenes.map((s) => s.narrative_role).filter((r) => Boolean(r));
9327
9383
  const arc = roles.length > 0 ? roles.join(" \u2192 ") : "";
9328
- const parts = [];
9329
- if (script) {
9330
- parts.push(
9331
- `Ad script (the bed must SUPPORT these words \u2014 leave room for the voice, swell on the payoff): "${script}"`
9332
- );
9333
- }
9334
- if (arc) parts.push(`Emotional arc across scenes: ${arc}. Shape the bed's energy to this arc.`);
9335
- return parts.length > 0 ? `
9384
+ return arc ? `
9336
9385
 
9337
- ${parts.join("\n")}` : "";
9386
+ Emotional arc across scenes: ${arc}. Shape the bed's energy to this arc, swelling on the payoff. Purely instrumental \u2014 no vocals, no singing, no spoken words.` : "";
9338
9387
  }
9339
9388
  function musicBedPrompt(blueprint, musicPrompt) {
9340
- const digest = musicScriptDigest(blueprint);
9389
+ const digest = musicArcDigest(blueprint);
9341
9390
  const track2 = blueprint.global?.music?.identified_track;
9342
9391
  const title = track2?.title?.trim();
9343
9392
  const vibe = title ? `
@@ -9881,13 +9930,21 @@ function buildSfxMusic(blueprint, nodes) {
9881
9930
  const musicPrompt = blueprint.global?.music?.music_prompt;
9882
9931
  if (musicPrompt) {
9883
9932
  const total = blueprint.source?.duration_s ?? lastSceneEnd(blueprint);
9884
- const startAt = Math.min(Math.max(blueprint.global?.music?.starts_at_s ?? 0, 0), Math.max(total - 0.5, 0));
9933
+ const hookEnd = blueprint.scenes[0]?.end_s ?? 0;
9934
+ const startAt = Math.min(Math.max(blueprint.global?.music?.starts_at_s ?? 0, hookEnd), Math.max(total - 0.5, 0));
9885
9935
  const totalMs = Math.round((total - startAt) * 1e3);
9886
9936
  const musicMs = Math.min(Math.max(totalMs, 3e3), ELEVENLABS_MAX_MUSIC_LENGTH_MS);
9887
9937
  nodes.push({
9888
9938
  id: "music_bed",
9889
9939
  type: "music",
9890
- params: { model: FIXED_MUSIC_MODEL, prompt: musicBedPrompt(blueprint, musicPrompt), music_length_ms: musicMs }
9940
+ // force_instrumental: the model is vocal-capable; without this it can SING the
9941
+ // mood (and feeding it the script made it sing the ad). The voice owns the words.
9942
+ params: {
9943
+ model: FIXED_MUSIC_MODEL,
9944
+ prompt: musicBedPrompt(blueprint, musicPrompt),
9945
+ music_length_ms: musicMs,
9946
+ force_instrumental: true
9947
+ }
9891
9948
  });
9892
9949
  tracks.push({
9893
9950
  slot: "music",
@@ -9931,9 +9988,20 @@ function normalizeAnim(animation) {
9931
9988
  const mapped = animation === "slide" ? "slide_up" : animation;
9932
9989
  return SUPPORTED_ANIMS.has(mapped) ? mapped : void 0;
9933
9990
  }
9991
+ var FACE_ZONE_POSITIONS = /* @__PURE__ */ new Set([
9992
+ "center",
9993
+ "centre",
9994
+ "mid-center",
9995
+ "mid-centre",
9996
+ "middle-center",
9997
+ "center-center",
9998
+ "mid",
9999
+ "middle"
10000
+ ]);
9934
10001
  function positionClass(position) {
9935
10002
  const p = (position ?? "bottom_center").toLowerCase().replace(/[^a-z]+/g, "-");
9936
- return `pos-${p}`;
10003
+ const safe = FACE_ZONE_POSITIONS.has(p) ? "bottom-center" : p;
10004
+ return `pos-${safe}`;
9937
10005
  }
9938
10006
  function collectCaptions(blueprint) {
9939
10007
  return blueprint.scenes.flatMap((scene) => {
@@ -10004,6 +10072,26 @@ function floatingStub(fe, sceneStart) {
10004
10072
  `<img class="ov ${positionClass(fe.position)}" src="your-${slug}.png" data-start="${at}" data-dur="${dur}" alt="" /> -->`
10005
10073
  ].join("\n");
10006
10074
  }
10075
+ function uiPipStub(scene) {
10076
+ const comp = scene.composition;
10077
+ const layout = (comp?.layout ?? "").toLowerCase();
10078
+ if (!COMPOSITE_LAYOUTS.has(layout)) return "";
10079
+ const regions = (comp?.regions ?? []).filter((r) => Boolean(r) && typeof r === "object");
10080
+ if (regions.length < 2 || !isUiOnlyComposite(regions)) return "";
10081
+ const ui = regions.find(regionIsUiSurface);
10082
+ const at = scene.start_s ?? 0;
10083
+ const dur = Math.max(0.5, Math.round(((scene.end_s ?? at + 2.5) - at) * 100) / 100);
10084
+ const label = commentSafe(ui?.summary || ui?.frame_prompt || ui?.panel || "the app screen");
10085
+ return [
10086
+ `<!-- PHONE UI @ ${at}s for ${dur}s \u2014 the app/site screen this scene shows: ${label}.`,
10087
+ " Build it as a REAL surface, NEVER AI: capture the live page \u2014",
10088
+ " baker images screenshot https://<brand-domain>/<path> (image-library skill)",
10089
+ " \u2014 OR hand-build a brand-accurate HTML screen; then frame it in a phone mockup:",
10090
+ " npx hyperframes add phone-scroll (writes compositions/phone-scroll.html)",
10091
+ " drop the screenshot as screenshot.png in this dir and nest it as a PIP clip:",
10092
+ ` <div data-composition-src="compositions/phone-scroll.html" data-start="${at}" data-duration="${dur}" data-track-index="2" data-width="1080" data-height="1920"></div> -->`
10093
+ ].join("\n");
10094
+ }
10007
10095
  function buildOverlayHtml(input) {
10008
10096
  const blueprint = VideoBlueprint.parse(input);
10009
10097
  const blocks = [
@@ -10029,6 +10117,8 @@ function buildOverlayHtml(input) {
10029
10117
  const sceneStart = scene.start_s ?? 0;
10030
10118
  const floats = z3.array(FloatingElement).safeParse(scene.floating_elements ?? []);
10031
10119
  const parts = (floats.success ? floats.data.map((fe) => floatingStub(fe, sceneStart)) : []).filter(Boolean);
10120
+ const pip = uiPipStub(scene);
10121
+ if (pip) parts.push(pip);
10032
10122
  if (parts.length > 0) blocks.push(parts.join("\n"));
10033
10123
  }
10034
10124
  return blocks.join("\n\n");
@@ -10110,13 +10200,14 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
10110
10200
  params: { source: "path", path: todoPath2(elements[i], slot.label), expect: "image" }
10111
10201
  });
10112
10202
  });
10113
- if (opts.actorSheets) applyActorSheets(slots, nodes);
10203
+ applyActorSheets(slots, nodes);
10114
10204
  const { clips, voTracks, vo_segments, talking_scenes } = buildTimeline(blueprint, slots, opts, nodes);
10115
10205
  let videoRef = buildSpine(clips, nodes);
10116
10206
  let videoNode = "spine";
10117
10207
  const overlays = blueprint.scenes.flatMap((s) => s.overlays ?? []);
10118
10208
  const floating = blueprint.scenes.flatMap((s) => s.floating_elements ?? []);
10119
- if (overlays.length > 0 || floating.length > 0) {
10209
+ const hasUiPip = blueprint.scenes.some((s) => uiPipStub(s) !== "");
10210
+ if (overlays.length > 0 || floating.length > 0 || hasUiPip) {
10120
10211
  nodes.push({
10121
10212
  id: "overlaid",
10122
10213
  type: "hyperframe_render",
@@ -10126,10 +10217,28 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
10126
10217
  videoRef = "$ref:overlaid.video";
10127
10218
  videoNode = "overlaid";
10128
10219
  }
10220
+ if (opts.captionsCompositionPath && opts.transcriptPath) {
10221
+ nodes.push({
10222
+ id: "captions_transcript",
10223
+ type: "ingest",
10224
+ params: { source: "path", path: opts.transcriptPath, expect: "json" }
10225
+ });
10226
+ nodes.push({
10227
+ id: "captions",
10228
+ type: "hyperframe_render",
10229
+ inputs: { background: videoRef, transcript: "$ref:captions_transcript.asset" },
10230
+ params: { composition: opts.captionsCompositionPath }
10231
+ });
10232
+ videoRef = "$ref:captions.video";
10233
+ videoNode = "captions";
10234
+ }
10129
10235
  const tracks = [...voTracks, ...buildSfxMusic(blueprint, nodes)];
10130
10236
  if (tracks.length > 0) {
10131
10237
  const mixInputs = {};
10132
10238
  for (const t of tracks) mixInputs[t.slot] = t.ref;
10239
+ const musicTrack = tracks.find((t) => t.kind === "music");
10240
+ const voiceSlots = tracks.filter((t) => t.kind === "vo").map((t) => t.slot);
10241
+ const duck = musicTrack && voiceSlots.length > 0 ? { duck: { track: musicTrack.slot, against: voiceSlots } } : {};
10133
10242
  nodes.push({
10134
10243
  id: "audio_mix",
10135
10244
  type: "audio_timeline",
@@ -10140,7 +10249,8 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
10140
10249
  start_s: t.start_s,
10141
10250
  ...t.gain_db !== void 0 ? { gain_db: t.gain_db } : {}
10142
10251
  })),
10143
- total_ms: Math.round((blueprint.source?.duration_s ?? lastSceneEnd(blueprint)) * 1e3)
10252
+ total_ms: Math.round((blueprint.source?.duration_s ?? lastSceneEnd(blueprint)) * 1e3),
10253
+ ...duck
10144
10254
  }
10145
10255
  });
10146
10256
  nodes.push({
@@ -10432,6 +10542,7 @@ function resolveShippedCanvasDir(name, startDir, exists = existsSync3, maxDepth
10432
10542
 
10433
10543
  // src/commands/canvas/scaffold-video.ts
10434
10544
  var SHIPPED_COMPOSITION_DIR = resolveShippedCanvasDir("video-overlay-composition", import.meta.dirname);
10545
+ var SHIPPED_CAPTIONS_DIR = resolveShippedCanvasDir("tiktok-captions-composition", import.meta.dirname);
10435
10546
  function resolveModel2(kind, preferred) {
10436
10547
  const ids = Object.keys(MODEL_REGISTRY[kind]);
10437
10548
  return ids.includes(preferred) ? preferred : ids[0] ?? preferred;
@@ -10463,6 +10574,24 @@ async function loadAssetText2(ref, label) {
10463
10574
  }
10464
10575
  throw new Error(`${label}: output had no readable path or url`);
10465
10576
  }
10577
+ async function loadTranscriptBestEffort(ref) {
10578
+ if (!ref) return void 0;
10579
+ try {
10580
+ return await loadAssetText2(ref, "deconstruct transcript");
10581
+ } catch {
10582
+ return void 0;
10583
+ }
10584
+ }
10585
+ async function stageCaptions(outDir, transcript) {
10586
+ const text = transcript?.trim();
10587
+ if (!text || text === "[]") return {};
10588
+ const transcriptPath = path5.join(outDir, "transcript.json");
10589
+ await writeFile2(transcriptPath, `${text}
10590
+ `, "utf8");
10591
+ const compositionPath = path5.join(outDir, "tiktok-captions-composition");
10592
+ await cp(SHIPPED_CAPTIONS_DIR, compositionPath, { recursive: true });
10593
+ return { compositionPath, transcriptPath };
10594
+ }
10466
10595
  function parseElements2(raw) {
10467
10596
  const parsed = JSON.parse(raw);
10468
10597
  if (Array.isArray(parsed)) return parsed;
@@ -10568,10 +10697,12 @@ async function runAnalysisPasses(deconstructCanvas, selectModel) {
10568
10697
  }
10569
10698
  };
10570
10699
  let blueprint;
10700
+ let transcript;
10571
10701
  try {
10572
10702
  const r1 = await engine.run(deconstructCanvas, {});
10573
10703
  addCredits(r1.stats);
10574
10704
  blueprint = JSON.parse(await loadAssetText2(r1.outputs_by_node.deconstruct?.analysis, "deconstruct output"));
10705
+ transcript = await loadTranscriptBestEffort(r1.outputs_by_node.deconstruct?.transcript);
10575
10706
  } catch (e) {
10576
10707
  if (e instanceof ValidationError) return fail2("validation", JSON.stringify(e.issues));
10577
10708
  if (e instanceof SyntaxError) return fail2("read_outputs", e.message);
@@ -10582,7 +10713,7 @@ async function runAnalysisPasses(deconstructCanvas, selectModel) {
10582
10713
  const r2 = await engine.run(buildSelectCanvas(selectModel, slimJson), {});
10583
10714
  addCredits(r2.stats);
10584
10715
  const elements = parseElements2(await loadAssetText2(r2.outputs_by_node.select?.text, "selection output"));
10585
- return { blueprint, elements, creditsSpent: sawCredits ? credits : void 0 };
10716
+ return { blueprint, elements, transcript, creditsSpent: sawCredits ? credits : void 0 };
10586
10717
  } catch (e) {
10587
10718
  if (e instanceof ValidationError) return fail2("validation", JSON.stringify(e.issues));
10588
10719
  if (e instanceof SyntaxError) return fail2("read_outputs", e.message);
@@ -10602,14 +10733,10 @@ var scaffoldVideoCommand = defineCommand76({
10602
10733
  type: "boolean",
10603
10734
  description: "Give silent b-roll scenes native diegetic ambient mixed deep under the music bed (off by default)"
10604
10735
  },
10605
- "actor-sheets": {
10606
- type: "boolean",
10607
- description: "Lock a recast person/animal that recurs across \u22652 scenes to ONE turnaround sheet grounding every frame"
10608
- },
10609
10736
  "max-scenes": { type: "string", description: "Cap the number of scenes the deconstruct emits" },
10610
10737
  "shot-threshold": {
10611
10738
  type: "string",
10612
- description: "PySceneDetect content threshold (default 18; lower = more/softer cuts, higher = fewer)"
10739
+ description: "PySceneDetect content threshold. Default is adaptive (18, auto re-checked at 27 when a continuous shot looks over-segmented); pinning a value disables the re-check. Lower = more/softer cuts, higher = fewer."
10613
10740
  },
10614
10741
  language: { type: "string", description: "Transcript/dialogue language hint (e.g. fr, en)" },
10615
10742
  focus: { type: "string", description: "Known provenance/emphasis to ground the deconstruct" },
@@ -10641,7 +10768,7 @@ var scaffoldVideoCommand = defineCommand76({
10641
10768
  focus: args.focus ? String(args.focus) : void 0,
10642
10769
  shotCuts
10643
10770
  });
10644
- const { blueprint, elements, creditsSpent } = await runAnalysisPasses(deconstructCanvas, selectModel);
10771
+ const { blueprint, elements, transcript, creditsSpent } = await runAnalysisPasses(deconstructCanvas, selectModel);
10645
10772
  await mkdir(outDir, { recursive: true });
10646
10773
  const annotated = annotateBlueprintWithElements(blueprint, elements);
10647
10774
  await writeFile2(blueprintPath, `${JSON.stringify(annotated, null, 2)}
@@ -10659,14 +10786,16 @@ var scaffoldVideoCommand = defineCommand76({
10659
10786
  );
10660
10787
  }
10661
10788
  await writeFile2(indexPath, injected, "utf8");
10789
+ const captions = await stageCaptions(outDir, transcript);
10662
10790
  const opts = {
10663
10791
  imageModel,
10664
10792
  videoModel,
10665
10793
  overlayCompositionPath: compositionDest,
10794
+ captionsCompositionPath: captions.compositionPath,
10795
+ transcriptPath: captions.transcriptPath,
10666
10796
  blueprintPath,
10667
10797
  frames,
10668
- ambient: Boolean(args.ambient),
10669
- actorSheets: Boolean(args["actor-sheets"])
10798
+ ambient: Boolean(args.ambient)
10670
10799
  };
10671
10800
  let canvas;
10672
10801
  let report;