@koda-sl/baker-cli 0.90.1 → 0.91.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/canvas/video-overlay-composition/index.html +31 -5
- package/dist/{chunk-2E4H2GIJ.js → chunk-LMVDA3EZ.js} +59 -5
- package/dist/chunk-LMVDA3EZ.js.map +1 -0
- package/dist/cli.js +166 -37
- package/dist/cli.js.map +1 -1
- package/dist/engine/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-2E4H2GIJ.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
defaultRegistry,
|
|
10
10
|
generateCatalog,
|
|
11
11
|
validateCanvasDeep
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-LMVDA3EZ.js";
|
|
13
13
|
|
|
14
14
|
// src/cli.ts
|
|
15
15
|
import { defineCommand as defineCommand141, runMain } from "citty";
|
|
@@ -8287,6 +8287,24 @@ import { promisify as promisify2 } from "util";
|
|
|
8287
8287
|
var execFileAsync2 = promisify2(execFile2);
|
|
8288
8288
|
var PYSCENEDETECT_THRESHOLD = 18;
|
|
8289
8289
|
var PYSCENEDETECT_MIN_SCENE_LEN_S = 0.25;
|
|
8290
|
+
var PYSCENEDETECT_RECHECK_THRESHOLD = 27;
|
|
8291
|
+
var PYSCENEDETECT_RECHECK_MIN_SCENE_LEN_S = 0.6;
|
|
8292
|
+
function isLikelyOverSegmented(cuts, opts = {}) {
|
|
8293
|
+
const minCuts = opts.minCuts ?? 6;
|
|
8294
|
+
const maxMedianGap = opts.medianGapS ?? 2;
|
|
8295
|
+
const sorted = [...cuts].filter((c) => Number.isFinite(c) && c > 0).sort((a, b) => a - b);
|
|
8296
|
+
if (sorted.length < minCuts) return false;
|
|
8297
|
+
const gaps = [];
|
|
8298
|
+
let prev = 0;
|
|
8299
|
+
for (const c of sorted) {
|
|
8300
|
+
gaps.push(c - prev);
|
|
8301
|
+
prev = c;
|
|
8302
|
+
}
|
|
8303
|
+
gaps.sort((a, b) => a - b);
|
|
8304
|
+
const mid = Math.floor(gaps.length / 2);
|
|
8305
|
+
const median = gaps.length % 2 ? gaps[mid] : (gaps[mid - 1] + gaps[mid]) / 2;
|
|
8306
|
+
return median < maxMedianGap;
|
|
8307
|
+
}
|
|
8290
8308
|
function timecodeToSeconds(tc) {
|
|
8291
8309
|
const m = tc.trim().match(/^(\d+):(\d{1,2}):(\d{1,2}(?:\.\d+)?)$/);
|
|
8292
8310
|
if (!m) return null;
|
|
@@ -8306,9 +8324,7 @@ function parsePySceneDetectCsvCuts(csv) {
|
|
|
8306
8324
|
}
|
|
8307
8325
|
return [...new Set(cuts)].sort((a, b) => a - b);
|
|
8308
8326
|
}
|
|
8309
|
-
async function
|
|
8310
|
-
const threshold = opts.threshold ?? PYSCENEDETECT_THRESHOLD;
|
|
8311
|
-
const minSceneLenS = opts.minSceneLenS ?? PYSCENEDETECT_MIN_SCENE_LEN_S;
|
|
8327
|
+
async function runSceneDetectOnce(filePath, threshold, minSceneLenS, timeoutMs) {
|
|
8312
8328
|
const outDir = await mkdtemp(join2(tmpdir(), "baker-scenedetect-"));
|
|
8313
8329
|
try {
|
|
8314
8330
|
await execFileAsync2(
|
|
@@ -8326,7 +8342,7 @@ async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
|
|
|
8326
8342
|
"list-scenes",
|
|
8327
8343
|
"--quiet"
|
|
8328
8344
|
],
|
|
8329
|
-
{ encoding: "utf-8", maxBuffer: 32 * 1024 * 1024, timeout:
|
|
8345
|
+
{ encoding: "utf-8", maxBuffer: 32 * 1024 * 1024, timeout: timeoutMs }
|
|
8330
8346
|
);
|
|
8331
8347
|
const csvName = (await readdir2(outDir)).find((f) => f.toLowerCase().endsWith(".csv"));
|
|
8332
8348
|
if (!csvName) return [];
|
|
@@ -8335,6 +8351,22 @@ async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
|
|
|
8335
8351
|
await rm(outDir, { recursive: true, force: true });
|
|
8336
8352
|
}
|
|
8337
8353
|
}
|
|
8354
|
+
async function detectSceneCutsPySceneDetect(filePath, opts = {}) {
|
|
8355
|
+
const pinned = opts.threshold !== void 0;
|
|
8356
|
+
const threshold = opts.threshold ?? PYSCENEDETECT_THRESHOLD;
|
|
8357
|
+
const minSceneLenS = opts.minSceneLenS ?? PYSCENEDETECT_MIN_SCENE_LEN_S;
|
|
8358
|
+
const timeoutMs = opts.timeout_ms ?? 12e4;
|
|
8359
|
+
const cuts = await runSceneDetectOnce(filePath, threshold, minSceneLenS, timeoutMs);
|
|
8360
|
+
if (!pinned && isLikelyOverSegmented(cuts)) {
|
|
8361
|
+
return await runSceneDetectOnce(
|
|
8362
|
+
filePath,
|
|
8363
|
+
PYSCENEDETECT_RECHECK_THRESHOLD,
|
|
8364
|
+
PYSCENEDETECT_RECHECK_MIN_SCENE_LEN_S,
|
|
8365
|
+
timeoutMs
|
|
8366
|
+
);
|
|
8367
|
+
}
|
|
8368
|
+
return cuts;
|
|
8369
|
+
}
|
|
8338
8370
|
|
|
8339
8371
|
// src/engine/scaffold/video.ts
|
|
8340
8372
|
import { z as z3 } from "zod";
|
|
@@ -8438,7 +8470,7 @@ var FIXED_TTS_MODEL = "elevenlabs/eleven_v3";
|
|
|
8438
8470
|
var FIXED_SFX_MODEL = "elevenlabs/eleven_text_to_sound_v2";
|
|
8439
8471
|
var FIXED_MUSIC_MODEL = "elevenlabs/music-v1";
|
|
8440
8472
|
var FIXED_VOICE_CONVERT_MODEL = "elevenlabs/eleven_multilingual_sts_v2";
|
|
8441
|
-
var MUSIC_BED_GAIN_DB = -
|
|
8473
|
+
var MUSIC_BED_GAIN_DB = -20;
|
|
8442
8474
|
var AMBIENT_BED_GAIN_DB = -20;
|
|
8443
8475
|
var TRANSITION_DEFAULT_S = 0.4;
|
|
8444
8476
|
var XFADE_BY_TYPE = {
|
|
@@ -8821,7 +8853,16 @@ function todoPath2(el, label) {
|
|
|
8821
8853
|
return `[TODO: drop one real source image for ${label} (${el.type})${desc}${expr} \u2014 reused across every frame it appears in${fresh}${same}]`;
|
|
8822
8854
|
}
|
|
8823
8855
|
function buildElementSlots(elements) {
|
|
8824
|
-
const usedIds = /* @__PURE__ */ new Set([
|
|
8856
|
+
const usedIds = /* @__PURE__ */ new Set([
|
|
8857
|
+
"prompt",
|
|
8858
|
+
"spine",
|
|
8859
|
+
"overlaid",
|
|
8860
|
+
"captions",
|
|
8861
|
+
"captions_transcript",
|
|
8862
|
+
"audio_mix",
|
|
8863
|
+
"final",
|
|
8864
|
+
"music_bed"
|
|
8865
|
+
]);
|
|
8825
8866
|
const slots = [];
|
|
8826
8867
|
assignElementLabels2(elements).forEach(({ el, label }, i) => {
|
|
8827
8868
|
let id = sanitizeId2(`el_${label}`, `el_${i}`);
|
|
@@ -8892,6 +8933,9 @@ function buildFramePrompt(edge, sceneIndex, framePrompt, present, hasAnchor, mod
|
|
|
8892
8933
|
"a studio) contains any text or graphics, DO NOT reproduce them \u2014 render the subject/scene",
|
|
8893
8934
|
"only, leaving the regions where overlays will sit clean. Imperfect/garbled letterforms or",
|
|
8894
8935
|
"stray icons are the worst outcome; leave those areas blank.",
|
|
8936
|
+
"A SCREEN/UI surface \u2014 an app, website, chat, dashboard, or phone display \u2014 is NEVER",
|
|
8937
|
+
"rendered here: leave any phone/screen OFF or blank-screened. The real interface is",
|
|
8938
|
+
"composited later as a screenshot or a brand HTML block, never AI-generated.",
|
|
8895
8939
|
"",
|
|
8896
8940
|
"FRAMING \u2014 ONE UNCUT FRAME:",
|
|
8897
8941
|
"Render ONE single uncut photographic frame: NO split screen, NO panels, NO dividing line,",
|
|
@@ -8959,8 +9003,13 @@ function ingestFrameRef(url, edge, ctx, nodes) {
|
|
|
8959
9003
|
function buildFrameRef(edge, url, framePrompt, present, ctx, nodes) {
|
|
8960
9004
|
const tag = ctx.tag ?? "";
|
|
8961
9005
|
if (ctx.reuse && url) return ingestFrameRef(url, edge, ctx, nodes);
|
|
8962
|
-
const
|
|
8963
|
-
|
|
9006
|
+
const hasPersonOrAnimal = present.some((s) => {
|
|
9007
|
+
const t = s.type.toLowerCase();
|
|
9008
|
+
return t === "person" || t === "animal";
|
|
9009
|
+
});
|
|
9010
|
+
const useOriginalAnchor = Boolean(url) && !hasPersonOrAnimal;
|
|
9011
|
+
const hasOriginal = useOriginalAnchor;
|
|
9012
|
+
const originalRef = useOriginalAnchor && url ? ingestFrameRef(url, edge, ctx, nodes) : void 0;
|
|
8964
9013
|
const reference = [...present.map((s) => s.ref), ...originalRef ? [originalRef] : []];
|
|
8965
9014
|
const genParams = {
|
|
8966
9015
|
model: ctx.imageModel,
|
|
@@ -9151,12 +9200,21 @@ function emitSceneClip(i, scene, present, mode, nativeTurn, ambientBroll, frames
|
|
|
9151
9200
|
return { ref: `$ref:s${i}${tag}_clip_trim.video`, scene_s: lengths.dur, out };
|
|
9152
9201
|
}
|
|
9153
9202
|
var COMPOSITE_LAYOUTS = /* @__PURE__ */ new Set(["split_screen", "pip", "keyed_overlay"]);
|
|
9203
|
+
var UI_SURFACE_RE = /\b(?:app|ui|web ?site|web ?page|website|browser|chat|interface|mock-?up|in[- ]?app|dashboard|app screen|phone screen|screen[- ]?(?:recording|capture|grab|share))\b/i;
|
|
9204
|
+
function regionIsUiSurface(r) {
|
|
9205
|
+
return UI_SURFACE_RE.test(`${r.panel ?? ""} ${r.summary ?? ""} ${r.frame_prompt ?? ""}`);
|
|
9206
|
+
}
|
|
9207
|
+
function isUiOnlyComposite(regions) {
|
|
9208
|
+
const ui = regions.filter(regionIsUiSurface).length;
|
|
9209
|
+
return ui >= 1 && regions.length - ui <= 1;
|
|
9210
|
+
}
|
|
9154
9211
|
function layeredComposition(scene) {
|
|
9155
9212
|
const comp = scene.composition;
|
|
9156
9213
|
const layout = (comp?.layout ?? "").toLowerCase();
|
|
9157
9214
|
if (!COMPOSITE_LAYOUTS.has(layout)) return null;
|
|
9158
9215
|
const regions = (comp?.regions ?? []).filter((r) => Boolean(r) && typeof r === "object");
|
|
9159
9216
|
if (regions.length < 2) return null;
|
|
9217
|
+
if (isUiOnlyComposite(regions)) return null;
|
|
9160
9218
|
return { layout, regions, comp: comp ?? {} };
|
|
9161
9219
|
}
|
|
9162
9220
|
function splitAxisOf(comp, regions) {
|
|
@@ -9320,24 +9378,15 @@ function emitFlashHold(i, scene, slots, ctx, lengths, out, ar, nodes, clips) {
|
|
|
9320
9378
|
});
|
|
9321
9379
|
clips.push({ ref: `$ref:s${i}_clip.video`, scene_s: lengths.dur, out });
|
|
9322
9380
|
}
|
|
9323
|
-
function
|
|
9324
|
-
const lines = blueprint.scenes.flatMap((s) => (s.dialogue ?? []).map((d) => d.line?.trim())).filter((l) => Boolean(l));
|
|
9325
|
-
const script = lines.join(" ").slice(0, 500);
|
|
9381
|
+
function musicArcDigest(blueprint) {
|
|
9326
9382
|
const roles = blueprint.scenes.map((s) => s.narrative_role).filter((r) => Boolean(r));
|
|
9327
9383
|
const arc = roles.length > 0 ? roles.join(" \u2192 ") : "";
|
|
9328
|
-
|
|
9329
|
-
if (script) {
|
|
9330
|
-
parts.push(
|
|
9331
|
-
`Ad script (the bed must SUPPORT these words \u2014 leave room for the voice, swell on the payoff): "${script}"`
|
|
9332
|
-
);
|
|
9333
|
-
}
|
|
9334
|
-
if (arc) parts.push(`Emotional arc across scenes: ${arc}. Shape the bed's energy to this arc.`);
|
|
9335
|
-
return parts.length > 0 ? `
|
|
9384
|
+
return arc ? `
|
|
9336
9385
|
|
|
9337
|
-
${
|
|
9386
|
+
Emotional arc across scenes: ${arc}. Shape the bed's energy to this arc, swelling on the payoff. Purely instrumental \u2014 no vocals, no singing, no spoken words.` : "";
|
|
9338
9387
|
}
|
|
9339
9388
|
function musicBedPrompt(blueprint, musicPrompt) {
|
|
9340
|
-
const digest =
|
|
9389
|
+
const digest = musicArcDigest(blueprint);
|
|
9341
9390
|
const track2 = blueprint.global?.music?.identified_track;
|
|
9342
9391
|
const title = track2?.title?.trim();
|
|
9343
9392
|
const vibe = title ? `
|
|
@@ -9881,13 +9930,21 @@ function buildSfxMusic(blueprint, nodes) {
|
|
|
9881
9930
|
const musicPrompt = blueprint.global?.music?.music_prompt;
|
|
9882
9931
|
if (musicPrompt) {
|
|
9883
9932
|
const total = blueprint.source?.duration_s ?? lastSceneEnd(blueprint);
|
|
9884
|
-
const
|
|
9933
|
+
const hookEnd = blueprint.scenes[0]?.end_s ?? 0;
|
|
9934
|
+
const startAt = Math.min(Math.max(blueprint.global?.music?.starts_at_s ?? 0, hookEnd), Math.max(total - 0.5, 0));
|
|
9885
9935
|
const totalMs = Math.round((total - startAt) * 1e3);
|
|
9886
9936
|
const musicMs = Math.min(Math.max(totalMs, 3e3), ELEVENLABS_MAX_MUSIC_LENGTH_MS);
|
|
9887
9937
|
nodes.push({
|
|
9888
9938
|
id: "music_bed",
|
|
9889
9939
|
type: "music",
|
|
9890
|
-
|
|
9940
|
+
// force_instrumental: the model is vocal-capable; without this it can SING the
|
|
9941
|
+
// mood (and feeding it the script made it sing the ad). The voice owns the words.
|
|
9942
|
+
params: {
|
|
9943
|
+
model: FIXED_MUSIC_MODEL,
|
|
9944
|
+
prompt: musicBedPrompt(blueprint, musicPrompt),
|
|
9945
|
+
music_length_ms: musicMs,
|
|
9946
|
+
force_instrumental: true
|
|
9947
|
+
}
|
|
9891
9948
|
});
|
|
9892
9949
|
tracks.push({
|
|
9893
9950
|
slot: "music",
|
|
@@ -9931,9 +9988,20 @@ function normalizeAnim(animation) {
|
|
|
9931
9988
|
const mapped = animation === "slide" ? "slide_up" : animation;
|
|
9932
9989
|
return SUPPORTED_ANIMS.has(mapped) ? mapped : void 0;
|
|
9933
9990
|
}
|
|
9991
|
+
var FACE_ZONE_POSITIONS = /* @__PURE__ */ new Set([
|
|
9992
|
+
"center",
|
|
9993
|
+
"centre",
|
|
9994
|
+
"mid-center",
|
|
9995
|
+
"mid-centre",
|
|
9996
|
+
"middle-center",
|
|
9997
|
+
"center-center",
|
|
9998
|
+
"mid",
|
|
9999
|
+
"middle"
|
|
10000
|
+
]);
|
|
9934
10001
|
function positionClass(position) {
|
|
9935
10002
|
const p = (position ?? "bottom_center").toLowerCase().replace(/[^a-z]+/g, "-");
|
|
9936
|
-
|
|
10003
|
+
const safe = FACE_ZONE_POSITIONS.has(p) ? "bottom-center" : p;
|
|
10004
|
+
return `pos-${safe}`;
|
|
9937
10005
|
}
|
|
9938
10006
|
function collectCaptions(blueprint) {
|
|
9939
10007
|
return blueprint.scenes.flatMap((scene) => {
|
|
@@ -10004,6 +10072,26 @@ function floatingStub(fe, sceneStart) {
|
|
|
10004
10072
|
`<img class="ov ${positionClass(fe.position)}" src="your-${slug}.png" data-start="${at}" data-dur="${dur}" alt="" /> -->`
|
|
10005
10073
|
].join("\n");
|
|
10006
10074
|
}
|
|
10075
|
+
function uiPipStub(scene) {
|
|
10076
|
+
const comp = scene.composition;
|
|
10077
|
+
const layout = (comp?.layout ?? "").toLowerCase();
|
|
10078
|
+
if (!COMPOSITE_LAYOUTS.has(layout)) return "";
|
|
10079
|
+
const regions = (comp?.regions ?? []).filter((r) => Boolean(r) && typeof r === "object");
|
|
10080
|
+
if (regions.length < 2 || !isUiOnlyComposite(regions)) return "";
|
|
10081
|
+
const ui = regions.find(regionIsUiSurface);
|
|
10082
|
+
const at = scene.start_s ?? 0;
|
|
10083
|
+
const dur = Math.max(0.5, Math.round(((scene.end_s ?? at + 2.5) - at) * 100) / 100);
|
|
10084
|
+
const label = commentSafe(ui?.summary || ui?.frame_prompt || ui?.panel || "the app screen");
|
|
10085
|
+
return [
|
|
10086
|
+
`<!-- PHONE UI @ ${at}s for ${dur}s \u2014 the app/site screen this scene shows: ${label}.`,
|
|
10087
|
+
" Build it as a REAL surface, NEVER AI: capture the live page \u2014",
|
|
10088
|
+
" baker images screenshot https://<brand-domain>/<path> (image-library skill)",
|
|
10089
|
+
" \u2014 OR hand-build a brand-accurate HTML screen; then frame it in a phone mockup:",
|
|
10090
|
+
" npx hyperframes add phone-scroll (writes compositions/phone-scroll.html)",
|
|
10091
|
+
" drop the screenshot as screenshot.png in this dir and nest it as a PIP clip:",
|
|
10092
|
+
` <div data-composition-src="compositions/phone-scroll.html" data-start="${at}" data-duration="${dur}" data-track-index="2" data-width="1080" data-height="1920"></div> -->`
|
|
10093
|
+
].join("\n");
|
|
10094
|
+
}
|
|
10007
10095
|
function buildOverlayHtml(input) {
|
|
10008
10096
|
const blueprint = VideoBlueprint.parse(input);
|
|
10009
10097
|
const blocks = [
|
|
@@ -10029,6 +10117,8 @@ function buildOverlayHtml(input) {
|
|
|
10029
10117
|
const sceneStart = scene.start_s ?? 0;
|
|
10030
10118
|
const floats = z3.array(FloatingElement).safeParse(scene.floating_elements ?? []);
|
|
10031
10119
|
const parts = (floats.success ? floats.data.map((fe) => floatingStub(fe, sceneStart)) : []).filter(Boolean);
|
|
10120
|
+
const pip = uiPipStub(scene);
|
|
10121
|
+
if (pip) parts.push(pip);
|
|
10032
10122
|
if (parts.length > 0) blocks.push(parts.join("\n"));
|
|
10033
10123
|
}
|
|
10034
10124
|
return blocks.join("\n\n");
|
|
@@ -10110,13 +10200,14 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
|
|
|
10110
10200
|
params: { source: "path", path: todoPath2(elements[i], slot.label), expect: "image" }
|
|
10111
10201
|
});
|
|
10112
10202
|
});
|
|
10113
|
-
|
|
10203
|
+
applyActorSheets(slots, nodes);
|
|
10114
10204
|
const { clips, voTracks, vo_segments, talking_scenes } = buildTimeline(blueprint, slots, opts, nodes);
|
|
10115
10205
|
let videoRef = buildSpine(clips, nodes);
|
|
10116
10206
|
let videoNode = "spine";
|
|
10117
10207
|
const overlays = blueprint.scenes.flatMap((s) => s.overlays ?? []);
|
|
10118
10208
|
const floating = blueprint.scenes.flatMap((s) => s.floating_elements ?? []);
|
|
10119
|
-
|
|
10209
|
+
const hasUiPip = blueprint.scenes.some((s) => uiPipStub(s) !== "");
|
|
10210
|
+
if (overlays.length > 0 || floating.length > 0 || hasUiPip) {
|
|
10120
10211
|
nodes.push({
|
|
10121
10212
|
id: "overlaid",
|
|
10122
10213
|
type: "hyperframe_render",
|
|
@@ -10126,10 +10217,28 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
|
|
|
10126
10217
|
videoRef = "$ref:overlaid.video";
|
|
10127
10218
|
videoNode = "overlaid";
|
|
10128
10219
|
}
|
|
10220
|
+
if (opts.captionsCompositionPath && opts.transcriptPath) {
|
|
10221
|
+
nodes.push({
|
|
10222
|
+
id: "captions_transcript",
|
|
10223
|
+
type: "ingest",
|
|
10224
|
+
params: { source: "path", path: opts.transcriptPath, expect: "json" }
|
|
10225
|
+
});
|
|
10226
|
+
nodes.push({
|
|
10227
|
+
id: "captions",
|
|
10228
|
+
type: "hyperframe_render",
|
|
10229
|
+
inputs: { background: videoRef, transcript: "$ref:captions_transcript.asset" },
|
|
10230
|
+
params: { composition: opts.captionsCompositionPath }
|
|
10231
|
+
});
|
|
10232
|
+
videoRef = "$ref:captions.video";
|
|
10233
|
+
videoNode = "captions";
|
|
10234
|
+
}
|
|
10129
10235
|
const tracks = [...voTracks, ...buildSfxMusic(blueprint, nodes)];
|
|
10130
10236
|
if (tracks.length > 0) {
|
|
10131
10237
|
const mixInputs = {};
|
|
10132
10238
|
for (const t of tracks) mixInputs[t.slot] = t.ref;
|
|
10239
|
+
const musicTrack = tracks.find((t) => t.kind === "music");
|
|
10240
|
+
const voiceSlots = tracks.filter((t) => t.kind === "vo").map((t) => t.slot);
|
|
10241
|
+
const duck = musicTrack && voiceSlots.length > 0 ? { duck: { track: musicTrack.slot, against: voiceSlots } } : {};
|
|
10133
10242
|
nodes.push({
|
|
10134
10243
|
id: "audio_mix",
|
|
10135
10244
|
type: "audio_timeline",
|
|
@@ -10140,7 +10249,8 @@ function scaffoldVideoCanvas(input, elementsInput, opts) {
|
|
|
10140
10249
|
start_s: t.start_s,
|
|
10141
10250
|
...t.gain_db !== void 0 ? { gain_db: t.gain_db } : {}
|
|
10142
10251
|
})),
|
|
10143
|
-
total_ms: Math.round((blueprint.source?.duration_s ?? lastSceneEnd(blueprint)) * 1e3)
|
|
10252
|
+
total_ms: Math.round((blueprint.source?.duration_s ?? lastSceneEnd(blueprint)) * 1e3),
|
|
10253
|
+
...duck
|
|
10144
10254
|
}
|
|
10145
10255
|
});
|
|
10146
10256
|
nodes.push({
|
|
@@ -10432,6 +10542,7 @@ function resolveShippedCanvasDir(name, startDir, exists = existsSync3, maxDepth
|
|
|
10432
10542
|
|
|
10433
10543
|
// src/commands/canvas/scaffold-video.ts
|
|
10434
10544
|
var SHIPPED_COMPOSITION_DIR = resolveShippedCanvasDir("video-overlay-composition", import.meta.dirname);
|
|
10545
|
+
var SHIPPED_CAPTIONS_DIR = resolveShippedCanvasDir("tiktok-captions-composition", import.meta.dirname);
|
|
10435
10546
|
function resolveModel2(kind, preferred) {
|
|
10436
10547
|
const ids = Object.keys(MODEL_REGISTRY[kind]);
|
|
10437
10548
|
return ids.includes(preferred) ? preferred : ids[0] ?? preferred;
|
|
@@ -10463,6 +10574,24 @@ async function loadAssetText2(ref, label) {
|
|
|
10463
10574
|
}
|
|
10464
10575
|
throw new Error(`${label}: output had no readable path or url`);
|
|
10465
10576
|
}
|
|
10577
|
+
async function loadTranscriptBestEffort(ref) {
|
|
10578
|
+
if (!ref) return void 0;
|
|
10579
|
+
try {
|
|
10580
|
+
return await loadAssetText2(ref, "deconstruct transcript");
|
|
10581
|
+
} catch {
|
|
10582
|
+
return void 0;
|
|
10583
|
+
}
|
|
10584
|
+
}
|
|
10585
|
+
async function stageCaptions(outDir, transcript) {
|
|
10586
|
+
const text = transcript?.trim();
|
|
10587
|
+
if (!text || text === "[]") return {};
|
|
10588
|
+
const transcriptPath = path5.join(outDir, "transcript.json");
|
|
10589
|
+
await writeFile2(transcriptPath, `${text}
|
|
10590
|
+
`, "utf8");
|
|
10591
|
+
const compositionPath = path5.join(outDir, "tiktok-captions-composition");
|
|
10592
|
+
await cp(SHIPPED_CAPTIONS_DIR, compositionPath, { recursive: true });
|
|
10593
|
+
return { compositionPath, transcriptPath };
|
|
10594
|
+
}
|
|
10466
10595
|
function parseElements2(raw) {
|
|
10467
10596
|
const parsed = JSON.parse(raw);
|
|
10468
10597
|
if (Array.isArray(parsed)) return parsed;
|
|
@@ -10568,10 +10697,12 @@ async function runAnalysisPasses(deconstructCanvas, selectModel) {
|
|
|
10568
10697
|
}
|
|
10569
10698
|
};
|
|
10570
10699
|
let blueprint;
|
|
10700
|
+
let transcript;
|
|
10571
10701
|
try {
|
|
10572
10702
|
const r1 = await engine.run(deconstructCanvas, {});
|
|
10573
10703
|
addCredits(r1.stats);
|
|
10574
10704
|
blueprint = JSON.parse(await loadAssetText2(r1.outputs_by_node.deconstruct?.analysis, "deconstruct output"));
|
|
10705
|
+
transcript = await loadTranscriptBestEffort(r1.outputs_by_node.deconstruct?.transcript);
|
|
10575
10706
|
} catch (e) {
|
|
10576
10707
|
if (e instanceof ValidationError) return fail2("validation", JSON.stringify(e.issues));
|
|
10577
10708
|
if (e instanceof SyntaxError) return fail2("read_outputs", e.message);
|
|
@@ -10582,7 +10713,7 @@ async function runAnalysisPasses(deconstructCanvas, selectModel) {
|
|
|
10582
10713
|
const r2 = await engine.run(buildSelectCanvas(selectModel, slimJson), {});
|
|
10583
10714
|
addCredits(r2.stats);
|
|
10584
10715
|
const elements = parseElements2(await loadAssetText2(r2.outputs_by_node.select?.text, "selection output"));
|
|
10585
|
-
return { blueprint, elements, creditsSpent: sawCredits ? credits : void 0 };
|
|
10716
|
+
return { blueprint, elements, transcript, creditsSpent: sawCredits ? credits : void 0 };
|
|
10586
10717
|
} catch (e) {
|
|
10587
10718
|
if (e instanceof ValidationError) return fail2("validation", JSON.stringify(e.issues));
|
|
10588
10719
|
if (e instanceof SyntaxError) return fail2("read_outputs", e.message);
|
|
@@ -10602,14 +10733,10 @@ var scaffoldVideoCommand = defineCommand76({
|
|
|
10602
10733
|
type: "boolean",
|
|
10603
10734
|
description: "Give silent b-roll scenes native diegetic ambient mixed deep under the music bed (off by default)"
|
|
10604
10735
|
},
|
|
10605
|
-
"actor-sheets": {
|
|
10606
|
-
type: "boolean",
|
|
10607
|
-
description: "Lock a recast person/animal that recurs across \u22652 scenes to ONE turnaround sheet grounding every frame"
|
|
10608
|
-
},
|
|
10609
10736
|
"max-scenes": { type: "string", description: "Cap the number of scenes the deconstruct emits" },
|
|
10610
10737
|
"shot-threshold": {
|
|
10611
10738
|
type: "string",
|
|
10612
|
-
description: "PySceneDetect content threshold (
|
|
10739
|
+
description: "PySceneDetect content threshold. Default is adaptive (18, auto re-checked at 27 when a continuous shot looks over-segmented); pinning a value disables the re-check. Lower = more/softer cuts, higher = fewer."
|
|
10613
10740
|
},
|
|
10614
10741
|
language: { type: "string", description: "Transcript/dialogue language hint (e.g. fr, en)" },
|
|
10615
10742
|
focus: { type: "string", description: "Known provenance/emphasis to ground the deconstruct" },
|
|
@@ -10641,7 +10768,7 @@ var scaffoldVideoCommand = defineCommand76({
|
|
|
10641
10768
|
focus: args.focus ? String(args.focus) : void 0,
|
|
10642
10769
|
shotCuts
|
|
10643
10770
|
});
|
|
10644
|
-
const { blueprint, elements, creditsSpent } = await runAnalysisPasses(deconstructCanvas, selectModel);
|
|
10771
|
+
const { blueprint, elements, transcript, creditsSpent } = await runAnalysisPasses(deconstructCanvas, selectModel);
|
|
10645
10772
|
await mkdir(outDir, { recursive: true });
|
|
10646
10773
|
const annotated = annotateBlueprintWithElements(blueprint, elements);
|
|
10647
10774
|
await writeFile2(blueprintPath, `${JSON.stringify(annotated, null, 2)}
|
|
@@ -10659,14 +10786,16 @@ var scaffoldVideoCommand = defineCommand76({
|
|
|
10659
10786
|
);
|
|
10660
10787
|
}
|
|
10661
10788
|
await writeFile2(indexPath, injected, "utf8");
|
|
10789
|
+
const captions = await stageCaptions(outDir, transcript);
|
|
10662
10790
|
const opts = {
|
|
10663
10791
|
imageModel,
|
|
10664
10792
|
videoModel,
|
|
10665
10793
|
overlayCompositionPath: compositionDest,
|
|
10794
|
+
captionsCompositionPath: captions.compositionPath,
|
|
10795
|
+
transcriptPath: captions.transcriptPath,
|
|
10666
10796
|
blueprintPath,
|
|
10667
10797
|
frames,
|
|
10668
|
-
ambient: Boolean(args.ambient)
|
|
10669
|
-
actorSheets: Boolean(args["actor-sheets"])
|
|
10798
|
+
ambient: Boolean(args.ambient)
|
|
10670
10799
|
};
|
|
10671
10800
|
let canvas;
|
|
10672
10801
|
let report;
|