@koda-sl/baker-cli 0.74.0 → 0.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -8
- package/canvas/end-card-composition/index.html +66 -0
- package/canvas/end-card-composition/meta.json +19 -0
- package/canvas/feature-reveal-composition/index.html +83 -0
- package/canvas/feature-reveal-composition/meta.json +18 -0
- package/canvas/lower-third-composition/index.html +75 -0
- package/canvas/lower-third-composition/meta.json +18 -0
- package/canvas/stat-counter-composition/index.html +73 -0
- package/canvas/stat-counter-composition/meta.json +20 -0
- package/canvas/title-card-composition/index.html +90 -0
- package/canvas/title-card-composition/meta.json +20 -0
- package/canvas/video-call-composition/index.html +136 -0
- package/canvas/video-call-composition/meta.json +26 -0
- package/canvas/video-overlay-composition/index.html +39 -2
- package/dist/{chunk-JIDZ37KG.js → chunk-NBNUNCY7.js} +552 -313
- package/dist/chunk-NBNUNCY7.js.map +1 -0
- package/dist/cli.js +640 -114
- package/dist/cli.js.map +1 -1
- package/dist/engine/index.d.ts +6 -0
- package/dist/engine/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-JIDZ37KG.js.map +0 -1
|
@@ -621,7 +621,7 @@ ${originalIndentation}`;
|
|
|
621
621
|
});
|
|
622
622
|
|
|
623
623
|
// src/engine/index.ts
|
|
624
|
-
import
|
|
624
|
+
import path15 from "path";
|
|
625
625
|
|
|
626
626
|
// src/engine/client/http.ts
|
|
627
627
|
var BackendHttpError = class extends Error {
|
|
@@ -667,14 +667,14 @@ var HttpClient = class {
|
|
|
667
667
|
this.fetchFn = opts.fetchFn ?? fetch;
|
|
668
668
|
this.sleepFn = opts.sleepFn ?? ((ms) => new Promise((r) => setTimeout(r, ms)));
|
|
669
669
|
}
|
|
670
|
-
async postJson(
|
|
671
|
-
return await this.requestJson("POST",
|
|
670
|
+
async postJson(path16, body, signal) {
|
|
671
|
+
return await this.requestJson("POST", path16, body, signal);
|
|
672
672
|
}
|
|
673
|
-
async getJson(
|
|
674
|
-
return await this.requestJson("GET",
|
|
673
|
+
async getJson(path16, signal) {
|
|
674
|
+
return await this.requestJson("GET", path16, void 0, signal);
|
|
675
675
|
}
|
|
676
|
-
async requestJson(method,
|
|
677
|
-
const url = `${this.baseUrl}${
|
|
676
|
+
async requestJson(method, path16, body, signal) {
|
|
677
|
+
const url = `${this.baseUrl}${path16.startsWith("/") ? path16 : `/${path16}`}`;
|
|
678
678
|
for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
|
|
679
679
|
const outcome = await this.attempt(method, url, body, attempt, signal);
|
|
680
680
|
if (outcome.kind === "value") return outcome.value;
|
|
@@ -786,8 +786,8 @@ var BackendClient = class {
|
|
|
786
786
|
);
|
|
787
787
|
}
|
|
788
788
|
getArtifact(kind, name, version, signal) {
|
|
789
|
-
const
|
|
790
|
-
return this.http.getJson(
|
|
789
|
+
const path16 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
|
|
790
|
+
return this.http.getJson(path16, signal);
|
|
791
791
|
}
|
|
792
792
|
};
|
|
793
793
|
|
|
@@ -1242,6 +1242,21 @@ var MODEL_REGISTRY = {
|
|
|
1242
1242
|
}
|
|
1243
1243
|
}
|
|
1244
1244
|
},
|
|
1245
|
+
audio_voice_convert: {
|
|
1246
|
+
"elevenlabs/eleven_multilingual_sts_v2": {
|
|
1247
|
+
// Speech-to-speech / Voice Changer: re-voice an existing audio clip in the
|
|
1248
|
+
// TARGET voice, preserving timing/prosody. Used to normalize a talking-head
|
|
1249
|
+
// clip's native (generator-chosen) voice into ONE consistent brand voice.
|
|
1250
|
+
label: "ElevenLabs Voice Changer (multilingual STS v2)",
|
|
1251
|
+
inputs: [{ kind: "audio", mimes: FAL_AUDIO_MIMES }],
|
|
1252
|
+
required: ["voice"],
|
|
1253
|
+
params: {
|
|
1254
|
+
voice: { kind: "string" },
|
|
1255
|
+
output_format: { kind: "string", enum: ELEVENLABS_OUTPUT_FORMATS },
|
|
1256
|
+
remove_background_noise: { kind: "boolean" }
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
},
|
|
1245
1260
|
music: {
|
|
1246
1261
|
"elevenlabs/music-v1": {
|
|
1247
1262
|
label: "ElevenLabs Music v1 (Compose)",
|
|
@@ -1512,11 +1527,30 @@ var VideoMeta = z.object({
|
|
|
1512
1527
|
speaker: z.string().optional()
|
|
1513
1528
|
})
|
|
1514
1529
|
).default([]),
|
|
1515
|
-
// Scenes with a single on-camera speaker
|
|
1516
|
-
//
|
|
1517
|
-
// `
|
|
1518
|
-
//
|
|
1519
|
-
|
|
1530
|
+
// Scenes with a single on-camera speaker, voiced NATIVELY by the video model
|
|
1531
|
+
// (Seedance generate_audio) then re-voiced to one brand voice. Each entry names
|
|
1532
|
+
// the `audio_voice_convert` node the validator confirms is present — the
|
|
1533
|
+
// native-audio replacement for the retired post-hoc lipsync check. A bare scene
|
|
1534
|
+
// index (legacy) or `{ scene, lipsync_node }` (legacy veed canvases) still parse.
|
|
1535
|
+
talking_scenes: z.array(
|
|
1536
|
+
z.union([
|
|
1537
|
+
z.number(),
|
|
1538
|
+
z.object({
|
|
1539
|
+
scene: z.number(),
|
|
1540
|
+
voice_convert_node: z.string(),
|
|
1541
|
+
// Advisory: the scene's visual length vs the estimated spoken length, so
|
|
1542
|
+
// a reviewer can see a native line that may run past its cut. Not gated.
|
|
1543
|
+
scene_s: z.number().optional(),
|
|
1544
|
+
est_speech_s: z.number().optional()
|
|
1545
|
+
}),
|
|
1546
|
+
z.object({ scene: z.number(), lipsync_node: z.string() })
|
|
1547
|
+
])
|
|
1548
|
+
).default([]),
|
|
1549
|
+
// Advisory, NOT gated by the validator: the reviewable "which graphic fires
|
|
1550
|
+
// on which spoken beat" map emitted by scaffold-video (per-scene window,
|
|
1551
|
+
// spoken line, storyboard frames, scheduled graphics). Free-form rows so the
|
|
1552
|
+
// schema stays decoupled from the scaffold's exact shape.
|
|
1553
|
+
motion_board: z.array(z.unknown()).optional()
|
|
1520
1554
|
}).strict().optional();
|
|
1521
1555
|
var CanvasMetadata = z.object({
|
|
1522
1556
|
name: z.string().optional(),
|
|
@@ -2193,7 +2227,8 @@ function resolveRefKind(ctx, refStr) {
|
|
|
2193
2227
|
if (!targetDef) return null;
|
|
2194
2228
|
const targetParams = targetDef.params.safeParse(target.params ?? {});
|
|
2195
2229
|
const resolvedKinds = resolveOutputKinds(targetDef.outputKinds, targetParams.success ? targetParams.data : {});
|
|
2196
|
-
const
|
|
2230
|
+
const declaredKind = target.params?.outputs?.[expr.output]?.kind;
|
|
2231
|
+
const kind = resolvedKinds[expr.output] ?? declaredKind;
|
|
2197
2232
|
return kind && MODEL_INPUT_KINDS.has(kind) ? kind : null;
|
|
2198
2233
|
}
|
|
2199
2234
|
function checkOneRef(ctx, n, i, refStr, jsonPath, field) {
|
|
@@ -2286,6 +2321,24 @@ function estimateCredits(ctx) {
|
|
|
2286
2321
|
}
|
|
2287
2322
|
return total;
|
|
2288
2323
|
}
|
|
2324
|
+
function talkingSceneSatisfied(ctx, entry, scene) {
|
|
2325
|
+
const nodes = ctx.canvas.nodes;
|
|
2326
|
+
if (typeof entry === "object" && "voice_convert_node" in entry) {
|
|
2327
|
+
const clipNativeAudio = nodes.some(
|
|
2328
|
+
(n) => n.id === `s${scene}_clip` && n.type === "video_generate" && n.params?.generate_audio === true
|
|
2329
|
+
);
|
|
2330
|
+
const converted = nodes.some((n) => n.id === entry.voice_convert_node && n.type === "audio_voice_convert");
|
|
2331
|
+
return clipNativeAudio && converted;
|
|
2332
|
+
}
|
|
2333
|
+
if (typeof entry === "object") {
|
|
2334
|
+
return nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
|
|
2335
|
+
}
|
|
2336
|
+
return nodes.some((n) => {
|
|
2337
|
+
if (n.type !== "video_lipsync") return false;
|
|
2338
|
+
const video = n.inputs?.video;
|
|
2339
|
+
return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
|
|
2340
|
+
});
|
|
2341
|
+
}
|
|
2289
2342
|
function checkVideoInvariants(ctx) {
|
|
2290
2343
|
const meta = ctx.canvas.metadata?.video;
|
|
2291
2344
|
if (!meta) return;
|
|
@@ -2312,16 +2365,11 @@ function checkVideoInvariants(ctx) {
|
|
|
2312
2365
|
}
|
|
2313
2366
|
for (const entry of meta.talking_scenes) {
|
|
2314
2367
|
const scene = typeof entry === "number" ? entry : entry.scene;
|
|
2315
|
-
|
|
2316
|
-
if (n.type !== "video_lipsync") return false;
|
|
2317
|
-
const video = n.inputs?.video;
|
|
2318
|
-
return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
|
|
2319
|
-
}) : ctx.canvas.nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
|
|
2320
|
-
if (!synced) {
|
|
2368
|
+
if (!talkingSceneSatisfied(ctx, entry, scene)) {
|
|
2321
2369
|
ctx.issues.push({
|
|
2322
2370
|
path: "metadata.video.talking_scenes",
|
|
2323
2371
|
code: STAGE_CODES.LIPSYNC_MISSING,
|
|
2324
|
-
message: `scene ${scene}
|
|
2372
|
+
message: `scene ${scene} is a single-on-camera-speaker talking head but its clip lacks native audio (generate_audio) or the audio_voice_convert node is missing \u2014 the voice won't be brand-consistent / lips may drift`
|
|
2325
2373
|
});
|
|
2326
2374
|
}
|
|
2327
2375
|
}
|
|
@@ -2355,9 +2403,9 @@ function checkOutputRef(ctx) {
|
|
|
2355
2403
|
function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
|
|
2356
2404
|
for (const issue of err.issues) {
|
|
2357
2405
|
const tail2 = pathToString(issue.path);
|
|
2358
|
-
const
|
|
2406
|
+
const path16 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
|
|
2359
2407
|
issues.push({
|
|
2360
|
-
path:
|
|
2408
|
+
path: path16,
|
|
2361
2409
|
code,
|
|
2362
2410
|
message: issue.message,
|
|
2363
2411
|
received: issue.code === "invalid_type" ? issue.received : void 0,
|
|
@@ -2366,8 +2414,8 @@ function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
|
|
|
2366
2414
|
});
|
|
2367
2415
|
}
|
|
2368
2416
|
}
|
|
2369
|
-
function pathToString(
|
|
2370
|
-
return
|
|
2417
|
+
function pathToString(path16) {
|
|
2418
|
+
return path16.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
|
|
2371
2419
|
}
|
|
2372
2420
|
function buildDepGraph(canvas) {
|
|
2373
2421
|
const graph = /* @__PURE__ */ new Map();
|
|
@@ -3995,12 +4043,12 @@ var fontSpecimenNode = defineNode({
|
|
|
3995
4043
|
});
|
|
3996
4044
|
|
|
3997
4045
|
// src/engine/nodes/local/hyperframe.ts
|
|
3998
|
-
import { execFile as
|
|
3999
|
-
import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as
|
|
4046
|
+
import { execFile as execFile4 } from "child_process";
|
|
4047
|
+
import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile8, rm as rm4, stat as stat5, writeFile as writeFile5 } from "fs/promises";
|
|
4000
4048
|
import { createRequire as createRequire2 } from "module";
|
|
4001
4049
|
import { cpus, tmpdir as tmpdir4 } from "os";
|
|
4002
|
-
import
|
|
4003
|
-
import { promisify as
|
|
4050
|
+
import path11 from "path";
|
|
4051
|
+
import { promisify as promisify4 } from "util";
|
|
4004
4052
|
import { z as z10 } from "zod";
|
|
4005
4053
|
|
|
4006
4054
|
// src/engine/engine/composition-hash.ts
|
|
@@ -4194,6 +4242,148 @@ function defaultFilenameForInput(key, kind) {
|
|
|
4194
4242
|
return `${key}.png`;
|
|
4195
4243
|
}
|
|
4196
4244
|
|
|
4245
|
+
// src/engine/nodes/local/lib/hyperframe-check.ts
|
|
4246
|
+
import { execFile as execFile3 } from "child_process";
|
|
4247
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
4248
|
+
import path9 from "path";
|
|
4249
|
+
import { promisify as promisify3 } from "util";
|
|
4250
|
+
var execFileAsync = promisify3(execFile3);
|
|
4251
|
+
var NEVER_BLOCK = [/contrast/i, /\bwcag\b/i, /missing_local_asset/i, /font[_-]?family/i, /font[_-]?face/i];
|
|
4252
|
+
var UNAVAILABLE = /unknown command|command not found|not found|Did you mean|Unknown argument|ENOENT/i;
|
|
4253
|
+
function isAdvisory(code, message) {
|
|
4254
|
+
const hay = `${code} ${message}`;
|
|
4255
|
+
return NEVER_BLOCK.some((re) => re.test(hay));
|
|
4256
|
+
}
|
|
4257
|
+
function parseCheckJson(raw) {
|
|
4258
|
+
if (!raw) return null;
|
|
4259
|
+
const trimmed = raw.trim();
|
|
4260
|
+
try {
|
|
4261
|
+
return JSON.parse(trimmed);
|
|
4262
|
+
} catch {
|
|
4263
|
+
}
|
|
4264
|
+
const start = trimmed.indexOf("{");
|
|
4265
|
+
const end = trimmed.lastIndexOf("}");
|
|
4266
|
+
if (start >= 0 && end > start) {
|
|
4267
|
+
try {
|
|
4268
|
+
return JSON.parse(trimmed.slice(start, end + 1));
|
|
4269
|
+
} catch {
|
|
4270
|
+
return null;
|
|
4271
|
+
}
|
|
4272
|
+
}
|
|
4273
|
+
return null;
|
|
4274
|
+
}
|
|
4275
|
+
function classifyLint(json) {
|
|
4276
|
+
const out = [];
|
|
4277
|
+
const findings = json?.findings;
|
|
4278
|
+
if (!Array.isArray(findings)) return out;
|
|
4279
|
+
for (const f of findings) {
|
|
4280
|
+
const rec = f;
|
|
4281
|
+
const code = String(rec?.code ?? "");
|
|
4282
|
+
const message = String(rec?.message ?? "");
|
|
4283
|
+
const severity = String(rec?.severity ?? "info");
|
|
4284
|
+
const blocking = severity === "error" && !isAdvisory(code, message);
|
|
4285
|
+
out.push({ source: "lint", code, message, severity: blocking ? "blocking" : "warning" });
|
|
4286
|
+
}
|
|
4287
|
+
return out;
|
|
4288
|
+
}
|
|
4289
|
+
function classifyInspect(json) {
|
|
4290
|
+
const out = [];
|
|
4291
|
+
const obj = json;
|
|
4292
|
+
const issues = obj?.issues;
|
|
4293
|
+
if (!Array.isArray(issues)) return out;
|
|
4294
|
+
for (const iss of issues) {
|
|
4295
|
+
const rec = iss;
|
|
4296
|
+
const code = String(rec?.code ?? rec?.type ?? "overflow");
|
|
4297
|
+
const message = String(rec?.message ?? rec?.detail ?? JSON.stringify(iss));
|
|
4298
|
+
const severity = rec?.severity ? String(rec.severity) : obj?.ok === false ? "error" : "warning";
|
|
4299
|
+
out.push({ source: "inspect", code, message, severity: severity === "error" ? "blocking" : "warning" });
|
|
4300
|
+
}
|
|
4301
|
+
return out;
|
|
4302
|
+
}
|
|
4303
|
+
function classifyCheckOutput(lintRaw, inspectRaw) {
|
|
4304
|
+
const findings = [...classifyLint(parseCheckJson(lintRaw)), ...classifyInspect(parseCheckJson(inspectRaw))];
|
|
4305
|
+
return {
|
|
4306
|
+
blocking: findings.filter((f) => f.severity === "blocking"),
|
|
4307
|
+
warnings: findings.filter((f) => f.severity === "warning")
|
|
4308
|
+
};
|
|
4309
|
+
}
|
|
4310
|
+
function buildLintArgs(dir) {
|
|
4311
|
+
return ["hyperframes", "lint", dir, "--json"];
|
|
4312
|
+
}
|
|
4313
|
+
function buildInspectArgs(dir, samples) {
|
|
4314
|
+
return ["hyperframes", "inspect", dir, "--json", "--samples", String(samples)];
|
|
4315
|
+
}
|
|
4316
|
+
function buildSnapshotArgs(dir, frames) {
|
|
4317
|
+
return ["hyperframes", "snapshot", dir, "--frames", String(frames), "--describe", "false"];
|
|
4318
|
+
}
|
|
4319
|
+
function usesNestedCompositions(indexHtml) {
|
|
4320
|
+
const withoutComments = indexHtml.replace(/<!--[\s\S]*?-->/g, "");
|
|
4321
|
+
return /data-composition-src\s*=/.test(withoutComments);
|
|
4322
|
+
}
|
|
4323
|
+
async function runOne(args, timeoutMs) {
|
|
4324
|
+
try {
|
|
4325
|
+
const { stdout } = await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
|
|
4326
|
+
return stdout;
|
|
4327
|
+
} catch (e) {
|
|
4328
|
+
const err = e;
|
|
4329
|
+
if (err.stdout?.includes("{")) return err.stdout;
|
|
4330
|
+
const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
|
|
4331
|
+
if (UNAVAILABLE.test(blob)) return null;
|
|
4332
|
+
return null;
|
|
4333
|
+
}
|
|
4334
|
+
}
|
|
4335
|
+
async function runSnapshotSmoke(args, timeoutMs) {
|
|
4336
|
+
try {
|
|
4337
|
+
await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
|
|
4338
|
+
return { ok: true, unavailable: false, message: "" };
|
|
4339
|
+
} catch (e) {
|
|
4340
|
+
const err = e;
|
|
4341
|
+
const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
|
|
4342
|
+
if (UNAVAILABLE.test(blob)) return { ok: false, unavailable: true, message: blob };
|
|
4343
|
+
return { ok: false, unavailable: false, message: (err.stderr || err.message || "snapshot failed").slice(0, 800) };
|
|
4344
|
+
}
|
|
4345
|
+
}
|
|
4346
|
+
async function runHyperframesCheck(opts) {
|
|
4347
|
+
const { dir, nodeId, ctx, timeoutMs, samples = 5 } = opts;
|
|
4348
|
+
const [lintRaw, inspectRaw] = await Promise.all([
|
|
4349
|
+
runOne(buildLintArgs(dir), timeoutMs),
|
|
4350
|
+
runOne(buildInspectArgs(dir, samples), timeoutMs)
|
|
4351
|
+
]);
|
|
4352
|
+
if (lintRaw === null && inspectRaw === null) {
|
|
4353
|
+
ctx.log(`${nodeId}: hyperframes lint/inspect unavailable \u2014 skipping pre-render check`);
|
|
4354
|
+
return;
|
|
4355
|
+
}
|
|
4356
|
+
const { blocking, warnings } = classifyCheckOutput(lintRaw ?? "", inspectRaw ?? "");
|
|
4357
|
+
for (const w of warnings) {
|
|
4358
|
+
ctx.log(`${nodeId}: hyperframe check warning [${w.source}/${w.code}] ${w.message}`);
|
|
4359
|
+
}
|
|
4360
|
+
if (blocking.length > 0) {
|
|
4361
|
+
const detail = blocking.map((b) => `\u2022 [${b.source}/${b.code}] ${b.message}`).join("\n");
|
|
4362
|
+
throw new Error(`${nodeId}: pre-render check failed (${blocking.length} blocking)
|
|
4363
|
+
${detail}`);
|
|
4364
|
+
}
|
|
4365
|
+
let indexHtml = "";
|
|
4366
|
+
try {
|
|
4367
|
+
indexHtml = await readFile7(path9.join(dir, "index.html"), "utf-8");
|
|
4368
|
+
} catch {
|
|
4369
|
+
indexHtml = "";
|
|
4370
|
+
}
|
|
4371
|
+
if (indexHtml && usesNestedCompositions(indexHtml)) {
|
|
4372
|
+
const snap = await runSnapshotSmoke(buildSnapshotArgs(dir, Math.min(samples, 3)), Math.max(timeoutMs, 12e4));
|
|
4373
|
+
if (snap.unavailable) {
|
|
4374
|
+
ctx.log(`${nodeId}: hyperframes snapshot unavailable \u2014 skipping nested-composition smoke test`);
|
|
4375
|
+
} else if (!snap.ok) {
|
|
4376
|
+
throw new Error(
|
|
4377
|
+
`${nodeId}: nested-composition smoke test failed \u2014 an embedded block did not render. Check the host\u2194block id match, that the block's <style>/<script> live inside its <template>, and that it styles #root (not a class).
|
|
4378
|
+
${snap.message}`
|
|
4379
|
+
);
|
|
4380
|
+
} else {
|
|
4381
|
+
ctx.log(`${nodeId}: nested-composition smoke test passed`);
|
|
4382
|
+
}
|
|
4383
|
+
}
|
|
4384
|
+
ctx.log(`${nodeId}: pre-render check passed (${warnings.length} warning${warnings.length === 1 ? "" : "s"})`);
|
|
4385
|
+
}
|
|
4386
|
+
|
|
4197
4387
|
// src/engine/nodes/local/lib/hyperframe-errors.ts
|
|
4198
4388
|
var KNOWN_ERROR_PATTERNS = [
|
|
4199
4389
|
{
|
|
@@ -4237,6 +4427,29 @@ ${stderr.slice(0, 1500)}`;
|
|
|
4237
4427
|
return null;
|
|
4238
4428
|
}
|
|
4239
4429
|
|
|
4430
|
+
// src/engine/nodes/local/lib/hyperframe-meta.ts
|
|
4431
|
+
import { writeFile as writeFile4 } from "fs/promises";
|
|
4432
|
+
import path10 from "path";
|
|
4433
|
+
async function ensureHyperframesMetaJson(tmp, nodeId, meta, duration) {
|
|
4434
|
+
const metaPath = path10.join(tmp, "meta.json");
|
|
4435
|
+
await writeFile4(
|
|
4436
|
+
metaPath,
|
|
4437
|
+
JSON.stringify(
|
|
4438
|
+
{
|
|
4439
|
+
id: nodeId,
|
|
4440
|
+
name: meta.id,
|
|
4441
|
+
duration,
|
|
4442
|
+
width: meta.width,
|
|
4443
|
+
height: meta.height,
|
|
4444
|
+
fps: meta.fps
|
|
4445
|
+
},
|
|
4446
|
+
null,
|
|
4447
|
+
2
|
|
4448
|
+
),
|
|
4449
|
+
"utf-8"
|
|
4450
|
+
);
|
|
4451
|
+
}
|
|
4452
|
+
|
|
4240
4453
|
// src/engine/nodes/local/lib/templating.ts
|
|
4241
4454
|
var PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
4242
4455
|
function substituteVariables(source, values) {
|
|
@@ -4272,25 +4485,34 @@ function literalize(value) {
|
|
|
4272
4485
|
}
|
|
4273
4486
|
|
|
4274
4487
|
// src/engine/nodes/local/hyperframe.ts
|
|
4275
|
-
var
|
|
4488
|
+
var execFileAsync2 = promisify4(execFile4);
|
|
4276
4489
|
var require_2 = createRequire2(import.meta.url);
|
|
4277
4490
|
var HyperframeParams = z10.object({
|
|
4278
4491
|
composition: z10.string().min(1),
|
|
4492
|
+
// Output container. mp4 (default) for delivery; webm/mov render WITH
|
|
4493
|
+
// transparency (alpha) when the composition background is transparent —
|
|
4494
|
+
// use for motion-graphic overlays dropped into Premiere/AE/Nuke.
|
|
4495
|
+
format: z10.enum(["mp4", "webm", "mov"]).optional().default("mp4"),
|
|
4279
4496
|
timeout_ms: z10.number().int().positive().optional().default(10 * 60 * 1e3)
|
|
4280
4497
|
}).catchall(z10.unknown());
|
|
4281
4498
|
var HyperframeInputs = z10.record(z10.string(), z10.custom()).optional().default({});
|
|
4282
4499
|
var HyperframeOutputs = z10.object({
|
|
4283
4500
|
video: z10.custom()
|
|
4284
4501
|
}).strict();
|
|
4285
|
-
var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "timeout_ms"]);
|
|
4502
|
+
var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "format", "timeout_ms"]);
|
|
4503
|
+
var MIME_BY_FORMAT = {
|
|
4504
|
+
mp4: "video/mp4",
|
|
4505
|
+
webm: "video/webm",
|
|
4506
|
+
mov: "video/quicktime"
|
|
4507
|
+
};
|
|
4286
4508
|
var ENGINE_INJECTED_TOKENS = /* @__PURE__ */ new Set(["duration"]);
|
|
4287
4509
|
var hyperframeRenderNode = defineNode({
|
|
4288
4510
|
id: "hyperframe_render",
|
|
4289
|
-
version: "6.
|
|
4511
|
+
version: "6.1.0",
|
|
4290
4512
|
category: "video",
|
|
4291
4513
|
location: "local",
|
|
4292
4514
|
summary: "Render an mp4 by composing an HTML/CSS/GSAP composition over upstream assets. Point `params.composition` at a directory containing `index.html` + `meta.json`. All variables are passed as primitives in `params` and substituted into the composition before render.",
|
|
4293
|
-
when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality
|
|
4515
|
+
when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality and worker count are fixed by the engine. Set `params.format` to `webm` or `mov` for a transparent (alpha) overlay to composite in another editor; defaults to `mp4`. Runs a pre-render `hyperframes lint`/`inspect` gate (overflow/structural errors block; contrast warns).",
|
|
4294
4516
|
inputs: HyperframeInputs,
|
|
4295
4517
|
params: HyperframeParams,
|
|
4296
4518
|
outputs: HyperframeOutputs,
|
|
@@ -4309,7 +4531,7 @@ var hyperframeRenderNode = defineNode({
|
|
|
4309
4531
|
const compositionDir = await resolveCompositionDir(params.composition);
|
|
4310
4532
|
const meta = await loadCompositionMeta(compositionDir);
|
|
4311
4533
|
const compositionParams = validateAndParseDynamicParams(meta, params);
|
|
4312
|
-
const tmp = await mkdtemp4(
|
|
4534
|
+
const tmp = await mkdtemp4(path11.join(tmpdir4(), "hf-render-"));
|
|
4313
4535
|
try {
|
|
4314
4536
|
await copyComposition(compositionDir, tmp);
|
|
4315
4537
|
await vendorGsap(tmp, ctx);
|
|
@@ -4317,15 +4539,16 @@ var hyperframeRenderNode = defineNode({
|
|
|
4317
4539
|
const duration = stagedDuration ?? meta.default_duration;
|
|
4318
4540
|
const substitutionValues = await buildSubstitutionValues(compositionParams, meta, duration);
|
|
4319
4541
|
await substituteCompositionFiles(tmp, substitutionValues);
|
|
4320
|
-
await
|
|
4321
|
-
|
|
4542
|
+
await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, duration);
|
|
4543
|
+
await runHyperframesCheck({ dir: tmp, nodeId: "hyperframe_render", ctx, timeoutMs: params.timeout_ms });
|
|
4544
|
+
const outputPath = path11.join(tmp, `output.${params.format}`);
|
|
4322
4545
|
await runRender({ tmp, outputPath, params, meta, ctx });
|
|
4323
|
-
const bytes = await
|
|
4546
|
+
const bytes = await readFile8(outputPath);
|
|
4324
4547
|
ctx.log(`rendered ${bytes.length} bytes`);
|
|
4325
4548
|
const ref = await ctx.assets.ingestBytes({
|
|
4326
4549
|
bytes: Buffer.from(bytes),
|
|
4327
4550
|
kind: "video",
|
|
4328
|
-
mime:
|
|
4551
|
+
mime: MIME_BY_FORMAT[params.format],
|
|
4329
4552
|
metadata: {
|
|
4330
4553
|
width: meta.width,
|
|
4331
4554
|
height: meta.height,
|
|
@@ -4342,10 +4565,10 @@ var hyperframeRenderNode = defineNode({
|
|
|
4342
4565
|
}
|
|
4343
4566
|
});
|
|
4344
4567
|
async function resolveCompositionDir(composition) {
|
|
4345
|
-
const compositionPath =
|
|
4568
|
+
const compositionPath = path11.isAbsolute(composition) ? composition : path11.resolve(process.cwd(), composition);
|
|
4346
4569
|
const s = await stat5(compositionPath);
|
|
4347
4570
|
if (s.isDirectory()) return compositionPath;
|
|
4348
|
-
return
|
|
4571
|
+
return path11.dirname(compositionPath);
|
|
4349
4572
|
}
|
|
4350
4573
|
async function validateCompositionParams(rawParams) {
|
|
4351
4574
|
const issues = [];
|
|
@@ -4412,7 +4635,7 @@ async function copyComposition(srcDir, destDir) {
|
|
|
4412
4635
|
await cp(srcDir, destDir, {
|
|
4413
4636
|
recursive: true,
|
|
4414
4637
|
filter: (src) => {
|
|
4415
|
-
const name =
|
|
4638
|
+
const name = path11.basename(src);
|
|
4416
4639
|
if (name === ".cache" || name === "node_modules" || name === ".git") return false;
|
|
4417
4640
|
return true;
|
|
4418
4641
|
}
|
|
@@ -4421,7 +4644,7 @@ async function copyComposition(srcDir, destDir) {
|
|
|
4421
4644
|
async function vendorGsap(tmp, ctx) {
|
|
4422
4645
|
try {
|
|
4423
4646
|
const gsapMin = require_2.resolve("gsap/dist/gsap.min.js");
|
|
4424
|
-
await copyFile4(gsapMin,
|
|
4647
|
+
await copyFile4(gsapMin, path11.join(tmp, "gsap.min.js"));
|
|
4425
4648
|
} catch (e) {
|
|
4426
4649
|
ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
|
|
4427
4650
|
}
|
|
@@ -4436,7 +4659,7 @@ async function stageInputs2(tmp, inputs, meta, ctx) {
|
|
|
4436
4659
|
await stageAsset(ref, tmp, filename);
|
|
4437
4660
|
ctx.log(`staged ${spec.kind} \u2192 ${filename}`);
|
|
4438
4661
|
if (spec.kind === "video" && primaryDuration === null) {
|
|
4439
|
-
primaryDuration = await probeDurationSeconds(
|
|
4662
|
+
primaryDuration = await probeDurationSeconds(path11.join(tmp, filename));
|
|
4440
4663
|
}
|
|
4441
4664
|
}
|
|
4442
4665
|
return primaryDuration;
|
|
@@ -4482,34 +4705,15 @@ function coerceImageParam(value) {
|
|
|
4482
4705
|
throw new Error("hyperframe_render: image param must be a URL string or AssetRef");
|
|
4483
4706
|
}
|
|
4484
4707
|
async function substituteCompositionFiles(tmp, values) {
|
|
4485
|
-
const entryPath =
|
|
4486
|
-
const original = await
|
|
4708
|
+
const entryPath = path11.join(tmp, "index.html");
|
|
4709
|
+
const original = await readFile8(entryPath, "utf-8");
|
|
4487
4710
|
const { output, missing } = substituteVariables(original, values);
|
|
4488
4711
|
if (missing.length > 0) {
|
|
4489
4712
|
throw new Error(
|
|
4490
4713
|
`hyperframe_render: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}. Add to params or to meta.json's params with a default.`
|
|
4491
4714
|
);
|
|
4492
4715
|
}
|
|
4493
|
-
await
|
|
4494
|
-
}
|
|
4495
|
-
async function ensureMetaJson(tmp, nodeId, meta, duration) {
|
|
4496
|
-
const metaPath = path9.join(tmp, "meta.json");
|
|
4497
|
-
await writeFile4(
|
|
4498
|
-
metaPath,
|
|
4499
|
-
JSON.stringify(
|
|
4500
|
-
{
|
|
4501
|
-
id: nodeId,
|
|
4502
|
-
name: meta.id,
|
|
4503
|
-
duration,
|
|
4504
|
-
width: meta.width,
|
|
4505
|
-
height: meta.height,
|
|
4506
|
-
fps: meta.fps
|
|
4507
|
-
},
|
|
4508
|
-
null,
|
|
4509
|
-
2
|
|
4510
|
-
),
|
|
4511
|
-
"utf-8"
|
|
4512
|
-
);
|
|
4716
|
+
await writeFile5(entryPath, output, "utf-8");
|
|
4513
4717
|
}
|
|
4514
4718
|
var MAX_WORKERS = 4;
|
|
4515
4719
|
function workerCount() {
|
|
@@ -4517,10 +4721,10 @@ function workerCount() {
|
|
|
4517
4721
|
}
|
|
4518
4722
|
async function runRender(opts) {
|
|
4519
4723
|
const { tmp, outputPath, params, meta, ctx } = opts;
|
|
4520
|
-
const args = buildRenderArgs(tmp, outputPath, meta);
|
|
4521
|
-
ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps from ${
|
|
4724
|
+
const args = buildRenderArgs(tmp, outputPath, meta, params.format);
|
|
4725
|
+
ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps ${params.format} from ${path11.basename(tmp)}`);
|
|
4522
4726
|
try {
|
|
4523
|
-
await
|
|
4727
|
+
await execFileAsync2("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
|
|
4524
4728
|
} catch (e) {
|
|
4525
4729
|
const stderr = e.stderr ?? "";
|
|
4526
4730
|
const stdout = e.stdout ?? "";
|
|
@@ -4530,7 +4734,7 @@ async function runRender(opts) {
|
|
|
4530
4734
|
${friendly ?? detail.slice(0, 4e3)}`);
|
|
4531
4735
|
}
|
|
4532
4736
|
}
|
|
4533
|
-
function buildRenderArgs(tmp, outputPath, meta) {
|
|
4737
|
+
function buildRenderArgs(tmp, outputPath, meta, format) {
|
|
4534
4738
|
return [
|
|
4535
4739
|
"hyperframes",
|
|
4536
4740
|
"render",
|
|
@@ -4542,13 +4746,13 @@ function buildRenderArgs(tmp, outputPath, meta) {
|
|
|
4542
4746
|
"--quality",
|
|
4543
4747
|
"high",
|
|
4544
4748
|
"--format",
|
|
4545
|
-
|
|
4749
|
+
format,
|
|
4546
4750
|
"--workers",
|
|
4547
4751
|
String(workerCount())
|
|
4548
4752
|
];
|
|
4549
4753
|
}
|
|
4550
4754
|
async function probeDurationSeconds(filePath) {
|
|
4551
|
-
const { stdout } = await
|
|
4755
|
+
const { stdout } = await execFileAsync2(
|
|
4552
4756
|
"ffprobe",
|
|
4553
4757
|
["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", filePath],
|
|
4554
4758
|
{ encoding: "utf-8" }
|
|
@@ -4561,14 +4765,14 @@ async function probeDurationSeconds(filePath) {
|
|
|
4561
4765
|
}
|
|
4562
4766
|
|
|
4563
4767
|
// src/engine/nodes/local/hyperframe-snapshot.ts
|
|
4564
|
-
import { execFile as
|
|
4565
|
-
import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as
|
|
4768
|
+
import { execFile as execFile5 } from "child_process";
|
|
4769
|
+
import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile9, rm as rm5, writeFile as writeFile6 } from "fs/promises";
|
|
4566
4770
|
import { createRequire as createRequire3 } from "module";
|
|
4567
4771
|
import { tmpdir as tmpdir5 } from "os";
|
|
4568
|
-
import
|
|
4569
|
-
import { promisify as
|
|
4772
|
+
import path12 from "path";
|
|
4773
|
+
import { promisify as promisify5 } from "util";
|
|
4570
4774
|
import { z as z11 } from "zod";
|
|
4571
|
-
var _execFileAsync =
|
|
4775
|
+
var _execFileAsync = promisify5(execFile5);
|
|
4572
4776
|
var require_3 = createRequire3(import.meta.url);
|
|
4573
4777
|
var WaitForSpec = z11.discriminatedUnion("kind", [
|
|
4574
4778
|
z11.object({ kind: z11.literal("auto") }),
|
|
@@ -4589,7 +4793,7 @@ var NODE_OWNED_PARAM_KEYS2 = /* @__PURE__ */ new Set(["composition", "wait_for",
|
|
|
4589
4793
|
var DEVICE_SCALE_FACTOR2 = 2;
|
|
4590
4794
|
var hyperframeSnapshotNode = defineNode({
|
|
4591
4795
|
id: "hyperframe_snapshot",
|
|
4592
|
-
version: "4.
|
|
4796
|
+
version: "4.1.0",
|
|
4593
4797
|
category: "image",
|
|
4594
4798
|
location: "local",
|
|
4595
4799
|
summary: "Render an HTML/CSS composition to a static PNG via headless Chromium at 2x device-scale (retina). Same composition model as `hyperframe_render` \u2014 point `params.composition` at a directory containing `index.html` + `meta.json`.",
|
|
@@ -4612,14 +4816,22 @@ var hyperframeSnapshotNode = defineNode({
|
|
|
4612
4816
|
const compositionDir = await resolveCompositionDir(params.composition);
|
|
4613
4817
|
const meta = await loadCompositionMeta(compositionDir);
|
|
4614
4818
|
const compositionParams = validateAndParseDynamicParams2(meta, params);
|
|
4615
|
-
const tmp = await mkdtemp5(
|
|
4819
|
+
const tmp = await mkdtemp5(path12.join(tmpdir5(), "hf-snap-"));
|
|
4616
4820
|
try {
|
|
4617
4821
|
await copyComposition2(compositionDir, tmp);
|
|
4618
4822
|
await vendorGsap2(tmp, ctx);
|
|
4619
4823
|
await stageInputs3(tmp, inputs, meta, ctx);
|
|
4620
4824
|
const substitutionValues = await buildSubstitutionValues2(compositionParams, meta);
|
|
4621
4825
|
await substituteCompositionFiles2(tmp, substitutionValues);
|
|
4622
|
-
|
|
4826
|
+
await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, meta.default_duration);
|
|
4827
|
+
await runHyperframesCheck({
|
|
4828
|
+
dir: tmp,
|
|
4829
|
+
nodeId: "hyperframe_snapshot",
|
|
4830
|
+
ctx,
|
|
4831
|
+
timeoutMs: params.timeout_ms,
|
|
4832
|
+
samples: 1
|
|
4833
|
+
});
|
|
4834
|
+
const entryPath = path12.join(tmp, "index.html");
|
|
4623
4835
|
const entryUrl = `file://${entryPath}`;
|
|
4624
4836
|
ctx.log(`snapshotting ${meta.width}x${meta.height}@${DEVICE_SCALE_FACTOR2}x wait=${params.wait_for.kind}`);
|
|
4625
4837
|
const pwSpecifier = ["play", "wright"].join("");
|
|
@@ -4680,7 +4892,7 @@ async function copyComposition2(srcDir, destDir) {
|
|
|
4680
4892
|
await cp(srcDir, destDir, {
|
|
4681
4893
|
recursive: true,
|
|
4682
4894
|
filter: (src) => {
|
|
4683
|
-
const name =
|
|
4895
|
+
const name = path12.basename(src);
|
|
4684
4896
|
if (name === ".cache" || name === "node_modules" || name === ".git") return false;
|
|
4685
4897
|
return true;
|
|
4686
4898
|
}
|
|
@@ -4689,7 +4901,7 @@ async function copyComposition2(srcDir, destDir) {
|
|
|
4689
4901
|
async function vendorGsap2(tmp, ctx) {
|
|
4690
4902
|
try {
|
|
4691
4903
|
const gsapMin = require_3.resolve("gsap/dist/gsap.min.js");
|
|
4692
|
-
await copyFile5(gsapMin,
|
|
4904
|
+
await copyFile5(gsapMin, path12.join(tmp, "gsap.min.js"));
|
|
4693
4905
|
} catch (e) {
|
|
4694
4906
|
ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
|
|
4695
4907
|
}
|
|
@@ -4723,15 +4935,15 @@ function coerceImageParam2(value) {
|
|
|
4723
4935
|
throw new Error("hyperframe_snapshot: image param must be a URL string or AssetRef");
|
|
4724
4936
|
}
|
|
4725
4937
|
async function substituteCompositionFiles2(tmp, values) {
|
|
4726
|
-
const entryPath =
|
|
4727
|
-
const original = await
|
|
4938
|
+
const entryPath = path12.join(tmp, "index.html");
|
|
4939
|
+
const original = await readFile9(entryPath, "utf-8");
|
|
4728
4940
|
const { output, missing } = substituteVariables(original, values);
|
|
4729
4941
|
if (missing.length > 0) {
|
|
4730
4942
|
throw new Error(
|
|
4731
4943
|
`hyperframe_snapshot: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}.`
|
|
4732
4944
|
);
|
|
4733
4945
|
}
|
|
4734
|
-
await
|
|
4946
|
+
await writeFile6(entryPath, output, "utf-8");
|
|
4735
4947
|
}
|
|
4736
4948
|
async function waitForReady(page, waitFor, timeoutMs) {
|
|
4737
4949
|
switch (waitFor.kind) {
|
|
@@ -4765,10 +4977,10 @@ async function waitForReady(page, waitFor, timeoutMs) {
|
|
|
4765
4977
|
}
|
|
4766
4978
|
|
|
4767
4979
|
// src/engine/nodes/local/imagemagick.ts
|
|
4768
|
-
import { execFile as
|
|
4769
|
-
import { promisify as
|
|
4980
|
+
import { execFile as execFile6 } from "child_process";
|
|
4981
|
+
import { promisify as promisify6 } from "util";
|
|
4770
4982
|
import { z as z12 } from "zod";
|
|
4771
|
-
var
|
|
4983
|
+
var execFileAsync3 = promisify6(execFile6);
|
|
4772
4984
|
var OutputDecl2 = z12.object({
|
|
4773
4985
|
kind: z12.enum(["image", "video", "audio"]),
|
|
4774
4986
|
ext: z12.string().min(1).max(8)
|
|
@@ -4784,7 +4996,7 @@ async function resolveBin() {
|
|
|
4784
4996
|
if (resolvedBin) return resolvedBin;
|
|
4785
4997
|
for (const candidate of ["magick", "convert"]) {
|
|
4786
4998
|
try {
|
|
4787
|
-
await
|
|
4999
|
+
await execFileAsync3(candidate, ["-version"], { encoding: "utf-8" });
|
|
4788
5000
|
resolvedBin = candidate;
|
|
4789
5001
|
return candidate;
|
|
4790
5002
|
} catch {
|
|
@@ -4834,34 +5046,60 @@ var textNode = defineNode({
|
|
|
4834
5046
|
execute: ({ params }) => Promise.resolve({ text: params.value })
|
|
4835
5047
|
});
|
|
4836
5048
|
|
|
4837
|
-
// src/engine/nodes/remote/
|
|
5049
|
+
// src/engine/nodes/remote/audioVoiceConvert.ts
|
|
4838
5050
|
import { z as z14 } from "zod";
|
|
4839
|
-
var
|
|
4840
|
-
|
|
4841
|
-
|
|
5051
|
+
var AudioVoiceConvertParams = z14.object({
|
|
5052
|
+
model: z14.literal("elevenlabs/eleven_multilingual_sts_v2"),
|
|
5053
|
+
/** Target voice id. Splice an upstream `voice_select` via `"{{voice_ref}}"`. */
|
|
5054
|
+
voice: z14.string().min(1),
|
|
5055
|
+
output_format: z14.string().optional(),
|
|
5056
|
+
/** Strip the source clip's background noise before re-voicing. */
|
|
5057
|
+
remove_background_noise: z14.boolean().optional()
|
|
5058
|
+
}).strict();
|
|
5059
|
+
var audioVoiceConvertNode = delegated({
|
|
5060
|
+
id: "audio_voice_convert",
|
|
5061
|
+
version: "1.0.0",
|
|
5062
|
+
category: "audio",
|
|
5063
|
+
summary: "Voice Changer / speech-to-speech via ElevenLabs (eleven_multilingual_sts_v2). Re-voices an existing audio clip in a TARGET voice while preserving timing/prosody.",
|
|
5064
|
+
when_to_use: 'Use to normalize a generator-chosen voice (e.g. a Seedance talking-head clip\'s native audio) into ONE consistent brand voice across every scene \u2014 the cadence is preserved so any lip-sync stays valid. Wire `inputs.voice_ref: $ref:<voice_select>.voice_id` and set `params.voice: "{{voice_ref}}"`.',
|
|
5065
|
+
inputs: z14.object({
|
|
5066
|
+
audio: AudioRef,
|
|
5067
|
+
voice_ref: TextRef.optional()
|
|
5068
|
+
}).strict(),
|
|
5069
|
+
params: AudioVoiceConvertParams,
|
|
5070
|
+
outputs: z14.object({ audio: AudioRef }).strict(),
|
|
5071
|
+
outputKinds: { audio: "audio" },
|
|
5072
|
+
cost: () => ({ credits: 1, seconds_estimate: 20 })
|
|
5073
|
+
});
|
|
5074
|
+
|
|
5075
|
+
// src/engine/nodes/remote/dialogue.ts
|
|
5076
|
+
import { z as z15 } from "zod";
|
|
5077
|
+
var DialogueInput = z15.object({
|
|
5078
|
+
text: z15.string().min(1),
|
|
5079
|
+
voice_id: z15.string().min(1)
|
|
4842
5080
|
});
|
|
4843
5081
|
var DIALOGUE_MODELS = ["elevenlabs/eleven_v3"];
|
|
4844
|
-
var DialogueParams =
|
|
4845
|
-
model:
|
|
5082
|
+
var DialogueParams = z15.object({
|
|
5083
|
+
model: z15.enum(DIALOGUE_MODELS),
|
|
4846
5084
|
/**
|
|
4847
5085
|
* Ordered list of lines, each tagged with the voice that should speak it.
|
|
4848
5086
|
* Up to 10 unique voice_ids; total text across all lines should stay under
|
|
4849
5087
|
* ~2000 characters for best quality (ElevenLabs guidance).
|
|
4850
5088
|
*/
|
|
4851
|
-
inputs:
|
|
4852
|
-
language_code:
|
|
5089
|
+
inputs: z15.array(DialogueInput).min(1).max(50),
|
|
5090
|
+
language_code: z15.string().optional(),
|
|
4853
5091
|
/** ElevenLabs voice/model settings passthrough (e.g. `{ stability: 0.5 }`). */
|
|
4854
|
-
settings:
|
|
4855
|
-
seed:
|
|
4856
|
-
apply_text_normalization:
|
|
5092
|
+
settings: z15.record(z15.string(), z15.unknown()).optional(),
|
|
5093
|
+
seed: z15.number().int().min(0).max(4294967295).optional(),
|
|
5094
|
+
apply_text_normalization: z15.enum(["auto", "on", "off"]).optional(),
|
|
4857
5095
|
/**
|
|
4858
5096
|
* When true, hits `/v1/text-to-dialogue/with-timestamps` and emits a
|
|
4859
5097
|
* separate `timestamps` output — character-level alignment plus
|
|
4860
5098
|
* per-voice segment markers usable for captions, lipsync, or
|
|
4861
5099
|
* beat-matched cuts in ad creatives.
|
|
4862
5100
|
*/
|
|
4863
|
-
with_timestamps:
|
|
4864
|
-
output_format:
|
|
5101
|
+
with_timestamps: z15.boolean().optional(),
|
|
5102
|
+
output_format: z15.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
4865
5103
|
}).strict().refine((p) => p.inputs.reduce((sum, line) => sum + line.text.length, 0) <= ELEVENLABS_MAX_TEXT_CHARS, {
|
|
4866
5104
|
message: `total dialogue text exceeds ${ELEVENLABS_MAX_TEXT_CHARS} characters`,
|
|
4867
5105
|
path: ["inputs"]
|
|
@@ -4872,9 +5110,9 @@ var dialogueNode = delegated({
|
|
|
4872
5110
|
category: "audio",
|
|
4873
5111
|
summary: "Multi-voice dialogue / VO with ElevenLabs Eleven v3. Each line is tagged with a `voice_id`, so you can render two-character scripts (e.g. ad VO + customer testimonial reaction) in a single call. Setting `with_timestamps: true` adds character-level alignment for caption rendering and lipsync-friendly cuts.",
|
|
4874
5112
|
when_to_use: "Use for any ad creative or website video VO that needs more than narration \u2014 interviews, two-actor scripts, character ads, testimonial reads. For single-voice flat reads the existing `tts` node is cheaper and simpler; reach for `dialogue` when you need multiple speakers in one stitched track or word-level timing for downstream lipsync / captions.",
|
|
4875
|
-
inputs:
|
|
5113
|
+
inputs: z15.object({}).loose(),
|
|
4876
5114
|
params: DialogueParams,
|
|
4877
|
-
outputs:
|
|
5115
|
+
outputs: z15.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
4878
5116
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
4879
5117
|
cost: ({ params }) => {
|
|
4880
5118
|
const chars = params.inputs.reduce((sum, line) => sum + line.text.length, 0);
|
|
@@ -4883,7 +5121,7 @@ var dialogueNode = delegated({
|
|
|
4883
5121
|
});
|
|
4884
5122
|
|
|
4885
5123
|
// src/engine/nodes/remote/image.ts
|
|
4886
|
-
import { z as
|
|
5124
|
+
import { z as z16 } from "zod";
|
|
4887
5125
|
var IMAGE_GENERATE_MODELS2 = [
|
|
4888
5126
|
"openai/gpt-5.4-image-2",
|
|
4889
5127
|
"google/gemini-3.5-flash",
|
|
@@ -4891,16 +5129,16 @@ var IMAGE_GENERATE_MODELS2 = [
|
|
|
4891
5129
|
"google/gemini-3-pro-image-preview",
|
|
4892
5130
|
"recraft/recraft-v4.1-pro-vector"
|
|
4893
5131
|
];
|
|
4894
|
-
var ImageGenerateParams =
|
|
4895
|
-
model:
|
|
4896
|
-
prompt:
|
|
4897
|
-
aspect_ratio:
|
|
4898
|
-
image_size:
|
|
5132
|
+
var ImageGenerateParams = z16.object({
|
|
5133
|
+
model: z16.enum(IMAGE_GENERATE_MODELS2),
|
|
5134
|
+
prompt: z16.string().min(1),
|
|
5135
|
+
aspect_ratio: z16.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
|
|
5136
|
+
image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional(),
|
|
4899
5137
|
// Recraft v4 vector controls — forwarded into `image_config`. Registry
|
|
4900
5138
|
// rejects them on non-Recraft models.
|
|
4901
|
-
strength:
|
|
4902
|
-
rgb_colors:
|
|
4903
|
-
background_rgb_color:
|
|
5139
|
+
strength: z16.number().min(0).max(1).optional(),
|
|
5140
|
+
rgb_colors: z16.array(z16.array(z16.number().int().min(0).max(255))).optional(),
|
|
5141
|
+
background_rgb_color: z16.array(z16.number().int().min(0).max(255)).optional()
|
|
4904
5142
|
}).strict();
|
|
4905
5143
|
var imageGenerateNode = delegated({
|
|
4906
5144
|
id: "image_generate",
|
|
@@ -4910,22 +5148,22 @@ var imageGenerateNode = delegated({
|
|
|
4910
5148
|
when_to_use: "Use for hero shots, product photography, illustrations, and vector logos. `recraft/recraft-v4.1-pro-vector` for crisp vector / logo work; `openai/gpt-5.4-image-2` for photorealistic; Gemini variants for fast iteration and editing via the `reference` input. `reference` accepts ONE image or an ARRAY of images \u2014 wire several to combine references in a single generation (e.g. a subject sheet + a font specimen + the original ad). Every reference is forwarded to the model in array order.",
|
|
4911
5149
|
// `reference` is one image or an ordered array of images. The backend forwards
|
|
4912
5150
|
// each as a separate `image_url` to the provider (OpenRouter accepts many).
|
|
4913
|
-
inputs:
|
|
5151
|
+
inputs: z16.object({ reference: z16.union([ImageRef, z16.array(ImageRef).min(1)]).optional() }).loose(),
|
|
4914
5152
|
params: ImageGenerateParams,
|
|
4915
|
-
outputs:
|
|
5153
|
+
outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
|
|
4916
5154
|
outputKinds: { images: "image" },
|
|
4917
5155
|
cost: () => ({ credits: 5, seconds_estimate: 10 })
|
|
4918
5156
|
});
|
|
4919
5157
|
|
|
4920
5158
|
// src/engine/nodes/remote/imageAspectAdapt.ts
|
|
4921
|
-
import { z as
|
|
5159
|
+
import { z as z17 } from "zod";
|
|
4922
5160
|
var ASPECT_ADAPT_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
|
|
4923
5161
|
var ASPECT_ADAPT_FORMATS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
|
|
4924
|
-
var ImageAspectAdaptParams =
|
|
4925
|
-
model:
|
|
4926
|
-
formats:
|
|
4927
|
-
guidance:
|
|
4928
|
-
image_size:
|
|
5162
|
+
var ImageAspectAdaptParams = z17.object({
|
|
5163
|
+
model: z17.enum(ASPECT_ADAPT_MODELS),
|
|
5164
|
+
formats: z17.array(z17.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
|
|
5165
|
+
guidance: z17.string().min(1).optional(),
|
|
5166
|
+
image_size: z17.enum(["0.5K", "1K", "2K", "4K"]).optional()
|
|
4929
5167
|
}).strict();
|
|
4930
5168
|
var imageAspectAdaptNode = delegated({
|
|
4931
5169
|
id: "image_aspect_adapt",
|
|
@@ -4933,9 +5171,9 @@ var imageAspectAdaptNode = delegated({
|
|
|
4933
5171
|
category: "image",
|
|
4934
5172
|
summary: "Adapt ONE creative into multiple aspect ratios (Meta: 9:16 stories, 1:1 feed, 4:5, 16:9\u2026) in a single step. AI recomposes the layout per format \u2014 identical subject, text, logos, colors, and style; the scene is extended/restructured, never stretched or cropped. Formats that already match the source ratio pass through unchanged at zero cost. Outputs are ordered exactly as `formats`.",
|
|
4935
5173
|
when_to_use: "Use after a hero creative exists (image_generate, ingest, image_search) to fan it out to every placement format \u2014 wire the creative into `source` and list the target ratios in `formats`. Cost is estimated per format; formats matching the source ratio are free pass-throughs. Pick `google/gemini-3.1-flash-image-preview` (Nano Banana flash) while iterating, `google/gemini-3-pro-image-preview` (Nano Banana Pro) for final-quality adaptation.",
|
|
4936
|
-
inputs:
|
|
5174
|
+
inputs: z17.object({ source: ImageRef }).loose(),
|
|
4937
5175
|
params: ImageAspectAdaptParams,
|
|
4938
|
-
outputs:
|
|
5176
|
+
outputs: z17.object({ images: z17.array(ImageRef).min(1) }).strict(),
|
|
4939
5177
|
outputKinds: { images: "image" },
|
|
4940
5178
|
cost: ({ params }) => {
|
|
4941
5179
|
const p = params;
|
|
@@ -4948,12 +5186,12 @@ var imageAspectAdaptNode = delegated({
|
|
|
4948
5186
|
});
|
|
4949
5187
|
|
|
4950
5188
|
// src/engine/nodes/remote/imageBackgroundRemove.ts
|
|
4951
|
-
import { z as
|
|
4952
|
-
var ImageBackgroundRemoveParams =
|
|
4953
|
-
model:
|
|
4954
|
-
model_variant:
|
|
4955
|
-
operating_resolution:
|
|
4956
|
-
mask_only:
|
|
5189
|
+
import { z as z18 } from "zod";
|
|
5190
|
+
var ImageBackgroundRemoveParams = z18.object({
|
|
5191
|
+
model: z18.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
|
|
5192
|
+
model_variant: z18.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
|
|
5193
|
+
operating_resolution: z18.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
|
|
5194
|
+
mask_only: z18.boolean().optional().default(false)
|
|
4957
5195
|
}).strict();
|
|
4958
5196
|
var imageBackgroundRemoveNode = delegated({
|
|
4959
5197
|
id: "image_background_remove",
|
|
@@ -4961,11 +5199,11 @@ var imageBackgroundRemoveNode = delegated({
|
|
|
4961
5199
|
category: "image",
|
|
4962
5200
|
summary: "Remove the background from an image and return a transparent PNG (or the segmentation mask). Powered by fal.ai `fal-ai/birefnet/v2`.",
|
|
4963
5201
|
when_to_use: "Use to extract subjects from photos for use as overlays in hyperframe compositions, product shots, or compositing pipelines. Set `mask_only:true` to return the binary mask instead of the alpha-cut image.",
|
|
4964
|
-
inputs:
|
|
5202
|
+
inputs: z18.object({
|
|
4965
5203
|
image: ImageRef
|
|
4966
5204
|
}).strict(),
|
|
4967
5205
|
params: ImageBackgroundRemoveParams,
|
|
4968
|
-
outputs:
|
|
5206
|
+
outputs: z18.object({
|
|
4969
5207
|
image: ImageRef,
|
|
4970
5208
|
mask: ImageRef.optional()
|
|
4971
5209
|
}).strict(),
|
|
@@ -4974,7 +5212,7 @@ var imageBackgroundRemoveNode = delegated({
|
|
|
4974
5212
|
});
|
|
4975
5213
|
|
|
4976
5214
|
// src/engine/nodes/remote/imageDescribe.ts
|
|
4977
|
-
import { z as
|
|
5215
|
+
import { z as z19 } from "zod";
|
|
4978
5216
|
var IMAGE_DESCRIBE_MODELS = ["~google/gemini-pro-latest", "~google/gemini-flash-latest"];
|
|
4979
5217
|
var imageDescribeNode = delegated({
|
|
4980
5218
|
id: "image_describe",
|
|
@@ -4982,31 +5220,31 @@ var imageDescribeNode = delegated({
|
|
|
4982
5220
|
category: "vision",
|
|
4983
5221
|
summary: "Reverse-engineer an image into an exhaustive, replication-grade JSON description: who the advertiser is and what they sell (source_context), composition, non-person subjects with expression/treatment, deeply detailed people, brand-identified logos (named by brand, not appearance), camera optics, lighting, color palette WITH per-color brand-ownership (brand vs borrowed-functional) and purpose, materials, visible text, ad signals (proof badges/CTA/price), the persuasion engine (ad_intent), style, post-processing.",
|
|
4984
5222
|
when_to_use: 'Use to turn a reference image into a structured blueprint you can inject into downstream prompts via `{{slot}}` \u2014 e.g. restyle a competitor ad onto your own product, lock a look across a series, or feed exact palette/lighting into image_generate. Purpose-built for market adaptation: logos are identified by brand ("Trustpilot", never "green star"), people and animals carry expression/emotion/intent detail, and each color is tagged brand vs borrowed-functional so a recolor can keep the reds/yellows that do a job. The extraction prompt is baked in; use `focus` to emphasise aspects and `context` to pass known provenance (advertiser, category, market) so source_context and color ownership are grounded. Pick `~google/gemini-pro-latest` for the densest extraction (recommended for ad / market-adaptation passes), `~google/gemini-flash-latest` for cheap/fast passes. The output is rich \u2014 raise `max_tokens` (e.g. 8000+) for dense ads so the JSON isn\'t truncated.',
|
|
4985
|
-
inputs:
|
|
4986
|
-
params:
|
|
4987
|
-
model:
|
|
4988
|
-
focus:
|
|
4989
|
-
context:
|
|
4990
|
-
temperature:
|
|
4991
|
-
max_tokens:
|
|
5223
|
+
inputs: z19.object({ image: ImageRef }).loose(),
|
|
5224
|
+
params: z19.object({
|
|
5225
|
+
model: z19.enum(IMAGE_DESCRIBE_MODELS),
|
|
5226
|
+
focus: z19.string().optional(),
|
|
5227
|
+
context: z19.string().optional(),
|
|
5228
|
+
temperature: z19.number().min(0).max(2).optional(),
|
|
5229
|
+
max_tokens: z19.number().int().positive().optional()
|
|
4992
5230
|
}).strict(),
|
|
4993
|
-
outputs:
|
|
5231
|
+
outputs: z19.object({ description: JsonRef }).strict(),
|
|
4994
5232
|
outputKinds: { description: "json" },
|
|
4995
5233
|
cost: () => ({ credits: 2, seconds_estimate: 10 })
|
|
4996
5234
|
});
|
|
4997
5235
|
|
|
4998
5236
|
// src/engine/nodes/remote/imageReferenceSheet.ts
|
|
4999
|
-
import { z as
|
|
5237
|
+
import { z as z20 } from "zod";
|
|
5000
5238
|
var REFERENCE_SHEET_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
|
|
5001
|
-
var ImageReferenceSheetParams =
|
|
5002
|
-
model:
|
|
5003
|
-
subject_description:
|
|
5004
|
-
subject_type:
|
|
5005
|
-
views:
|
|
5006
|
-
style:
|
|
5007
|
-
prompt_override:
|
|
5008
|
-
aspect_ratio:
|
|
5009
|
-
image_size:
|
|
5239
|
+
var ImageReferenceSheetParams = z20.object({
|
|
5240
|
+
model: z20.enum(REFERENCE_SHEET_MODELS),
|
|
5241
|
+
subject_description: z20.string().min(1),
|
|
5242
|
+
subject_type: z20.enum(["character", "person", "product"]),
|
|
5243
|
+
views: z20.array(z20.string().min(1)).min(2).max(6).optional(),
|
|
5244
|
+
style: z20.string().optional(),
|
|
5245
|
+
prompt_override: z20.string().min(1).optional(),
|
|
5246
|
+
aspect_ratio: z20.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
|
|
5247
|
+
image_size: z20.enum(["0.5K", "1K", "2K", "4K"]).optional()
|
|
5010
5248
|
}).strict();
|
|
5011
5249
|
var imageReferenceSheetNode = delegated({
|
|
5012
5250
|
id: "image_reference_sheet",
|
|
@@ -5014,9 +5252,9 @@ var imageReferenceSheetNode = delegated({
|
|
|
5014
5252
|
category: "image",
|
|
5015
5253
|
summary: "Fuse 1\u20136 images of a single subject (person, character, or product) into ONE multi-view reference sheet \u2014 a labeled turnaround grid (FRONT / SIDE / BACK\u2026) in consistent style and lighting. Curated models: Gemini 3 Pro Image (best fusion + labels), Gemini 3.1 Flash Image (cheap iteration).",
|
|
5016
5254
|
when_to_use: "Use before image_generate / video_generate when a subject must stay consistent across many creatives \u2014 wire the `sheet` output into their `reference` input instead of re-describing the subject per prompt. `subject_description` should be the exact wording you reuse downstream. Pick `google/gemini-3-pro-image-preview` for final 6-view sheets at 2K+, `google/gemini-3.1-flash-image-preview` while iterating.",
|
|
5017
|
-
inputs:
|
|
5255
|
+
inputs: z20.object({ references: z20.array(ImageRef).min(1).max(6) }).loose(),
|
|
5018
5256
|
params: ImageReferenceSheetParams,
|
|
5019
|
-
outputs:
|
|
5257
|
+
outputs: z20.object({ sheet: ImageRef }).strict(),
|
|
5020
5258
|
outputKinds: { sheet: "image" },
|
|
5021
5259
|
cost: ({ params }) => ({
|
|
5022
5260
|
credits: params?.model === "google/gemini-3-pro-image-preview" ? 20 : 5,
|
|
@@ -5025,10 +5263,10 @@ var imageReferenceSheetNode = delegated({
|
|
|
5025
5263
|
});
|
|
5026
5264
|
|
|
5027
5265
|
// src/engine/nodes/remote/imageSearch.ts
|
|
5028
|
-
import { z as
|
|
5029
|
-
var ImageSearchParams =
|
|
5030
|
-
prompt:
|
|
5031
|
-
count:
|
|
5266
|
+
import { z as z21 } from "zod";
|
|
5267
|
+
var ImageSearchParams = z21.object({
|
|
5268
|
+
prompt: z21.string().min(1),
|
|
5269
|
+
count: z21.number().int().min(1).max(20).default(5)
|
|
5032
5270
|
}).strict();
|
|
5033
5271
|
var imageSearchNode = delegated({
|
|
5034
5272
|
id: "image_search",
|
|
@@ -5036,15 +5274,15 @@ var imageSearchNode = delegated({
|
|
|
5036
5274
|
category: "image",
|
|
5037
5275
|
summary: "Agentic image search across Google Images, stock photography (Freepik), and Pinterest. An LLM agent picks the search tools and queries, selects the best matches, and the results are downloaded into canvas assets.",
|
|
5038
5276
|
when_to_use: "Use to gather real-world reference or inspiration images for a prompt (e.g. several photos of an australian shepherd) so a later step or the user can pick the best one. Not for creating new imagery \u2014 use image_generate for that.",
|
|
5039
|
-
inputs:
|
|
5277
|
+
inputs: z21.object({}).loose(),
|
|
5040
5278
|
params: ImageSearchParams,
|
|
5041
|
-
outputs:
|
|
5279
|
+
outputs: z21.object({ images: z21.array(ImageRef).min(1) }).strict(),
|
|
5042
5280
|
outputKinds: { images: "image" },
|
|
5043
5281
|
cost: ({ params }) => ({ credits: Math.ceil(2 + params.count / 2), seconds_estimate: 30 })
|
|
5044
5282
|
});
|
|
5045
5283
|
|
|
5046
5284
|
// src/engine/nodes/remote/imageSelect.ts
|
|
5047
|
-
import { z as
|
|
5285
|
+
import { z as z22 } from "zod";
|
|
5048
5286
|
var IMAGE_SELECT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5049
5287
|
var imageSelectNode = delegated({
|
|
5050
5288
|
id: "image_select",
|
|
@@ -5052,15 +5290,15 @@ var imageSelectNode = delegated({
|
|
|
5052
5290
|
category: "vision",
|
|
5053
5291
|
summary: "Pick the best `count` images out of 2+ candidates with a vision LLM, judged against a prompt. Outputs a passthrough subset of the input refs (no new pixels) plus the model's comparative reasoning.",
|
|
5054
5292
|
when_to_use: "Use after fanning out several image_generate variants (or any pool of 2+ images) to keep only the strongest before expensive downstream steps \u2014 video generation, reference sheets, final delivery. `count` fixes the output size, so `images#0`\u2026`images#count-1` are always safe to wire. Pick `~google/gemini-flash-latest` for cheap/fast picks and `~google/gemini-pro-latest` for harder aesthetic judgement.",
|
|
5055
|
-
inputs:
|
|
5056
|
-
params:
|
|
5057
|
-
model:
|
|
5058
|
-
prompt:
|
|
5059
|
-
count:
|
|
5060
|
-
temperature:
|
|
5061
|
-
max_tokens:
|
|
5293
|
+
inputs: z22.object({ images: z22.array(ImageRef).min(2) }).loose(),
|
|
5294
|
+
params: z22.object({
|
|
5295
|
+
model: z22.enum(IMAGE_SELECT_MODELS),
|
|
5296
|
+
prompt: z22.string().min(1),
|
|
5297
|
+
count: z22.number().int().min(1).default(1),
|
|
5298
|
+
temperature: z22.number().min(0).max(2).optional(),
|
|
5299
|
+
max_tokens: z22.number().int().positive().optional()
|
|
5062
5300
|
}).strict(),
|
|
5063
|
-
outputs:
|
|
5301
|
+
outputs: z22.object({ images: z22.array(ImageRef).min(1), reasoning: TextRef }).strict(),
|
|
5064
5302
|
outputKinds: { images: "image", reasoning: "text" },
|
|
5065
5303
|
cost: () => ({ credits: 1, seconds_estimate: 5 }),
|
|
5066
5304
|
// Arity is only knowable at validate time when `images` is a literal array
|
|
@@ -5085,34 +5323,34 @@ var imageSelectNode = delegated({
|
|
|
5085
5323
|
});
|
|
5086
5324
|
|
|
5087
5325
|
// src/engine/nodes/remote/music.ts
|
|
5088
|
-
import { z as
|
|
5326
|
+
import { z as z23 } from "zod";
|
|
5089
5327
|
var MUSIC_MODELS = ["elevenlabs/music-v1", "elevenlabs/video-background-music-v1"];
|
|
5090
|
-
var MusicParams =
|
|
5091
|
-
model:
|
|
5328
|
+
var MusicParams = z23.object({
|
|
5329
|
+
model: z23.enum(MUSIC_MODELS),
|
|
5092
5330
|
/** Free-form prompt. Used by `elevenlabs/music-v1` (compose-detailed). */
|
|
5093
|
-
prompt:
|
|
5331
|
+
prompt: z23.string().optional(),
|
|
5094
5332
|
/**
|
|
5095
5333
|
* Structured composition plan (intro / hook / verse / outro sections with
|
|
5096
5334
|
* per-section styles + durations). Mutually exclusive with `prompt`.
|
|
5097
5335
|
*/
|
|
5098
|
-
composition_plan:
|
|
5336
|
+
composition_plan: z23.record(z23.string(), z23.unknown()).optional(),
|
|
5099
5337
|
/** Target length when using `prompt`. 3000–454545ms (capped by the $10 per-node cost limit). */
|
|
5100
|
-
music_length_ms:
|
|
5101
|
-
seed:
|
|
5338
|
+
music_length_ms: z23.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
|
|
5339
|
+
seed: z23.number().int().optional(),
|
|
5102
5340
|
/** Prompt mode only — forces an instrumental (no vocals) track. */
|
|
5103
|
-
force_instrumental:
|
|
5341
|
+
force_instrumental: z23.boolean().optional(),
|
|
5104
5342
|
/** composition_plan only — honor exact section durations. */
|
|
5105
|
-
respect_sections_durations:
|
|
5343
|
+
respect_sections_durations: z23.boolean().optional(),
|
|
5106
5344
|
/** Emit word-level timestamps alongside the audio. */
|
|
5107
|
-
with_timestamps:
|
|
5345
|
+
with_timestamps: z23.boolean().optional(),
|
|
5108
5346
|
/**
|
|
5109
5347
|
* video-to-music only — short description of the desired score
|
|
5110
5348
|
* ("upbeat synth, fast cuts, 80s") used to bias the model.
|
|
5111
5349
|
*/
|
|
5112
|
-
description:
|
|
5350
|
+
description: z23.string().max(1e3).optional(),
|
|
5113
5351
|
/** video-to-music only — up to 10 style tags. */
|
|
5114
|
-
tags:
|
|
5115
|
-
output_format:
|
|
5352
|
+
tags: z23.array(z23.string()).max(10).optional(),
|
|
5353
|
+
output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
5116
5354
|
}).strict();
|
|
5117
5355
|
var musicNode = delegated({
|
|
5118
5356
|
id: "music",
|
|
@@ -5120,9 +5358,9 @@ var musicNode = delegated({
|
|
|
5120
5358
|
category: "audio",
|
|
5121
5359
|
summary: "Generate music for ad creatives and website video content. `elevenlabs/music-v1` composes from a text prompt or structured composition plan; `elevenlabs/video-background-music-v1` scores an existing video clip provided via `inputs.video`.",
|
|
5122
5360
|
when_to_use: "Use to produce background music or a full score for video ads, hero-section reels, or any motion content. Prefer the video-to-music model when you already have a cut and want music timed to it; use compose-detailed when you have only a brief or want section-level control (intro / hook / outro). Pair the resulting audio with `video_generate` or `video_lipsync` at compose time.",
|
|
5123
|
-
inputs:
|
|
5361
|
+
inputs: z23.object({ video: VideoRef.optional() }).loose(),
|
|
5124
5362
|
params: MusicParams,
|
|
5125
|
-
outputs:
|
|
5363
|
+
outputs: z23.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
5126
5364
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
5127
5365
|
cost: ({ params }) => {
|
|
5128
5366
|
const seconds = params.music_length_ms ? Math.ceil(params.music_length_ms / 1e3) : 30;
|
|
@@ -5153,25 +5391,25 @@ var musicNode = delegated({
|
|
|
5153
5391
|
});
|
|
5154
5392
|
|
|
5155
5393
|
// src/engine/nodes/remote/soundEffect.ts
|
|
5156
|
-
import { z as
|
|
5394
|
+
import { z as z24 } from "zod";
|
|
5157
5395
|
var SOUND_EFFECT_MODELS = ["elevenlabs/eleven_text_to_sound_v2"];
|
|
5158
|
-
var SoundEffectParams =
|
|
5159
|
-
model:
|
|
5396
|
+
var SoundEffectParams = z24.object({
|
|
5397
|
+
model: z24.enum(SOUND_EFFECT_MODELS),
|
|
5160
5398
|
/** Prompt describing the SFX ("metal door slam", "soft UI tap", "ocean waves"). */
|
|
5161
|
-
text:
|
|
5399
|
+
text: z24.string().min(1),
|
|
5162
5400
|
/**
|
|
5163
5401
|
* Target length in seconds. 0.5–30. Leave unset to let the model pick the
|
|
5164
5402
|
* natural length for the described effect.
|
|
5165
5403
|
*/
|
|
5166
|
-
duration_seconds:
|
|
5404
|
+
duration_seconds: z24.number().min(0.5).max(30).optional(),
|
|
5167
5405
|
/**
|
|
5168
5406
|
* 0–1. Higher = stick closer to the prompt at the cost of variety; lower
|
|
5169
5407
|
* = let the model interpret more freely. Defaults to 0.3 on the provider.
|
|
5170
5408
|
*/
|
|
5171
|
-
prompt_influence:
|
|
5409
|
+
prompt_influence: z24.number().min(0).max(1).optional(),
|
|
5172
5410
|
/** Only valid on `eleven_text_to_sound_v2` — produce a seamless loop. */
|
|
5173
|
-
loop:
|
|
5174
|
-
output_format:
|
|
5411
|
+
loop: z24.boolean().optional(),
|
|
5412
|
+
output_format: z24.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
5175
5413
|
}).strict();
|
|
5176
5414
|
var soundEffectNode = delegated({
|
|
5177
5415
|
id: "sound_effect",
|
|
@@ -5179,9 +5417,9 @@ var soundEffectNode = delegated({
|
|
|
5179
5417
|
category: "audio",
|
|
5180
5418
|
summary: "Generate short sound effects from a text prompt via ElevenLabs Text-to-Sound. Use for whooshes, impacts, UI clicks, ambient beds, or signature stingers in ad creatives and product videos.",
|
|
5181
5419
|
when_to_use: "Reach for this when you need a punch-in SFX layered against `video_generate` or `hyperframe_render` output \u2014 e.g. a logo whoosh on a hero shot, a click on a CTA cut, a swelling ambient bed under VO. Set `loop: true` for atmospheric beds that need to tile under longer footage; leave `duration_seconds` unset and the model picks a natural length.",
|
|
5182
|
-
inputs:
|
|
5420
|
+
inputs: z24.object({}).loose(),
|
|
5183
5421
|
params: SoundEffectParams,
|
|
5184
|
-
outputs:
|
|
5422
|
+
outputs: z24.object({ audio: AudioRef }).strict(),
|
|
5185
5423
|
outputKinds: { audio: "audio" },
|
|
5186
5424
|
cost: ({ params }) => {
|
|
5187
5425
|
const seconds = params.duration_seconds ?? 5;
|
|
@@ -5190,7 +5428,7 @@ var soundEffectNode = delegated({
|
|
|
5190
5428
|
});
|
|
5191
5429
|
|
|
5192
5430
|
// src/engine/nodes/remote/textGenerate.ts
|
|
5193
|
-
import { z as
|
|
5431
|
+
import { z as z25 } from "zod";
|
|
5194
5432
|
var TEXT_GENERATE_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5195
5433
|
var textGenerateNode = delegated({
|
|
5196
5434
|
id: "text_generate",
|
|
@@ -5198,58 +5436,58 @@ var textGenerateNode = delegated({
|
|
|
5198
5436
|
category: "language",
|
|
5199
5437
|
summary: "Single-turn LLM text generation via OpenRouter. Returns a text response.",
|
|
5200
5438
|
when_to_use: 'Use for any short text generation step in a canvas \u2014 ad copy, hooks, headlines, JSON outputs for downstream nodes. Pick `~google/gemini-flash-latest` for cheap/fast work and `~google/gemini-pro-latest` for harder reasoning. When the output must be JSON for a downstream `{{slot}}` (e.g. the ad-blueprint transform), set `response_format: "json_object"` so the model returns clean JSON with no markdown fences or prose. Set `web_search: true` to let the model search the live web first (OpenRouter `:online`) \u2014 useful when the transform must adapt copy to the target brand\'s real facts (current pricing, the trust signals it actually has) rather than guess.',
|
|
5201
|
-
inputs:
|
|
5202
|
-
params:
|
|
5203
|
-
model:
|
|
5204
|
-
prompt:
|
|
5205
|
-
system:
|
|
5206
|
-
response_format:
|
|
5207
|
-
web_search:
|
|
5208
|
-
temperature:
|
|
5209
|
-
max_tokens:
|
|
5439
|
+
inputs: z25.object({}).loose(),
|
|
5440
|
+
params: z25.object({
|
|
5441
|
+
model: z25.enum(TEXT_GENERATE_MODELS),
|
|
5442
|
+
prompt: z25.string().min(1),
|
|
5443
|
+
system: z25.string().optional(),
|
|
5444
|
+
response_format: z25.enum(["text", "json_object"]).optional(),
|
|
5445
|
+
web_search: z25.boolean().optional(),
|
|
5446
|
+
temperature: z25.number().min(0).max(2).optional(),
|
|
5447
|
+
max_tokens: z25.number().int().positive().optional()
|
|
5210
5448
|
}).strict(),
|
|
5211
|
-
outputs:
|
|
5449
|
+
outputs: z25.object({ text: TextRef }).strict(),
|
|
5212
5450
|
outputKinds: { text: "text" },
|
|
5213
5451
|
cost: () => ({ credits: 1, seconds_estimate: 3 })
|
|
5214
5452
|
});
|
|
5215
5453
|
|
|
5216
5454
|
// src/engine/nodes/remote/tts.ts
|
|
5217
|
-
import { z as
|
|
5455
|
+
import { z as z26 } from "zod";
|
|
5218
5456
|
var TTS_MODELS = ["elevenlabs/eleven_v3"];
|
|
5219
|
-
var TtsVoiceSettings =
|
|
5220
|
-
stability:
|
|
5221
|
-
similarity_boost:
|
|
5222
|
-
style:
|
|
5223
|
-
use_speaker_boost:
|
|
5224
|
-
speed:
|
|
5457
|
+
var TtsVoiceSettings = z26.object({
|
|
5458
|
+
stability: z26.number().min(0).max(1).optional(),
|
|
5459
|
+
similarity_boost: z26.number().min(0).max(1).optional(),
|
|
5460
|
+
style: z26.number().min(0).max(1).optional(),
|
|
5461
|
+
use_speaker_boost: z26.boolean().optional(),
|
|
5462
|
+
speed: z26.number().min(0.25).max(4).optional()
|
|
5225
5463
|
}).strict();
|
|
5226
|
-
var TtsPronunciationLocator =
|
|
5227
|
-
pronunciation_dictionary_id:
|
|
5228
|
-
version_id:
|
|
5464
|
+
var TtsPronunciationLocator = z26.object({
|
|
5465
|
+
pronunciation_dictionary_id: z26.string().min(1),
|
|
5466
|
+
version_id: z26.string().nullable().optional()
|
|
5229
5467
|
}).strict();
|
|
5230
|
-
var TtsParams =
|
|
5231
|
-
model:
|
|
5232
|
-
text:
|
|
5233
|
-
voice:
|
|
5468
|
+
var TtsParams = z26.object({
|
|
5469
|
+
model: z26.enum(TTS_MODELS),
|
|
5470
|
+
text: z26.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
|
|
5471
|
+
voice: z26.string().min(1),
|
|
5234
5472
|
/** Provider output_format (mp3 family only — assets are stored as audio/mpeg). */
|
|
5235
|
-
output_format:
|
|
5236
|
-
seed:
|
|
5473
|
+
output_format: z26.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
|
|
5474
|
+
seed: z26.number().int().min(0).max(4294967295).optional(),
|
|
5237
5475
|
// Top-level shortcuts; structured form is `voice_settings`.
|
|
5238
|
-
stability:
|
|
5239
|
-
similarity_boost:
|
|
5476
|
+
stability: z26.number().min(0).max(1).optional(),
|
|
5477
|
+
similarity_boost: z26.number().min(0).max(1).optional(),
|
|
5240
5478
|
voice_settings: TtsVoiceSettings.optional(),
|
|
5241
5479
|
/** ISO 639-1 language code. eleven_v3 supports language hints. */
|
|
5242
|
-
language_code:
|
|
5243
|
-
pronunciation_dictionary_locators:
|
|
5244
|
-
apply_text_normalization:
|
|
5480
|
+
language_code: z26.string().optional(),
|
|
5481
|
+
pronunciation_dictionary_locators: z26.array(TtsPronunciationLocator).max(3).optional(),
|
|
5482
|
+
apply_text_normalization: z26.enum(["auto", "on", "off"]).optional(),
|
|
5245
5483
|
/** Currently Japanese-only. Adds latency. */
|
|
5246
|
-
apply_language_text_normalization:
|
|
5484
|
+
apply_language_text_normalization: z26.boolean().optional(),
|
|
5247
5485
|
/**
|
|
5248
5486
|
* When true, hits `/v1/text-to-speech/{voice_id}/with-timestamps` and
|
|
5249
5487
|
* adds a `timestamps` output (character-level alignment) for caption
|
|
5250
5488
|
* rendering, lipsync, and beat-matched cuts.
|
|
5251
5489
|
*/
|
|
5252
|
-
with_timestamps:
|
|
5490
|
+
with_timestamps: z26.boolean().optional()
|
|
5253
5491
|
}).strict();
|
|
5254
5492
|
var ttsNode = delegated({
|
|
5255
5493
|
id: "tts",
|
|
@@ -5257,9 +5495,9 @@ var ttsNode = delegated({
|
|
|
5257
5495
|
category: "audio",
|
|
5258
5496
|
summary: "Single-voice text-to-speech via ElevenLabs Eleven v3. Optional character-level timestamps for caption rendering and beat-matched cuts.",
|
|
5259
5497
|
when_to_use: "Use for single-speaker VO \u2014 ad reads, hero-section narration, product walkthroughs. Reach for `dialogue` when you need multiple voices in one stitched track. Set `with_timestamps: true` when downstream needs character-level alignment (captions, lipsync).",
|
|
5260
|
-
inputs:
|
|
5498
|
+
inputs: z26.object({}).loose(),
|
|
5261
5499
|
params: TtsParams,
|
|
5262
|
-
outputs:
|
|
5500
|
+
outputs: z26.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
5263
5501
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
5264
5502
|
cost: ({ params }) => ({
|
|
5265
5503
|
credits: Math.max(1, Math.ceil(params.text.length * 15e-4)),
|
|
@@ -5268,23 +5506,23 @@ var ttsNode = delegated({
|
|
|
5268
5506
|
});
|
|
5269
5507
|
|
|
5270
5508
|
// src/engine/nodes/remote/video.ts
|
|
5271
|
-
import { z as
|
|
5509
|
+
import { z as z27 } from "zod";
|
|
5272
5510
|
var VIDEO_GENERATE_MODELS = ["bytedance/seedance-2.0", "google/veo-3.1-fast"];
|
|
5273
|
-
var VideoGenerateParams =
|
|
5274
|
-
model:
|
|
5275
|
-
prompt:
|
|
5276
|
-
duration:
|
|
5277
|
-
resolution:
|
|
5511
|
+
var VideoGenerateParams = z27.object({
|
|
5512
|
+
model: z27.enum(VIDEO_GENERATE_MODELS),
|
|
5513
|
+
prompt: z27.string().min(1),
|
|
5514
|
+
duration: z27.number().int().positive().optional(),
|
|
5515
|
+
resolution: z27.string().optional(),
|
|
5278
5516
|
// Union of ratios accepted by at least one curated model (registry gates
|
|
5279
5517
|
// per-model). 3:2/2:3 are deliberately absent: no registered model takes them.
|
|
5280
|
-
aspect_ratio:
|
|
5281
|
-
generate_audio:
|
|
5282
|
-
seed:
|
|
5518
|
+
aspect_ratio: z27.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
|
|
5519
|
+
generate_audio: z27.boolean().optional(),
|
|
5520
|
+
seed: z27.number().int().nonnegative().optional(),
|
|
5283
5521
|
// Veo-only passthroughs (routed via `provider.options.google-vertex.parameters`).
|
|
5284
|
-
negative_prompt:
|
|
5285
|
-
person_generation:
|
|
5286
|
-
enhance_prompt:
|
|
5287
|
-
conditioning_scale:
|
|
5522
|
+
negative_prompt: z27.string().optional(),
|
|
5523
|
+
person_generation: z27.string().optional(),
|
|
5524
|
+
enhance_prompt: z27.boolean().optional(),
|
|
5525
|
+
conditioning_scale: z27.number().optional()
|
|
5288
5526
|
}).strict();
|
|
5289
5527
|
var videoGenerateNode = delegated({
|
|
5290
5528
|
id: "video_generate",
|
|
@@ -5292,23 +5530,23 @@ var videoGenerateNode = delegated({
|
|
|
5292
5530
|
category: "video",
|
|
5293
5531
|
summary: "Generate video for ad creatives. Two curated models: `bytedance/seedance-2.0` (production quality, photorealistic humans via fal.ai) and `google/veo-3.1-fast` (cheap/fast for iteration and tests). Async with polling.",
|
|
5294
5532
|
when_to_use: "Use `bytedance/seedance-2.0` for final ad output (photoreal subjects, image-to-video with first/last frames). Use `google/veo-3.1-fast` while iterating to keep cost low. Each model has different supported durations, resolutions, and aspect ratios \u2014 see the README per-model section.",
|
|
5295
|
-
inputs:
|
|
5533
|
+
inputs: z27.object({
|
|
5296
5534
|
first_frame: ImageRef.optional(),
|
|
5297
5535
|
last_frame: ImageRef.optional(),
|
|
5298
5536
|
reference: ImageRef.optional()
|
|
5299
5537
|
}).loose(),
|
|
5300
5538
|
params: VideoGenerateParams,
|
|
5301
|
-
outputs:
|
|
5539
|
+
outputs: z27.object({ video: VideoRef }).strict(),
|
|
5302
5540
|
outputKinds: { video: "video" },
|
|
5303
5541
|
cost: () => ({ credits: 50, seconds_estimate: 120 })
|
|
5304
5542
|
});
|
|
5305
5543
|
|
|
5306
5544
|
// src/engine/nodes/remote/videoBackgroundRemove.ts
|
|
5307
|
-
import { z as
|
|
5308
|
-
var VideoBackgroundRemoveParams =
|
|
5309
|
-
model:
|
|
5310
|
-
edge_refinement:
|
|
5311
|
-
output_codec:
|
|
5545
|
+
import { z as z28 } from "zod";
|
|
5546
|
+
var VideoBackgroundRemoveParams = z28.object({
|
|
5547
|
+
model: z28.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
|
|
5548
|
+
edge_refinement: z28.boolean().optional().default(true),
|
|
5549
|
+
output_codec: z28.enum(["vp9", "h264"]).optional().default("vp9")
|
|
5312
5550
|
}).strict();
|
|
5313
5551
|
var videoBackgroundRemoveNode = delegated({
|
|
5314
5552
|
id: "video_background_remove",
|
|
@@ -5316,18 +5554,18 @@ var videoBackgroundRemoveNode = delegated({
|
|
|
5316
5554
|
category: "video",
|
|
5317
5555
|
summary: "Remove the background from a video and return a transparent VP9-with-alpha WebM (or H264 RGB+alpha pair). Drops directly into a hyperframe composition as `<video src='...'>` for chroma-keyed picture-in-picture overlays. Powered by fal.ai `veed/video-background-removal/fast`.",
|
|
5318
5556
|
when_to_use: "Use when you need a talking-head or subject to float over a custom background in a hyperframe composition. Pair with hyperframe_render(composition: screencast-with-talker) for screencast-with-narrator videos. Output is `video/webm` with alpha \u2014 feed straight into `<video src>` in a composition.",
|
|
5319
|
-
inputs:
|
|
5557
|
+
inputs: z28.object({
|
|
5320
5558
|
video: VideoRef
|
|
5321
5559
|
}).strict(),
|
|
5322
5560
|
params: VideoBackgroundRemoveParams,
|
|
5323
|
-
outputs:
|
|
5561
|
+
outputs: z28.object({ video: VideoRef }).strict(),
|
|
5324
5562
|
outputKinds: { video: "video" },
|
|
5325
5563
|
// $0.012 per 30 frames (edge refinement on) — assume ~30fps; refine via fal dashboard.
|
|
5326
5564
|
cost: () => ({ credits: 50, seconds_estimate: 60 })
|
|
5327
5565
|
});
|
|
5328
5566
|
|
|
5329
5567
|
// src/engine/nodes/remote/videoDeconstruct.ts
|
|
5330
|
-
import { z as
|
|
5568
|
+
import { z as z29 } from "zod";
|
|
5331
5569
|
var VIDEO_DECONSTRUCT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5332
5570
|
var videoDeconstructNode = delegated({
|
|
5333
5571
|
id: "video_deconstruct",
|
|
@@ -5335,24 +5573,24 @@ var videoDeconstructNode = delegated({
|
|
|
5335
5573
|
category: "video",
|
|
5336
5574
|
summary: 'Deconstruct a video into a replication-grade blueprint: scene boundaries, the real start/end frame of every scene (extracted from the video as images), and an exhaustive JSON analysis \u2014 per-scene action detail, camera motion, generation-ready frame/motion prompts, overlay text with full typographic style, floating elements, deeply detailed cast (perceived demographics, ethnicity/skin-tone, styling, market-recasting notes), brand-identified logos (named by brand and what they signal, not by appearance, with on-screen timestamps), dialogue with voice descriptions, music spec, SFX list, plus a word-level transcript. `mode:"index"` is the cheap structure-first pass: scene boundaries + global blueprint only (one LLM call, no frames).',
|
|
5337
5575
|
when_to_use: 'Use to reverse-engineer a reference video (e.g. a competitor ad) so a new canvas can reproduce or remix it scene by scene. Agent loop: (1) optionally run `mode:"index"` to see the structure cheaply (scene count, boundaries, transcript) before planning; (2) run the full deconstruct; (3) read `analysis` and author the reproduction canvas. The blueprint maps 1:1 onto generation nodes: `analysis.scenes[i]` aligns positionally with `start_frames#i`/`end_frames#i`; per scene, `start_frame_prompt`/`end_frame_prompt` feed image_generate (overlay text is excluded from them by contract \u2014 recomposite it from `overlays`), `motion_prompt` + the two frames feed video_generate (first_frame/last_frame), `dialogue[].voice_description` casts tts/dialogue voices, `global.music.music_prompt` feeds music, `sfx[].sound_effect_prompt` feeds sound_effect, and `overlays`/`floating_elements` drive an ffmpeg/hyperframe overlay pass. Long videos (over ~8 min single-shot): run `mode:"index"` first, then several full nodes IN PARALLEL each with a `start_s`/`end_s` window (\u2264480s, snap edges to index scene boundaries), and merge by concatenating `analysis.scenes`; over-length errors include suggested windows. Inject fields into downstream prompts via `{{slot}}`. Pick `~google/gemini-pro-latest` for the densest extraction, `~google/gemini-flash-latest` for cheap/fast passes.',
|
|
5338
|
-
inputs:
|
|
5339
|
-
params:
|
|
5340
|
-
model:
|
|
5341
|
-
mode:
|
|
5342
|
-
language:
|
|
5343
|
-
max_scenes:
|
|
5344
|
-
focus:
|
|
5345
|
-
start_s:
|
|
5346
|
-
end_s:
|
|
5576
|
+
inputs: z29.object({ video: VideoRef }).loose(),
|
|
5577
|
+
params: z29.object({
|
|
5578
|
+
model: z29.enum(VIDEO_DECONSTRUCT_MODELS),
|
|
5579
|
+
mode: z29.enum(["full", "index"]).optional(),
|
|
5580
|
+
language: z29.string().min(2).max(8).optional(),
|
|
5581
|
+
max_scenes: z29.number().int().min(1).max(60).optional(),
|
|
5582
|
+
focus: z29.string().optional(),
|
|
5583
|
+
start_s: z29.number().min(0).optional(),
|
|
5584
|
+
end_s: z29.number().positive().optional(),
|
|
5347
5585
|
// Transcript provider for the blueprint's dialogue/transcript. Default
|
|
5348
5586
|
// Groq Whisper; "deepgram" routes to Nova-3 so words carry punctuation.
|
|
5349
|
-
transcriber:
|
|
5587
|
+
transcriber: z29.enum(["groq", "deepgram"]).optional()
|
|
5350
5588
|
}).strict(),
|
|
5351
|
-
outputs:
|
|
5589
|
+
outputs: z29.object({
|
|
5352
5590
|
analysis: JsonRef,
|
|
5353
5591
|
// Absent in mode:"index" (structure only, no Mux frame extraction).
|
|
5354
|
-
start_frames:
|
|
5355
|
-
end_frames:
|
|
5592
|
+
start_frames: z29.array(ImageRef).min(1).optional(),
|
|
5593
|
+
end_frames: z29.array(ImageRef).min(1).optional(),
|
|
5356
5594
|
transcript: JsonRef
|
|
5357
5595
|
}).strict(),
|
|
5358
5596
|
outputKinds: { analysis: "json", start_frames: "image", end_frames: "image", transcript: "json" },
|
|
@@ -5360,38 +5598,38 @@ var videoDeconstructNode = delegated({
|
|
|
5360
5598
|
});
|
|
5361
5599
|
|
|
5362
5600
|
// src/engine/nodes/remote/videoLipsync.ts
|
|
5363
|
-
import { z as
|
|
5364
|
-
var FalLipsyncParams =
|
|
5365
|
-
model:
|
|
5601
|
+
import { z as z30 } from "zod";
|
|
5602
|
+
var FalLipsyncParams = z30.object({
|
|
5603
|
+
model: z30.literal("fal/veed-lipsync")
|
|
5366
5604
|
}).strict();
|
|
5367
|
-
var VideoLipsyncParams =
|
|
5605
|
+
var VideoLipsyncParams = z30.discriminatedUnion("model", [FalLipsyncParams]);
|
|
5368
5606
|
var videoLipsyncNode = delegated({
|
|
5369
5607
|
id: "video_lipsync",
|
|
5370
5608
|
version: "1.0.0",
|
|
5371
5609
|
category: "video",
|
|
5372
5610
|
summary: "Lip-sync a video to an audio track. Currently backed by VEED via fal.ai (`fal/veed-lipsync`). $0.40/min of output.",
|
|
5373
|
-
inputs:
|
|
5611
|
+
inputs: z30.object({
|
|
5374
5612
|
video: VideoRef,
|
|
5375
5613
|
audio: AudioRef
|
|
5376
5614
|
}).strict(),
|
|
5377
5615
|
params: VideoLipsyncParams,
|
|
5378
|
-
outputs:
|
|
5616
|
+
outputs: z30.object({ video: VideoRef }).strict(),
|
|
5379
5617
|
outputKinds: { video: "video" },
|
|
5380
5618
|
cost: () => ({ credits: 20, seconds_estimate: 120 })
|
|
5381
5619
|
});
|
|
5382
5620
|
|
|
5383
5621
|
// src/engine/nodes/remote/videoTranscribe.ts
|
|
5384
|
-
import { mkdtemp as mkdtemp6, readFile as
|
|
5622
|
+
import { mkdtemp as mkdtemp6, readFile as readFile10, rm as rm6 } from "fs/promises";
|
|
5385
5623
|
import { tmpdir as tmpdir6 } from "os";
|
|
5386
|
-
import
|
|
5387
|
-
import { z as
|
|
5624
|
+
import path13 from "path";
|
|
5625
|
+
import { z as z31 } from "zod";
|
|
5388
5626
|
|
|
5389
5627
|
// src/engine/nodes/local/lib/ffmpeg.ts
|
|
5390
|
-
import { execFile as
|
|
5391
|
-
import { promisify as
|
|
5392
|
-
var
|
|
5628
|
+
import { execFile as execFile7 } from "child_process";
|
|
5629
|
+
import { promisify as promisify7 } from "util";
|
|
5630
|
+
var execFileAsync4 = promisify7(execFile7);
|
|
5393
5631
|
async function probeVideo(filePath) {
|
|
5394
|
-
const { stdout } = await
|
|
5632
|
+
const { stdout } = await execFileAsync4(
|
|
5395
5633
|
"ffprobe",
|
|
5396
5634
|
["-v", "error", "-show_streams", "-show_format", "-of", "json", filePath],
|
|
5397
5635
|
{ encoding: "utf-8", maxBuffer: 8 * 1024 * 1024 }
|
|
@@ -5449,7 +5687,7 @@ function parseFrameRate(rate) {
|
|
|
5449
5687
|
}
|
|
5450
5688
|
async function runFfmpeg(args, opts) {
|
|
5451
5689
|
try {
|
|
5452
|
-
await
|
|
5690
|
+
await execFileAsync4("ffmpeg", args, {
|
|
5453
5691
|
timeout: opts.timeout_ms,
|
|
5454
5692
|
maxBuffer: 64 * 1024 * 1024
|
|
5455
5693
|
});
|
|
@@ -5463,21 +5701,21 @@ ${detail.slice(-4e3)}`);
|
|
|
5463
5701
|
}
|
|
5464
5702
|
|
|
5465
5703
|
// src/engine/nodes/remote/videoTranscribe.ts
|
|
5466
|
-
var VideoTranscribeParams =
|
|
5467
|
-
language:
|
|
5704
|
+
var VideoTranscribeParams = z31.object({
|
|
5705
|
+
language: z31.string().min(2).max(8).optional(),
|
|
5468
5706
|
// Provider choice is explicit (no env-based silent branching). Default Groq
|
|
5469
5707
|
// Whisper; "deepgram" routes to Deepgram Nova-3, which additionally emits a
|
|
5470
5708
|
// `rich` JSON output with punctuated words + paragraph/sentence grouping.
|
|
5471
|
-
transcriber:
|
|
5709
|
+
transcriber: z31.enum(["groq", "deepgram"]).optional()
|
|
5472
5710
|
}).strict();
|
|
5473
|
-
var VideoTranscribeInputs =
|
|
5711
|
+
var VideoTranscribeInputs = z31.object({
|
|
5474
5712
|
video: VideoRef
|
|
5475
5713
|
}).strict();
|
|
5476
|
-
var VideoTranscribeOutputs =
|
|
5477
|
-
transcript:
|
|
5714
|
+
var VideoTranscribeOutputs = z31.object({
|
|
5715
|
+
transcript: z31.custom(),
|
|
5478
5716
|
// Only emitted by the Deepgram path: full punctuated words + paragraph /
|
|
5479
5717
|
// sentence grouping with speaker indices. Absent for the default Groq path.
|
|
5480
|
-
rich:
|
|
5718
|
+
rich: z31.custom().optional()
|
|
5481
5719
|
}).strict();
|
|
5482
5720
|
var AUDIO_EXTRACT_TIMEOUT_MS = 6e4;
|
|
5483
5721
|
var videoTranscribeNode = defineNode({
|
|
@@ -5515,14 +5753,14 @@ async function tryExtractAudio(inputs, ctx) {
|
|
|
5515
5753
|
ctx.log("video_transcribe: no audio track detected, sending full video");
|
|
5516
5754
|
return null;
|
|
5517
5755
|
}
|
|
5518
|
-
tmpDir = await mkdtemp6(
|
|
5519
|
-
const audioPath =
|
|
5756
|
+
tmpDir = await mkdtemp6(path13.join(tmpdir6(), "vtx-"));
|
|
5757
|
+
const audioPath = path13.join(tmpDir, "audio.mp3");
|
|
5520
5758
|
ctx.log("video_transcribe: extracting audio (mono 16kHz mp3)");
|
|
5521
5759
|
await runFfmpeg(
|
|
5522
5760
|
["-i", video.path, "-vn", "-ac", "1", "-ar", "16000", "-b:a", "64k", "-f", "mp3", "-y", audioPath],
|
|
5523
5761
|
{ timeout_ms: AUDIO_EXTRACT_TIMEOUT_MS }
|
|
5524
5762
|
);
|
|
5525
|
-
const bytes = await
|
|
5763
|
+
const bytes = await readFile10(audioPath);
|
|
5526
5764
|
if (bytes.byteLength === 0) {
|
|
5527
5765
|
ctx.log("video_transcribe: extracted audio is empty, sending full video");
|
|
5528
5766
|
return null;
|
|
@@ -5562,29 +5800,29 @@ async function tryExtractAudio(inputs, ctx) {
|
|
|
5562
5800
|
}
|
|
5563
5801
|
|
|
5564
5802
|
// src/engine/nodes/remote/voiceSelect.ts
|
|
5565
|
-
import { z as
|
|
5803
|
+
import { z as z32 } from "zod";
|
|
5566
5804
|
var voiceSelectNode = delegated({
|
|
5567
5805
|
id: "voice_select",
|
|
5568
5806
|
version: "1.0.0",
|
|
5569
5807
|
category: "audio",
|
|
5570
5808
|
summary: 'Cast an ElevenLabs voice from a natural-language description (e.g. "warm, authoritative female narrator, American accent"). Lists the account\'s voices and ranks them against the brief, emitting the best `voice_id` as a bare-string text asset plus a ranked `candidates` JSON.',
|
|
5571
5809
|
when_to_use: 'Use to turn a voice description (e.g. from a `video_deconstruct` blueprint\'s `voice_description`) into a usable ElevenLabs voice id, then feed it into a `tts` node by wiring `inputs.voice_ref: $ref:<this>.voice_id` and setting `params.voice: "{{voice_ref}}"` \u2014 the engine splices the id in at run time. Review `candidates` (json) to pick a different voice. Optional `gender`/`age`/`accent`/`language` hints sharpen the ranking.',
|
|
5572
|
-
inputs:
|
|
5573
|
-
params:
|
|
5574
|
-
description:
|
|
5575
|
-
gender:
|
|
5576
|
-
age:
|
|
5577
|
-
accent:
|
|
5578
|
-
language:
|
|
5579
|
-
limit:
|
|
5810
|
+
inputs: z32.object({}).loose(),
|
|
5811
|
+
params: z32.object({
|
|
5812
|
+
description: z32.string().min(1),
|
|
5813
|
+
gender: z32.string().optional(),
|
|
5814
|
+
age: z32.string().optional(),
|
|
5815
|
+
accent: z32.string().optional(),
|
|
5816
|
+
language: z32.string().optional(),
|
|
5817
|
+
limit: z32.number().int().min(1).max(20).optional()
|
|
5580
5818
|
}).strict(),
|
|
5581
|
-
outputs:
|
|
5819
|
+
outputs: z32.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
|
|
5582
5820
|
outputKinds: { voice_id: "text", candidates: "json" },
|
|
5583
5821
|
cost: () => ({ credits: 0, seconds_estimate: 5 })
|
|
5584
5822
|
});
|
|
5585
5823
|
|
|
5586
5824
|
// src/engine/schema/catalog.ts
|
|
5587
|
-
import { z as
|
|
5825
|
+
import { z as z33 } from "zod";
|
|
5588
5826
|
function generateCatalog(registry, opts = {}) {
|
|
5589
5827
|
const entries = registry.all().map((def) => {
|
|
5590
5828
|
const cost = def.cost ? safeCost(def) : void 0;
|
|
@@ -5595,9 +5833,9 @@ function generateCatalog(registry, opts = {}) {
|
|
|
5595
5833
|
summary: def.summary,
|
|
5596
5834
|
when_to_use: def.when_to_use,
|
|
5597
5835
|
location: def.location,
|
|
5598
|
-
inputs:
|
|
5599
|
-
params:
|
|
5600
|
-
outputs:
|
|
5836
|
+
inputs: z33.toJSONSchema(def.inputs, { unrepresentable: "any" }),
|
|
5837
|
+
params: z33.toJSONSchema(def.params, { unrepresentable: "any" }),
|
|
5838
|
+
outputs: z33.toJSONSchema(def.outputs, { unrepresentable: "any" }),
|
|
5601
5839
|
cost_estimate_credits: cost?.credits,
|
|
5602
5840
|
runtime_estimate_seconds: cost?.seconds_estimate
|
|
5603
5841
|
};
|
|
@@ -5629,19 +5867,19 @@ function safeCost(def) {
|
|
|
5629
5867
|
|
|
5630
5868
|
// src/engine/storage/cache-store.ts
|
|
5631
5869
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
5632
|
-
import { mkdir as mkdir3, readFile as
|
|
5633
|
-
import
|
|
5870
|
+
import { mkdir as mkdir3, readFile as readFile11, rename as rename2, writeFile as writeFile7 } from "fs/promises";
|
|
5871
|
+
import path14 from "path";
|
|
5634
5872
|
var LocalCacheStore = class {
|
|
5635
5873
|
rootDir;
|
|
5636
5874
|
constructor(rootDir) {
|
|
5637
5875
|
this.rootDir = rootDir;
|
|
5638
5876
|
}
|
|
5639
5877
|
filePath(cacheKey) {
|
|
5640
|
-
return
|
|
5878
|
+
return path14.join(this.rootDir, `${cacheKey}.json`);
|
|
5641
5879
|
}
|
|
5642
5880
|
async get(cacheKey) {
|
|
5643
5881
|
try {
|
|
5644
|
-
const buf = await
|
|
5882
|
+
const buf = await readFile11(this.filePath(cacheKey), "utf8");
|
|
5645
5883
|
return JSON.parse(buf);
|
|
5646
5884
|
} catch (e) {
|
|
5647
5885
|
if (e.code === "ENOENT") return null;
|
|
@@ -5650,9 +5888,9 @@ var LocalCacheStore = class {
|
|
|
5650
5888
|
}
|
|
5651
5889
|
async put(entry) {
|
|
5652
5890
|
const dest = this.filePath(entry.cacheKey);
|
|
5653
|
-
await mkdir3(
|
|
5891
|
+
await mkdir3(path14.dirname(dest), { recursive: true });
|
|
5654
5892
|
const tmp = `${dest}.tmp-${process.pid}-${randomUUID2()}`;
|
|
5655
|
-
await
|
|
5893
|
+
await writeFile7(tmp, JSON.stringify(entry, null, 0));
|
|
5656
5894
|
await rename2(tmp, dest);
|
|
5657
5895
|
}
|
|
5658
5896
|
};
|
|
@@ -5686,6 +5924,7 @@ var REMOTE_NODES = [
|
|
|
5686
5924
|
imageSelectNode,
|
|
5687
5925
|
videoGenerateNode,
|
|
5688
5926
|
ttsNode,
|
|
5927
|
+
audioVoiceConvertNode,
|
|
5689
5928
|
musicNode,
|
|
5690
5929
|
dialogueNode,
|
|
5691
5930
|
soundEffectNode,
|
|
@@ -5703,14 +5942,14 @@ function defaultRegistry() {
|
|
|
5703
5942
|
}
|
|
5704
5943
|
function createEngineFromEnv(opts = {}) {
|
|
5705
5944
|
const cwd = opts.cwd ?? process.cwd();
|
|
5706
|
-
const cacheDir = opts.cacheDir ??
|
|
5707
|
-
const outputsDir = opts.outputsDir ??
|
|
5945
|
+
const cacheDir = opts.cacheDir ?? path15.join(cwd, "canvas", ".cache");
|
|
5946
|
+
const outputsDir = opts.outputsDir ?? path15.join(cwd, "canvas");
|
|
5708
5947
|
const creds = requireCredentialsFromEnv();
|
|
5709
5948
|
return new Engine({
|
|
5710
5949
|
registry: defaultRegistry(),
|
|
5711
5950
|
client: new BackendClient({ baseUrl: creds.url, apiKey: creds.apiKey }),
|
|
5712
|
-
assets: new LocalAssetStore(
|
|
5713
|
-
cache: new LocalCacheStore(
|
|
5951
|
+
assets: new LocalAssetStore(path15.join(cacheDir, "assets")),
|
|
5952
|
+
cache: new LocalCacheStore(path15.join(cacheDir, "index")),
|
|
5714
5953
|
outputsDir,
|
|
5715
5954
|
log: opts.log
|
|
5716
5955
|
});
|
|
@@ -5731,4 +5970,4 @@ export {
|
|
|
5731
5970
|
defaultRegistry,
|
|
5732
5971
|
createEngineFromEnv
|
|
5733
5972
|
};
|
|
5734
|
-
//# sourceMappingURL=chunk-
|
|
5973
|
+
//# sourceMappingURL=chunk-NBNUNCY7.js.map
|