@koda-sl/baker-cli 0.74.0 → 0.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -8
- package/canvas/end-card-composition/index.html +66 -0
- package/canvas/end-card-composition/meta.json +19 -0
- package/canvas/feature-reveal-composition/index.html +83 -0
- package/canvas/feature-reveal-composition/meta.json +18 -0
- package/canvas/lower-third-composition/index.html +75 -0
- package/canvas/lower-third-composition/meta.json +18 -0
- package/canvas/stat-counter-composition/index.html +73 -0
- package/canvas/stat-counter-composition/meta.json +20 -0
- package/canvas/title-card-composition/index.html +90 -0
- package/canvas/title-card-composition/meta.json +20 -0
- package/dist/{chunk-JIDZ37KG.js → chunk-CCO34ACK.js} +507 -307
- package/dist/chunk-CCO34ACK.js.map +1 -0
- package/dist/cli.js +624 -109
- package/dist/cli.js.map +1 -1
- package/dist/engine/index.d.ts +6 -0
- package/dist/engine/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-JIDZ37KG.js.map +0 -1
|
@@ -621,7 +621,7 @@ ${originalIndentation}`;
|
|
|
621
621
|
});
|
|
622
622
|
|
|
623
623
|
// src/engine/index.ts
|
|
624
|
-
import
|
|
624
|
+
import path14 from "path";
|
|
625
625
|
|
|
626
626
|
// src/engine/client/http.ts
|
|
627
627
|
var BackendHttpError = class extends Error {
|
|
@@ -667,14 +667,14 @@ var HttpClient = class {
|
|
|
667
667
|
this.fetchFn = opts.fetchFn ?? fetch;
|
|
668
668
|
this.sleepFn = opts.sleepFn ?? ((ms) => new Promise((r) => setTimeout(r, ms)));
|
|
669
669
|
}
|
|
670
|
-
async postJson(
|
|
671
|
-
return await this.requestJson("POST",
|
|
670
|
+
async postJson(path15, body, signal) {
|
|
671
|
+
return await this.requestJson("POST", path15, body, signal);
|
|
672
672
|
}
|
|
673
|
-
async getJson(
|
|
674
|
-
return await this.requestJson("GET",
|
|
673
|
+
async getJson(path15, signal) {
|
|
674
|
+
return await this.requestJson("GET", path15, void 0, signal);
|
|
675
675
|
}
|
|
676
|
-
async requestJson(method,
|
|
677
|
-
const url = `${this.baseUrl}${
|
|
676
|
+
async requestJson(method, path15, body, signal) {
|
|
677
|
+
const url = `${this.baseUrl}${path15.startsWith("/") ? path15 : `/${path15}`}`;
|
|
678
678
|
for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
|
|
679
679
|
const outcome = await this.attempt(method, url, body, attempt, signal);
|
|
680
680
|
if (outcome.kind === "value") return outcome.value;
|
|
@@ -786,8 +786,8 @@ var BackendClient = class {
|
|
|
786
786
|
);
|
|
787
787
|
}
|
|
788
788
|
getArtifact(kind, name, version, signal) {
|
|
789
|
-
const
|
|
790
|
-
return this.http.getJson(
|
|
789
|
+
const path15 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
|
|
790
|
+
return this.http.getJson(path15, signal);
|
|
791
791
|
}
|
|
792
792
|
};
|
|
793
793
|
|
|
@@ -1242,6 +1242,21 @@ var MODEL_REGISTRY = {
|
|
|
1242
1242
|
}
|
|
1243
1243
|
}
|
|
1244
1244
|
},
|
|
1245
|
+
audio_voice_convert: {
|
|
1246
|
+
"elevenlabs/eleven_multilingual_sts_v2": {
|
|
1247
|
+
// Speech-to-speech / Voice Changer: re-voice an existing audio clip in the
|
|
1248
|
+
// TARGET voice, preserving timing/prosody. Used to normalize a talking-head
|
|
1249
|
+
// clip's native (generator-chosen) voice into ONE consistent brand voice.
|
|
1250
|
+
label: "ElevenLabs Voice Changer (multilingual STS v2)",
|
|
1251
|
+
inputs: [{ kind: "audio", mimes: FAL_AUDIO_MIMES }],
|
|
1252
|
+
required: ["voice"],
|
|
1253
|
+
params: {
|
|
1254
|
+
voice: { kind: "string" },
|
|
1255
|
+
output_format: { kind: "string", enum: ELEVENLABS_OUTPUT_FORMATS },
|
|
1256
|
+
remove_background_noise: { kind: "boolean" }
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
},
|
|
1245
1260
|
music: {
|
|
1246
1261
|
"elevenlabs/music-v1": {
|
|
1247
1262
|
label: "ElevenLabs Music v1 (Compose)",
|
|
@@ -1512,11 +1527,30 @@ var VideoMeta = z.object({
|
|
|
1512
1527
|
speaker: z.string().optional()
|
|
1513
1528
|
})
|
|
1514
1529
|
).default([]),
|
|
1515
|
-
// Scenes with a single on-camera speaker
|
|
1516
|
-
//
|
|
1517
|
-
// `
|
|
1518
|
-
//
|
|
1519
|
-
|
|
1530
|
+
// Scenes with a single on-camera speaker, voiced NATIVELY by the video model
|
|
1531
|
+
// (Seedance generate_audio) then re-voiced to one brand voice. Each entry names
|
|
1532
|
+
// the `audio_voice_convert` node the validator confirms is present — the
|
|
1533
|
+
// native-audio replacement for the retired post-hoc lipsync check. A bare scene
|
|
1534
|
+
// index (legacy) or `{ scene, lipsync_node }` (legacy veed canvases) still parse.
|
|
1535
|
+
talking_scenes: z.array(
|
|
1536
|
+
z.union([
|
|
1537
|
+
z.number(),
|
|
1538
|
+
z.object({
|
|
1539
|
+
scene: z.number(),
|
|
1540
|
+
voice_convert_node: z.string(),
|
|
1541
|
+
// Advisory: the scene's visual length vs the estimated spoken length, so
|
|
1542
|
+
// a reviewer can see a native line that may run past its cut. Not gated.
|
|
1543
|
+
scene_s: z.number().optional(),
|
|
1544
|
+
est_speech_s: z.number().optional()
|
|
1545
|
+
}),
|
|
1546
|
+
z.object({ scene: z.number(), lipsync_node: z.string() })
|
|
1547
|
+
])
|
|
1548
|
+
).default([]),
|
|
1549
|
+
// Advisory, NOT gated by the validator: the reviewable "which graphic fires
|
|
1550
|
+
// on which spoken beat" map emitted by scaffold-video (per-scene window,
|
|
1551
|
+
// spoken line, storyboard frames, scheduled graphics). Free-form rows so the
|
|
1552
|
+
// schema stays decoupled from the scaffold's exact shape.
|
|
1553
|
+
motion_board: z.array(z.unknown()).optional()
|
|
1520
1554
|
}).strict().optional();
|
|
1521
1555
|
var CanvasMetadata = z.object({
|
|
1522
1556
|
name: z.string().optional(),
|
|
@@ -2193,7 +2227,8 @@ function resolveRefKind(ctx, refStr) {
|
|
|
2193
2227
|
if (!targetDef) return null;
|
|
2194
2228
|
const targetParams = targetDef.params.safeParse(target.params ?? {});
|
|
2195
2229
|
const resolvedKinds = resolveOutputKinds(targetDef.outputKinds, targetParams.success ? targetParams.data : {});
|
|
2196
|
-
const
|
|
2230
|
+
const declaredKind = target.params?.outputs?.[expr.output]?.kind;
|
|
2231
|
+
const kind = resolvedKinds[expr.output] ?? declaredKind;
|
|
2197
2232
|
return kind && MODEL_INPUT_KINDS.has(kind) ? kind : null;
|
|
2198
2233
|
}
|
|
2199
2234
|
function checkOneRef(ctx, n, i, refStr, jsonPath, field) {
|
|
@@ -2286,6 +2321,24 @@ function estimateCredits(ctx) {
|
|
|
2286
2321
|
}
|
|
2287
2322
|
return total;
|
|
2288
2323
|
}
|
|
2324
|
+
function talkingSceneSatisfied(ctx, entry, scene) {
|
|
2325
|
+
const nodes = ctx.canvas.nodes;
|
|
2326
|
+
if (typeof entry === "object" && "voice_convert_node" in entry) {
|
|
2327
|
+
const clipNativeAudio = nodes.some(
|
|
2328
|
+
(n) => n.id === `s${scene}_clip` && n.type === "video_generate" && n.params?.generate_audio === true
|
|
2329
|
+
);
|
|
2330
|
+
const converted = nodes.some((n) => n.id === entry.voice_convert_node && n.type === "audio_voice_convert");
|
|
2331
|
+
return clipNativeAudio && converted;
|
|
2332
|
+
}
|
|
2333
|
+
if (typeof entry === "object") {
|
|
2334
|
+
return nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
|
|
2335
|
+
}
|
|
2336
|
+
return nodes.some((n) => {
|
|
2337
|
+
if (n.type !== "video_lipsync") return false;
|
|
2338
|
+
const video = n.inputs?.video;
|
|
2339
|
+
return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
|
|
2340
|
+
});
|
|
2341
|
+
}
|
|
2289
2342
|
function checkVideoInvariants(ctx) {
|
|
2290
2343
|
const meta = ctx.canvas.metadata?.video;
|
|
2291
2344
|
if (!meta) return;
|
|
@@ -2312,16 +2365,11 @@ function checkVideoInvariants(ctx) {
|
|
|
2312
2365
|
}
|
|
2313
2366
|
for (const entry of meta.talking_scenes) {
|
|
2314
2367
|
const scene = typeof entry === "number" ? entry : entry.scene;
|
|
2315
|
-
|
|
2316
|
-
if (n.type !== "video_lipsync") return false;
|
|
2317
|
-
const video = n.inputs?.video;
|
|
2318
|
-
return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
|
|
2319
|
-
}) : ctx.canvas.nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
|
|
2320
|
-
if (!synced) {
|
|
2368
|
+
if (!talkingSceneSatisfied(ctx, entry, scene)) {
|
|
2321
2369
|
ctx.issues.push({
|
|
2322
2370
|
path: "metadata.video.talking_scenes",
|
|
2323
2371
|
code: STAGE_CODES.LIPSYNC_MISSING,
|
|
2324
|
-
message: `scene ${scene}
|
|
2372
|
+
message: `scene ${scene} is a single-on-camera-speaker talking head but its clip lacks native audio (generate_audio) or the audio_voice_convert node is missing \u2014 the voice won't be brand-consistent / lips may drift`
|
|
2325
2373
|
});
|
|
2326
2374
|
}
|
|
2327
2375
|
}
|
|
@@ -2355,9 +2403,9 @@ function checkOutputRef(ctx) {
|
|
|
2355
2403
|
function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
|
|
2356
2404
|
for (const issue of err.issues) {
|
|
2357
2405
|
const tail2 = pathToString(issue.path);
|
|
2358
|
-
const
|
|
2406
|
+
const path15 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
|
|
2359
2407
|
issues.push({
|
|
2360
|
-
path:
|
|
2408
|
+
path: path15,
|
|
2361
2409
|
code,
|
|
2362
2410
|
message: issue.message,
|
|
2363
2411
|
received: issue.code === "invalid_type" ? issue.received : void 0,
|
|
@@ -2366,8 +2414,8 @@ function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
|
|
|
2366
2414
|
});
|
|
2367
2415
|
}
|
|
2368
2416
|
}
|
|
2369
|
-
function pathToString(
|
|
2370
|
-
return
|
|
2417
|
+
function pathToString(path15) {
|
|
2418
|
+
return path15.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
|
|
2371
2419
|
}
|
|
2372
2420
|
function buildDepGraph(canvas) {
|
|
2373
2421
|
const graph = /* @__PURE__ */ new Map();
|
|
@@ -3995,12 +4043,12 @@ var fontSpecimenNode = defineNode({
|
|
|
3995
4043
|
});
|
|
3996
4044
|
|
|
3997
4045
|
// src/engine/nodes/local/hyperframe.ts
|
|
3998
|
-
import { execFile as
|
|
3999
|
-
import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile7, rm as rm4, stat as stat5, writeFile as
|
|
4046
|
+
import { execFile as execFile4 } from "child_process";
|
|
4047
|
+
import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile7, rm as rm4, stat as stat5, writeFile as writeFile5 } from "fs/promises";
|
|
4000
4048
|
import { createRequire as createRequire2 } from "module";
|
|
4001
4049
|
import { cpus, tmpdir as tmpdir4 } from "os";
|
|
4002
|
-
import
|
|
4003
|
-
import { promisify as
|
|
4050
|
+
import path10 from "path";
|
|
4051
|
+
import { promisify as promisify4 } from "util";
|
|
4004
4052
|
import { z as z10 } from "zod";
|
|
4005
4053
|
|
|
4006
4054
|
// src/engine/engine/composition-hash.ts
|
|
@@ -4194,6 +4242,109 @@ function defaultFilenameForInput(key, kind) {
|
|
|
4194
4242
|
return `${key}.png`;
|
|
4195
4243
|
}
|
|
4196
4244
|
|
|
4245
|
+
// src/engine/nodes/local/lib/hyperframe-check.ts
|
|
4246
|
+
import { execFile as execFile3 } from "child_process";
|
|
4247
|
+
import { promisify as promisify3 } from "util";
|
|
4248
|
+
var execFileAsync = promisify3(execFile3);
|
|
4249
|
+
var NEVER_BLOCK = [/contrast/i, /\bwcag\b/i, /missing_local_asset/i, /font[_-]?family/i, /font[_-]?face/i];
|
|
4250
|
+
var UNAVAILABLE = /unknown command|command not found|not found|Did you mean|Unknown argument|ENOENT/i;
|
|
4251
|
+
function isAdvisory(code, message) {
|
|
4252
|
+
const hay = `${code} ${message}`;
|
|
4253
|
+
return NEVER_BLOCK.some((re) => re.test(hay));
|
|
4254
|
+
}
|
|
4255
|
+
function parseCheckJson(raw) {
|
|
4256
|
+
if (!raw) return null;
|
|
4257
|
+
const trimmed = raw.trim();
|
|
4258
|
+
try {
|
|
4259
|
+
return JSON.parse(trimmed);
|
|
4260
|
+
} catch {
|
|
4261
|
+
}
|
|
4262
|
+
const start = trimmed.indexOf("{");
|
|
4263
|
+
const end = trimmed.lastIndexOf("}");
|
|
4264
|
+
if (start >= 0 && end > start) {
|
|
4265
|
+
try {
|
|
4266
|
+
return JSON.parse(trimmed.slice(start, end + 1));
|
|
4267
|
+
} catch {
|
|
4268
|
+
return null;
|
|
4269
|
+
}
|
|
4270
|
+
}
|
|
4271
|
+
return null;
|
|
4272
|
+
}
|
|
4273
|
+
function classifyLint(json) {
|
|
4274
|
+
const out = [];
|
|
4275
|
+
const findings = json?.findings;
|
|
4276
|
+
if (!Array.isArray(findings)) return out;
|
|
4277
|
+
for (const f of findings) {
|
|
4278
|
+
const rec = f;
|
|
4279
|
+
const code = String(rec?.code ?? "");
|
|
4280
|
+
const message = String(rec?.message ?? "");
|
|
4281
|
+
const severity = String(rec?.severity ?? "info");
|
|
4282
|
+
const blocking = severity === "error" && !isAdvisory(code, message);
|
|
4283
|
+
out.push({ source: "lint", code, message, severity: blocking ? "blocking" : "warning" });
|
|
4284
|
+
}
|
|
4285
|
+
return out;
|
|
4286
|
+
}
|
|
4287
|
+
function classifyInspect(json) {
|
|
4288
|
+
const out = [];
|
|
4289
|
+
const obj = json;
|
|
4290
|
+
const issues = obj?.issues;
|
|
4291
|
+
if (!Array.isArray(issues)) return out;
|
|
4292
|
+
for (const iss of issues) {
|
|
4293
|
+
const rec = iss;
|
|
4294
|
+
const code = String(rec?.code ?? rec?.type ?? "overflow");
|
|
4295
|
+
const message = String(rec?.message ?? rec?.detail ?? JSON.stringify(iss));
|
|
4296
|
+
const severity = rec?.severity ? String(rec.severity) : obj?.ok === false ? "error" : "warning";
|
|
4297
|
+
out.push({ source: "inspect", code, message, severity: severity === "error" ? "blocking" : "warning" });
|
|
4298
|
+
}
|
|
4299
|
+
return out;
|
|
4300
|
+
}
|
|
4301
|
+
function classifyCheckOutput(lintRaw, inspectRaw) {
|
|
4302
|
+
const findings = [...classifyLint(parseCheckJson(lintRaw)), ...classifyInspect(parseCheckJson(inspectRaw))];
|
|
4303
|
+
return {
|
|
4304
|
+
blocking: findings.filter((f) => f.severity === "blocking"),
|
|
4305
|
+
warnings: findings.filter((f) => f.severity === "warning")
|
|
4306
|
+
};
|
|
4307
|
+
}
|
|
4308
|
+
function buildLintArgs(dir) {
|
|
4309
|
+
return ["hyperframes", "lint", dir, "--json"];
|
|
4310
|
+
}
|
|
4311
|
+
function buildInspectArgs(dir, samples) {
|
|
4312
|
+
return ["hyperframes", "inspect", dir, "--json", "--samples", String(samples)];
|
|
4313
|
+
}
|
|
4314
|
+
async function runOne(args, timeoutMs) {
|
|
4315
|
+
try {
|
|
4316
|
+
const { stdout } = await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
|
|
4317
|
+
return stdout;
|
|
4318
|
+
} catch (e) {
|
|
4319
|
+
const err = e;
|
|
4320
|
+
if (err.stdout?.includes("{")) return err.stdout;
|
|
4321
|
+
const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
|
|
4322
|
+
if (UNAVAILABLE.test(blob)) return null;
|
|
4323
|
+
return null;
|
|
4324
|
+
}
|
|
4325
|
+
}
|
|
4326
|
+
async function runHyperframesCheck(opts) {
|
|
4327
|
+
const { dir, nodeId, ctx, timeoutMs, samples = 5 } = opts;
|
|
4328
|
+
const [lintRaw, inspectRaw] = await Promise.all([
|
|
4329
|
+
runOne(buildLintArgs(dir), timeoutMs),
|
|
4330
|
+
runOne(buildInspectArgs(dir, samples), timeoutMs)
|
|
4331
|
+
]);
|
|
4332
|
+
if (lintRaw === null && inspectRaw === null) {
|
|
4333
|
+
ctx.log(`${nodeId}: hyperframes lint/inspect unavailable \u2014 skipping pre-render check`);
|
|
4334
|
+
return;
|
|
4335
|
+
}
|
|
4336
|
+
const { blocking, warnings } = classifyCheckOutput(lintRaw ?? "", inspectRaw ?? "");
|
|
4337
|
+
for (const w of warnings) {
|
|
4338
|
+
ctx.log(`${nodeId}: hyperframe check warning [${w.source}/${w.code}] ${w.message}`);
|
|
4339
|
+
}
|
|
4340
|
+
if (blocking.length > 0) {
|
|
4341
|
+
const detail = blocking.map((b) => `\u2022 [${b.source}/${b.code}] ${b.message}`).join("\n");
|
|
4342
|
+
throw new Error(`${nodeId}: pre-render check failed (${blocking.length} blocking)
|
|
4343
|
+
${detail}`);
|
|
4344
|
+
}
|
|
4345
|
+
ctx.log(`${nodeId}: pre-render check passed (${warnings.length} warning${warnings.length === 1 ? "" : "s"})`);
|
|
4346
|
+
}
|
|
4347
|
+
|
|
4197
4348
|
// src/engine/nodes/local/lib/hyperframe-errors.ts
|
|
4198
4349
|
var KNOWN_ERROR_PATTERNS = [
|
|
4199
4350
|
{
|
|
@@ -4237,6 +4388,29 @@ ${stderr.slice(0, 1500)}`;
|
|
|
4237
4388
|
return null;
|
|
4238
4389
|
}
|
|
4239
4390
|
|
|
4391
|
+
// src/engine/nodes/local/lib/hyperframe-meta.ts
|
|
4392
|
+
import { writeFile as writeFile4 } from "fs/promises";
|
|
4393
|
+
import path9 from "path";
|
|
4394
|
+
async function ensureHyperframesMetaJson(tmp, nodeId, meta, duration) {
|
|
4395
|
+
const metaPath = path9.join(tmp, "meta.json");
|
|
4396
|
+
await writeFile4(
|
|
4397
|
+
metaPath,
|
|
4398
|
+
JSON.stringify(
|
|
4399
|
+
{
|
|
4400
|
+
id: nodeId,
|
|
4401
|
+
name: meta.id,
|
|
4402
|
+
duration,
|
|
4403
|
+
width: meta.width,
|
|
4404
|
+
height: meta.height,
|
|
4405
|
+
fps: meta.fps
|
|
4406
|
+
},
|
|
4407
|
+
null,
|
|
4408
|
+
2
|
|
4409
|
+
),
|
|
4410
|
+
"utf-8"
|
|
4411
|
+
);
|
|
4412
|
+
}
|
|
4413
|
+
|
|
4240
4414
|
// src/engine/nodes/local/lib/templating.ts
|
|
4241
4415
|
var PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
4242
4416
|
function substituteVariables(source, values) {
|
|
@@ -4272,25 +4446,34 @@ function literalize(value) {
|
|
|
4272
4446
|
}
|
|
4273
4447
|
|
|
4274
4448
|
// src/engine/nodes/local/hyperframe.ts
|
|
4275
|
-
var
|
|
4449
|
+
var execFileAsync2 = promisify4(execFile4);
|
|
4276
4450
|
var require_2 = createRequire2(import.meta.url);
|
|
4277
4451
|
var HyperframeParams = z10.object({
|
|
4278
4452
|
composition: z10.string().min(1),
|
|
4453
|
+
// Output container. mp4 (default) for delivery; webm/mov render WITH
|
|
4454
|
+
// transparency (alpha) when the composition background is transparent —
|
|
4455
|
+
// use for motion-graphic overlays dropped into Premiere/AE/Nuke.
|
|
4456
|
+
format: z10.enum(["mp4", "webm", "mov"]).optional().default("mp4"),
|
|
4279
4457
|
timeout_ms: z10.number().int().positive().optional().default(10 * 60 * 1e3)
|
|
4280
4458
|
}).catchall(z10.unknown());
|
|
4281
4459
|
var HyperframeInputs = z10.record(z10.string(), z10.custom()).optional().default({});
|
|
4282
4460
|
var HyperframeOutputs = z10.object({
|
|
4283
4461
|
video: z10.custom()
|
|
4284
4462
|
}).strict();
|
|
4285
|
-
var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "timeout_ms"]);
|
|
4463
|
+
var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "format", "timeout_ms"]);
|
|
4464
|
+
var MIME_BY_FORMAT = {
|
|
4465
|
+
mp4: "video/mp4",
|
|
4466
|
+
webm: "video/webm",
|
|
4467
|
+
mov: "video/quicktime"
|
|
4468
|
+
};
|
|
4286
4469
|
var ENGINE_INJECTED_TOKENS = /* @__PURE__ */ new Set(["duration"]);
|
|
4287
4470
|
var hyperframeRenderNode = defineNode({
|
|
4288
4471
|
id: "hyperframe_render",
|
|
4289
|
-
version: "6.
|
|
4472
|
+
version: "6.1.0",
|
|
4290
4473
|
category: "video",
|
|
4291
4474
|
location: "local",
|
|
4292
4475
|
summary: "Render an mp4 by composing an HTML/CSS/GSAP composition over upstream assets. Point `params.composition` at a directory containing `index.html` + `meta.json`. All variables are passed as primitives in `params` and substituted into the composition before render.",
|
|
4293
|
-
when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality
|
|
4476
|
+
when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality and worker count are fixed by the engine. Set `params.format` to `webm` or `mov` for a transparent (alpha) overlay to composite in another editor; defaults to `mp4`. Runs a pre-render `hyperframes lint`/`inspect` gate (overflow/structural errors block; contrast warns).",
|
|
4294
4477
|
inputs: HyperframeInputs,
|
|
4295
4478
|
params: HyperframeParams,
|
|
4296
4479
|
outputs: HyperframeOutputs,
|
|
@@ -4309,7 +4492,7 @@ var hyperframeRenderNode = defineNode({
|
|
|
4309
4492
|
const compositionDir = await resolveCompositionDir(params.composition);
|
|
4310
4493
|
const meta = await loadCompositionMeta(compositionDir);
|
|
4311
4494
|
const compositionParams = validateAndParseDynamicParams(meta, params);
|
|
4312
|
-
const tmp = await mkdtemp4(
|
|
4495
|
+
const tmp = await mkdtemp4(path10.join(tmpdir4(), "hf-render-"));
|
|
4313
4496
|
try {
|
|
4314
4497
|
await copyComposition(compositionDir, tmp);
|
|
4315
4498
|
await vendorGsap(tmp, ctx);
|
|
@@ -4317,15 +4500,16 @@ var hyperframeRenderNode = defineNode({
|
|
|
4317
4500
|
const duration = stagedDuration ?? meta.default_duration;
|
|
4318
4501
|
const substitutionValues = await buildSubstitutionValues(compositionParams, meta, duration);
|
|
4319
4502
|
await substituteCompositionFiles(tmp, substitutionValues);
|
|
4320
|
-
await
|
|
4321
|
-
|
|
4503
|
+
await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, duration);
|
|
4504
|
+
await runHyperframesCheck({ dir: tmp, nodeId: "hyperframe_render", ctx, timeoutMs: params.timeout_ms });
|
|
4505
|
+
const outputPath = path10.join(tmp, `output.${params.format}`);
|
|
4322
4506
|
await runRender({ tmp, outputPath, params, meta, ctx });
|
|
4323
4507
|
const bytes = await readFile7(outputPath);
|
|
4324
4508
|
ctx.log(`rendered ${bytes.length} bytes`);
|
|
4325
4509
|
const ref = await ctx.assets.ingestBytes({
|
|
4326
4510
|
bytes: Buffer.from(bytes),
|
|
4327
4511
|
kind: "video",
|
|
4328
|
-
mime:
|
|
4512
|
+
mime: MIME_BY_FORMAT[params.format],
|
|
4329
4513
|
metadata: {
|
|
4330
4514
|
width: meta.width,
|
|
4331
4515
|
height: meta.height,
|
|
@@ -4342,10 +4526,10 @@ var hyperframeRenderNode = defineNode({
|
|
|
4342
4526
|
}
|
|
4343
4527
|
});
|
|
4344
4528
|
async function resolveCompositionDir(composition) {
|
|
4345
|
-
const compositionPath =
|
|
4529
|
+
const compositionPath = path10.isAbsolute(composition) ? composition : path10.resolve(process.cwd(), composition);
|
|
4346
4530
|
const s = await stat5(compositionPath);
|
|
4347
4531
|
if (s.isDirectory()) return compositionPath;
|
|
4348
|
-
return
|
|
4532
|
+
return path10.dirname(compositionPath);
|
|
4349
4533
|
}
|
|
4350
4534
|
async function validateCompositionParams(rawParams) {
|
|
4351
4535
|
const issues = [];
|
|
@@ -4412,7 +4596,7 @@ async function copyComposition(srcDir, destDir) {
|
|
|
4412
4596
|
await cp(srcDir, destDir, {
|
|
4413
4597
|
recursive: true,
|
|
4414
4598
|
filter: (src) => {
|
|
4415
|
-
const name =
|
|
4599
|
+
const name = path10.basename(src);
|
|
4416
4600
|
if (name === ".cache" || name === "node_modules" || name === ".git") return false;
|
|
4417
4601
|
return true;
|
|
4418
4602
|
}
|
|
@@ -4421,7 +4605,7 @@ async function copyComposition(srcDir, destDir) {
|
|
|
4421
4605
|
async function vendorGsap(tmp, ctx) {
|
|
4422
4606
|
try {
|
|
4423
4607
|
const gsapMin = require_2.resolve("gsap/dist/gsap.min.js");
|
|
4424
|
-
await copyFile4(gsapMin,
|
|
4608
|
+
await copyFile4(gsapMin, path10.join(tmp, "gsap.min.js"));
|
|
4425
4609
|
} catch (e) {
|
|
4426
4610
|
ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
|
|
4427
4611
|
}
|
|
@@ -4436,7 +4620,7 @@ async function stageInputs2(tmp, inputs, meta, ctx) {
|
|
|
4436
4620
|
await stageAsset(ref, tmp, filename);
|
|
4437
4621
|
ctx.log(`staged ${spec.kind} \u2192 ${filename}`);
|
|
4438
4622
|
if (spec.kind === "video" && primaryDuration === null) {
|
|
4439
|
-
primaryDuration = await probeDurationSeconds(
|
|
4623
|
+
primaryDuration = await probeDurationSeconds(path10.join(tmp, filename));
|
|
4440
4624
|
}
|
|
4441
4625
|
}
|
|
4442
4626
|
return primaryDuration;
|
|
@@ -4482,7 +4666,7 @@ function coerceImageParam(value) {
|
|
|
4482
4666
|
throw new Error("hyperframe_render: image param must be a URL string or AssetRef");
|
|
4483
4667
|
}
|
|
4484
4668
|
async function substituteCompositionFiles(tmp, values) {
|
|
4485
|
-
const entryPath =
|
|
4669
|
+
const entryPath = path10.join(tmp, "index.html");
|
|
4486
4670
|
const original = await readFile7(entryPath, "utf-8");
|
|
4487
4671
|
const { output, missing } = substituteVariables(original, values);
|
|
4488
4672
|
if (missing.length > 0) {
|
|
@@ -4490,26 +4674,7 @@ async function substituteCompositionFiles(tmp, values) {
|
|
|
4490
4674
|
`hyperframe_render: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}. Add to params or to meta.json's params with a default.`
|
|
4491
4675
|
);
|
|
4492
4676
|
}
|
|
4493
|
-
await
|
|
4494
|
-
}
|
|
4495
|
-
async function ensureMetaJson(tmp, nodeId, meta, duration) {
|
|
4496
|
-
const metaPath = path9.join(tmp, "meta.json");
|
|
4497
|
-
await writeFile4(
|
|
4498
|
-
metaPath,
|
|
4499
|
-
JSON.stringify(
|
|
4500
|
-
{
|
|
4501
|
-
id: nodeId,
|
|
4502
|
-
name: meta.id,
|
|
4503
|
-
duration,
|
|
4504
|
-
width: meta.width,
|
|
4505
|
-
height: meta.height,
|
|
4506
|
-
fps: meta.fps
|
|
4507
|
-
},
|
|
4508
|
-
null,
|
|
4509
|
-
2
|
|
4510
|
-
),
|
|
4511
|
-
"utf-8"
|
|
4512
|
-
);
|
|
4677
|
+
await writeFile5(entryPath, output, "utf-8");
|
|
4513
4678
|
}
|
|
4514
4679
|
var MAX_WORKERS = 4;
|
|
4515
4680
|
function workerCount() {
|
|
@@ -4517,10 +4682,10 @@ function workerCount() {
|
|
|
4517
4682
|
}
|
|
4518
4683
|
async function runRender(opts) {
|
|
4519
4684
|
const { tmp, outputPath, params, meta, ctx } = opts;
|
|
4520
|
-
const args = buildRenderArgs(tmp, outputPath, meta);
|
|
4521
|
-
ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps from ${
|
|
4685
|
+
const args = buildRenderArgs(tmp, outputPath, meta, params.format);
|
|
4686
|
+
ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps ${params.format} from ${path10.basename(tmp)}`);
|
|
4522
4687
|
try {
|
|
4523
|
-
await
|
|
4688
|
+
await execFileAsync2("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
|
|
4524
4689
|
} catch (e) {
|
|
4525
4690
|
const stderr = e.stderr ?? "";
|
|
4526
4691
|
const stdout = e.stdout ?? "";
|
|
@@ -4530,7 +4695,7 @@ async function runRender(opts) {
|
|
|
4530
4695
|
${friendly ?? detail.slice(0, 4e3)}`);
|
|
4531
4696
|
}
|
|
4532
4697
|
}
|
|
4533
|
-
function buildRenderArgs(tmp, outputPath, meta) {
|
|
4698
|
+
function buildRenderArgs(tmp, outputPath, meta, format) {
|
|
4534
4699
|
return [
|
|
4535
4700
|
"hyperframes",
|
|
4536
4701
|
"render",
|
|
@@ -4542,13 +4707,13 @@ function buildRenderArgs(tmp, outputPath, meta) {
|
|
|
4542
4707
|
"--quality",
|
|
4543
4708
|
"high",
|
|
4544
4709
|
"--format",
|
|
4545
|
-
|
|
4710
|
+
format,
|
|
4546
4711
|
"--workers",
|
|
4547
4712
|
String(workerCount())
|
|
4548
4713
|
];
|
|
4549
4714
|
}
|
|
4550
4715
|
async function probeDurationSeconds(filePath) {
|
|
4551
|
-
const { stdout } = await
|
|
4716
|
+
const { stdout } = await execFileAsync2(
|
|
4552
4717
|
"ffprobe",
|
|
4553
4718
|
["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", filePath],
|
|
4554
4719
|
{ encoding: "utf-8" }
|
|
@@ -4561,14 +4726,14 @@ async function probeDurationSeconds(filePath) {
|
|
|
4561
4726
|
}
|
|
4562
4727
|
|
|
4563
4728
|
// src/engine/nodes/local/hyperframe-snapshot.ts
|
|
4564
|
-
import { execFile as
|
|
4565
|
-
import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile8, rm as rm5, writeFile as
|
|
4729
|
+
import { execFile as execFile5 } from "child_process";
|
|
4730
|
+
import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile8, rm as rm5, writeFile as writeFile6 } from "fs/promises";
|
|
4566
4731
|
import { createRequire as createRequire3 } from "module";
|
|
4567
4732
|
import { tmpdir as tmpdir5 } from "os";
|
|
4568
|
-
import
|
|
4569
|
-
import { promisify as
|
|
4733
|
+
import path11 from "path";
|
|
4734
|
+
import { promisify as promisify5 } from "util";
|
|
4570
4735
|
import { z as z11 } from "zod";
|
|
4571
|
-
var _execFileAsync =
|
|
4736
|
+
var _execFileAsync = promisify5(execFile5);
|
|
4572
4737
|
var require_3 = createRequire3(import.meta.url);
|
|
4573
4738
|
var WaitForSpec = z11.discriminatedUnion("kind", [
|
|
4574
4739
|
z11.object({ kind: z11.literal("auto") }),
|
|
@@ -4589,7 +4754,7 @@ var NODE_OWNED_PARAM_KEYS2 = /* @__PURE__ */ new Set(["composition", "wait_for",
|
|
|
4589
4754
|
var DEVICE_SCALE_FACTOR2 = 2;
|
|
4590
4755
|
var hyperframeSnapshotNode = defineNode({
|
|
4591
4756
|
id: "hyperframe_snapshot",
|
|
4592
|
-
version: "4.
|
|
4757
|
+
version: "4.1.0",
|
|
4593
4758
|
category: "image",
|
|
4594
4759
|
location: "local",
|
|
4595
4760
|
summary: "Render an HTML/CSS composition to a static PNG via headless Chromium at 2x device-scale (retina). Same composition model as `hyperframe_render` \u2014 point `params.composition` at a directory containing `index.html` + `meta.json`.",
|
|
@@ -4612,14 +4777,22 @@ var hyperframeSnapshotNode = defineNode({
|
|
|
4612
4777
|
const compositionDir = await resolveCompositionDir(params.composition);
|
|
4613
4778
|
const meta = await loadCompositionMeta(compositionDir);
|
|
4614
4779
|
const compositionParams = validateAndParseDynamicParams2(meta, params);
|
|
4615
|
-
const tmp = await mkdtemp5(
|
|
4780
|
+
const tmp = await mkdtemp5(path11.join(tmpdir5(), "hf-snap-"));
|
|
4616
4781
|
try {
|
|
4617
4782
|
await copyComposition2(compositionDir, tmp);
|
|
4618
4783
|
await vendorGsap2(tmp, ctx);
|
|
4619
4784
|
await stageInputs3(tmp, inputs, meta, ctx);
|
|
4620
4785
|
const substitutionValues = await buildSubstitutionValues2(compositionParams, meta);
|
|
4621
4786
|
await substituteCompositionFiles2(tmp, substitutionValues);
|
|
4622
|
-
|
|
4787
|
+
await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, meta.default_duration);
|
|
4788
|
+
await runHyperframesCheck({
|
|
4789
|
+
dir: tmp,
|
|
4790
|
+
nodeId: "hyperframe_snapshot",
|
|
4791
|
+
ctx,
|
|
4792
|
+
timeoutMs: params.timeout_ms,
|
|
4793
|
+
samples: 1
|
|
4794
|
+
});
|
|
4795
|
+
const entryPath = path11.join(tmp, "index.html");
|
|
4623
4796
|
const entryUrl = `file://${entryPath}`;
|
|
4624
4797
|
ctx.log(`snapshotting ${meta.width}x${meta.height}@${DEVICE_SCALE_FACTOR2}x wait=${params.wait_for.kind}`);
|
|
4625
4798
|
const pwSpecifier = ["play", "wright"].join("");
|
|
@@ -4680,7 +4853,7 @@ async function copyComposition2(srcDir, destDir) {
|
|
|
4680
4853
|
await cp(srcDir, destDir, {
|
|
4681
4854
|
recursive: true,
|
|
4682
4855
|
filter: (src) => {
|
|
4683
|
-
const name =
|
|
4856
|
+
const name = path11.basename(src);
|
|
4684
4857
|
if (name === ".cache" || name === "node_modules" || name === ".git") return false;
|
|
4685
4858
|
return true;
|
|
4686
4859
|
}
|
|
@@ -4689,7 +4862,7 @@ async function copyComposition2(srcDir, destDir) {
|
|
|
4689
4862
|
async function vendorGsap2(tmp, ctx) {
|
|
4690
4863
|
try {
|
|
4691
4864
|
const gsapMin = require_3.resolve("gsap/dist/gsap.min.js");
|
|
4692
|
-
await copyFile5(gsapMin,
|
|
4865
|
+
await copyFile5(gsapMin, path11.join(tmp, "gsap.min.js"));
|
|
4693
4866
|
} catch (e) {
|
|
4694
4867
|
ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
|
|
4695
4868
|
}
|
|
@@ -4723,7 +4896,7 @@ function coerceImageParam2(value) {
|
|
|
4723
4896
|
throw new Error("hyperframe_snapshot: image param must be a URL string or AssetRef");
|
|
4724
4897
|
}
|
|
4725
4898
|
async function substituteCompositionFiles2(tmp, values) {
|
|
4726
|
-
const entryPath =
|
|
4899
|
+
const entryPath = path11.join(tmp, "index.html");
|
|
4727
4900
|
const original = await readFile8(entryPath, "utf-8");
|
|
4728
4901
|
const { output, missing } = substituteVariables(original, values);
|
|
4729
4902
|
if (missing.length > 0) {
|
|
@@ -4731,7 +4904,7 @@ async function substituteCompositionFiles2(tmp, values) {
|
|
|
4731
4904
|
`hyperframe_snapshot: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}.`
|
|
4732
4905
|
);
|
|
4733
4906
|
}
|
|
4734
|
-
await
|
|
4907
|
+
await writeFile6(entryPath, output, "utf-8");
|
|
4735
4908
|
}
|
|
4736
4909
|
async function waitForReady(page, waitFor, timeoutMs) {
|
|
4737
4910
|
switch (waitFor.kind) {
|
|
@@ -4765,10 +4938,10 @@ async function waitForReady(page, waitFor, timeoutMs) {
|
|
|
4765
4938
|
}
|
|
4766
4939
|
|
|
4767
4940
|
// src/engine/nodes/local/imagemagick.ts
|
|
4768
|
-
import { execFile as
|
|
4769
|
-
import { promisify as
|
|
4941
|
+
import { execFile as execFile6 } from "child_process";
|
|
4942
|
+
import { promisify as promisify6 } from "util";
|
|
4770
4943
|
import { z as z12 } from "zod";
|
|
4771
|
-
var
|
|
4944
|
+
var execFileAsync3 = promisify6(execFile6);
|
|
4772
4945
|
var OutputDecl2 = z12.object({
|
|
4773
4946
|
kind: z12.enum(["image", "video", "audio"]),
|
|
4774
4947
|
ext: z12.string().min(1).max(8)
|
|
@@ -4784,7 +4957,7 @@ async function resolveBin() {
|
|
|
4784
4957
|
if (resolvedBin) return resolvedBin;
|
|
4785
4958
|
for (const candidate of ["magick", "convert"]) {
|
|
4786
4959
|
try {
|
|
4787
|
-
await
|
|
4960
|
+
await execFileAsync3(candidate, ["-version"], { encoding: "utf-8" });
|
|
4788
4961
|
resolvedBin = candidate;
|
|
4789
4962
|
return candidate;
|
|
4790
4963
|
} catch {
|
|
@@ -4834,34 +5007,60 @@ var textNode = defineNode({
|
|
|
4834
5007
|
execute: ({ params }) => Promise.resolve({ text: params.value })
|
|
4835
5008
|
});
|
|
4836
5009
|
|
|
4837
|
-
// src/engine/nodes/remote/
|
|
5010
|
+
// src/engine/nodes/remote/audioVoiceConvert.ts
|
|
4838
5011
|
import { z as z14 } from "zod";
|
|
4839
|
-
var
|
|
4840
|
-
|
|
4841
|
-
|
|
5012
|
+
var AudioVoiceConvertParams = z14.object({
|
|
5013
|
+
model: z14.literal("elevenlabs/eleven_multilingual_sts_v2"),
|
|
5014
|
+
/** Target voice id. Splice an upstream `voice_select` via `"{{voice_ref}}"`. */
|
|
5015
|
+
voice: z14.string().min(1),
|
|
5016
|
+
output_format: z14.string().optional(),
|
|
5017
|
+
/** Strip the source clip's background noise before re-voicing. */
|
|
5018
|
+
remove_background_noise: z14.boolean().optional()
|
|
5019
|
+
}).strict();
|
|
5020
|
+
var audioVoiceConvertNode = delegated({
|
|
5021
|
+
id: "audio_voice_convert",
|
|
5022
|
+
version: "1.0.0",
|
|
5023
|
+
category: "audio",
|
|
5024
|
+
summary: "Voice Changer / speech-to-speech via ElevenLabs (eleven_multilingual_sts_v2). Re-voices an existing audio clip in a TARGET voice while preserving timing/prosody.",
|
|
5025
|
+
when_to_use: 'Use to normalize a generator-chosen voice (e.g. a Seedance talking-head clip\'s native audio) into ONE consistent brand voice across every scene \u2014 the cadence is preserved so any lip-sync stays valid. Wire `inputs.voice_ref: $ref:<voice_select>.voice_id` and set `params.voice: "{{voice_ref}}"`.',
|
|
5026
|
+
inputs: z14.object({
|
|
5027
|
+
audio: AudioRef,
|
|
5028
|
+
voice_ref: TextRef.optional()
|
|
5029
|
+
}).strict(),
|
|
5030
|
+
params: AudioVoiceConvertParams,
|
|
5031
|
+
outputs: z14.object({ audio: AudioRef }).strict(),
|
|
5032
|
+
outputKinds: { audio: "audio" },
|
|
5033
|
+
cost: () => ({ credits: 1, seconds_estimate: 20 })
|
|
5034
|
+
});
|
|
5035
|
+
|
|
5036
|
+
// src/engine/nodes/remote/dialogue.ts
|
|
5037
|
+
import { z as z15 } from "zod";
|
|
5038
|
+
var DialogueInput = z15.object({
|
|
5039
|
+
text: z15.string().min(1),
|
|
5040
|
+
voice_id: z15.string().min(1)
|
|
4842
5041
|
});
|
|
4843
5042
|
var DIALOGUE_MODELS = ["elevenlabs/eleven_v3"];
|
|
4844
|
-
var DialogueParams =
|
|
4845
|
-
model:
|
|
5043
|
+
var DialogueParams = z15.object({
|
|
5044
|
+
model: z15.enum(DIALOGUE_MODELS),
|
|
4846
5045
|
/**
|
|
4847
5046
|
* Ordered list of lines, each tagged with the voice that should speak it.
|
|
4848
5047
|
* Up to 10 unique voice_ids; total text across all lines should stay under
|
|
4849
5048
|
* ~2000 characters for best quality (ElevenLabs guidance).
|
|
4850
5049
|
*/
|
|
4851
|
-
inputs:
|
|
4852
|
-
language_code:
|
|
5050
|
+
inputs: z15.array(DialogueInput).min(1).max(50),
|
|
5051
|
+
language_code: z15.string().optional(),
|
|
4853
5052
|
/** ElevenLabs voice/model settings passthrough (e.g. `{ stability: 0.5 }`). */
|
|
4854
|
-
settings:
|
|
4855
|
-
seed:
|
|
4856
|
-
apply_text_normalization:
|
|
5053
|
+
settings: z15.record(z15.string(), z15.unknown()).optional(),
|
|
5054
|
+
seed: z15.number().int().min(0).max(4294967295).optional(),
|
|
5055
|
+
apply_text_normalization: z15.enum(["auto", "on", "off"]).optional(),
|
|
4857
5056
|
/**
|
|
4858
5057
|
* When true, hits `/v1/text-to-dialogue/with-timestamps` and emits a
|
|
4859
5058
|
* separate `timestamps` output — character-level alignment plus
|
|
4860
5059
|
* per-voice segment markers usable for captions, lipsync, or
|
|
4861
5060
|
* beat-matched cuts in ad creatives.
|
|
4862
5061
|
*/
|
|
4863
|
-
with_timestamps:
|
|
4864
|
-
output_format:
|
|
5062
|
+
with_timestamps: z15.boolean().optional(),
|
|
5063
|
+
output_format: z15.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
4865
5064
|
}).strict().refine((p) => p.inputs.reduce((sum, line) => sum + line.text.length, 0) <= ELEVENLABS_MAX_TEXT_CHARS, {
|
|
4866
5065
|
message: `total dialogue text exceeds ${ELEVENLABS_MAX_TEXT_CHARS} characters`,
|
|
4867
5066
|
path: ["inputs"]
|
|
@@ -4872,9 +5071,9 @@ var dialogueNode = delegated({
|
|
|
4872
5071
|
category: "audio",
|
|
4873
5072
|
summary: "Multi-voice dialogue / VO with ElevenLabs Eleven v3. Each line is tagged with a `voice_id`, so you can render two-character scripts (e.g. ad VO + customer testimonial reaction) in a single call. Setting `with_timestamps: true` adds character-level alignment for caption rendering and lipsync-friendly cuts.",
|
|
4874
5073
|
when_to_use: "Use for any ad creative or website video VO that needs more than narration \u2014 interviews, two-actor scripts, character ads, testimonial reads. For single-voice flat reads the existing `tts` node is cheaper and simpler; reach for `dialogue` when you need multiple speakers in one stitched track or word-level timing for downstream lipsync / captions.",
|
|
4875
|
-
inputs:
|
|
5074
|
+
inputs: z15.object({}).loose(),
|
|
4876
5075
|
params: DialogueParams,
|
|
4877
|
-
outputs:
|
|
5076
|
+
outputs: z15.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
4878
5077
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
4879
5078
|
cost: ({ params }) => {
|
|
4880
5079
|
const chars = params.inputs.reduce((sum, line) => sum + line.text.length, 0);
|
|
@@ -4883,7 +5082,7 @@ var dialogueNode = delegated({
|
|
|
4883
5082
|
});
|
|
4884
5083
|
|
|
4885
5084
|
// src/engine/nodes/remote/image.ts
|
|
4886
|
-
import { z as
|
|
5085
|
+
import { z as z16 } from "zod";
|
|
4887
5086
|
var IMAGE_GENERATE_MODELS2 = [
|
|
4888
5087
|
"openai/gpt-5.4-image-2",
|
|
4889
5088
|
"google/gemini-3.5-flash",
|
|
@@ -4891,16 +5090,16 @@ var IMAGE_GENERATE_MODELS2 = [
|
|
|
4891
5090
|
"google/gemini-3-pro-image-preview",
|
|
4892
5091
|
"recraft/recraft-v4.1-pro-vector"
|
|
4893
5092
|
];
|
|
4894
|
-
var ImageGenerateParams =
|
|
4895
|
-
model:
|
|
4896
|
-
prompt:
|
|
4897
|
-
aspect_ratio:
|
|
4898
|
-
image_size:
|
|
5093
|
+
var ImageGenerateParams = z16.object({
|
|
5094
|
+
model: z16.enum(IMAGE_GENERATE_MODELS2),
|
|
5095
|
+
prompt: z16.string().min(1),
|
|
5096
|
+
aspect_ratio: z16.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
|
|
5097
|
+
image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional(),
|
|
4899
5098
|
// Recraft v4 vector controls — forwarded into `image_config`. Registry
|
|
4900
5099
|
// rejects them on non-Recraft models.
|
|
4901
|
-
strength:
|
|
4902
|
-
rgb_colors:
|
|
4903
|
-
background_rgb_color:
|
|
5100
|
+
strength: z16.number().min(0).max(1).optional(),
|
|
5101
|
+
rgb_colors: z16.array(z16.array(z16.number().int().min(0).max(255))).optional(),
|
|
5102
|
+
background_rgb_color: z16.array(z16.number().int().min(0).max(255)).optional()
|
|
4904
5103
|
}).strict();
|
|
4905
5104
|
var imageGenerateNode = delegated({
|
|
4906
5105
|
id: "image_generate",
|
|
@@ -4910,22 +5109,22 @@ var imageGenerateNode = delegated({
|
|
|
4910
5109
|
when_to_use: "Use for hero shots, product photography, illustrations, and vector logos. `recraft/recraft-v4.1-pro-vector` for crisp vector / logo work; `openai/gpt-5.4-image-2` for photorealistic; Gemini variants for fast iteration and editing via the `reference` input. `reference` accepts ONE image or an ARRAY of images \u2014 wire several to combine references in a single generation (e.g. a subject sheet + a font specimen + the original ad). Every reference is forwarded to the model in array order.",
|
|
4911
5110
|
// `reference` is one image or an ordered array of images. The backend forwards
|
|
4912
5111
|
// each as a separate `image_url` to the provider (OpenRouter accepts many).
|
|
4913
|
-
inputs:
|
|
5112
|
+
inputs: z16.object({ reference: z16.union([ImageRef, z16.array(ImageRef).min(1)]).optional() }).loose(),
|
|
4914
5113
|
params: ImageGenerateParams,
|
|
4915
|
-
outputs:
|
|
5114
|
+
outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
|
|
4916
5115
|
outputKinds: { images: "image" },
|
|
4917
5116
|
cost: () => ({ credits: 5, seconds_estimate: 10 })
|
|
4918
5117
|
});
|
|
4919
5118
|
|
|
4920
5119
|
// src/engine/nodes/remote/imageAspectAdapt.ts
|
|
4921
|
-
import { z as
|
|
5120
|
+
import { z as z17 } from "zod";
|
|
4922
5121
|
var ASPECT_ADAPT_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
|
|
4923
5122
|
var ASPECT_ADAPT_FORMATS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
|
|
4924
|
-
var ImageAspectAdaptParams =
|
|
4925
|
-
model:
|
|
4926
|
-
formats:
|
|
4927
|
-
guidance:
|
|
4928
|
-
image_size:
|
|
5123
|
+
var ImageAspectAdaptParams = z17.object({
|
|
5124
|
+
model: z17.enum(ASPECT_ADAPT_MODELS),
|
|
5125
|
+
formats: z17.array(z17.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
|
|
5126
|
+
guidance: z17.string().min(1).optional(),
|
|
5127
|
+
image_size: z17.enum(["0.5K", "1K", "2K", "4K"]).optional()
|
|
4929
5128
|
}).strict();
|
|
4930
5129
|
var imageAspectAdaptNode = delegated({
|
|
4931
5130
|
id: "image_aspect_adapt",
|
|
@@ -4933,9 +5132,9 @@ var imageAspectAdaptNode = delegated({
|
|
|
4933
5132
|
category: "image",
|
|
4934
5133
|
summary: "Adapt ONE creative into multiple aspect ratios (Meta: 9:16 stories, 1:1 feed, 4:5, 16:9\u2026) in a single step. AI recomposes the layout per format \u2014 identical subject, text, logos, colors, and style; the scene is extended/restructured, never stretched or cropped. Formats that already match the source ratio pass through unchanged at zero cost. Outputs are ordered exactly as `formats`.",
|
|
4935
5134
|
when_to_use: "Use after a hero creative exists (image_generate, ingest, image_search) to fan it out to every placement format \u2014 wire the creative into `source` and list the target ratios in `formats`. Cost is estimated per format; formats matching the source ratio are free pass-throughs. Pick `google/gemini-3.1-flash-image-preview` (Nano Banana flash) while iterating, `google/gemini-3-pro-image-preview` (Nano Banana Pro) for final-quality adaptation.",
|
|
4936
|
-
inputs:
|
|
5135
|
+
inputs: z17.object({ source: ImageRef }).loose(),
|
|
4937
5136
|
params: ImageAspectAdaptParams,
|
|
4938
|
-
outputs:
|
|
5137
|
+
outputs: z17.object({ images: z17.array(ImageRef).min(1) }).strict(),
|
|
4939
5138
|
outputKinds: { images: "image" },
|
|
4940
5139
|
cost: ({ params }) => {
|
|
4941
5140
|
const p = params;
|
|
@@ -4948,12 +5147,12 @@ var imageAspectAdaptNode = delegated({
|
|
|
4948
5147
|
});
|
|
4949
5148
|
|
|
4950
5149
|
// src/engine/nodes/remote/imageBackgroundRemove.ts
|
|
4951
|
-
import { z as
|
|
4952
|
-
var ImageBackgroundRemoveParams =
|
|
4953
|
-
model:
|
|
4954
|
-
model_variant:
|
|
4955
|
-
operating_resolution:
|
|
4956
|
-
mask_only:
|
|
5150
|
+
import { z as z18 } from "zod";
|
|
5151
|
+
var ImageBackgroundRemoveParams = z18.object({
|
|
5152
|
+
model: z18.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
|
|
5153
|
+
model_variant: z18.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
|
|
5154
|
+
operating_resolution: z18.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
|
|
5155
|
+
mask_only: z18.boolean().optional().default(false)
|
|
4957
5156
|
}).strict();
|
|
4958
5157
|
var imageBackgroundRemoveNode = delegated({
|
|
4959
5158
|
id: "image_background_remove",
|
|
@@ -4961,11 +5160,11 @@ var imageBackgroundRemoveNode = delegated({
|
|
|
4961
5160
|
category: "image",
|
|
4962
5161
|
summary: "Remove the background from an image and return a transparent PNG (or the segmentation mask). Powered by fal.ai `fal-ai/birefnet/v2`.",
|
|
4963
5162
|
when_to_use: "Use to extract subjects from photos for use as overlays in hyperframe compositions, product shots, or compositing pipelines. Set `mask_only:true` to return the binary mask instead of the alpha-cut image.",
|
|
4964
|
-
inputs:
|
|
5163
|
+
inputs: z18.object({
|
|
4965
5164
|
image: ImageRef
|
|
4966
5165
|
}).strict(),
|
|
4967
5166
|
params: ImageBackgroundRemoveParams,
|
|
4968
|
-
outputs:
|
|
5167
|
+
outputs: z18.object({
|
|
4969
5168
|
image: ImageRef,
|
|
4970
5169
|
mask: ImageRef.optional()
|
|
4971
5170
|
}).strict(),
|
|
@@ -4974,7 +5173,7 @@ var imageBackgroundRemoveNode = delegated({
|
|
|
4974
5173
|
});
|
|
4975
5174
|
|
|
4976
5175
|
// src/engine/nodes/remote/imageDescribe.ts
|
|
4977
|
-
import { z as
|
|
5176
|
+
import { z as z19 } from "zod";
|
|
4978
5177
|
var IMAGE_DESCRIBE_MODELS = ["~google/gemini-pro-latest", "~google/gemini-flash-latest"];
|
|
4979
5178
|
var imageDescribeNode = delegated({
|
|
4980
5179
|
id: "image_describe",
|
|
@@ -4982,31 +5181,31 @@ var imageDescribeNode = delegated({
|
|
|
4982
5181
|
category: "vision",
|
|
4983
5182
|
summary: "Reverse-engineer an image into an exhaustive, replication-grade JSON description: who the advertiser is and what they sell (source_context), composition, non-person subjects with expression/treatment, deeply detailed people, brand-identified logos (named by brand, not appearance), camera optics, lighting, color palette WITH per-color brand-ownership (brand vs borrowed-functional) and purpose, materials, visible text, ad signals (proof badges/CTA/price), the persuasion engine (ad_intent), style, post-processing.",
|
|
4984
5183
|
when_to_use: 'Use to turn a reference image into a structured blueprint you can inject into downstream prompts via `{{slot}}` \u2014 e.g. restyle a competitor ad onto your own product, lock a look across a series, or feed exact palette/lighting into image_generate. Purpose-built for market adaptation: logos are identified by brand ("Trustpilot", never "green star"), people and animals carry expression/emotion/intent detail, and each color is tagged brand vs borrowed-functional so a recolor can keep the reds/yellows that do a job. The extraction prompt is baked in; use `focus` to emphasise aspects and `context` to pass known provenance (advertiser, category, market) so source_context and color ownership are grounded. Pick `~google/gemini-pro-latest` for the densest extraction (recommended for ad / market-adaptation passes), `~google/gemini-flash-latest` for cheap/fast passes. The output is rich \u2014 raise `max_tokens` (e.g. 8000+) for dense ads so the JSON isn\'t truncated.',
|
|
4985
|
-
inputs:
|
|
4986
|
-
params:
|
|
4987
|
-
model:
|
|
4988
|
-
focus:
|
|
4989
|
-
context:
|
|
4990
|
-
temperature:
|
|
4991
|
-
max_tokens:
|
|
5184
|
+
inputs: z19.object({ image: ImageRef }).loose(),
|
|
5185
|
+
params: z19.object({
|
|
5186
|
+
model: z19.enum(IMAGE_DESCRIBE_MODELS),
|
|
5187
|
+
focus: z19.string().optional(),
|
|
5188
|
+
context: z19.string().optional(),
|
|
5189
|
+
temperature: z19.number().min(0).max(2).optional(),
|
|
5190
|
+
max_tokens: z19.number().int().positive().optional()
|
|
4992
5191
|
}).strict(),
|
|
4993
|
-
outputs:
|
|
5192
|
+
outputs: z19.object({ description: JsonRef }).strict(),
|
|
4994
5193
|
outputKinds: { description: "json" },
|
|
4995
5194
|
cost: () => ({ credits: 2, seconds_estimate: 10 })
|
|
4996
5195
|
});
|
|
4997
5196
|
|
|
4998
5197
|
// src/engine/nodes/remote/imageReferenceSheet.ts
|
|
4999
|
-
import { z as
|
|
5198
|
+
import { z as z20 } from "zod";
|
|
5000
5199
|
var REFERENCE_SHEET_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
|
|
5001
|
-
var ImageReferenceSheetParams =
|
|
5002
|
-
model:
|
|
5003
|
-
subject_description:
|
|
5004
|
-
subject_type:
|
|
5005
|
-
views:
|
|
5006
|
-
style:
|
|
5007
|
-
prompt_override:
|
|
5008
|
-
aspect_ratio:
|
|
5009
|
-
image_size:
|
|
5200
|
+
var ImageReferenceSheetParams = z20.object({
|
|
5201
|
+
model: z20.enum(REFERENCE_SHEET_MODELS),
|
|
5202
|
+
subject_description: z20.string().min(1),
|
|
5203
|
+
subject_type: z20.enum(["character", "person", "product"]),
|
|
5204
|
+
views: z20.array(z20.string().min(1)).min(2).max(6).optional(),
|
|
5205
|
+
style: z20.string().optional(),
|
|
5206
|
+
prompt_override: z20.string().min(1).optional(),
|
|
5207
|
+
aspect_ratio: z20.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
|
|
5208
|
+
image_size: z20.enum(["0.5K", "1K", "2K", "4K"]).optional()
|
|
5010
5209
|
}).strict();
|
|
5011
5210
|
var imageReferenceSheetNode = delegated({
|
|
5012
5211
|
id: "image_reference_sheet",
|
|
@@ -5014,9 +5213,9 @@ var imageReferenceSheetNode = delegated({
|
|
|
5014
5213
|
category: "image",
|
|
5015
5214
|
summary: "Fuse 1\u20136 images of a single subject (person, character, or product) into ONE multi-view reference sheet \u2014 a labeled turnaround grid (FRONT / SIDE / BACK\u2026) in consistent style and lighting. Curated models: Gemini 3 Pro Image (best fusion + labels), Gemini 3.1 Flash Image (cheap iteration).",
|
|
5016
5215
|
when_to_use: "Use before image_generate / video_generate when a subject must stay consistent across many creatives \u2014 wire the `sheet` output into their `reference` input instead of re-describing the subject per prompt. `subject_description` should be the exact wording you reuse downstream. Pick `google/gemini-3-pro-image-preview` for final 6-view sheets at 2K+, `google/gemini-3.1-flash-image-preview` while iterating.",
|
|
5017
|
-
inputs:
|
|
5216
|
+
inputs: z20.object({ references: z20.array(ImageRef).min(1).max(6) }).loose(),
|
|
5018
5217
|
params: ImageReferenceSheetParams,
|
|
5019
|
-
outputs:
|
|
5218
|
+
outputs: z20.object({ sheet: ImageRef }).strict(),
|
|
5020
5219
|
outputKinds: { sheet: "image" },
|
|
5021
5220
|
cost: ({ params }) => ({
|
|
5022
5221
|
credits: params?.model === "google/gemini-3-pro-image-preview" ? 20 : 5,
|
|
@@ -5025,10 +5224,10 @@ var imageReferenceSheetNode = delegated({
|
|
|
5025
5224
|
});
|
|
5026
5225
|
|
|
5027
5226
|
// src/engine/nodes/remote/imageSearch.ts
|
|
5028
|
-
import { z as
|
|
5029
|
-
var ImageSearchParams =
|
|
5030
|
-
prompt:
|
|
5031
|
-
count:
|
|
5227
|
+
import { z as z21 } from "zod";
|
|
5228
|
+
var ImageSearchParams = z21.object({
|
|
5229
|
+
prompt: z21.string().min(1),
|
|
5230
|
+
count: z21.number().int().min(1).max(20).default(5)
|
|
5032
5231
|
}).strict();
|
|
5033
5232
|
var imageSearchNode = delegated({
|
|
5034
5233
|
id: "image_search",
|
|
@@ -5036,15 +5235,15 @@ var imageSearchNode = delegated({
|
|
|
5036
5235
|
category: "image",
|
|
5037
5236
|
summary: "Agentic image search across Google Images, stock photography (Freepik), and Pinterest. An LLM agent picks the search tools and queries, selects the best matches, and the results are downloaded into canvas assets.",
|
|
5038
5237
|
when_to_use: "Use to gather real-world reference or inspiration images for a prompt (e.g. several photos of an australian shepherd) so a later step or the user can pick the best one. Not for creating new imagery \u2014 use image_generate for that.",
|
|
5039
|
-
inputs:
|
|
5238
|
+
inputs: z21.object({}).loose(),
|
|
5040
5239
|
params: ImageSearchParams,
|
|
5041
|
-
outputs:
|
|
5240
|
+
outputs: z21.object({ images: z21.array(ImageRef).min(1) }).strict(),
|
|
5042
5241
|
outputKinds: { images: "image" },
|
|
5043
5242
|
cost: ({ params }) => ({ credits: Math.ceil(2 + params.count / 2), seconds_estimate: 30 })
|
|
5044
5243
|
});
|
|
5045
5244
|
|
|
5046
5245
|
// src/engine/nodes/remote/imageSelect.ts
|
|
5047
|
-
import { z as
|
|
5246
|
+
import { z as z22 } from "zod";
|
|
5048
5247
|
var IMAGE_SELECT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5049
5248
|
var imageSelectNode = delegated({
|
|
5050
5249
|
id: "image_select",
|
|
@@ -5052,15 +5251,15 @@ var imageSelectNode = delegated({
|
|
|
5052
5251
|
category: "vision",
|
|
5053
5252
|
summary: "Pick the best `count` images out of 2+ candidates with a vision LLM, judged against a prompt. Outputs a passthrough subset of the input refs (no new pixels) plus the model's comparative reasoning.",
|
|
5054
5253
|
when_to_use: "Use after fanning out several image_generate variants (or any pool of 2+ images) to keep only the strongest before expensive downstream steps \u2014 video generation, reference sheets, final delivery. `count` fixes the output size, so `images#0`\u2026`images#count-1` are always safe to wire. Pick `~google/gemini-flash-latest` for cheap/fast picks and `~google/gemini-pro-latest` for harder aesthetic judgement.",
|
|
5055
|
-
inputs:
|
|
5056
|
-
params:
|
|
5057
|
-
model:
|
|
5058
|
-
prompt:
|
|
5059
|
-
count:
|
|
5060
|
-
temperature:
|
|
5061
|
-
max_tokens:
|
|
5254
|
+
inputs: z22.object({ images: z22.array(ImageRef).min(2) }).loose(),
|
|
5255
|
+
params: z22.object({
|
|
5256
|
+
model: z22.enum(IMAGE_SELECT_MODELS),
|
|
5257
|
+
prompt: z22.string().min(1),
|
|
5258
|
+
count: z22.number().int().min(1).default(1),
|
|
5259
|
+
temperature: z22.number().min(0).max(2).optional(),
|
|
5260
|
+
max_tokens: z22.number().int().positive().optional()
|
|
5062
5261
|
}).strict(),
|
|
5063
|
-
outputs:
|
|
5262
|
+
outputs: z22.object({ images: z22.array(ImageRef).min(1), reasoning: TextRef }).strict(),
|
|
5064
5263
|
outputKinds: { images: "image", reasoning: "text" },
|
|
5065
5264
|
cost: () => ({ credits: 1, seconds_estimate: 5 }),
|
|
5066
5265
|
// Arity is only knowable at validate time when `images` is a literal array
|
|
@@ -5085,34 +5284,34 @@ var imageSelectNode = delegated({
|
|
|
5085
5284
|
});
|
|
5086
5285
|
|
|
5087
5286
|
// src/engine/nodes/remote/music.ts
|
|
5088
|
-
import { z as
|
|
5287
|
+
import { z as z23 } from "zod";
|
|
5089
5288
|
var MUSIC_MODELS = ["elevenlabs/music-v1", "elevenlabs/video-background-music-v1"];
|
|
5090
|
-
var MusicParams =
|
|
5091
|
-
model:
|
|
5289
|
+
var MusicParams = z23.object({
|
|
5290
|
+
model: z23.enum(MUSIC_MODELS),
|
|
5092
5291
|
/** Free-form prompt. Used by `elevenlabs/music-v1` (compose-detailed). */
|
|
5093
|
-
prompt:
|
|
5292
|
+
prompt: z23.string().optional(),
|
|
5094
5293
|
/**
|
|
5095
5294
|
* Structured composition plan (intro / hook / verse / outro sections with
|
|
5096
5295
|
* per-section styles + durations). Mutually exclusive with `prompt`.
|
|
5097
5296
|
*/
|
|
5098
|
-
composition_plan:
|
|
5297
|
+
composition_plan: z23.record(z23.string(), z23.unknown()).optional(),
|
|
5099
5298
|
/** Target length when using `prompt`. 3000–454545ms (capped by the $10 per-node cost limit). */
|
|
5100
|
-
music_length_ms:
|
|
5101
|
-
seed:
|
|
5299
|
+
music_length_ms: z23.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
|
|
5300
|
+
seed: z23.number().int().optional(),
|
|
5102
5301
|
/** Prompt mode only — forces an instrumental (no vocals) track. */
|
|
5103
|
-
force_instrumental:
|
|
5302
|
+
force_instrumental: z23.boolean().optional(),
|
|
5104
5303
|
/** composition_plan only — honor exact section durations. */
|
|
5105
|
-
respect_sections_durations:
|
|
5304
|
+
respect_sections_durations: z23.boolean().optional(),
|
|
5106
5305
|
/** Emit word-level timestamps alongside the audio. */
|
|
5107
|
-
with_timestamps:
|
|
5306
|
+
with_timestamps: z23.boolean().optional(),
|
|
5108
5307
|
/**
|
|
5109
5308
|
* video-to-music only — short description of the desired score
|
|
5110
5309
|
* ("upbeat synth, fast cuts, 80s") used to bias the model.
|
|
5111
5310
|
*/
|
|
5112
|
-
description:
|
|
5311
|
+
description: z23.string().max(1e3).optional(),
|
|
5113
5312
|
/** video-to-music only — up to 10 style tags. */
|
|
5114
|
-
tags:
|
|
5115
|
-
output_format:
|
|
5313
|
+
tags: z23.array(z23.string()).max(10).optional(),
|
|
5314
|
+
output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
5116
5315
|
}).strict();
|
|
5117
5316
|
var musicNode = delegated({
|
|
5118
5317
|
id: "music",
|
|
@@ -5120,9 +5319,9 @@ var musicNode = delegated({
|
|
|
5120
5319
|
category: "audio",
|
|
5121
5320
|
summary: "Generate music for ad creatives and website video content. `elevenlabs/music-v1` composes from a text prompt or structured composition plan; `elevenlabs/video-background-music-v1` scores an existing video clip provided via `inputs.video`.",
|
|
5122
5321
|
when_to_use: "Use to produce background music or a full score for video ads, hero-section reels, or any motion content. Prefer the video-to-music model when you already have a cut and want music timed to it; use compose-detailed when you have only a brief or want section-level control (intro / hook / outro). Pair the resulting audio with `video_generate` or `video_lipsync` at compose time.",
|
|
5123
|
-
inputs:
|
|
5322
|
+
inputs: z23.object({ video: VideoRef.optional() }).loose(),
|
|
5124
5323
|
params: MusicParams,
|
|
5125
|
-
outputs:
|
|
5324
|
+
outputs: z23.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
5126
5325
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
5127
5326
|
cost: ({ params }) => {
|
|
5128
5327
|
const seconds = params.music_length_ms ? Math.ceil(params.music_length_ms / 1e3) : 30;
|
|
@@ -5153,25 +5352,25 @@ var musicNode = delegated({
|
|
|
5153
5352
|
});
|
|
5154
5353
|
|
|
5155
5354
|
// src/engine/nodes/remote/soundEffect.ts
|
|
5156
|
-
import { z as
|
|
5355
|
+
import { z as z24 } from "zod";
|
|
5157
5356
|
var SOUND_EFFECT_MODELS = ["elevenlabs/eleven_text_to_sound_v2"];
|
|
5158
|
-
var SoundEffectParams =
|
|
5159
|
-
model:
|
|
5357
|
+
var SoundEffectParams = z24.object({
|
|
5358
|
+
model: z24.enum(SOUND_EFFECT_MODELS),
|
|
5160
5359
|
/** Prompt describing the SFX ("metal door slam", "soft UI tap", "ocean waves"). */
|
|
5161
|
-
text:
|
|
5360
|
+
text: z24.string().min(1),
|
|
5162
5361
|
/**
|
|
5163
5362
|
* Target length in seconds. 0.5–30. Leave unset to let the model pick the
|
|
5164
5363
|
* natural length for the described effect.
|
|
5165
5364
|
*/
|
|
5166
|
-
duration_seconds:
|
|
5365
|
+
duration_seconds: z24.number().min(0.5).max(30).optional(),
|
|
5167
5366
|
/**
|
|
5168
5367
|
* 0–1. Higher = stick closer to the prompt at the cost of variety; lower
|
|
5169
5368
|
* = let the model interpret more freely. Defaults to 0.3 on the provider.
|
|
5170
5369
|
*/
|
|
5171
|
-
prompt_influence:
|
|
5370
|
+
prompt_influence: z24.number().min(0).max(1).optional(),
|
|
5172
5371
|
/** Only valid on `eleven_text_to_sound_v2` — produce a seamless loop. */
|
|
5173
|
-
loop:
|
|
5174
|
-
output_format:
|
|
5372
|
+
loop: z24.boolean().optional(),
|
|
5373
|
+
output_format: z24.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
|
|
5175
5374
|
}).strict();
|
|
5176
5375
|
var soundEffectNode = delegated({
|
|
5177
5376
|
id: "sound_effect",
|
|
@@ -5179,9 +5378,9 @@ var soundEffectNode = delegated({
|
|
|
5179
5378
|
category: "audio",
|
|
5180
5379
|
summary: "Generate short sound effects from a text prompt via ElevenLabs Text-to-Sound. Use for whooshes, impacts, UI clicks, ambient beds, or signature stingers in ad creatives and product videos.",
|
|
5181
5380
|
when_to_use: "Reach for this when you need a punch-in SFX layered against `video_generate` or `hyperframe_render` output \u2014 e.g. a logo whoosh on a hero shot, a click on a CTA cut, a swelling ambient bed under VO. Set `loop: true` for atmospheric beds that need to tile under longer footage; leave `duration_seconds` unset and the model picks a natural length.",
|
|
5182
|
-
inputs:
|
|
5381
|
+
inputs: z24.object({}).loose(),
|
|
5183
5382
|
params: SoundEffectParams,
|
|
5184
|
-
outputs:
|
|
5383
|
+
outputs: z24.object({ audio: AudioRef }).strict(),
|
|
5185
5384
|
outputKinds: { audio: "audio" },
|
|
5186
5385
|
cost: ({ params }) => {
|
|
5187
5386
|
const seconds = params.duration_seconds ?? 5;
|
|
@@ -5190,7 +5389,7 @@ var soundEffectNode = delegated({
|
|
|
5190
5389
|
});
|
|
5191
5390
|
|
|
5192
5391
|
// src/engine/nodes/remote/textGenerate.ts
|
|
5193
|
-
import { z as
|
|
5392
|
+
import { z as z25 } from "zod";
|
|
5194
5393
|
var TEXT_GENERATE_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5195
5394
|
var textGenerateNode = delegated({
|
|
5196
5395
|
id: "text_generate",
|
|
@@ -5198,58 +5397,58 @@ var textGenerateNode = delegated({
|
|
|
5198
5397
|
category: "language",
|
|
5199
5398
|
summary: "Single-turn LLM text generation via OpenRouter. Returns a text response.",
|
|
5200
5399
|
when_to_use: 'Use for any short text generation step in a canvas \u2014 ad copy, hooks, headlines, JSON outputs for downstream nodes. Pick `~google/gemini-flash-latest` for cheap/fast work and `~google/gemini-pro-latest` for harder reasoning. When the output must be JSON for a downstream `{{slot}}` (e.g. the ad-blueprint transform), set `response_format: "json_object"` so the model returns clean JSON with no markdown fences or prose. Set `web_search: true` to let the model search the live web first (OpenRouter `:online`) \u2014 useful when the transform must adapt copy to the target brand\'s real facts (current pricing, the trust signals it actually has) rather than guess.',
|
|
5201
|
-
inputs:
|
|
5202
|
-
params:
|
|
5203
|
-
model:
|
|
5204
|
-
prompt:
|
|
5205
|
-
system:
|
|
5206
|
-
response_format:
|
|
5207
|
-
web_search:
|
|
5208
|
-
temperature:
|
|
5209
|
-
max_tokens:
|
|
5400
|
+
inputs: z25.object({}).loose(),
|
|
5401
|
+
params: z25.object({
|
|
5402
|
+
model: z25.enum(TEXT_GENERATE_MODELS),
|
|
5403
|
+
prompt: z25.string().min(1),
|
|
5404
|
+
system: z25.string().optional(),
|
|
5405
|
+
response_format: z25.enum(["text", "json_object"]).optional(),
|
|
5406
|
+
web_search: z25.boolean().optional(),
|
|
5407
|
+
temperature: z25.number().min(0).max(2).optional(),
|
|
5408
|
+
max_tokens: z25.number().int().positive().optional()
|
|
5210
5409
|
}).strict(),
|
|
5211
|
-
outputs:
|
|
5410
|
+
outputs: z25.object({ text: TextRef }).strict(),
|
|
5212
5411
|
outputKinds: { text: "text" },
|
|
5213
5412
|
cost: () => ({ credits: 1, seconds_estimate: 3 })
|
|
5214
5413
|
});
|
|
5215
5414
|
|
|
5216
5415
|
// src/engine/nodes/remote/tts.ts
|
|
5217
|
-
import { z as
|
|
5416
|
+
import { z as z26 } from "zod";
|
|
5218
5417
|
var TTS_MODELS = ["elevenlabs/eleven_v3"];
|
|
5219
|
-
var TtsVoiceSettings =
|
|
5220
|
-
stability:
|
|
5221
|
-
similarity_boost:
|
|
5222
|
-
style:
|
|
5223
|
-
use_speaker_boost:
|
|
5224
|
-
speed:
|
|
5418
|
+
var TtsVoiceSettings = z26.object({
|
|
5419
|
+
stability: z26.number().min(0).max(1).optional(),
|
|
5420
|
+
similarity_boost: z26.number().min(0).max(1).optional(),
|
|
5421
|
+
style: z26.number().min(0).max(1).optional(),
|
|
5422
|
+
use_speaker_boost: z26.boolean().optional(),
|
|
5423
|
+
speed: z26.number().min(0.25).max(4).optional()
|
|
5225
5424
|
}).strict();
|
|
5226
|
-
var TtsPronunciationLocator =
|
|
5227
|
-
pronunciation_dictionary_id:
|
|
5228
|
-
version_id:
|
|
5425
|
+
var TtsPronunciationLocator = z26.object({
|
|
5426
|
+
pronunciation_dictionary_id: z26.string().min(1),
|
|
5427
|
+
version_id: z26.string().nullable().optional()
|
|
5229
5428
|
}).strict();
|
|
5230
|
-
var TtsParams =
|
|
5231
|
-
model:
|
|
5232
|
-
text:
|
|
5233
|
-
voice:
|
|
5429
|
+
var TtsParams = z26.object({
|
|
5430
|
+
model: z26.enum(TTS_MODELS),
|
|
5431
|
+
text: z26.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
|
|
5432
|
+
voice: z26.string().min(1),
|
|
5234
5433
|
/** Provider output_format (mp3 family only — assets are stored as audio/mpeg). */
|
|
5235
|
-
output_format:
|
|
5236
|
-
seed:
|
|
5434
|
+
output_format: z26.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
|
|
5435
|
+
seed: z26.number().int().min(0).max(4294967295).optional(),
|
|
5237
5436
|
// Top-level shortcuts; structured form is `voice_settings`.
|
|
5238
|
-
stability:
|
|
5239
|
-
similarity_boost:
|
|
5437
|
+
stability: z26.number().min(0).max(1).optional(),
|
|
5438
|
+
similarity_boost: z26.number().min(0).max(1).optional(),
|
|
5240
5439
|
voice_settings: TtsVoiceSettings.optional(),
|
|
5241
5440
|
/** ISO 639-1 language code. eleven_v3 supports language hints. */
|
|
5242
|
-
language_code:
|
|
5243
|
-
pronunciation_dictionary_locators:
|
|
5244
|
-
apply_text_normalization:
|
|
5441
|
+
language_code: z26.string().optional(),
|
|
5442
|
+
pronunciation_dictionary_locators: z26.array(TtsPronunciationLocator).max(3).optional(),
|
|
5443
|
+
apply_text_normalization: z26.enum(["auto", "on", "off"]).optional(),
|
|
5245
5444
|
/** Currently Japanese-only. Adds latency. */
|
|
5246
|
-
apply_language_text_normalization:
|
|
5445
|
+
apply_language_text_normalization: z26.boolean().optional(),
|
|
5247
5446
|
/**
|
|
5248
5447
|
* When true, hits `/v1/text-to-speech/{voice_id}/with-timestamps` and
|
|
5249
5448
|
* adds a `timestamps` output (character-level alignment) for caption
|
|
5250
5449
|
* rendering, lipsync, and beat-matched cuts.
|
|
5251
5450
|
*/
|
|
5252
|
-
with_timestamps:
|
|
5451
|
+
with_timestamps: z26.boolean().optional()
|
|
5253
5452
|
}).strict();
|
|
5254
5453
|
var ttsNode = delegated({
|
|
5255
5454
|
id: "tts",
|
|
@@ -5257,9 +5456,9 @@ var ttsNode = delegated({
|
|
|
5257
5456
|
category: "audio",
|
|
5258
5457
|
summary: "Single-voice text-to-speech via ElevenLabs Eleven v3. Optional character-level timestamps for caption rendering and beat-matched cuts.",
|
|
5259
5458
|
when_to_use: "Use for single-speaker VO \u2014 ad reads, hero-section narration, product walkthroughs. Reach for `dialogue` when you need multiple voices in one stitched track. Set `with_timestamps: true` when downstream needs character-level alignment (captions, lipsync).",
|
|
5260
|
-
inputs:
|
|
5459
|
+
inputs: z26.object({}).loose(),
|
|
5261
5460
|
params: TtsParams,
|
|
5262
|
-
outputs:
|
|
5461
|
+
outputs: z26.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
|
|
5263
5462
|
outputKinds: { audio: "audio", timestamps: "json" },
|
|
5264
5463
|
cost: ({ params }) => ({
|
|
5265
5464
|
credits: Math.max(1, Math.ceil(params.text.length * 15e-4)),
|
|
@@ -5268,23 +5467,23 @@ var ttsNode = delegated({
|
|
|
5268
5467
|
});
|
|
5269
5468
|
|
|
5270
5469
|
// src/engine/nodes/remote/video.ts
|
|
5271
|
-
import { z as
|
|
5470
|
+
import { z as z27 } from "zod";
|
|
5272
5471
|
var VIDEO_GENERATE_MODELS = ["bytedance/seedance-2.0", "google/veo-3.1-fast"];
|
|
5273
|
-
var VideoGenerateParams =
|
|
5274
|
-
model:
|
|
5275
|
-
prompt:
|
|
5276
|
-
duration:
|
|
5277
|
-
resolution:
|
|
5472
|
+
var VideoGenerateParams = z27.object({
|
|
5473
|
+
model: z27.enum(VIDEO_GENERATE_MODELS),
|
|
5474
|
+
prompt: z27.string().min(1),
|
|
5475
|
+
duration: z27.number().int().positive().optional(),
|
|
5476
|
+
resolution: z27.string().optional(),
|
|
5278
5477
|
// Union of ratios accepted by at least one curated model (registry gates
|
|
5279
5478
|
// per-model). 3:2/2:3 are deliberately absent: no registered model takes them.
|
|
5280
|
-
aspect_ratio:
|
|
5281
|
-
generate_audio:
|
|
5282
|
-
seed:
|
|
5479
|
+
aspect_ratio: z27.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
|
|
5480
|
+
generate_audio: z27.boolean().optional(),
|
|
5481
|
+
seed: z27.number().int().nonnegative().optional(),
|
|
5283
5482
|
// Veo-only passthroughs (routed via `provider.options.google-vertex.parameters`).
|
|
5284
|
-
negative_prompt:
|
|
5285
|
-
person_generation:
|
|
5286
|
-
enhance_prompt:
|
|
5287
|
-
conditioning_scale:
|
|
5483
|
+
negative_prompt: z27.string().optional(),
|
|
5484
|
+
person_generation: z27.string().optional(),
|
|
5485
|
+
enhance_prompt: z27.boolean().optional(),
|
|
5486
|
+
conditioning_scale: z27.number().optional()
|
|
5288
5487
|
}).strict();
|
|
5289
5488
|
var videoGenerateNode = delegated({
|
|
5290
5489
|
id: "video_generate",
|
|
@@ -5292,23 +5491,23 @@ var videoGenerateNode = delegated({
|
|
|
5292
5491
|
category: "video",
|
|
5293
5492
|
summary: "Generate video for ad creatives. Two curated models: `bytedance/seedance-2.0` (production quality, photorealistic humans via fal.ai) and `google/veo-3.1-fast` (cheap/fast for iteration and tests). Async with polling.",
|
|
5294
5493
|
when_to_use: "Use `bytedance/seedance-2.0` for final ad output (photoreal subjects, image-to-video with first/last frames). Use `google/veo-3.1-fast` while iterating to keep cost low. Each model has different supported durations, resolutions, and aspect ratios \u2014 see the README per-model section.",
|
|
5295
|
-
inputs:
|
|
5494
|
+
inputs: z27.object({
|
|
5296
5495
|
first_frame: ImageRef.optional(),
|
|
5297
5496
|
last_frame: ImageRef.optional(),
|
|
5298
5497
|
reference: ImageRef.optional()
|
|
5299
5498
|
}).loose(),
|
|
5300
5499
|
params: VideoGenerateParams,
|
|
5301
|
-
outputs:
|
|
5500
|
+
outputs: z27.object({ video: VideoRef }).strict(),
|
|
5302
5501
|
outputKinds: { video: "video" },
|
|
5303
5502
|
cost: () => ({ credits: 50, seconds_estimate: 120 })
|
|
5304
5503
|
});
|
|
5305
5504
|
|
|
5306
5505
|
// src/engine/nodes/remote/videoBackgroundRemove.ts
|
|
5307
|
-
import { z as
|
|
5308
|
-
var VideoBackgroundRemoveParams =
|
|
5309
|
-
model:
|
|
5310
|
-
edge_refinement:
|
|
5311
|
-
output_codec:
|
|
5506
|
+
import { z as z28 } from "zod";
|
|
5507
|
+
var VideoBackgroundRemoveParams = z28.object({
|
|
5508
|
+
model: z28.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
|
|
5509
|
+
edge_refinement: z28.boolean().optional().default(true),
|
|
5510
|
+
output_codec: z28.enum(["vp9", "h264"]).optional().default("vp9")
|
|
5312
5511
|
}).strict();
|
|
5313
5512
|
var videoBackgroundRemoveNode = delegated({
|
|
5314
5513
|
id: "video_background_remove",
|
|
@@ -5316,18 +5515,18 @@ var videoBackgroundRemoveNode = delegated({
|
|
|
5316
5515
|
category: "video",
|
|
5317
5516
|
summary: "Remove the background from a video and return a transparent VP9-with-alpha WebM (or H264 RGB+alpha pair). Drops directly into a hyperframe composition as `<video src='...'>` for chroma-keyed picture-in-picture overlays. Powered by fal.ai `veed/video-background-removal/fast`.",
|
|
5318
5517
|
when_to_use: "Use when you need a talking-head or subject to float over a custom background in a hyperframe composition. Pair with hyperframe_render(composition: screencast-with-talker) for screencast-with-narrator videos. Output is `video/webm` with alpha \u2014 feed straight into `<video src>` in a composition.",
|
|
5319
|
-
inputs:
|
|
5518
|
+
inputs: z28.object({
|
|
5320
5519
|
video: VideoRef
|
|
5321
5520
|
}).strict(),
|
|
5322
5521
|
params: VideoBackgroundRemoveParams,
|
|
5323
|
-
outputs:
|
|
5522
|
+
outputs: z28.object({ video: VideoRef }).strict(),
|
|
5324
5523
|
outputKinds: { video: "video" },
|
|
5325
5524
|
// $0.012 per 30 frames (edge refinement on) — assume ~30fps; refine via fal dashboard.
|
|
5326
5525
|
cost: () => ({ credits: 50, seconds_estimate: 60 })
|
|
5327
5526
|
});
|
|
5328
5527
|
|
|
5329
5528
|
// src/engine/nodes/remote/videoDeconstruct.ts
|
|
5330
|
-
import { z as
|
|
5529
|
+
import { z as z29 } from "zod";
|
|
5331
5530
|
var VIDEO_DECONSTRUCT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
|
|
5332
5531
|
var videoDeconstructNode = delegated({
|
|
5333
5532
|
id: "video_deconstruct",
|
|
@@ -5335,24 +5534,24 @@ var videoDeconstructNode = delegated({
|
|
|
5335
5534
|
category: "video",
|
|
5336
5535
|
summary: 'Deconstruct a video into a replication-grade blueprint: scene boundaries, the real start/end frame of every scene (extracted from the video as images), and an exhaustive JSON analysis \u2014 per-scene action detail, camera motion, generation-ready frame/motion prompts, overlay text with full typographic style, floating elements, deeply detailed cast (perceived demographics, ethnicity/skin-tone, styling, market-recasting notes), brand-identified logos (named by brand and what they signal, not by appearance, with on-screen timestamps), dialogue with voice descriptions, music spec, SFX list, plus a word-level transcript. `mode:"index"` is the cheap structure-first pass: scene boundaries + global blueprint only (one LLM call, no frames).',
|
|
5337
5536
|
when_to_use: 'Use to reverse-engineer a reference video (e.g. a competitor ad) so a new canvas can reproduce or remix it scene by scene. Agent loop: (1) optionally run `mode:"index"` to see the structure cheaply (scene count, boundaries, transcript) before planning; (2) run the full deconstruct; (3) read `analysis` and author the reproduction canvas. The blueprint maps 1:1 onto generation nodes: `analysis.scenes[i]` aligns positionally with `start_frames#i`/`end_frames#i`; per scene, `start_frame_prompt`/`end_frame_prompt` feed image_generate (overlay text is excluded from them by contract \u2014 recomposite it from `overlays`), `motion_prompt` + the two frames feed video_generate (first_frame/last_frame), `dialogue[].voice_description` casts tts/dialogue voices, `global.music.music_prompt` feeds music, `sfx[].sound_effect_prompt` feeds sound_effect, and `overlays`/`floating_elements` drive an ffmpeg/hyperframe overlay pass. Long videos (over ~8 min single-shot): run `mode:"index"` first, then several full nodes IN PARALLEL each with a `start_s`/`end_s` window (\u2264480s, snap edges to index scene boundaries), and merge by concatenating `analysis.scenes`; over-length errors include suggested windows. Inject fields into downstream prompts via `{{slot}}`. Pick `~google/gemini-pro-latest` for the densest extraction, `~google/gemini-flash-latest` for cheap/fast passes.',
|
|
5338
|
-
inputs:
|
|
5339
|
-
params:
|
|
5340
|
-
model:
|
|
5341
|
-
mode:
|
|
5342
|
-
language:
|
|
5343
|
-
max_scenes:
|
|
5344
|
-
focus:
|
|
5345
|
-
start_s:
|
|
5346
|
-
end_s:
|
|
5537
|
+
inputs: z29.object({ video: VideoRef }).loose(),
|
|
5538
|
+
params: z29.object({
|
|
5539
|
+
model: z29.enum(VIDEO_DECONSTRUCT_MODELS),
|
|
5540
|
+
mode: z29.enum(["full", "index"]).optional(),
|
|
5541
|
+
language: z29.string().min(2).max(8).optional(),
|
|
5542
|
+
max_scenes: z29.number().int().min(1).max(60).optional(),
|
|
5543
|
+
focus: z29.string().optional(),
|
|
5544
|
+
start_s: z29.number().min(0).optional(),
|
|
5545
|
+
end_s: z29.number().positive().optional(),
|
|
5347
5546
|
// Transcript provider for the blueprint's dialogue/transcript. Default
|
|
5348
5547
|
// Groq Whisper; "deepgram" routes to Nova-3 so words carry punctuation.
|
|
5349
|
-
transcriber:
|
|
5548
|
+
transcriber: z29.enum(["groq", "deepgram"]).optional()
|
|
5350
5549
|
}).strict(),
|
|
5351
|
-
outputs:
|
|
5550
|
+
outputs: z29.object({
|
|
5352
5551
|
analysis: JsonRef,
|
|
5353
5552
|
// Absent in mode:"index" (structure only, no Mux frame extraction).
|
|
5354
|
-
start_frames:
|
|
5355
|
-
end_frames:
|
|
5553
|
+
start_frames: z29.array(ImageRef).min(1).optional(),
|
|
5554
|
+
end_frames: z29.array(ImageRef).min(1).optional(),
|
|
5356
5555
|
transcript: JsonRef
|
|
5357
5556
|
}).strict(),
|
|
5358
5557
|
outputKinds: { analysis: "json", start_frames: "image", end_frames: "image", transcript: "json" },
|
|
@@ -5360,22 +5559,22 @@ var videoDeconstructNode = delegated({
|
|
|
5360
5559
|
});
|
|
5361
5560
|
|
|
5362
5561
|
// src/engine/nodes/remote/videoLipsync.ts
|
|
5363
|
-
import { z as
|
|
5364
|
-
var FalLipsyncParams =
|
|
5365
|
-
model:
|
|
5562
|
+
import { z as z30 } from "zod";
|
|
5563
|
+
var FalLipsyncParams = z30.object({
|
|
5564
|
+
model: z30.literal("fal/veed-lipsync")
|
|
5366
5565
|
}).strict();
|
|
5367
|
-
var VideoLipsyncParams =
|
|
5566
|
+
var VideoLipsyncParams = z30.discriminatedUnion("model", [FalLipsyncParams]);
|
|
5368
5567
|
var videoLipsyncNode = delegated({
|
|
5369
5568
|
id: "video_lipsync",
|
|
5370
5569
|
version: "1.0.0",
|
|
5371
5570
|
category: "video",
|
|
5372
5571
|
summary: "Lip-sync a video to an audio track. Currently backed by VEED via fal.ai (`fal/veed-lipsync`). $0.40/min of output.",
|
|
5373
|
-
inputs:
|
|
5572
|
+
inputs: z30.object({
|
|
5374
5573
|
video: VideoRef,
|
|
5375
5574
|
audio: AudioRef
|
|
5376
5575
|
}).strict(),
|
|
5377
5576
|
params: VideoLipsyncParams,
|
|
5378
|
-
outputs:
|
|
5577
|
+
outputs: z30.object({ video: VideoRef }).strict(),
|
|
5379
5578
|
outputKinds: { video: "video" },
|
|
5380
5579
|
cost: () => ({ credits: 20, seconds_estimate: 120 })
|
|
5381
5580
|
});
|
|
@@ -5383,15 +5582,15 @@ var videoLipsyncNode = delegated({
|
|
|
5383
5582
|
// src/engine/nodes/remote/videoTranscribe.ts
|
|
5384
5583
|
import { mkdtemp as mkdtemp6, readFile as readFile9, rm as rm6 } from "fs/promises";
|
|
5385
5584
|
import { tmpdir as tmpdir6 } from "os";
|
|
5386
|
-
import
|
|
5387
|
-
import { z as
|
|
5585
|
+
import path12 from "path";
|
|
5586
|
+
import { z as z31 } from "zod";
|
|
5388
5587
|
|
|
5389
5588
|
// src/engine/nodes/local/lib/ffmpeg.ts
|
|
5390
|
-
import { execFile as
|
|
5391
|
-
import { promisify as
|
|
5392
|
-
var
|
|
5589
|
+
import { execFile as execFile7 } from "child_process";
|
|
5590
|
+
import { promisify as promisify7 } from "util";
|
|
5591
|
+
var execFileAsync4 = promisify7(execFile7);
|
|
5393
5592
|
async function probeVideo(filePath) {
|
|
5394
|
-
const { stdout } = await
|
|
5593
|
+
const { stdout } = await execFileAsync4(
|
|
5395
5594
|
"ffprobe",
|
|
5396
5595
|
["-v", "error", "-show_streams", "-show_format", "-of", "json", filePath],
|
|
5397
5596
|
{ encoding: "utf-8", maxBuffer: 8 * 1024 * 1024 }
|
|
@@ -5449,7 +5648,7 @@ function parseFrameRate(rate) {
|
|
|
5449
5648
|
}
|
|
5450
5649
|
async function runFfmpeg(args, opts) {
|
|
5451
5650
|
try {
|
|
5452
|
-
await
|
|
5651
|
+
await execFileAsync4("ffmpeg", args, {
|
|
5453
5652
|
timeout: opts.timeout_ms,
|
|
5454
5653
|
maxBuffer: 64 * 1024 * 1024
|
|
5455
5654
|
});
|
|
@@ -5463,21 +5662,21 @@ ${detail.slice(-4e3)}`);
|
|
|
5463
5662
|
}
|
|
5464
5663
|
|
|
5465
5664
|
// src/engine/nodes/remote/videoTranscribe.ts
|
|
5466
|
-
var VideoTranscribeParams =
|
|
5467
|
-
language:
|
|
5665
|
+
var VideoTranscribeParams = z31.object({
|
|
5666
|
+
language: z31.string().min(2).max(8).optional(),
|
|
5468
5667
|
// Provider choice is explicit (no env-based silent branching). Default Groq
|
|
5469
5668
|
// Whisper; "deepgram" routes to Deepgram Nova-3, which additionally emits a
|
|
5470
5669
|
// `rich` JSON output with punctuated words + paragraph/sentence grouping.
|
|
5471
|
-
transcriber:
|
|
5670
|
+
transcriber: z31.enum(["groq", "deepgram"]).optional()
|
|
5472
5671
|
}).strict();
|
|
5473
|
-
var VideoTranscribeInputs =
|
|
5672
|
+
var VideoTranscribeInputs = z31.object({
|
|
5474
5673
|
video: VideoRef
|
|
5475
5674
|
}).strict();
|
|
5476
|
-
var VideoTranscribeOutputs =
|
|
5477
|
-
transcript:
|
|
5675
|
+
var VideoTranscribeOutputs = z31.object({
|
|
5676
|
+
transcript: z31.custom(),
|
|
5478
5677
|
// Only emitted by the Deepgram path: full punctuated words + paragraph /
|
|
5479
5678
|
// sentence grouping with speaker indices. Absent for the default Groq path.
|
|
5480
|
-
rich:
|
|
5679
|
+
rich: z31.custom().optional()
|
|
5481
5680
|
}).strict();
|
|
5482
5681
|
var AUDIO_EXTRACT_TIMEOUT_MS = 6e4;
|
|
5483
5682
|
var videoTranscribeNode = defineNode({
|
|
@@ -5515,8 +5714,8 @@ async function tryExtractAudio(inputs, ctx) {
|
|
|
5515
5714
|
ctx.log("video_transcribe: no audio track detected, sending full video");
|
|
5516
5715
|
return null;
|
|
5517
5716
|
}
|
|
5518
|
-
tmpDir = await mkdtemp6(
|
|
5519
|
-
const audioPath =
|
|
5717
|
+
tmpDir = await mkdtemp6(path12.join(tmpdir6(), "vtx-"));
|
|
5718
|
+
const audioPath = path12.join(tmpDir, "audio.mp3");
|
|
5520
5719
|
ctx.log("video_transcribe: extracting audio (mono 16kHz mp3)");
|
|
5521
5720
|
await runFfmpeg(
|
|
5522
5721
|
["-i", video.path, "-vn", "-ac", "1", "-ar", "16000", "-b:a", "64k", "-f", "mp3", "-y", audioPath],
|
|
@@ -5562,29 +5761,29 @@ async function tryExtractAudio(inputs, ctx) {
|
|
|
5562
5761
|
}
|
|
5563
5762
|
|
|
5564
5763
|
// src/engine/nodes/remote/voiceSelect.ts
|
|
5565
|
-
import { z as
|
|
5764
|
+
import { z as z32 } from "zod";
|
|
5566
5765
|
var voiceSelectNode = delegated({
|
|
5567
5766
|
id: "voice_select",
|
|
5568
5767
|
version: "1.0.0",
|
|
5569
5768
|
category: "audio",
|
|
5570
5769
|
summary: 'Cast an ElevenLabs voice from a natural-language description (e.g. "warm, authoritative female narrator, American accent"). Lists the account\'s voices and ranks them against the brief, emitting the best `voice_id` as a bare-string text asset plus a ranked `candidates` JSON.',
|
|
5571
5770
|
when_to_use: 'Use to turn a voice description (e.g. from a `video_deconstruct` blueprint\'s `voice_description`) into a usable ElevenLabs voice id, then feed it into a `tts` node by wiring `inputs.voice_ref: $ref:<this>.voice_id` and setting `params.voice: "{{voice_ref}}"` \u2014 the engine splices the id in at run time. Review `candidates` (json) to pick a different voice. Optional `gender`/`age`/`accent`/`language` hints sharpen the ranking.',
|
|
5572
|
-
inputs:
|
|
5573
|
-
params:
|
|
5574
|
-
description:
|
|
5575
|
-
gender:
|
|
5576
|
-
age:
|
|
5577
|
-
accent:
|
|
5578
|
-
language:
|
|
5579
|
-
limit:
|
|
5771
|
+
inputs: z32.object({}).loose(),
|
|
5772
|
+
params: z32.object({
|
|
5773
|
+
description: z32.string().min(1),
|
|
5774
|
+
gender: z32.string().optional(),
|
|
5775
|
+
age: z32.string().optional(),
|
|
5776
|
+
accent: z32.string().optional(),
|
|
5777
|
+
language: z32.string().optional(),
|
|
5778
|
+
limit: z32.number().int().min(1).max(20).optional()
|
|
5580
5779
|
}).strict(),
|
|
5581
|
-
outputs:
|
|
5780
|
+
outputs: z32.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
|
|
5582
5781
|
outputKinds: { voice_id: "text", candidates: "json" },
|
|
5583
5782
|
cost: () => ({ credits: 0, seconds_estimate: 5 })
|
|
5584
5783
|
});
|
|
5585
5784
|
|
|
5586
5785
|
// src/engine/schema/catalog.ts
|
|
5587
|
-
import { z as
|
|
5786
|
+
import { z as z33 } from "zod";
|
|
5588
5787
|
function generateCatalog(registry, opts = {}) {
|
|
5589
5788
|
const entries = registry.all().map((def) => {
|
|
5590
5789
|
const cost = def.cost ? safeCost(def) : void 0;
|
|
@@ -5595,9 +5794,9 @@ function generateCatalog(registry, opts = {}) {
|
|
|
5595
5794
|
summary: def.summary,
|
|
5596
5795
|
when_to_use: def.when_to_use,
|
|
5597
5796
|
location: def.location,
|
|
5598
|
-
inputs:
|
|
5599
|
-
params:
|
|
5600
|
-
outputs:
|
|
5797
|
+
inputs: z33.toJSONSchema(def.inputs, { unrepresentable: "any" }),
|
|
5798
|
+
params: z33.toJSONSchema(def.params, { unrepresentable: "any" }),
|
|
5799
|
+
outputs: z33.toJSONSchema(def.outputs, { unrepresentable: "any" }),
|
|
5601
5800
|
cost_estimate_credits: cost?.credits,
|
|
5602
5801
|
runtime_estimate_seconds: cost?.seconds_estimate
|
|
5603
5802
|
};
|
|
@@ -5629,15 +5828,15 @@ function safeCost(def) {
|
|
|
5629
5828
|
|
|
5630
5829
|
// src/engine/storage/cache-store.ts
|
|
5631
5830
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
5632
|
-
import { mkdir as mkdir3, readFile as readFile10, rename as rename2, writeFile as
|
|
5633
|
-
import
|
|
5831
|
+
import { mkdir as mkdir3, readFile as readFile10, rename as rename2, writeFile as writeFile7 } from "fs/promises";
|
|
5832
|
+
import path13 from "path";
|
|
5634
5833
|
var LocalCacheStore = class {
|
|
5635
5834
|
rootDir;
|
|
5636
5835
|
constructor(rootDir) {
|
|
5637
5836
|
this.rootDir = rootDir;
|
|
5638
5837
|
}
|
|
5639
5838
|
filePath(cacheKey) {
|
|
5640
|
-
return
|
|
5839
|
+
return path13.join(this.rootDir, `${cacheKey}.json`);
|
|
5641
5840
|
}
|
|
5642
5841
|
async get(cacheKey) {
|
|
5643
5842
|
try {
|
|
@@ -5650,9 +5849,9 @@ var LocalCacheStore = class {
|
|
|
5650
5849
|
}
|
|
5651
5850
|
async put(entry) {
|
|
5652
5851
|
const dest = this.filePath(entry.cacheKey);
|
|
5653
|
-
await mkdir3(
|
|
5852
|
+
await mkdir3(path13.dirname(dest), { recursive: true });
|
|
5654
5853
|
const tmp = `${dest}.tmp-${process.pid}-${randomUUID2()}`;
|
|
5655
|
-
await
|
|
5854
|
+
await writeFile7(tmp, JSON.stringify(entry, null, 0));
|
|
5656
5855
|
await rename2(tmp, dest);
|
|
5657
5856
|
}
|
|
5658
5857
|
};
|
|
@@ -5686,6 +5885,7 @@ var REMOTE_NODES = [
|
|
|
5686
5885
|
imageSelectNode,
|
|
5687
5886
|
videoGenerateNode,
|
|
5688
5887
|
ttsNode,
|
|
5888
|
+
audioVoiceConvertNode,
|
|
5689
5889
|
musicNode,
|
|
5690
5890
|
dialogueNode,
|
|
5691
5891
|
soundEffectNode,
|
|
@@ -5703,14 +5903,14 @@ function defaultRegistry() {
|
|
|
5703
5903
|
}
|
|
5704
5904
|
function createEngineFromEnv(opts = {}) {
|
|
5705
5905
|
const cwd = opts.cwd ?? process.cwd();
|
|
5706
|
-
const cacheDir = opts.cacheDir ??
|
|
5707
|
-
const outputsDir = opts.outputsDir ??
|
|
5906
|
+
const cacheDir = opts.cacheDir ?? path14.join(cwd, "canvas", ".cache");
|
|
5907
|
+
const outputsDir = opts.outputsDir ?? path14.join(cwd, "canvas");
|
|
5708
5908
|
const creds = requireCredentialsFromEnv();
|
|
5709
5909
|
return new Engine({
|
|
5710
5910
|
registry: defaultRegistry(),
|
|
5711
5911
|
client: new BackendClient({ baseUrl: creds.url, apiKey: creds.apiKey }),
|
|
5712
|
-
assets: new LocalAssetStore(
|
|
5713
|
-
cache: new LocalCacheStore(
|
|
5912
|
+
assets: new LocalAssetStore(path14.join(cacheDir, "assets")),
|
|
5913
|
+
cache: new LocalCacheStore(path14.join(cacheDir, "index")),
|
|
5714
5914
|
outputsDir,
|
|
5715
5915
|
log: opts.log
|
|
5716
5916
|
});
|
|
@@ -5731,4 +5931,4 @@ export {
|
|
|
5731
5931
|
defaultRegistry,
|
|
5732
5932
|
createEngineFromEnv
|
|
5733
5933
|
};
|
|
5734
|
-
//# sourceMappingURL=chunk-
|
|
5934
|
+
//# sourceMappingURL=chunk-CCO34ACK.js.map
|