@koda-sl/baker-cli 0.74.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -621,7 +621,7 @@ ${originalIndentation}`;
621
621
  });
622
622
 
623
623
  // src/engine/index.ts
624
- import path13 from "path";
624
+ import path15 from "path";
625
625
 
626
626
  // src/engine/client/http.ts
627
627
  var BackendHttpError = class extends Error {
@@ -667,14 +667,14 @@ var HttpClient = class {
667
667
  this.fetchFn = opts.fetchFn ?? fetch;
668
668
  this.sleepFn = opts.sleepFn ?? ((ms) => new Promise((r) => setTimeout(r, ms)));
669
669
  }
670
- async postJson(path14, body, signal) {
671
- return await this.requestJson("POST", path14, body, signal);
670
+ async postJson(path16, body, signal) {
671
+ return await this.requestJson("POST", path16, body, signal);
672
672
  }
673
- async getJson(path14, signal) {
674
- return await this.requestJson("GET", path14, void 0, signal);
673
+ async getJson(path16, signal) {
674
+ return await this.requestJson("GET", path16, void 0, signal);
675
675
  }
676
- async requestJson(method, path14, body, signal) {
677
- const url = `${this.baseUrl}${path14.startsWith("/") ? path14 : `/${path14}`}`;
676
+ async requestJson(method, path16, body, signal) {
677
+ const url = `${this.baseUrl}${path16.startsWith("/") ? path16 : `/${path16}`}`;
678
678
  for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
679
679
  const outcome = await this.attempt(method, url, body, attempt, signal);
680
680
  if (outcome.kind === "value") return outcome.value;
@@ -786,8 +786,8 @@ var BackendClient = class {
786
786
  );
787
787
  }
788
788
  getArtifact(kind, name, version, signal) {
789
- const path14 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
790
- return this.http.getJson(path14, signal);
789
+ const path16 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
790
+ return this.http.getJson(path16, signal);
791
791
  }
792
792
  };
793
793
 
@@ -1242,6 +1242,21 @@ var MODEL_REGISTRY = {
1242
1242
  }
1243
1243
  }
1244
1244
  },
1245
+ audio_voice_convert: {
1246
+ "elevenlabs/eleven_multilingual_sts_v2": {
1247
+ // Speech-to-speech / Voice Changer: re-voice an existing audio clip in the
1248
+ // TARGET voice, preserving timing/prosody. Used to normalize a talking-head
1249
+ // clip's native (generator-chosen) voice into ONE consistent brand voice.
1250
+ label: "ElevenLabs Voice Changer (multilingual STS v2)",
1251
+ inputs: [{ kind: "audio", mimes: FAL_AUDIO_MIMES }],
1252
+ required: ["voice"],
1253
+ params: {
1254
+ voice: { kind: "string" },
1255
+ output_format: { kind: "string", enum: ELEVENLABS_OUTPUT_FORMATS },
1256
+ remove_background_noise: { kind: "boolean" }
1257
+ }
1258
+ }
1259
+ },
1245
1260
  music: {
1246
1261
  "elevenlabs/music-v1": {
1247
1262
  label: "ElevenLabs Music v1 (Compose)",
@@ -1512,11 +1527,30 @@ var VideoMeta = z.object({
1512
1527
  speaker: z.string().optional()
1513
1528
  })
1514
1529
  ).default([]),
1515
- // Scenes with a single on-camera speaker each MUST be lip-synced. Either a
1516
- // bare scene index (validator falls back to the scaffold naming convention) or
1517
- // `{ scene, lipsync_node }`, which names the exact node to look for so a
1518
- // hand-authored canvas with differently-named clip nodes isn't a false miss.
1519
- talking_scenes: z.array(z.union([z.number(), z.object({ scene: z.number(), lipsync_node: z.string() })])).default([])
1530
+ // Scenes with a single on-camera speaker, voiced NATIVELY by the video model
1531
+ // (Seedance generate_audio) then re-voiced to one brand voice. Each entry names
1532
+ // the `audio_voice_convert` node the validator confirms is present the
1533
+ // native-audio replacement for the retired post-hoc lipsync check. A bare scene
1534
+ // index (legacy) or `{ scene, lipsync_node }` (legacy veed canvases) still parse.
1535
+ talking_scenes: z.array(
1536
+ z.union([
1537
+ z.number(),
1538
+ z.object({
1539
+ scene: z.number(),
1540
+ voice_convert_node: z.string(),
1541
+ // Advisory: the scene's visual length vs the estimated spoken length, so
1542
+ // a reviewer can see a native line that may run past its cut. Not gated.
1543
+ scene_s: z.number().optional(),
1544
+ est_speech_s: z.number().optional()
1545
+ }),
1546
+ z.object({ scene: z.number(), lipsync_node: z.string() })
1547
+ ])
1548
+ ).default([]),
1549
+ // Advisory, NOT gated by the validator: the reviewable "which graphic fires
1550
+ // on which spoken beat" map emitted by scaffold-video (per-scene window,
1551
+ // spoken line, storyboard frames, scheduled graphics). Free-form rows so the
1552
+ // schema stays decoupled from the scaffold's exact shape.
1553
+ motion_board: z.array(z.unknown()).optional()
1520
1554
  }).strict().optional();
1521
1555
  var CanvasMetadata = z.object({
1522
1556
  name: z.string().optional(),
@@ -2193,7 +2227,8 @@ function resolveRefKind(ctx, refStr) {
2193
2227
  if (!targetDef) return null;
2194
2228
  const targetParams = targetDef.params.safeParse(target.params ?? {});
2195
2229
  const resolvedKinds = resolveOutputKinds(targetDef.outputKinds, targetParams.success ? targetParams.data : {});
2196
- const kind = resolvedKinds[expr.output];
2230
+ const declaredKind = target.params?.outputs?.[expr.output]?.kind;
2231
+ const kind = resolvedKinds[expr.output] ?? declaredKind;
2197
2232
  return kind && MODEL_INPUT_KINDS.has(kind) ? kind : null;
2198
2233
  }
2199
2234
  function checkOneRef(ctx, n, i, refStr, jsonPath, field) {
@@ -2286,6 +2321,24 @@ function estimateCredits(ctx) {
2286
2321
  }
2287
2322
  return total;
2288
2323
  }
2324
+ function talkingSceneSatisfied(ctx, entry, scene) {
2325
+ const nodes = ctx.canvas.nodes;
2326
+ if (typeof entry === "object" && "voice_convert_node" in entry) {
2327
+ const clipNativeAudio = nodes.some(
2328
+ (n) => n.id === `s${scene}_clip` && n.type === "video_generate" && n.params?.generate_audio === true
2329
+ );
2330
+ const converted = nodes.some((n) => n.id === entry.voice_convert_node && n.type === "audio_voice_convert");
2331
+ return clipNativeAudio && converted;
2332
+ }
2333
+ if (typeof entry === "object") {
2334
+ return nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
2335
+ }
2336
+ return nodes.some((n) => {
2337
+ if (n.type !== "video_lipsync") return false;
2338
+ const video = n.inputs?.video;
2339
+ return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
2340
+ });
2341
+ }
2289
2342
  function checkVideoInvariants(ctx) {
2290
2343
  const meta = ctx.canvas.metadata?.video;
2291
2344
  if (!meta) return;
@@ -2312,16 +2365,11 @@ function checkVideoInvariants(ctx) {
2312
2365
  }
2313
2366
  for (const entry of meta.talking_scenes) {
2314
2367
  const scene = typeof entry === "number" ? entry : entry.scene;
2315
- const synced = typeof entry === "number" ? ctx.canvas.nodes.some((n) => {
2316
- if (n.type !== "video_lipsync") return false;
2317
- const video = n.inputs?.video;
2318
- return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
2319
- }) : ctx.canvas.nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
2320
- if (!synced) {
2368
+ if (!talkingSceneSatisfied(ctx, entry, scene)) {
2321
2369
  ctx.issues.push({
2322
2370
  path: "metadata.video.talking_scenes",
2323
2371
  code: STAGE_CODES.LIPSYNC_MISSING,
2324
- message: `scene ${scene} has a single on-camera speaker but no video_lipsync on s${scene}_clip \u2014 its mouth will drift out of sync with the voiceover`
2372
+ message: `scene ${scene} is a single-on-camera-speaker talking head but its clip lacks native audio (generate_audio) or the audio_voice_convert node is missing \u2014 the voice won't be brand-consistent / lips may drift`
2325
2373
  });
2326
2374
  }
2327
2375
  }
@@ -2355,9 +2403,9 @@ function checkOutputRef(ctx) {
2355
2403
  function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
2356
2404
  for (const issue of err.issues) {
2357
2405
  const tail2 = pathToString(issue.path);
2358
- const path14 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
2406
+ const path16 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
2359
2407
  issues.push({
2360
- path: path14,
2408
+ path: path16,
2361
2409
  code,
2362
2410
  message: issue.message,
2363
2411
  received: issue.code === "invalid_type" ? issue.received : void 0,
@@ -2366,8 +2414,8 @@ function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
2366
2414
  });
2367
2415
  }
2368
2416
  }
2369
- function pathToString(path14) {
2370
- return path14.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
2417
+ function pathToString(path16) {
2418
+ return path16.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
2371
2419
  }
2372
2420
  function buildDepGraph(canvas) {
2373
2421
  const graph = /* @__PURE__ */ new Map();
@@ -3995,12 +4043,12 @@ var fontSpecimenNode = defineNode({
3995
4043
  });
3996
4044
 
3997
4045
  // src/engine/nodes/local/hyperframe.ts
3998
- import { execFile as execFile3 } from "child_process";
3999
- import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile7, rm as rm4, stat as stat5, writeFile as writeFile4 } from "fs/promises";
4046
+ import { execFile as execFile4 } from "child_process";
4047
+ import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile8, rm as rm4, stat as stat5, writeFile as writeFile5 } from "fs/promises";
4000
4048
  import { createRequire as createRequire2 } from "module";
4001
4049
  import { cpus, tmpdir as tmpdir4 } from "os";
4002
- import path9 from "path";
4003
- import { promisify as promisify3 } from "util";
4050
+ import path11 from "path";
4051
+ import { promisify as promisify4 } from "util";
4004
4052
  import { z as z10 } from "zod";
4005
4053
 
4006
4054
  // src/engine/engine/composition-hash.ts
@@ -4194,6 +4242,148 @@ function defaultFilenameForInput(key, kind) {
4194
4242
  return `${key}.png`;
4195
4243
  }
4196
4244
 
4245
+ // src/engine/nodes/local/lib/hyperframe-check.ts
4246
+ import { execFile as execFile3 } from "child_process";
4247
+ import { readFile as readFile7 } from "fs/promises";
4248
+ import path9 from "path";
4249
+ import { promisify as promisify3 } from "util";
4250
+ var execFileAsync = promisify3(execFile3);
4251
+ var NEVER_BLOCK = [/contrast/i, /\bwcag\b/i, /missing_local_asset/i, /font[_-]?family/i, /font[_-]?face/i];
4252
+ var UNAVAILABLE = /unknown command|command not found|not found|Did you mean|Unknown argument|ENOENT/i;
4253
+ function isAdvisory(code, message) {
4254
+ const hay = `${code} ${message}`;
4255
+ return NEVER_BLOCK.some((re) => re.test(hay));
4256
+ }
4257
+ function parseCheckJson(raw) {
4258
+ if (!raw) return null;
4259
+ const trimmed = raw.trim();
4260
+ try {
4261
+ return JSON.parse(trimmed);
4262
+ } catch {
4263
+ }
4264
+ const start = trimmed.indexOf("{");
4265
+ const end = trimmed.lastIndexOf("}");
4266
+ if (start >= 0 && end > start) {
4267
+ try {
4268
+ return JSON.parse(trimmed.slice(start, end + 1));
4269
+ } catch {
4270
+ return null;
4271
+ }
4272
+ }
4273
+ return null;
4274
+ }
4275
+ function classifyLint(json) {
4276
+ const out = [];
4277
+ const findings = json?.findings;
4278
+ if (!Array.isArray(findings)) return out;
4279
+ for (const f of findings) {
4280
+ const rec = f;
4281
+ const code = String(rec?.code ?? "");
4282
+ const message = String(rec?.message ?? "");
4283
+ const severity = String(rec?.severity ?? "info");
4284
+ const blocking = severity === "error" && !isAdvisory(code, message);
4285
+ out.push({ source: "lint", code, message, severity: blocking ? "blocking" : "warning" });
4286
+ }
4287
+ return out;
4288
+ }
4289
+ function classifyInspect(json) {
4290
+ const out = [];
4291
+ const obj = json;
4292
+ const issues = obj?.issues;
4293
+ if (!Array.isArray(issues)) return out;
4294
+ for (const iss of issues) {
4295
+ const rec = iss;
4296
+ const code = String(rec?.code ?? rec?.type ?? "overflow");
4297
+ const message = String(rec?.message ?? rec?.detail ?? JSON.stringify(iss));
4298
+ const severity = rec?.severity ? String(rec.severity) : obj?.ok === false ? "error" : "warning";
4299
+ out.push({ source: "inspect", code, message, severity: severity === "error" ? "blocking" : "warning" });
4300
+ }
4301
+ return out;
4302
+ }
4303
+ function classifyCheckOutput(lintRaw, inspectRaw) {
4304
+ const findings = [...classifyLint(parseCheckJson(lintRaw)), ...classifyInspect(parseCheckJson(inspectRaw))];
4305
+ return {
4306
+ blocking: findings.filter((f) => f.severity === "blocking"),
4307
+ warnings: findings.filter((f) => f.severity === "warning")
4308
+ };
4309
+ }
4310
+ function buildLintArgs(dir) {
4311
+ return ["hyperframes", "lint", dir, "--json"];
4312
+ }
4313
+ function buildInspectArgs(dir, samples) {
4314
+ return ["hyperframes", "inspect", dir, "--json", "--samples", String(samples)];
4315
+ }
4316
+ function buildSnapshotArgs(dir, frames) {
4317
+ return ["hyperframes", "snapshot", dir, "--frames", String(frames), "--describe", "false"];
4318
+ }
4319
+ function usesNestedCompositions(indexHtml) {
4320
+ const withoutComments = indexHtml.replace(/<!--[\s\S]*?-->/g, "");
4321
+ return /data-composition-src\s*=/.test(withoutComments);
4322
+ }
4323
+ async function runOne(args, timeoutMs) {
4324
+ try {
4325
+ const { stdout } = await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
4326
+ return stdout;
4327
+ } catch (e) {
4328
+ const err = e;
4329
+ if (err.stdout?.includes("{")) return err.stdout;
4330
+ const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
4331
+ if (UNAVAILABLE.test(blob)) return null;
4332
+ return null;
4333
+ }
4334
+ }
4335
+ async function runSnapshotSmoke(args, timeoutMs) {
4336
+ try {
4337
+ await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
4338
+ return { ok: true, unavailable: false, message: "" };
4339
+ } catch (e) {
4340
+ const err = e;
4341
+ const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
4342
+ if (UNAVAILABLE.test(blob)) return { ok: false, unavailable: true, message: blob };
4343
+ return { ok: false, unavailable: false, message: (err.stderr || err.message || "snapshot failed").slice(0, 800) };
4344
+ }
4345
+ }
4346
+ async function runHyperframesCheck(opts) {
4347
+ const { dir, nodeId, ctx, timeoutMs, samples = 5 } = opts;
4348
+ const [lintRaw, inspectRaw] = await Promise.all([
4349
+ runOne(buildLintArgs(dir), timeoutMs),
4350
+ runOne(buildInspectArgs(dir, samples), timeoutMs)
4351
+ ]);
4352
+ if (lintRaw === null && inspectRaw === null) {
4353
+ ctx.log(`${nodeId}: hyperframes lint/inspect unavailable \u2014 skipping pre-render check`);
4354
+ return;
4355
+ }
4356
+ const { blocking, warnings } = classifyCheckOutput(lintRaw ?? "", inspectRaw ?? "");
4357
+ for (const w of warnings) {
4358
+ ctx.log(`${nodeId}: hyperframe check warning [${w.source}/${w.code}] ${w.message}`);
4359
+ }
4360
+ if (blocking.length > 0) {
4361
+ const detail = blocking.map((b) => `\u2022 [${b.source}/${b.code}] ${b.message}`).join("\n");
4362
+ throw new Error(`${nodeId}: pre-render check failed (${blocking.length} blocking)
4363
+ ${detail}`);
4364
+ }
4365
+ let indexHtml = "";
4366
+ try {
4367
+ indexHtml = await readFile7(path9.join(dir, "index.html"), "utf-8");
4368
+ } catch {
4369
+ indexHtml = "";
4370
+ }
4371
+ if (indexHtml && usesNestedCompositions(indexHtml)) {
4372
+ const snap = await runSnapshotSmoke(buildSnapshotArgs(dir, Math.min(samples, 3)), Math.max(timeoutMs, 12e4));
4373
+ if (snap.unavailable) {
4374
+ ctx.log(`${nodeId}: hyperframes snapshot unavailable \u2014 skipping nested-composition smoke test`);
4375
+ } else if (!snap.ok) {
4376
+ throw new Error(
4377
+ `${nodeId}: nested-composition smoke test failed \u2014 an embedded block did not render. Check the host\u2194block id match, that the block's <style>/<script> live inside its <template>, and that it styles #root (not a class).
4378
+ ${snap.message}`
4379
+ );
4380
+ } else {
4381
+ ctx.log(`${nodeId}: nested-composition smoke test passed`);
4382
+ }
4383
+ }
4384
+ ctx.log(`${nodeId}: pre-render check passed (${warnings.length} warning${warnings.length === 1 ? "" : "s"})`);
4385
+ }
4386
+
4197
4387
  // src/engine/nodes/local/lib/hyperframe-errors.ts
4198
4388
  var KNOWN_ERROR_PATTERNS = [
4199
4389
  {
@@ -4237,6 +4427,29 @@ ${stderr.slice(0, 1500)}`;
4237
4427
  return null;
4238
4428
  }
4239
4429
 
4430
+ // src/engine/nodes/local/lib/hyperframe-meta.ts
4431
+ import { writeFile as writeFile4 } from "fs/promises";
4432
+ import path10 from "path";
4433
+ async function ensureHyperframesMetaJson(tmp, nodeId, meta, duration) {
4434
+ const metaPath = path10.join(tmp, "meta.json");
4435
+ await writeFile4(
4436
+ metaPath,
4437
+ JSON.stringify(
4438
+ {
4439
+ id: nodeId,
4440
+ name: meta.id,
4441
+ duration,
4442
+ width: meta.width,
4443
+ height: meta.height,
4444
+ fps: meta.fps
4445
+ },
4446
+ null,
4447
+ 2
4448
+ ),
4449
+ "utf-8"
4450
+ );
4451
+ }
4452
+
4240
4453
  // src/engine/nodes/local/lib/templating.ts
4241
4454
  var PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
4242
4455
  function substituteVariables(source, values) {
@@ -4272,25 +4485,34 @@ function literalize(value) {
4272
4485
  }
4273
4486
 
4274
4487
  // src/engine/nodes/local/hyperframe.ts
4275
- var execFileAsync = promisify3(execFile3);
4488
+ var execFileAsync2 = promisify4(execFile4);
4276
4489
  var require_2 = createRequire2(import.meta.url);
4277
4490
  var HyperframeParams = z10.object({
4278
4491
  composition: z10.string().min(1),
4492
+ // Output container. mp4 (default) for delivery; webm/mov render WITH
4493
+ // transparency (alpha) when the composition background is transparent —
4494
+ // use for motion-graphic overlays dropped into Premiere/AE/Nuke.
4495
+ format: z10.enum(["mp4", "webm", "mov"]).optional().default("mp4"),
4279
4496
  timeout_ms: z10.number().int().positive().optional().default(10 * 60 * 1e3)
4280
4497
  }).catchall(z10.unknown());
4281
4498
  var HyperframeInputs = z10.record(z10.string(), z10.custom()).optional().default({});
4282
4499
  var HyperframeOutputs = z10.object({
4283
4500
  video: z10.custom()
4284
4501
  }).strict();
4285
- var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "timeout_ms"]);
4502
+ var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "format", "timeout_ms"]);
4503
+ var MIME_BY_FORMAT = {
4504
+ mp4: "video/mp4",
4505
+ webm: "video/webm",
4506
+ mov: "video/quicktime"
4507
+ };
4286
4508
  var ENGINE_INJECTED_TOKENS = /* @__PURE__ */ new Set(["duration"]);
4287
4509
  var hyperframeRenderNode = defineNode({
4288
4510
  id: "hyperframe_render",
4289
- version: "6.0.0",
4511
+ version: "6.1.0",
4290
4512
  category: "video",
4291
4513
  location: "local",
4292
4514
  summary: "Render an mp4 by composing an HTML/CSS/GSAP composition over upstream assets. Point `params.composition` at a directory containing `index.html` + `meta.json`. All variables are passed as primitives in `params` and substituted into the composition before render.",
4293
- when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality, format, and worker count are fixed by the engine for ad-creative delivery.",
4515
+ when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality and worker count are fixed by the engine. Set `params.format` to `webm` or `mov` for a transparent (alpha) overlay to composite in another editor; defaults to `mp4`. Runs a pre-render `hyperframes lint`/`inspect` gate (overflow/structural errors block; contrast warns).",
4294
4516
  inputs: HyperframeInputs,
4295
4517
  params: HyperframeParams,
4296
4518
  outputs: HyperframeOutputs,
@@ -4309,7 +4531,7 @@ var hyperframeRenderNode = defineNode({
4309
4531
  const compositionDir = await resolveCompositionDir(params.composition);
4310
4532
  const meta = await loadCompositionMeta(compositionDir);
4311
4533
  const compositionParams = validateAndParseDynamicParams(meta, params);
4312
- const tmp = await mkdtemp4(path9.join(tmpdir4(), "hf-render-"));
4534
+ const tmp = await mkdtemp4(path11.join(tmpdir4(), "hf-render-"));
4313
4535
  try {
4314
4536
  await copyComposition(compositionDir, tmp);
4315
4537
  await vendorGsap(tmp, ctx);
@@ -4317,15 +4539,16 @@ var hyperframeRenderNode = defineNode({
4317
4539
  const duration = stagedDuration ?? meta.default_duration;
4318
4540
  const substitutionValues = await buildSubstitutionValues(compositionParams, meta, duration);
4319
4541
  await substituteCompositionFiles(tmp, substitutionValues);
4320
- await ensureMetaJson(tmp, ctx.nodeId, meta, duration);
4321
- const outputPath = path9.join(tmp, "output.mp4");
4542
+ await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, duration);
4543
+ await runHyperframesCheck({ dir: tmp, nodeId: "hyperframe_render", ctx, timeoutMs: params.timeout_ms });
4544
+ const outputPath = path11.join(tmp, `output.${params.format}`);
4322
4545
  await runRender({ tmp, outputPath, params, meta, ctx });
4323
- const bytes = await readFile7(outputPath);
4546
+ const bytes = await readFile8(outputPath);
4324
4547
  ctx.log(`rendered ${bytes.length} bytes`);
4325
4548
  const ref = await ctx.assets.ingestBytes({
4326
4549
  bytes: Buffer.from(bytes),
4327
4550
  kind: "video",
4328
- mime: "video/mp4",
4551
+ mime: MIME_BY_FORMAT[params.format],
4329
4552
  metadata: {
4330
4553
  width: meta.width,
4331
4554
  height: meta.height,
@@ -4342,10 +4565,10 @@ var hyperframeRenderNode = defineNode({
4342
4565
  }
4343
4566
  });
4344
4567
  async function resolveCompositionDir(composition) {
4345
- const compositionPath = path9.isAbsolute(composition) ? composition : path9.resolve(process.cwd(), composition);
4568
+ const compositionPath = path11.isAbsolute(composition) ? composition : path11.resolve(process.cwd(), composition);
4346
4569
  const s = await stat5(compositionPath);
4347
4570
  if (s.isDirectory()) return compositionPath;
4348
- return path9.dirname(compositionPath);
4571
+ return path11.dirname(compositionPath);
4349
4572
  }
4350
4573
  async function validateCompositionParams(rawParams) {
4351
4574
  const issues = [];
@@ -4412,7 +4635,7 @@ async function copyComposition(srcDir, destDir) {
4412
4635
  await cp(srcDir, destDir, {
4413
4636
  recursive: true,
4414
4637
  filter: (src) => {
4415
- const name = path9.basename(src);
4638
+ const name = path11.basename(src);
4416
4639
  if (name === ".cache" || name === "node_modules" || name === ".git") return false;
4417
4640
  return true;
4418
4641
  }
@@ -4421,7 +4644,7 @@ async function copyComposition(srcDir, destDir) {
4421
4644
  async function vendorGsap(tmp, ctx) {
4422
4645
  try {
4423
4646
  const gsapMin = require_2.resolve("gsap/dist/gsap.min.js");
4424
- await copyFile4(gsapMin, path9.join(tmp, "gsap.min.js"));
4647
+ await copyFile4(gsapMin, path11.join(tmp, "gsap.min.js"));
4425
4648
  } catch (e) {
4426
4649
  ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
4427
4650
  }
@@ -4436,7 +4659,7 @@ async function stageInputs2(tmp, inputs, meta, ctx) {
4436
4659
  await stageAsset(ref, tmp, filename);
4437
4660
  ctx.log(`staged ${spec.kind} \u2192 ${filename}`);
4438
4661
  if (spec.kind === "video" && primaryDuration === null) {
4439
- primaryDuration = await probeDurationSeconds(path9.join(tmp, filename));
4662
+ primaryDuration = await probeDurationSeconds(path11.join(tmp, filename));
4440
4663
  }
4441
4664
  }
4442
4665
  return primaryDuration;
@@ -4482,34 +4705,15 @@ function coerceImageParam(value) {
4482
4705
  throw new Error("hyperframe_render: image param must be a URL string or AssetRef");
4483
4706
  }
4484
4707
  async function substituteCompositionFiles(tmp, values) {
4485
- const entryPath = path9.join(tmp, "index.html");
4486
- const original = await readFile7(entryPath, "utf-8");
4708
+ const entryPath = path11.join(tmp, "index.html");
4709
+ const original = await readFile8(entryPath, "utf-8");
4487
4710
  const { output, missing } = substituteVariables(original, values);
4488
4711
  if (missing.length > 0) {
4489
4712
  throw new Error(
4490
4713
  `hyperframe_render: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}. Add to params or to meta.json's params with a default.`
4491
4714
  );
4492
4715
  }
4493
- await writeFile4(entryPath, output, "utf-8");
4494
- }
4495
- async function ensureMetaJson(tmp, nodeId, meta, duration) {
4496
- const metaPath = path9.join(tmp, "meta.json");
4497
- await writeFile4(
4498
- metaPath,
4499
- JSON.stringify(
4500
- {
4501
- id: nodeId,
4502
- name: meta.id,
4503
- duration,
4504
- width: meta.width,
4505
- height: meta.height,
4506
- fps: meta.fps
4507
- },
4508
- null,
4509
- 2
4510
- ),
4511
- "utf-8"
4512
- );
4716
+ await writeFile5(entryPath, output, "utf-8");
4513
4717
  }
4514
4718
  var MAX_WORKERS = 4;
4515
4719
  function workerCount() {
@@ -4517,10 +4721,10 @@ function workerCount() {
4517
4721
  }
4518
4722
  async function runRender(opts) {
4519
4723
  const { tmp, outputPath, params, meta, ctx } = opts;
4520
- const args = buildRenderArgs(tmp, outputPath, meta);
4521
- ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps from ${path9.basename(tmp)}`);
4724
+ const args = buildRenderArgs(tmp, outputPath, meta, params.format);
4725
+ ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps ${params.format} from ${path11.basename(tmp)}`);
4522
4726
  try {
4523
- await execFileAsync("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
4727
+ await execFileAsync2("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
4524
4728
  } catch (e) {
4525
4729
  const stderr = e.stderr ?? "";
4526
4730
  const stdout = e.stdout ?? "";
@@ -4530,7 +4734,7 @@ async function runRender(opts) {
4530
4734
  ${friendly ?? detail.slice(0, 4e3)}`);
4531
4735
  }
4532
4736
  }
4533
- function buildRenderArgs(tmp, outputPath, meta) {
4737
+ function buildRenderArgs(tmp, outputPath, meta, format) {
4534
4738
  return [
4535
4739
  "hyperframes",
4536
4740
  "render",
@@ -4542,13 +4746,13 @@ function buildRenderArgs(tmp, outputPath, meta) {
4542
4746
  "--quality",
4543
4747
  "high",
4544
4748
  "--format",
4545
- "mp4",
4749
+ format,
4546
4750
  "--workers",
4547
4751
  String(workerCount())
4548
4752
  ];
4549
4753
  }
4550
4754
  async function probeDurationSeconds(filePath) {
4551
- const { stdout } = await execFileAsync(
4755
+ const { stdout } = await execFileAsync2(
4552
4756
  "ffprobe",
4553
4757
  ["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", filePath],
4554
4758
  { encoding: "utf-8" }
@@ -4561,14 +4765,14 @@ async function probeDurationSeconds(filePath) {
4561
4765
  }
4562
4766
 
4563
4767
  // src/engine/nodes/local/hyperframe-snapshot.ts
4564
- import { execFile as execFile4 } from "child_process";
4565
- import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile8, rm as rm5, writeFile as writeFile5 } from "fs/promises";
4768
+ import { execFile as execFile5 } from "child_process";
4769
+ import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile9, rm as rm5, writeFile as writeFile6 } from "fs/promises";
4566
4770
  import { createRequire as createRequire3 } from "module";
4567
4771
  import { tmpdir as tmpdir5 } from "os";
4568
- import path10 from "path";
4569
- import { promisify as promisify4 } from "util";
4772
+ import path12 from "path";
4773
+ import { promisify as promisify5 } from "util";
4570
4774
  import { z as z11 } from "zod";
4571
- var _execFileAsync = promisify4(execFile4);
4775
+ var _execFileAsync = promisify5(execFile5);
4572
4776
  var require_3 = createRequire3(import.meta.url);
4573
4777
  var WaitForSpec = z11.discriminatedUnion("kind", [
4574
4778
  z11.object({ kind: z11.literal("auto") }),
@@ -4589,7 +4793,7 @@ var NODE_OWNED_PARAM_KEYS2 = /* @__PURE__ */ new Set(["composition", "wait_for",
4589
4793
  var DEVICE_SCALE_FACTOR2 = 2;
4590
4794
  var hyperframeSnapshotNode = defineNode({
4591
4795
  id: "hyperframe_snapshot",
4592
- version: "4.0.0",
4796
+ version: "4.1.0",
4593
4797
  category: "image",
4594
4798
  location: "local",
4595
4799
  summary: "Render an HTML/CSS composition to a static PNG via headless Chromium at 2x device-scale (retina). Same composition model as `hyperframe_render` \u2014 point `params.composition` at a directory containing `index.html` + `meta.json`.",
@@ -4612,14 +4816,22 @@ var hyperframeSnapshotNode = defineNode({
4612
4816
  const compositionDir = await resolveCompositionDir(params.composition);
4613
4817
  const meta = await loadCompositionMeta(compositionDir);
4614
4818
  const compositionParams = validateAndParseDynamicParams2(meta, params);
4615
- const tmp = await mkdtemp5(path10.join(tmpdir5(), "hf-snap-"));
4819
+ const tmp = await mkdtemp5(path12.join(tmpdir5(), "hf-snap-"));
4616
4820
  try {
4617
4821
  await copyComposition2(compositionDir, tmp);
4618
4822
  await vendorGsap2(tmp, ctx);
4619
4823
  await stageInputs3(tmp, inputs, meta, ctx);
4620
4824
  const substitutionValues = await buildSubstitutionValues2(compositionParams, meta);
4621
4825
  await substituteCompositionFiles2(tmp, substitutionValues);
4622
- const entryPath = path10.join(tmp, "index.html");
4826
+ await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, meta.default_duration);
4827
+ await runHyperframesCheck({
4828
+ dir: tmp,
4829
+ nodeId: "hyperframe_snapshot",
4830
+ ctx,
4831
+ timeoutMs: params.timeout_ms,
4832
+ samples: 1
4833
+ });
4834
+ const entryPath = path12.join(tmp, "index.html");
4623
4835
  const entryUrl = `file://${entryPath}`;
4624
4836
  ctx.log(`snapshotting ${meta.width}x${meta.height}@${DEVICE_SCALE_FACTOR2}x wait=${params.wait_for.kind}`);
4625
4837
  const pwSpecifier = ["play", "wright"].join("");
@@ -4680,7 +4892,7 @@ async function copyComposition2(srcDir, destDir) {
4680
4892
  await cp(srcDir, destDir, {
4681
4893
  recursive: true,
4682
4894
  filter: (src) => {
4683
- const name = path10.basename(src);
4895
+ const name = path12.basename(src);
4684
4896
  if (name === ".cache" || name === "node_modules" || name === ".git") return false;
4685
4897
  return true;
4686
4898
  }
@@ -4689,7 +4901,7 @@ async function copyComposition2(srcDir, destDir) {
4689
4901
  async function vendorGsap2(tmp, ctx) {
4690
4902
  try {
4691
4903
  const gsapMin = require_3.resolve("gsap/dist/gsap.min.js");
4692
- await copyFile5(gsapMin, path10.join(tmp, "gsap.min.js"));
4904
+ await copyFile5(gsapMin, path12.join(tmp, "gsap.min.js"));
4693
4905
  } catch (e) {
4694
4906
  ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
4695
4907
  }
@@ -4723,15 +4935,15 @@ function coerceImageParam2(value) {
4723
4935
  throw new Error("hyperframe_snapshot: image param must be a URL string or AssetRef");
4724
4936
  }
4725
4937
  async function substituteCompositionFiles2(tmp, values) {
4726
- const entryPath = path10.join(tmp, "index.html");
4727
- const original = await readFile8(entryPath, "utf-8");
4938
+ const entryPath = path12.join(tmp, "index.html");
4939
+ const original = await readFile9(entryPath, "utf-8");
4728
4940
  const { output, missing } = substituteVariables(original, values);
4729
4941
  if (missing.length > 0) {
4730
4942
  throw new Error(
4731
4943
  `hyperframe_snapshot: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}.`
4732
4944
  );
4733
4945
  }
4734
- await writeFile5(entryPath, output, "utf-8");
4946
+ await writeFile6(entryPath, output, "utf-8");
4735
4947
  }
4736
4948
  async function waitForReady(page, waitFor, timeoutMs) {
4737
4949
  switch (waitFor.kind) {
@@ -4765,10 +4977,10 @@ async function waitForReady(page, waitFor, timeoutMs) {
4765
4977
  }
4766
4978
 
4767
4979
  // src/engine/nodes/local/imagemagick.ts
4768
- import { execFile as execFile5 } from "child_process";
4769
- import { promisify as promisify5 } from "util";
4980
+ import { execFile as execFile6 } from "child_process";
4981
+ import { promisify as promisify6 } from "util";
4770
4982
  import { z as z12 } from "zod";
4771
- var execFileAsync2 = promisify5(execFile5);
4983
+ var execFileAsync3 = promisify6(execFile6);
4772
4984
  var OutputDecl2 = z12.object({
4773
4985
  kind: z12.enum(["image", "video", "audio"]),
4774
4986
  ext: z12.string().min(1).max(8)
@@ -4784,7 +4996,7 @@ async function resolveBin() {
4784
4996
  if (resolvedBin) return resolvedBin;
4785
4997
  for (const candidate of ["magick", "convert"]) {
4786
4998
  try {
4787
- await execFileAsync2(candidate, ["-version"], { encoding: "utf-8" });
4999
+ await execFileAsync3(candidate, ["-version"], { encoding: "utf-8" });
4788
5000
  resolvedBin = candidate;
4789
5001
  return candidate;
4790
5002
  } catch {
@@ -4834,34 +5046,60 @@ var textNode = defineNode({
4834
5046
  execute: ({ params }) => Promise.resolve({ text: params.value })
4835
5047
  });
4836
5048
 
4837
- // src/engine/nodes/remote/dialogue.ts
5049
+ // src/engine/nodes/remote/audioVoiceConvert.ts
4838
5050
  import { z as z14 } from "zod";
4839
- var DialogueInput = z14.object({
4840
- text: z14.string().min(1),
4841
- voice_id: z14.string().min(1)
5051
+ var AudioVoiceConvertParams = z14.object({
5052
+ model: z14.literal("elevenlabs/eleven_multilingual_sts_v2"),
5053
+ /** Target voice id. Splice an upstream `voice_select` via `"{{voice_ref}}"`. */
5054
+ voice: z14.string().min(1),
5055
+ output_format: z14.string().optional(),
5056
+ /** Strip the source clip's background noise before re-voicing. */
5057
+ remove_background_noise: z14.boolean().optional()
5058
+ }).strict();
5059
+ var audioVoiceConvertNode = delegated({
5060
+ id: "audio_voice_convert",
5061
+ version: "1.0.0",
5062
+ category: "audio",
5063
+ summary: "Voice Changer / speech-to-speech via ElevenLabs (eleven_multilingual_sts_v2). Re-voices an existing audio clip in a TARGET voice while preserving timing/prosody.",
5064
+ when_to_use: 'Use to normalize a generator-chosen voice (e.g. a Seedance talking-head clip\'s native audio) into ONE consistent brand voice across every scene \u2014 the cadence is preserved so any lip-sync stays valid. Wire `inputs.voice_ref: $ref:<voice_select>.voice_id` and set `params.voice: "{{voice_ref}}"`.',
5065
+ inputs: z14.object({
5066
+ audio: AudioRef,
5067
+ voice_ref: TextRef.optional()
5068
+ }).strict(),
5069
+ params: AudioVoiceConvertParams,
5070
+ outputs: z14.object({ audio: AudioRef }).strict(),
5071
+ outputKinds: { audio: "audio" },
5072
+ cost: () => ({ credits: 1, seconds_estimate: 20 })
5073
+ });
5074
+
5075
+ // src/engine/nodes/remote/dialogue.ts
5076
+ import { z as z15 } from "zod";
5077
+ var DialogueInput = z15.object({
5078
+ text: z15.string().min(1),
5079
+ voice_id: z15.string().min(1)
4842
5080
  });
4843
5081
  var DIALOGUE_MODELS = ["elevenlabs/eleven_v3"];
4844
- var DialogueParams = z14.object({
4845
- model: z14.enum(DIALOGUE_MODELS),
5082
+ var DialogueParams = z15.object({
5083
+ model: z15.enum(DIALOGUE_MODELS),
4846
5084
  /**
4847
5085
  * Ordered list of lines, each tagged with the voice that should speak it.
4848
5086
  * Up to 10 unique voice_ids; total text across all lines should stay under
4849
5087
  * ~2000 characters for best quality (ElevenLabs guidance).
4850
5088
  */
4851
- inputs: z14.array(DialogueInput).min(1).max(50),
4852
- language_code: z14.string().optional(),
5089
+ inputs: z15.array(DialogueInput).min(1).max(50),
5090
+ language_code: z15.string().optional(),
4853
5091
  /** ElevenLabs voice/model settings passthrough (e.g. `{ stability: 0.5 }`). */
4854
- settings: z14.record(z14.string(), z14.unknown()).optional(),
4855
- seed: z14.number().int().min(0).max(4294967295).optional(),
4856
- apply_text_normalization: z14.enum(["auto", "on", "off"]).optional(),
5092
+ settings: z15.record(z15.string(), z15.unknown()).optional(),
5093
+ seed: z15.number().int().min(0).max(4294967295).optional(),
5094
+ apply_text_normalization: z15.enum(["auto", "on", "off"]).optional(),
4857
5095
  /**
4858
5096
  * When true, hits `/v1/text-to-dialogue/with-timestamps` and emits a
4859
5097
  * separate `timestamps` output — character-level alignment plus
4860
5098
  * per-voice segment markers usable for captions, lipsync, or
4861
5099
  * beat-matched cuts in ad creatives.
4862
5100
  */
4863
- with_timestamps: z14.boolean().optional(),
4864
- output_format: z14.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5101
+ with_timestamps: z15.boolean().optional(),
5102
+ output_format: z15.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
4865
5103
  }).strict().refine((p) => p.inputs.reduce((sum, line) => sum + line.text.length, 0) <= ELEVENLABS_MAX_TEXT_CHARS, {
4866
5104
  message: `total dialogue text exceeds ${ELEVENLABS_MAX_TEXT_CHARS} characters`,
4867
5105
  path: ["inputs"]
@@ -4872,9 +5110,9 @@ var dialogueNode = delegated({
4872
5110
  category: "audio",
4873
5111
  summary: "Multi-voice dialogue / VO with ElevenLabs Eleven v3. Each line is tagged with a `voice_id`, so you can render two-character scripts (e.g. ad VO + customer testimonial reaction) in a single call. Setting `with_timestamps: true` adds character-level alignment for caption rendering and lipsync-friendly cuts.",
4874
5112
  when_to_use: "Use for any ad creative or website video VO that needs more than narration \u2014 interviews, two-actor scripts, character ads, testimonial reads. For single-voice flat reads the existing `tts` node is cheaper and simpler; reach for `dialogue` when you need multiple speakers in one stitched track or word-level timing for downstream lipsync / captions.",
4875
- inputs: z14.object({}).loose(),
5113
+ inputs: z15.object({}).loose(),
4876
5114
  params: DialogueParams,
4877
- outputs: z14.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5115
+ outputs: z15.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
4878
5116
  outputKinds: { audio: "audio", timestamps: "json" },
4879
5117
  cost: ({ params }) => {
4880
5118
  const chars = params.inputs.reduce((sum, line) => sum + line.text.length, 0);
@@ -4883,7 +5121,7 @@ var dialogueNode = delegated({
4883
5121
  });
4884
5122
 
4885
5123
  // src/engine/nodes/remote/image.ts
4886
- import { z as z15 } from "zod";
5124
+ import { z as z16 } from "zod";
4887
5125
  var IMAGE_GENERATE_MODELS2 = [
4888
5126
  "openai/gpt-5.4-image-2",
4889
5127
  "google/gemini-3.5-flash",
@@ -4891,16 +5129,16 @@ var IMAGE_GENERATE_MODELS2 = [
4891
5129
  "google/gemini-3-pro-image-preview",
4892
5130
  "recraft/recraft-v4.1-pro-vector"
4893
5131
  ];
4894
- var ImageGenerateParams = z15.object({
4895
- model: z15.enum(IMAGE_GENERATE_MODELS2),
4896
- prompt: z15.string().min(1),
4897
- aspect_ratio: z15.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
4898
- image_size: z15.enum(["0.5K", "1K", "2K", "4K"]).optional(),
5132
+ var ImageGenerateParams = z16.object({
5133
+ model: z16.enum(IMAGE_GENERATE_MODELS2),
5134
+ prompt: z16.string().min(1),
5135
+ aspect_ratio: z16.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5136
+ image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional(),
4899
5137
  // Recraft v4 vector controls — forwarded into `image_config`. Registry
4900
5138
  // rejects them on non-Recraft models.
4901
- strength: z15.number().min(0).max(1).optional(),
4902
- rgb_colors: z15.array(z15.array(z15.number().int().min(0).max(255))).optional(),
4903
- background_rgb_color: z15.array(z15.number().int().min(0).max(255)).optional()
5139
+ strength: z16.number().min(0).max(1).optional(),
5140
+ rgb_colors: z16.array(z16.array(z16.number().int().min(0).max(255))).optional(),
5141
+ background_rgb_color: z16.array(z16.number().int().min(0).max(255)).optional()
4904
5142
  }).strict();
4905
5143
  var imageGenerateNode = delegated({
4906
5144
  id: "image_generate",
@@ -4910,22 +5148,22 @@ var imageGenerateNode = delegated({
4910
5148
  when_to_use: "Use for hero shots, product photography, illustrations, and vector logos. `recraft/recraft-v4.1-pro-vector` for crisp vector / logo work; `openai/gpt-5.4-image-2` for photorealistic; Gemini variants for fast iteration and editing via the `reference` input. `reference` accepts ONE image or an ARRAY of images \u2014 wire several to combine references in a single generation (e.g. a subject sheet + a font specimen + the original ad). Every reference is forwarded to the model in array order.",
4911
5149
  // `reference` is one image or an ordered array of images. The backend forwards
4912
5150
  // each as a separate `image_url` to the provider (OpenRouter accepts many).
4913
- inputs: z15.object({ reference: z15.union([ImageRef, z15.array(ImageRef).min(1)]).optional() }).loose(),
5151
+ inputs: z16.object({ reference: z16.union([ImageRef, z16.array(ImageRef).min(1)]).optional() }).loose(),
4914
5152
  params: ImageGenerateParams,
4915
- outputs: z15.object({ images: z15.array(ImageRef).min(1) }).strict(),
5153
+ outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
4916
5154
  outputKinds: { images: "image" },
4917
5155
  cost: () => ({ credits: 5, seconds_estimate: 10 })
4918
5156
  });
4919
5157
 
4920
5158
  // src/engine/nodes/remote/imageAspectAdapt.ts
4921
- import { z as z16 } from "zod";
5159
+ import { z as z17 } from "zod";
4922
5160
  var ASPECT_ADAPT_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
4923
5161
  var ASPECT_ADAPT_FORMATS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
4924
- var ImageAspectAdaptParams = z16.object({
4925
- model: z16.enum(ASPECT_ADAPT_MODELS),
4926
- formats: z16.array(z16.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
4927
- guidance: z16.string().min(1).optional(),
4928
- image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional()
5162
+ var ImageAspectAdaptParams = z17.object({
5163
+ model: z17.enum(ASPECT_ADAPT_MODELS),
5164
+ formats: z17.array(z17.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
5165
+ guidance: z17.string().min(1).optional(),
5166
+ image_size: z17.enum(["0.5K", "1K", "2K", "4K"]).optional()
4929
5167
  }).strict();
4930
5168
  var imageAspectAdaptNode = delegated({
4931
5169
  id: "image_aspect_adapt",
@@ -4933,9 +5171,9 @@ var imageAspectAdaptNode = delegated({
4933
5171
  category: "image",
4934
5172
  summary: "Adapt ONE creative into multiple aspect ratios (Meta: 9:16 stories, 1:1 feed, 4:5, 16:9\u2026) in a single step. AI recomposes the layout per format \u2014 identical subject, text, logos, colors, and style; the scene is extended/restructured, never stretched or cropped. Formats that already match the source ratio pass through unchanged at zero cost. Outputs are ordered exactly as `formats`.",
4935
5173
  when_to_use: "Use after a hero creative exists (image_generate, ingest, image_search) to fan it out to every placement format \u2014 wire the creative into `source` and list the target ratios in `formats`. Cost is estimated per format; formats matching the source ratio are free pass-throughs. Pick `google/gemini-3.1-flash-image-preview` (Nano Banana flash) while iterating, `google/gemini-3-pro-image-preview` (Nano Banana Pro) for final-quality adaptation.",
4936
- inputs: z16.object({ source: ImageRef }).loose(),
5174
+ inputs: z17.object({ source: ImageRef }).loose(),
4937
5175
  params: ImageAspectAdaptParams,
4938
- outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
5176
+ outputs: z17.object({ images: z17.array(ImageRef).min(1) }).strict(),
4939
5177
  outputKinds: { images: "image" },
4940
5178
  cost: ({ params }) => {
4941
5179
  const p = params;
@@ -4948,12 +5186,12 @@ var imageAspectAdaptNode = delegated({
4948
5186
  });
4949
5187
 
4950
5188
  // src/engine/nodes/remote/imageBackgroundRemove.ts
4951
- import { z as z17 } from "zod";
4952
- var ImageBackgroundRemoveParams = z17.object({
4953
- model: z17.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
4954
- model_variant: z17.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
4955
- operating_resolution: z17.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
4956
- mask_only: z17.boolean().optional().default(false)
5189
+ import { z as z18 } from "zod";
5190
+ var ImageBackgroundRemoveParams = z18.object({
5191
+ model: z18.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
5192
+ model_variant: z18.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
5193
+ operating_resolution: z18.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
5194
+ mask_only: z18.boolean().optional().default(false)
4957
5195
  }).strict();
4958
5196
  var imageBackgroundRemoveNode = delegated({
4959
5197
  id: "image_background_remove",
@@ -4961,11 +5199,11 @@ var imageBackgroundRemoveNode = delegated({
4961
5199
  category: "image",
4962
5200
  summary: "Remove the background from an image and return a transparent PNG (or the segmentation mask). Powered by fal.ai `fal-ai/birefnet/v2`.",
4963
5201
  when_to_use: "Use to extract subjects from photos for use as overlays in hyperframe compositions, product shots, or compositing pipelines. Set `mask_only:true` to return the binary mask instead of the alpha-cut image.",
4964
- inputs: z17.object({
5202
+ inputs: z18.object({
4965
5203
  image: ImageRef
4966
5204
  }).strict(),
4967
5205
  params: ImageBackgroundRemoveParams,
4968
- outputs: z17.object({
5206
+ outputs: z18.object({
4969
5207
  image: ImageRef,
4970
5208
  mask: ImageRef.optional()
4971
5209
  }).strict(),
@@ -4974,7 +5212,7 @@ var imageBackgroundRemoveNode = delegated({
4974
5212
  });
4975
5213
 
4976
5214
  // src/engine/nodes/remote/imageDescribe.ts
4977
- import { z as z18 } from "zod";
5215
+ import { z as z19 } from "zod";
4978
5216
  var IMAGE_DESCRIBE_MODELS = ["~google/gemini-pro-latest", "~google/gemini-flash-latest"];
4979
5217
  var imageDescribeNode = delegated({
4980
5218
  id: "image_describe",
@@ -4982,31 +5220,31 @@ var imageDescribeNode = delegated({
4982
5220
  category: "vision",
4983
5221
  summary: "Reverse-engineer an image into an exhaustive, replication-grade JSON description: who the advertiser is and what they sell (source_context), composition, non-person subjects with expression/treatment, deeply detailed people, brand-identified logos (named by brand, not appearance), camera optics, lighting, color palette WITH per-color brand-ownership (brand vs borrowed-functional) and purpose, materials, visible text, ad signals (proof badges/CTA/price), the persuasion engine (ad_intent), style, post-processing.",
4984
5222
  when_to_use: 'Use to turn a reference image into a structured blueprint you can inject into downstream prompts via `{{slot}}` \u2014 e.g. restyle a competitor ad onto your own product, lock a look across a series, or feed exact palette/lighting into image_generate. Purpose-built for market adaptation: logos are identified by brand ("Trustpilot", never "green star"), people and animals carry expression/emotion/intent detail, and each color is tagged brand vs borrowed-functional so a recolor can keep the reds/yellows that do a job. The extraction prompt is baked in; use `focus` to emphasise aspects and `context` to pass known provenance (advertiser, category, market) so source_context and color ownership are grounded. Pick `~google/gemini-pro-latest` for the densest extraction (recommended for ad / market-adaptation passes), `~google/gemini-flash-latest` for cheap/fast passes. The output is rich \u2014 raise `max_tokens` (e.g. 8000+) for dense ads so the JSON isn\'t truncated.',
4985
- inputs: z18.object({ image: ImageRef }).loose(),
4986
- params: z18.object({
4987
- model: z18.enum(IMAGE_DESCRIBE_MODELS),
4988
- focus: z18.string().optional(),
4989
- context: z18.string().optional(),
4990
- temperature: z18.number().min(0).max(2).optional(),
4991
- max_tokens: z18.number().int().positive().optional()
5223
+ inputs: z19.object({ image: ImageRef }).loose(),
5224
+ params: z19.object({
5225
+ model: z19.enum(IMAGE_DESCRIBE_MODELS),
5226
+ focus: z19.string().optional(),
5227
+ context: z19.string().optional(),
5228
+ temperature: z19.number().min(0).max(2).optional(),
5229
+ max_tokens: z19.number().int().positive().optional()
4992
5230
  }).strict(),
4993
- outputs: z18.object({ description: JsonRef }).strict(),
5231
+ outputs: z19.object({ description: JsonRef }).strict(),
4994
5232
  outputKinds: { description: "json" },
4995
5233
  cost: () => ({ credits: 2, seconds_estimate: 10 })
4996
5234
  });
4997
5235
 
4998
5236
  // src/engine/nodes/remote/imageReferenceSheet.ts
4999
- import { z as z19 } from "zod";
5237
+ import { z as z20 } from "zod";
5000
5238
  var REFERENCE_SHEET_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
5001
- var ImageReferenceSheetParams = z19.object({
5002
- model: z19.enum(REFERENCE_SHEET_MODELS),
5003
- subject_description: z19.string().min(1),
5004
- subject_type: z19.enum(["character", "person", "product"]),
5005
- views: z19.array(z19.string().min(1)).min(2).max(6).optional(),
5006
- style: z19.string().optional(),
5007
- prompt_override: z19.string().min(1).optional(),
5008
- aspect_ratio: z19.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5009
- image_size: z19.enum(["0.5K", "1K", "2K", "4K"]).optional()
5239
+ var ImageReferenceSheetParams = z20.object({
5240
+ model: z20.enum(REFERENCE_SHEET_MODELS),
5241
+ subject_description: z20.string().min(1),
5242
+ subject_type: z20.enum(["character", "person", "product"]),
5243
+ views: z20.array(z20.string().min(1)).min(2).max(6).optional(),
5244
+ style: z20.string().optional(),
5245
+ prompt_override: z20.string().min(1).optional(),
5246
+ aspect_ratio: z20.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5247
+ image_size: z20.enum(["0.5K", "1K", "2K", "4K"]).optional()
5010
5248
  }).strict();
5011
5249
  var imageReferenceSheetNode = delegated({
5012
5250
  id: "image_reference_sheet",
@@ -5014,9 +5252,9 @@ var imageReferenceSheetNode = delegated({
5014
5252
  category: "image",
5015
5253
  summary: "Fuse 1\u20136 images of a single subject (person, character, or product) into ONE multi-view reference sheet \u2014 a labeled turnaround grid (FRONT / SIDE / BACK\u2026) in consistent style and lighting. Curated models: Gemini 3 Pro Image (best fusion + labels), Gemini 3.1 Flash Image (cheap iteration).",
5016
5254
  when_to_use: "Use before image_generate / video_generate when a subject must stay consistent across many creatives \u2014 wire the `sheet` output into their `reference` input instead of re-describing the subject per prompt. `subject_description` should be the exact wording you reuse downstream. Pick `google/gemini-3-pro-image-preview` for final 6-view sheets at 2K+, `google/gemini-3.1-flash-image-preview` while iterating.",
5017
- inputs: z19.object({ references: z19.array(ImageRef).min(1).max(6) }).loose(),
5255
+ inputs: z20.object({ references: z20.array(ImageRef).min(1).max(6) }).loose(),
5018
5256
  params: ImageReferenceSheetParams,
5019
- outputs: z19.object({ sheet: ImageRef }).strict(),
5257
+ outputs: z20.object({ sheet: ImageRef }).strict(),
5020
5258
  outputKinds: { sheet: "image" },
5021
5259
  cost: ({ params }) => ({
5022
5260
  credits: params?.model === "google/gemini-3-pro-image-preview" ? 20 : 5,
@@ -5025,10 +5263,10 @@ var imageReferenceSheetNode = delegated({
5025
5263
  });
5026
5264
 
5027
5265
  // src/engine/nodes/remote/imageSearch.ts
5028
- import { z as z20 } from "zod";
5029
- var ImageSearchParams = z20.object({
5030
- prompt: z20.string().min(1),
5031
- count: z20.number().int().min(1).max(20).default(5)
5266
+ import { z as z21 } from "zod";
5267
+ var ImageSearchParams = z21.object({
5268
+ prompt: z21.string().min(1),
5269
+ count: z21.number().int().min(1).max(20).default(5)
5032
5270
  }).strict();
5033
5271
  var imageSearchNode = delegated({
5034
5272
  id: "image_search",
@@ -5036,15 +5274,15 @@ var imageSearchNode = delegated({
5036
5274
  category: "image",
5037
5275
  summary: "Agentic image search across Google Images, stock photography (Freepik), and Pinterest. An LLM agent picks the search tools and queries, selects the best matches, and the results are downloaded into canvas assets.",
5038
5276
  when_to_use: "Use to gather real-world reference or inspiration images for a prompt (e.g. several photos of an australian shepherd) so a later step or the user can pick the best one. Not for creating new imagery \u2014 use image_generate for that.",
5039
- inputs: z20.object({}).loose(),
5277
+ inputs: z21.object({}).loose(),
5040
5278
  params: ImageSearchParams,
5041
- outputs: z20.object({ images: z20.array(ImageRef).min(1) }).strict(),
5279
+ outputs: z21.object({ images: z21.array(ImageRef).min(1) }).strict(),
5042
5280
  outputKinds: { images: "image" },
5043
5281
  cost: ({ params }) => ({ credits: Math.ceil(2 + params.count / 2), seconds_estimate: 30 })
5044
5282
  });
5045
5283
 
5046
5284
  // src/engine/nodes/remote/imageSelect.ts
5047
- import { z as z21 } from "zod";
5285
+ import { z as z22 } from "zod";
5048
5286
  var IMAGE_SELECT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5049
5287
  var imageSelectNode = delegated({
5050
5288
  id: "image_select",
@@ -5052,15 +5290,15 @@ var imageSelectNode = delegated({
5052
5290
  category: "vision",
5053
5291
  summary: "Pick the best `count` images out of 2+ candidates with a vision LLM, judged against a prompt. Outputs a passthrough subset of the input refs (no new pixels) plus the model's comparative reasoning.",
5054
5292
  when_to_use: "Use after fanning out several image_generate variants (or any pool of 2+ images) to keep only the strongest before expensive downstream steps \u2014 video generation, reference sheets, final delivery. `count` fixes the output size, so `images#0`\u2026`images#count-1` are always safe to wire. Pick `~google/gemini-flash-latest` for cheap/fast picks and `~google/gemini-pro-latest` for harder aesthetic judgement.",
5055
- inputs: z21.object({ images: z21.array(ImageRef).min(2) }).loose(),
5056
- params: z21.object({
5057
- model: z21.enum(IMAGE_SELECT_MODELS),
5058
- prompt: z21.string().min(1),
5059
- count: z21.number().int().min(1).default(1),
5060
- temperature: z21.number().min(0).max(2).optional(),
5061
- max_tokens: z21.number().int().positive().optional()
5293
+ inputs: z22.object({ images: z22.array(ImageRef).min(2) }).loose(),
5294
+ params: z22.object({
5295
+ model: z22.enum(IMAGE_SELECT_MODELS),
5296
+ prompt: z22.string().min(1),
5297
+ count: z22.number().int().min(1).default(1),
5298
+ temperature: z22.number().min(0).max(2).optional(),
5299
+ max_tokens: z22.number().int().positive().optional()
5062
5300
  }).strict(),
5063
- outputs: z21.object({ images: z21.array(ImageRef).min(1), reasoning: TextRef }).strict(),
5301
+ outputs: z22.object({ images: z22.array(ImageRef).min(1), reasoning: TextRef }).strict(),
5064
5302
  outputKinds: { images: "image", reasoning: "text" },
5065
5303
  cost: () => ({ credits: 1, seconds_estimate: 5 }),
5066
5304
  // Arity is only knowable at validate time when `images` is a literal array
@@ -5085,34 +5323,34 @@ var imageSelectNode = delegated({
5085
5323
  });
5086
5324
 
5087
5325
  // src/engine/nodes/remote/music.ts
5088
- import { z as z22 } from "zod";
5326
+ import { z as z23 } from "zod";
5089
5327
  var MUSIC_MODELS = ["elevenlabs/music-v1", "elevenlabs/video-background-music-v1"];
5090
- var MusicParams = z22.object({
5091
- model: z22.enum(MUSIC_MODELS),
5328
+ var MusicParams = z23.object({
5329
+ model: z23.enum(MUSIC_MODELS),
5092
5330
  /** Free-form prompt. Used by `elevenlabs/music-v1` (compose-detailed). */
5093
- prompt: z22.string().optional(),
5331
+ prompt: z23.string().optional(),
5094
5332
  /**
5095
5333
  * Structured composition plan (intro / hook / verse / outro sections with
5096
5334
  * per-section styles + durations). Mutually exclusive with `prompt`.
5097
5335
  */
5098
- composition_plan: z22.record(z22.string(), z22.unknown()).optional(),
5336
+ composition_plan: z23.record(z23.string(), z23.unknown()).optional(),
5099
5337
  /** Target length when using `prompt`. 3000–454545ms (capped by the $10 per-node cost limit). */
5100
- music_length_ms: z22.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
5101
- seed: z22.number().int().optional(),
5338
+ music_length_ms: z23.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
5339
+ seed: z23.number().int().optional(),
5102
5340
  /** Prompt mode only — forces an instrumental (no vocals) track. */
5103
- force_instrumental: z22.boolean().optional(),
5341
+ force_instrumental: z23.boolean().optional(),
5104
5342
  /** composition_plan only — honor exact section durations. */
5105
- respect_sections_durations: z22.boolean().optional(),
5343
+ respect_sections_durations: z23.boolean().optional(),
5106
5344
  /** Emit word-level timestamps alongside the audio. */
5107
- with_timestamps: z22.boolean().optional(),
5345
+ with_timestamps: z23.boolean().optional(),
5108
5346
  /**
5109
5347
  * video-to-music only — short description of the desired score
5110
5348
  * ("upbeat synth, fast cuts, 80s") used to bias the model.
5111
5349
  */
5112
- description: z22.string().max(1e3).optional(),
5350
+ description: z23.string().max(1e3).optional(),
5113
5351
  /** video-to-music only — up to 10 style tags. */
5114
- tags: z22.array(z22.string()).max(10).optional(),
5115
- output_format: z22.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5352
+ tags: z23.array(z23.string()).max(10).optional(),
5353
+ output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5116
5354
  }).strict();
5117
5355
  var musicNode = delegated({
5118
5356
  id: "music",
@@ -5120,9 +5358,9 @@ var musicNode = delegated({
5120
5358
  category: "audio",
5121
5359
  summary: "Generate music for ad creatives and website video content. `elevenlabs/music-v1` composes from a text prompt or structured composition plan; `elevenlabs/video-background-music-v1` scores an existing video clip provided via `inputs.video`.",
5122
5360
  when_to_use: "Use to produce background music or a full score for video ads, hero-section reels, or any motion content. Prefer the video-to-music model when you already have a cut and want music timed to it; use compose-detailed when you have only a brief or want section-level control (intro / hook / outro). Pair the resulting audio with `video_generate` or `video_lipsync` at compose time.",
5123
- inputs: z22.object({ video: VideoRef.optional() }).loose(),
5361
+ inputs: z23.object({ video: VideoRef.optional() }).loose(),
5124
5362
  params: MusicParams,
5125
- outputs: z22.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5363
+ outputs: z23.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5126
5364
  outputKinds: { audio: "audio", timestamps: "json" },
5127
5365
  cost: ({ params }) => {
5128
5366
  const seconds = params.music_length_ms ? Math.ceil(params.music_length_ms / 1e3) : 30;
@@ -5153,25 +5391,25 @@ var musicNode = delegated({
5153
5391
  });
5154
5392
 
5155
5393
  // src/engine/nodes/remote/soundEffect.ts
5156
- import { z as z23 } from "zod";
5394
+ import { z as z24 } from "zod";
5157
5395
  var SOUND_EFFECT_MODELS = ["elevenlabs/eleven_text_to_sound_v2"];
5158
- var SoundEffectParams = z23.object({
5159
- model: z23.enum(SOUND_EFFECT_MODELS),
5396
+ var SoundEffectParams = z24.object({
5397
+ model: z24.enum(SOUND_EFFECT_MODELS),
5160
5398
  /** Prompt describing the SFX ("metal door slam", "soft UI tap", "ocean waves"). */
5161
- text: z23.string().min(1),
5399
+ text: z24.string().min(1),
5162
5400
  /**
5163
5401
  * Target length in seconds. 0.5–30. Leave unset to let the model pick the
5164
5402
  * natural length for the described effect.
5165
5403
  */
5166
- duration_seconds: z23.number().min(0.5).max(30).optional(),
5404
+ duration_seconds: z24.number().min(0.5).max(30).optional(),
5167
5405
  /**
5168
5406
  * 0–1. Higher = stick closer to the prompt at the cost of variety; lower
5169
5407
  * = let the model interpret more freely. Defaults to 0.3 on the provider.
5170
5408
  */
5171
- prompt_influence: z23.number().min(0).max(1).optional(),
5409
+ prompt_influence: z24.number().min(0).max(1).optional(),
5172
5410
  /** Only valid on `eleven_text_to_sound_v2` — produce a seamless loop. */
5173
- loop: z23.boolean().optional(),
5174
- output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5411
+ loop: z24.boolean().optional(),
5412
+ output_format: z24.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5175
5413
  }).strict();
5176
5414
  var soundEffectNode = delegated({
5177
5415
  id: "sound_effect",
@@ -5179,9 +5417,9 @@ var soundEffectNode = delegated({
5179
5417
  category: "audio",
5180
5418
  summary: "Generate short sound effects from a text prompt via ElevenLabs Text-to-Sound. Use for whooshes, impacts, UI clicks, ambient beds, or signature stingers in ad creatives and product videos.",
5181
5419
  when_to_use: "Reach for this when you need a punch-in SFX layered against `video_generate` or `hyperframe_render` output \u2014 e.g. a logo whoosh on a hero shot, a click on a CTA cut, a swelling ambient bed under VO. Set `loop: true` for atmospheric beds that need to tile under longer footage; leave `duration_seconds` unset and the model picks a natural length.",
5182
- inputs: z23.object({}).loose(),
5420
+ inputs: z24.object({}).loose(),
5183
5421
  params: SoundEffectParams,
5184
- outputs: z23.object({ audio: AudioRef }).strict(),
5422
+ outputs: z24.object({ audio: AudioRef }).strict(),
5185
5423
  outputKinds: { audio: "audio" },
5186
5424
  cost: ({ params }) => {
5187
5425
  const seconds = params.duration_seconds ?? 5;
@@ -5190,7 +5428,7 @@ var soundEffectNode = delegated({
5190
5428
  });
5191
5429
 
5192
5430
  // src/engine/nodes/remote/textGenerate.ts
5193
- import { z as z24 } from "zod";
5431
+ import { z as z25 } from "zod";
5194
5432
  var TEXT_GENERATE_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5195
5433
  var textGenerateNode = delegated({
5196
5434
  id: "text_generate",
@@ -5198,58 +5436,58 @@ var textGenerateNode = delegated({
5198
5436
  category: "language",
5199
5437
  summary: "Single-turn LLM text generation via OpenRouter. Returns a text response.",
5200
5438
  when_to_use: 'Use for any short text generation step in a canvas \u2014 ad copy, hooks, headlines, JSON outputs for downstream nodes. Pick `~google/gemini-flash-latest` for cheap/fast work and `~google/gemini-pro-latest` for harder reasoning. When the output must be JSON for a downstream `{{slot}}` (e.g. the ad-blueprint transform), set `response_format: "json_object"` so the model returns clean JSON with no markdown fences or prose. Set `web_search: true` to let the model search the live web first (OpenRouter `:online`) \u2014 useful when the transform must adapt copy to the target brand\'s real facts (current pricing, the trust signals it actually has) rather than guess.',
5201
- inputs: z24.object({}).loose(),
5202
- params: z24.object({
5203
- model: z24.enum(TEXT_GENERATE_MODELS),
5204
- prompt: z24.string().min(1),
5205
- system: z24.string().optional(),
5206
- response_format: z24.enum(["text", "json_object"]).optional(),
5207
- web_search: z24.boolean().optional(),
5208
- temperature: z24.number().min(0).max(2).optional(),
5209
- max_tokens: z24.number().int().positive().optional()
5439
+ inputs: z25.object({}).loose(),
5440
+ params: z25.object({
5441
+ model: z25.enum(TEXT_GENERATE_MODELS),
5442
+ prompt: z25.string().min(1),
5443
+ system: z25.string().optional(),
5444
+ response_format: z25.enum(["text", "json_object"]).optional(),
5445
+ web_search: z25.boolean().optional(),
5446
+ temperature: z25.number().min(0).max(2).optional(),
5447
+ max_tokens: z25.number().int().positive().optional()
5210
5448
  }).strict(),
5211
- outputs: z24.object({ text: TextRef }).strict(),
5449
+ outputs: z25.object({ text: TextRef }).strict(),
5212
5450
  outputKinds: { text: "text" },
5213
5451
  cost: () => ({ credits: 1, seconds_estimate: 3 })
5214
5452
  });
5215
5453
 
5216
5454
  // src/engine/nodes/remote/tts.ts
5217
- import { z as z25 } from "zod";
5455
+ import { z as z26 } from "zod";
5218
5456
  var TTS_MODELS = ["elevenlabs/eleven_v3"];
5219
- var TtsVoiceSettings = z25.object({
5220
- stability: z25.number().min(0).max(1).optional(),
5221
- similarity_boost: z25.number().min(0).max(1).optional(),
5222
- style: z25.number().min(0).max(1).optional(),
5223
- use_speaker_boost: z25.boolean().optional(),
5224
- speed: z25.number().min(0.25).max(4).optional()
5457
+ var TtsVoiceSettings = z26.object({
5458
+ stability: z26.number().min(0).max(1).optional(),
5459
+ similarity_boost: z26.number().min(0).max(1).optional(),
5460
+ style: z26.number().min(0).max(1).optional(),
5461
+ use_speaker_boost: z26.boolean().optional(),
5462
+ speed: z26.number().min(0.25).max(4).optional()
5225
5463
  }).strict();
5226
- var TtsPronunciationLocator = z25.object({
5227
- pronunciation_dictionary_id: z25.string().min(1),
5228
- version_id: z25.string().nullable().optional()
5464
+ var TtsPronunciationLocator = z26.object({
5465
+ pronunciation_dictionary_id: z26.string().min(1),
5466
+ version_id: z26.string().nullable().optional()
5229
5467
  }).strict();
5230
- var TtsParams = z25.object({
5231
- model: z25.enum(TTS_MODELS),
5232
- text: z25.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
5233
- voice: z25.string().min(1),
5468
+ var TtsParams = z26.object({
5469
+ model: z26.enum(TTS_MODELS),
5470
+ text: z26.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
5471
+ voice: z26.string().min(1),
5234
5472
  /** Provider output_format (mp3 family only — assets are stored as audio/mpeg). */
5235
- output_format: z25.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
5236
- seed: z25.number().int().min(0).max(4294967295).optional(),
5473
+ output_format: z26.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
5474
+ seed: z26.number().int().min(0).max(4294967295).optional(),
5237
5475
  // Top-level shortcuts; structured form is `voice_settings`.
5238
- stability: z25.number().min(0).max(1).optional(),
5239
- similarity_boost: z25.number().min(0).max(1).optional(),
5476
+ stability: z26.number().min(0).max(1).optional(),
5477
+ similarity_boost: z26.number().min(0).max(1).optional(),
5240
5478
  voice_settings: TtsVoiceSettings.optional(),
5241
5479
  /** ISO 639-1 language code. eleven_v3 supports language hints. */
5242
- language_code: z25.string().optional(),
5243
- pronunciation_dictionary_locators: z25.array(TtsPronunciationLocator).max(3).optional(),
5244
- apply_text_normalization: z25.enum(["auto", "on", "off"]).optional(),
5480
+ language_code: z26.string().optional(),
5481
+ pronunciation_dictionary_locators: z26.array(TtsPronunciationLocator).max(3).optional(),
5482
+ apply_text_normalization: z26.enum(["auto", "on", "off"]).optional(),
5245
5483
  /** Currently Japanese-only. Adds latency. */
5246
- apply_language_text_normalization: z25.boolean().optional(),
5484
+ apply_language_text_normalization: z26.boolean().optional(),
5247
5485
  /**
5248
5486
  * When true, hits `/v1/text-to-speech/{voice_id}/with-timestamps` and
5249
5487
  * adds a `timestamps` output (character-level alignment) for caption
5250
5488
  * rendering, lipsync, and beat-matched cuts.
5251
5489
  */
5252
- with_timestamps: z25.boolean().optional()
5490
+ with_timestamps: z26.boolean().optional()
5253
5491
  }).strict();
5254
5492
  var ttsNode = delegated({
5255
5493
  id: "tts",
@@ -5257,9 +5495,9 @@ var ttsNode = delegated({
5257
5495
  category: "audio",
5258
5496
  summary: "Single-voice text-to-speech via ElevenLabs Eleven v3. Optional character-level timestamps for caption rendering and beat-matched cuts.",
5259
5497
  when_to_use: "Use for single-speaker VO \u2014 ad reads, hero-section narration, product walkthroughs. Reach for `dialogue` when you need multiple voices in one stitched track. Set `with_timestamps: true` when downstream needs character-level alignment (captions, lipsync).",
5260
- inputs: z25.object({}).loose(),
5498
+ inputs: z26.object({}).loose(),
5261
5499
  params: TtsParams,
5262
- outputs: z25.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5500
+ outputs: z26.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5263
5501
  outputKinds: { audio: "audio", timestamps: "json" },
5264
5502
  cost: ({ params }) => ({
5265
5503
  credits: Math.max(1, Math.ceil(params.text.length * 15e-4)),
@@ -5268,23 +5506,23 @@ var ttsNode = delegated({
5268
5506
  });
5269
5507
 
5270
5508
  // src/engine/nodes/remote/video.ts
5271
- import { z as z26 } from "zod";
5509
+ import { z as z27 } from "zod";
5272
5510
  var VIDEO_GENERATE_MODELS = ["bytedance/seedance-2.0", "google/veo-3.1-fast"];
5273
- var VideoGenerateParams = z26.object({
5274
- model: z26.enum(VIDEO_GENERATE_MODELS),
5275
- prompt: z26.string().min(1),
5276
- duration: z26.number().int().positive().optional(),
5277
- resolution: z26.string().optional(),
5511
+ var VideoGenerateParams = z27.object({
5512
+ model: z27.enum(VIDEO_GENERATE_MODELS),
5513
+ prompt: z27.string().min(1),
5514
+ duration: z27.number().int().positive().optional(),
5515
+ resolution: z27.string().optional(),
5278
5516
  // Union of ratios accepted by at least one curated model (registry gates
5279
5517
  // per-model). 3:2/2:3 are deliberately absent: no registered model takes them.
5280
- aspect_ratio: z26.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
5281
- generate_audio: z26.boolean().optional(),
5282
- seed: z26.number().int().nonnegative().optional(),
5518
+ aspect_ratio: z27.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
5519
+ generate_audio: z27.boolean().optional(),
5520
+ seed: z27.number().int().nonnegative().optional(),
5283
5521
  // Veo-only passthroughs (routed via `provider.options.google-vertex.parameters`).
5284
- negative_prompt: z26.string().optional(),
5285
- person_generation: z26.string().optional(),
5286
- enhance_prompt: z26.boolean().optional(),
5287
- conditioning_scale: z26.number().optional()
5522
+ negative_prompt: z27.string().optional(),
5523
+ person_generation: z27.string().optional(),
5524
+ enhance_prompt: z27.boolean().optional(),
5525
+ conditioning_scale: z27.number().optional()
5288
5526
  }).strict();
5289
5527
  var videoGenerateNode = delegated({
5290
5528
  id: "video_generate",
@@ -5292,23 +5530,23 @@ var videoGenerateNode = delegated({
5292
5530
  category: "video",
5293
5531
  summary: "Generate video for ad creatives. Two curated models: `bytedance/seedance-2.0` (production quality, photorealistic humans via fal.ai) and `google/veo-3.1-fast` (cheap/fast for iteration and tests). Async with polling.",
5294
5532
  when_to_use: "Use `bytedance/seedance-2.0` for final ad output (photoreal subjects, image-to-video with first/last frames). Use `google/veo-3.1-fast` while iterating to keep cost low. Each model has different supported durations, resolutions, and aspect ratios \u2014 see the README per-model section.",
5295
- inputs: z26.object({
5533
+ inputs: z27.object({
5296
5534
  first_frame: ImageRef.optional(),
5297
5535
  last_frame: ImageRef.optional(),
5298
5536
  reference: ImageRef.optional()
5299
5537
  }).loose(),
5300
5538
  params: VideoGenerateParams,
5301
- outputs: z26.object({ video: VideoRef }).strict(),
5539
+ outputs: z27.object({ video: VideoRef }).strict(),
5302
5540
  outputKinds: { video: "video" },
5303
5541
  cost: () => ({ credits: 50, seconds_estimate: 120 })
5304
5542
  });
5305
5543
 
5306
5544
  // src/engine/nodes/remote/videoBackgroundRemove.ts
5307
- import { z as z27 } from "zod";
5308
- var VideoBackgroundRemoveParams = z27.object({
5309
- model: z27.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
5310
- edge_refinement: z27.boolean().optional().default(true),
5311
- output_codec: z27.enum(["vp9", "h264"]).optional().default("vp9")
5545
+ import { z as z28 } from "zod";
5546
+ var VideoBackgroundRemoveParams = z28.object({
5547
+ model: z28.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
5548
+ edge_refinement: z28.boolean().optional().default(true),
5549
+ output_codec: z28.enum(["vp9", "h264"]).optional().default("vp9")
5312
5550
  }).strict();
5313
5551
  var videoBackgroundRemoveNode = delegated({
5314
5552
  id: "video_background_remove",
@@ -5316,18 +5554,18 @@ var videoBackgroundRemoveNode = delegated({
5316
5554
  category: "video",
5317
5555
  summary: "Remove the background from a video and return a transparent VP9-with-alpha WebM (or H264 RGB+alpha pair). Drops directly into a hyperframe composition as `<video src='...'>` for chroma-keyed picture-in-picture overlays. Powered by fal.ai `veed/video-background-removal/fast`.",
5318
5556
  when_to_use: "Use when you need a talking-head or subject to float over a custom background in a hyperframe composition. Pair with hyperframe_render(composition: screencast-with-talker) for screencast-with-narrator videos. Output is `video/webm` with alpha \u2014 feed straight into `<video src>` in a composition.",
5319
- inputs: z27.object({
5557
+ inputs: z28.object({
5320
5558
  video: VideoRef
5321
5559
  }).strict(),
5322
5560
  params: VideoBackgroundRemoveParams,
5323
- outputs: z27.object({ video: VideoRef }).strict(),
5561
+ outputs: z28.object({ video: VideoRef }).strict(),
5324
5562
  outputKinds: { video: "video" },
5325
5563
  // $0.012 per 30 frames (edge refinement on) — assume ~30fps; refine via fal dashboard.
5326
5564
  cost: () => ({ credits: 50, seconds_estimate: 60 })
5327
5565
  });
5328
5566
 
5329
5567
  // src/engine/nodes/remote/videoDeconstruct.ts
5330
- import { z as z28 } from "zod";
5568
+ import { z as z29 } from "zod";
5331
5569
  var VIDEO_DECONSTRUCT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5332
5570
  var videoDeconstructNode = delegated({
5333
5571
  id: "video_deconstruct",
@@ -5335,24 +5573,24 @@ var videoDeconstructNode = delegated({
5335
5573
  category: "video",
5336
5574
  summary: 'Deconstruct a video into a replication-grade blueprint: scene boundaries, the real start/end frame of every scene (extracted from the video as images), and an exhaustive JSON analysis \u2014 per-scene action detail, camera motion, generation-ready frame/motion prompts, overlay text with full typographic style, floating elements, deeply detailed cast (perceived demographics, ethnicity/skin-tone, styling, market-recasting notes), brand-identified logos (named by brand and what they signal, not by appearance, with on-screen timestamps), dialogue with voice descriptions, music spec, SFX list, plus a word-level transcript. `mode:"index"` is the cheap structure-first pass: scene boundaries + global blueprint only (one LLM call, no frames).',
5337
5575
  when_to_use: 'Use to reverse-engineer a reference video (e.g. a competitor ad) so a new canvas can reproduce or remix it scene by scene. Agent loop: (1) optionally run `mode:"index"` to see the structure cheaply (scene count, boundaries, transcript) before planning; (2) run the full deconstruct; (3) read `analysis` and author the reproduction canvas. The blueprint maps 1:1 onto generation nodes: `analysis.scenes[i]` aligns positionally with `start_frames#i`/`end_frames#i`; per scene, `start_frame_prompt`/`end_frame_prompt` feed image_generate (overlay text is excluded from them by contract \u2014 recomposite it from `overlays`), `motion_prompt` + the two frames feed video_generate (first_frame/last_frame), `dialogue[].voice_description` casts tts/dialogue voices, `global.music.music_prompt` feeds music, `sfx[].sound_effect_prompt` feeds sound_effect, and `overlays`/`floating_elements` drive an ffmpeg/hyperframe overlay pass. Long videos (over ~8 min single-shot): run `mode:"index"` first, then several full nodes IN PARALLEL each with a `start_s`/`end_s` window (\u2264480s, snap edges to index scene boundaries), and merge by concatenating `analysis.scenes`; over-length errors include suggested windows. Inject fields into downstream prompts via `{{slot}}`. Pick `~google/gemini-pro-latest` for the densest extraction, `~google/gemini-flash-latest` for cheap/fast passes.',
5338
- inputs: z28.object({ video: VideoRef }).loose(),
5339
- params: z28.object({
5340
- model: z28.enum(VIDEO_DECONSTRUCT_MODELS),
5341
- mode: z28.enum(["full", "index"]).optional(),
5342
- language: z28.string().min(2).max(8).optional(),
5343
- max_scenes: z28.number().int().min(1).max(60).optional(),
5344
- focus: z28.string().optional(),
5345
- start_s: z28.number().min(0).optional(),
5346
- end_s: z28.number().positive().optional(),
5576
+ inputs: z29.object({ video: VideoRef }).loose(),
5577
+ params: z29.object({
5578
+ model: z29.enum(VIDEO_DECONSTRUCT_MODELS),
5579
+ mode: z29.enum(["full", "index"]).optional(),
5580
+ language: z29.string().min(2).max(8).optional(),
5581
+ max_scenes: z29.number().int().min(1).max(60).optional(),
5582
+ focus: z29.string().optional(),
5583
+ start_s: z29.number().min(0).optional(),
5584
+ end_s: z29.number().positive().optional(),
5347
5585
  // Transcript provider for the blueprint's dialogue/transcript. Default
5348
5586
  // Groq Whisper; "deepgram" routes to Nova-3 so words carry punctuation.
5349
- transcriber: z28.enum(["groq", "deepgram"]).optional()
5587
+ transcriber: z29.enum(["groq", "deepgram"]).optional()
5350
5588
  }).strict(),
5351
- outputs: z28.object({
5589
+ outputs: z29.object({
5352
5590
  analysis: JsonRef,
5353
5591
  // Absent in mode:"index" (structure only, no Mux frame extraction).
5354
- start_frames: z28.array(ImageRef).min(1).optional(),
5355
- end_frames: z28.array(ImageRef).min(1).optional(),
5592
+ start_frames: z29.array(ImageRef).min(1).optional(),
5593
+ end_frames: z29.array(ImageRef).min(1).optional(),
5356
5594
  transcript: JsonRef
5357
5595
  }).strict(),
5358
5596
  outputKinds: { analysis: "json", start_frames: "image", end_frames: "image", transcript: "json" },
@@ -5360,38 +5598,38 @@ var videoDeconstructNode = delegated({
5360
5598
  });
5361
5599
 
5362
5600
  // src/engine/nodes/remote/videoLipsync.ts
5363
- import { z as z29 } from "zod";
5364
- var FalLipsyncParams = z29.object({
5365
- model: z29.literal("fal/veed-lipsync")
5601
+ import { z as z30 } from "zod";
5602
+ var FalLipsyncParams = z30.object({
5603
+ model: z30.literal("fal/veed-lipsync")
5366
5604
  }).strict();
5367
- var VideoLipsyncParams = z29.discriminatedUnion("model", [FalLipsyncParams]);
5605
+ var VideoLipsyncParams = z30.discriminatedUnion("model", [FalLipsyncParams]);
5368
5606
  var videoLipsyncNode = delegated({
5369
5607
  id: "video_lipsync",
5370
5608
  version: "1.0.0",
5371
5609
  category: "video",
5372
5610
  summary: "Lip-sync a video to an audio track. Currently backed by VEED via fal.ai (`fal/veed-lipsync`). $0.40/min of output.",
5373
- inputs: z29.object({
5611
+ inputs: z30.object({
5374
5612
  video: VideoRef,
5375
5613
  audio: AudioRef
5376
5614
  }).strict(),
5377
5615
  params: VideoLipsyncParams,
5378
- outputs: z29.object({ video: VideoRef }).strict(),
5616
+ outputs: z30.object({ video: VideoRef }).strict(),
5379
5617
  outputKinds: { video: "video" },
5380
5618
  cost: () => ({ credits: 20, seconds_estimate: 120 })
5381
5619
  });
5382
5620
 
5383
5621
  // src/engine/nodes/remote/videoTranscribe.ts
5384
- import { mkdtemp as mkdtemp6, readFile as readFile9, rm as rm6 } from "fs/promises";
5622
+ import { mkdtemp as mkdtemp6, readFile as readFile10, rm as rm6 } from "fs/promises";
5385
5623
  import { tmpdir as tmpdir6 } from "os";
5386
- import path11 from "path";
5387
- import { z as z30 } from "zod";
5624
+ import path13 from "path";
5625
+ import { z as z31 } from "zod";
5388
5626
 
5389
5627
  // src/engine/nodes/local/lib/ffmpeg.ts
5390
- import { execFile as execFile6 } from "child_process";
5391
- import { promisify as promisify6 } from "util";
5392
- var execFileAsync3 = promisify6(execFile6);
5628
+ import { execFile as execFile7 } from "child_process";
5629
+ import { promisify as promisify7 } from "util";
5630
+ var execFileAsync4 = promisify7(execFile7);
5393
5631
  async function probeVideo(filePath) {
5394
- const { stdout } = await execFileAsync3(
5632
+ const { stdout } = await execFileAsync4(
5395
5633
  "ffprobe",
5396
5634
  ["-v", "error", "-show_streams", "-show_format", "-of", "json", filePath],
5397
5635
  { encoding: "utf-8", maxBuffer: 8 * 1024 * 1024 }
@@ -5449,7 +5687,7 @@ function parseFrameRate(rate) {
5449
5687
  }
5450
5688
  async function runFfmpeg(args, opts) {
5451
5689
  try {
5452
- await execFileAsync3("ffmpeg", args, {
5690
+ await execFileAsync4("ffmpeg", args, {
5453
5691
  timeout: opts.timeout_ms,
5454
5692
  maxBuffer: 64 * 1024 * 1024
5455
5693
  });
@@ -5463,21 +5701,21 @@ ${detail.slice(-4e3)}`);
5463
5701
  }
5464
5702
 
5465
5703
  // src/engine/nodes/remote/videoTranscribe.ts
5466
- var VideoTranscribeParams = z30.object({
5467
- language: z30.string().min(2).max(8).optional(),
5704
+ var VideoTranscribeParams = z31.object({
5705
+ language: z31.string().min(2).max(8).optional(),
5468
5706
  // Provider choice is explicit (no env-based silent branching). Default Groq
5469
5707
  // Whisper; "deepgram" routes to Deepgram Nova-3, which additionally emits a
5470
5708
  // `rich` JSON output with punctuated words + paragraph/sentence grouping.
5471
- transcriber: z30.enum(["groq", "deepgram"]).optional()
5709
+ transcriber: z31.enum(["groq", "deepgram"]).optional()
5472
5710
  }).strict();
5473
- var VideoTranscribeInputs = z30.object({
5711
+ var VideoTranscribeInputs = z31.object({
5474
5712
  video: VideoRef
5475
5713
  }).strict();
5476
- var VideoTranscribeOutputs = z30.object({
5477
- transcript: z30.custom(),
5714
+ var VideoTranscribeOutputs = z31.object({
5715
+ transcript: z31.custom(),
5478
5716
  // Only emitted by the Deepgram path: full punctuated words + paragraph /
5479
5717
  // sentence grouping with speaker indices. Absent for the default Groq path.
5480
- rich: z30.custom().optional()
5718
+ rich: z31.custom().optional()
5481
5719
  }).strict();
5482
5720
  var AUDIO_EXTRACT_TIMEOUT_MS = 6e4;
5483
5721
  var videoTranscribeNode = defineNode({
@@ -5515,14 +5753,14 @@ async function tryExtractAudio(inputs, ctx) {
5515
5753
  ctx.log("video_transcribe: no audio track detected, sending full video");
5516
5754
  return null;
5517
5755
  }
5518
- tmpDir = await mkdtemp6(path11.join(tmpdir6(), "vtx-"));
5519
- const audioPath = path11.join(tmpDir, "audio.mp3");
5756
+ tmpDir = await mkdtemp6(path13.join(tmpdir6(), "vtx-"));
5757
+ const audioPath = path13.join(tmpDir, "audio.mp3");
5520
5758
  ctx.log("video_transcribe: extracting audio (mono 16kHz mp3)");
5521
5759
  await runFfmpeg(
5522
5760
  ["-i", video.path, "-vn", "-ac", "1", "-ar", "16000", "-b:a", "64k", "-f", "mp3", "-y", audioPath],
5523
5761
  { timeout_ms: AUDIO_EXTRACT_TIMEOUT_MS }
5524
5762
  );
5525
- const bytes = await readFile9(audioPath);
5763
+ const bytes = await readFile10(audioPath);
5526
5764
  if (bytes.byteLength === 0) {
5527
5765
  ctx.log("video_transcribe: extracted audio is empty, sending full video");
5528
5766
  return null;
@@ -5562,29 +5800,29 @@ async function tryExtractAudio(inputs, ctx) {
5562
5800
  }
5563
5801
 
5564
5802
  // src/engine/nodes/remote/voiceSelect.ts
5565
- import { z as z31 } from "zod";
5803
+ import { z as z32 } from "zod";
5566
5804
  var voiceSelectNode = delegated({
5567
5805
  id: "voice_select",
5568
5806
  version: "1.0.0",
5569
5807
  category: "audio",
5570
5808
  summary: 'Cast an ElevenLabs voice from a natural-language description (e.g. "warm, authoritative female narrator, American accent"). Lists the account\'s voices and ranks them against the brief, emitting the best `voice_id` as a bare-string text asset plus a ranked `candidates` JSON.',
5571
5809
  when_to_use: 'Use to turn a voice description (e.g. from a `video_deconstruct` blueprint\'s `voice_description`) into a usable ElevenLabs voice id, then feed it into a `tts` node by wiring `inputs.voice_ref: $ref:<this>.voice_id` and setting `params.voice: "{{voice_ref}}"` \u2014 the engine splices the id in at run time. Review `candidates` (json) to pick a different voice. Optional `gender`/`age`/`accent`/`language` hints sharpen the ranking.',
5572
- inputs: z31.object({}).loose(),
5573
- params: z31.object({
5574
- description: z31.string().min(1),
5575
- gender: z31.string().optional(),
5576
- age: z31.string().optional(),
5577
- accent: z31.string().optional(),
5578
- language: z31.string().optional(),
5579
- limit: z31.number().int().min(1).max(20).optional()
5810
+ inputs: z32.object({}).loose(),
5811
+ params: z32.object({
5812
+ description: z32.string().min(1),
5813
+ gender: z32.string().optional(),
5814
+ age: z32.string().optional(),
5815
+ accent: z32.string().optional(),
5816
+ language: z32.string().optional(),
5817
+ limit: z32.number().int().min(1).max(20).optional()
5580
5818
  }).strict(),
5581
- outputs: z31.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
5819
+ outputs: z32.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
5582
5820
  outputKinds: { voice_id: "text", candidates: "json" },
5583
5821
  cost: () => ({ credits: 0, seconds_estimate: 5 })
5584
5822
  });
5585
5823
 
5586
5824
  // src/engine/schema/catalog.ts
5587
- import { z as z32 } from "zod";
5825
+ import { z as z33 } from "zod";
5588
5826
  function generateCatalog(registry, opts = {}) {
5589
5827
  const entries = registry.all().map((def) => {
5590
5828
  const cost = def.cost ? safeCost(def) : void 0;
@@ -5595,9 +5833,9 @@ function generateCatalog(registry, opts = {}) {
5595
5833
  summary: def.summary,
5596
5834
  when_to_use: def.when_to_use,
5597
5835
  location: def.location,
5598
- inputs: z32.toJSONSchema(def.inputs, { unrepresentable: "any" }),
5599
- params: z32.toJSONSchema(def.params, { unrepresentable: "any" }),
5600
- outputs: z32.toJSONSchema(def.outputs, { unrepresentable: "any" }),
5836
+ inputs: z33.toJSONSchema(def.inputs, { unrepresentable: "any" }),
5837
+ params: z33.toJSONSchema(def.params, { unrepresentable: "any" }),
5838
+ outputs: z33.toJSONSchema(def.outputs, { unrepresentable: "any" }),
5601
5839
  cost_estimate_credits: cost?.credits,
5602
5840
  runtime_estimate_seconds: cost?.seconds_estimate
5603
5841
  };
@@ -5629,19 +5867,19 @@ function safeCost(def) {
5629
5867
 
5630
5868
  // src/engine/storage/cache-store.ts
5631
5869
  import { randomUUID as randomUUID2 } from "crypto";
5632
- import { mkdir as mkdir3, readFile as readFile10, rename as rename2, writeFile as writeFile6 } from "fs/promises";
5633
- import path12 from "path";
5870
+ import { mkdir as mkdir3, readFile as readFile11, rename as rename2, writeFile as writeFile7 } from "fs/promises";
5871
+ import path14 from "path";
5634
5872
  var LocalCacheStore = class {
5635
5873
  rootDir;
5636
5874
  constructor(rootDir) {
5637
5875
  this.rootDir = rootDir;
5638
5876
  }
5639
5877
  filePath(cacheKey) {
5640
- return path12.join(this.rootDir, `${cacheKey}.json`);
5878
+ return path14.join(this.rootDir, `${cacheKey}.json`);
5641
5879
  }
5642
5880
  async get(cacheKey) {
5643
5881
  try {
5644
- const buf = await readFile10(this.filePath(cacheKey), "utf8");
5882
+ const buf = await readFile11(this.filePath(cacheKey), "utf8");
5645
5883
  return JSON.parse(buf);
5646
5884
  } catch (e) {
5647
5885
  if (e.code === "ENOENT") return null;
@@ -5650,9 +5888,9 @@ var LocalCacheStore = class {
5650
5888
  }
5651
5889
  async put(entry) {
5652
5890
  const dest = this.filePath(entry.cacheKey);
5653
- await mkdir3(path12.dirname(dest), { recursive: true });
5891
+ await mkdir3(path14.dirname(dest), { recursive: true });
5654
5892
  const tmp = `${dest}.tmp-${process.pid}-${randomUUID2()}`;
5655
- await writeFile6(tmp, JSON.stringify(entry, null, 0));
5893
+ await writeFile7(tmp, JSON.stringify(entry, null, 0));
5656
5894
  await rename2(tmp, dest);
5657
5895
  }
5658
5896
  };
@@ -5686,6 +5924,7 @@ var REMOTE_NODES = [
5686
5924
  imageSelectNode,
5687
5925
  videoGenerateNode,
5688
5926
  ttsNode,
5927
+ audioVoiceConvertNode,
5689
5928
  musicNode,
5690
5929
  dialogueNode,
5691
5930
  soundEffectNode,
@@ -5703,14 +5942,14 @@ function defaultRegistry() {
5703
5942
  }
5704
5943
  function createEngineFromEnv(opts = {}) {
5705
5944
  const cwd = opts.cwd ?? process.cwd();
5706
- const cacheDir = opts.cacheDir ?? path13.join(cwd, "canvas", ".cache");
5707
- const outputsDir = opts.outputsDir ?? path13.join(cwd, "canvas");
5945
+ const cacheDir = opts.cacheDir ?? path15.join(cwd, "canvas", ".cache");
5946
+ const outputsDir = opts.outputsDir ?? path15.join(cwd, "canvas");
5708
5947
  const creds = requireCredentialsFromEnv();
5709
5948
  return new Engine({
5710
5949
  registry: defaultRegistry(),
5711
5950
  client: new BackendClient({ baseUrl: creds.url, apiKey: creds.apiKey }),
5712
- assets: new LocalAssetStore(path13.join(cacheDir, "assets")),
5713
- cache: new LocalCacheStore(path13.join(cacheDir, "index")),
5951
+ assets: new LocalAssetStore(path15.join(cacheDir, "assets")),
5952
+ cache: new LocalCacheStore(path15.join(cacheDir, "index")),
5714
5953
  outputsDir,
5715
5954
  log: opts.log
5716
5955
  });
@@ -5731,4 +5970,4 @@ export {
5731
5970
  defaultRegistry,
5732
5971
  createEngineFromEnv
5733
5972
  };
5734
- //# sourceMappingURL=chunk-JIDZ37KG.js.map
5973
+ //# sourceMappingURL=chunk-NBNUNCY7.js.map