@koda-sl/baker-cli 0.74.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -621,7 +621,7 @@ ${originalIndentation}`;
621
621
  });
622
622
 
623
623
  // src/engine/index.ts
624
- import path13 from "path";
624
+ import path14 from "path";
625
625
 
626
626
  // src/engine/client/http.ts
627
627
  var BackendHttpError = class extends Error {
@@ -667,14 +667,14 @@ var HttpClient = class {
667
667
  this.fetchFn = opts.fetchFn ?? fetch;
668
668
  this.sleepFn = opts.sleepFn ?? ((ms) => new Promise((r) => setTimeout(r, ms)));
669
669
  }
670
- async postJson(path14, body, signal) {
671
- return await this.requestJson("POST", path14, body, signal);
670
+ async postJson(path15, body, signal) {
671
+ return await this.requestJson("POST", path15, body, signal);
672
672
  }
673
- async getJson(path14, signal) {
674
- return await this.requestJson("GET", path14, void 0, signal);
673
+ async getJson(path15, signal) {
674
+ return await this.requestJson("GET", path15, void 0, signal);
675
675
  }
676
- async requestJson(method, path14, body, signal) {
677
- const url = `${this.baseUrl}${path14.startsWith("/") ? path14 : `/${path14}`}`;
676
+ async requestJson(method, path15, body, signal) {
677
+ const url = `${this.baseUrl}${path15.startsWith("/") ? path15 : `/${path15}`}`;
678
678
  for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
679
679
  const outcome = await this.attempt(method, url, body, attempt, signal);
680
680
  if (outcome.kind === "value") return outcome.value;
@@ -786,8 +786,8 @@ var BackendClient = class {
786
786
  );
787
787
  }
788
788
  getArtifact(kind, name, version, signal) {
789
- const path14 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
790
- return this.http.getJson(path14, signal);
789
+ const path15 = version ? `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}/${encodeURIComponent(version)}` : `/api/canvas/artifacts/${encodeURIComponent(kind)}/${encodeURIComponent(name)}`;
790
+ return this.http.getJson(path15, signal);
791
791
  }
792
792
  };
793
793
 
@@ -1242,6 +1242,21 @@ var MODEL_REGISTRY = {
1242
1242
  }
1243
1243
  }
1244
1244
  },
1245
+ audio_voice_convert: {
1246
+ "elevenlabs/eleven_multilingual_sts_v2": {
1247
+ // Speech-to-speech / Voice Changer: re-voice an existing audio clip in the
1248
+ // TARGET voice, preserving timing/prosody. Used to normalize a talking-head
1249
+ // clip's native (generator-chosen) voice into ONE consistent brand voice.
1250
+ label: "ElevenLabs Voice Changer (multilingual STS v2)",
1251
+ inputs: [{ kind: "audio", mimes: FAL_AUDIO_MIMES }],
1252
+ required: ["voice"],
1253
+ params: {
1254
+ voice: { kind: "string" },
1255
+ output_format: { kind: "string", enum: ELEVENLABS_OUTPUT_FORMATS },
1256
+ remove_background_noise: { kind: "boolean" }
1257
+ }
1258
+ }
1259
+ },
1245
1260
  music: {
1246
1261
  "elevenlabs/music-v1": {
1247
1262
  label: "ElevenLabs Music v1 (Compose)",
@@ -1512,11 +1527,30 @@ var VideoMeta = z.object({
1512
1527
  speaker: z.string().optional()
1513
1528
  })
1514
1529
  ).default([]),
1515
- // Scenes with a single on-camera speaker each MUST be lip-synced. Either a
1516
- // bare scene index (validator falls back to the scaffold naming convention) or
1517
- // `{ scene, lipsync_node }`, which names the exact node to look for so a
1518
- // hand-authored canvas with differently-named clip nodes isn't a false miss.
1519
- talking_scenes: z.array(z.union([z.number(), z.object({ scene: z.number(), lipsync_node: z.string() })])).default([])
1530
+ // Scenes with a single on-camera speaker, voiced NATIVELY by the video model
1531
+ // (Seedance generate_audio) then re-voiced to one brand voice. Each entry names
1532
+ // the `audio_voice_convert` node the validator confirms is present the
1533
+ // native-audio replacement for the retired post-hoc lipsync check. A bare scene
1534
+ // index (legacy) or `{ scene, lipsync_node }` (legacy veed canvases) still parse.
1535
+ talking_scenes: z.array(
1536
+ z.union([
1537
+ z.number(),
1538
+ z.object({
1539
+ scene: z.number(),
1540
+ voice_convert_node: z.string(),
1541
+ // Advisory: the scene's visual length vs the estimated spoken length, so
1542
+ // a reviewer can see a native line that may run past its cut. Not gated.
1543
+ scene_s: z.number().optional(),
1544
+ est_speech_s: z.number().optional()
1545
+ }),
1546
+ z.object({ scene: z.number(), lipsync_node: z.string() })
1547
+ ])
1548
+ ).default([]),
1549
+ // Advisory, NOT gated by the validator: the reviewable "which graphic fires
1550
+ // on which spoken beat" map emitted by scaffold-video (per-scene window,
1551
+ // spoken line, storyboard frames, scheduled graphics). Free-form rows so the
1552
+ // schema stays decoupled from the scaffold's exact shape.
1553
+ motion_board: z.array(z.unknown()).optional()
1520
1554
  }).strict().optional();
1521
1555
  var CanvasMetadata = z.object({
1522
1556
  name: z.string().optional(),
@@ -2193,7 +2227,8 @@ function resolveRefKind(ctx, refStr) {
2193
2227
  if (!targetDef) return null;
2194
2228
  const targetParams = targetDef.params.safeParse(target.params ?? {});
2195
2229
  const resolvedKinds = resolveOutputKinds(targetDef.outputKinds, targetParams.success ? targetParams.data : {});
2196
- const kind = resolvedKinds[expr.output];
2230
+ const declaredKind = target.params?.outputs?.[expr.output]?.kind;
2231
+ const kind = resolvedKinds[expr.output] ?? declaredKind;
2197
2232
  return kind && MODEL_INPUT_KINDS.has(kind) ? kind : null;
2198
2233
  }
2199
2234
  function checkOneRef(ctx, n, i, refStr, jsonPath, field) {
@@ -2286,6 +2321,24 @@ function estimateCredits(ctx) {
2286
2321
  }
2287
2322
  return total;
2288
2323
  }
2324
+ function talkingSceneSatisfied(ctx, entry, scene) {
2325
+ const nodes = ctx.canvas.nodes;
2326
+ if (typeof entry === "object" && "voice_convert_node" in entry) {
2327
+ const clipNativeAudio = nodes.some(
2328
+ (n) => n.id === `s${scene}_clip` && n.type === "video_generate" && n.params?.generate_audio === true
2329
+ );
2330
+ const converted = nodes.some((n) => n.id === entry.voice_convert_node && n.type === "audio_voice_convert");
2331
+ return clipNativeAudio && converted;
2332
+ }
2333
+ if (typeof entry === "object") {
2334
+ return nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
2335
+ }
2336
+ return nodes.some((n) => {
2337
+ if (n.type !== "video_lipsync") return false;
2338
+ const video = n.inputs?.video;
2339
+ return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
2340
+ });
2341
+ }
2289
2342
  function checkVideoInvariants(ctx) {
2290
2343
  const meta = ctx.canvas.metadata?.video;
2291
2344
  if (!meta) return;
@@ -2312,16 +2365,11 @@ function checkVideoInvariants(ctx) {
2312
2365
  }
2313
2366
  for (const entry of meta.talking_scenes) {
2314
2367
  const scene = typeof entry === "number" ? entry : entry.scene;
2315
- const synced = typeof entry === "number" ? ctx.canvas.nodes.some((n) => {
2316
- if (n.type !== "video_lipsync") return false;
2317
- const video = n.inputs?.video;
2318
- return video === `$ref:s${scene}_trim.video` || video === `$ref:s${scene}_clip.video`;
2319
- }) : ctx.canvas.nodes.some((n) => n.id === entry.lipsync_node && n.type === "video_lipsync");
2320
- if (!synced) {
2368
+ if (!talkingSceneSatisfied(ctx, entry, scene)) {
2321
2369
  ctx.issues.push({
2322
2370
  path: "metadata.video.talking_scenes",
2323
2371
  code: STAGE_CODES.LIPSYNC_MISSING,
2324
- message: `scene ${scene} has a single on-camera speaker but no video_lipsync on s${scene}_clip \u2014 its mouth will drift out of sync with the voiceover`
2372
+ message: `scene ${scene} is a single-on-camera-speaker talking head but its clip lacks native audio (generate_audio) or the audio_voice_convert node is missing \u2014 the voice won't be brand-consistent / lips may drift`
2325
2373
  });
2326
2374
  }
2327
2375
  }
@@ -2355,9 +2403,9 @@ function checkOutputRef(ctx) {
2355
2403
  function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
2356
2404
  for (const issue of err.issues) {
2357
2405
  const tail2 = pathToString(issue.path);
2358
- const path14 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
2406
+ const path15 = pathPrefix ? tail2 ? `${pathPrefix}.${tail2}` : pathPrefix : tail2;
2359
2407
  issues.push({
2360
- path: path14,
2408
+ path: path15,
2361
2409
  code,
2362
2410
  message: issue.message,
2363
2411
  received: issue.code === "invalid_type" ? issue.received : void 0,
@@ -2366,8 +2414,8 @@ function pushZodIssues(issues, err, pathPrefix, code, nodeId, nodeType) {
2366
2414
  });
2367
2415
  }
2368
2416
  }
2369
- function pathToString(path14) {
2370
- return path14.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
2417
+ function pathToString(path15) {
2418
+ return path15.map((p) => typeof p === "number" ? `[${p}]` : `.${String(p)}`).join("").replace(/^\./, "");
2371
2419
  }
2372
2420
  function buildDepGraph(canvas) {
2373
2421
  const graph = /* @__PURE__ */ new Map();
@@ -3995,12 +4043,12 @@ var fontSpecimenNode = defineNode({
3995
4043
  });
3996
4044
 
3997
4045
  // src/engine/nodes/local/hyperframe.ts
3998
- import { execFile as execFile3 } from "child_process";
3999
- import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile7, rm as rm4, stat as stat5, writeFile as writeFile4 } from "fs/promises";
4046
+ import { execFile as execFile4 } from "child_process";
4047
+ import { copyFile as copyFile4, mkdtemp as mkdtemp4, readFile as readFile7, rm as rm4, stat as stat5, writeFile as writeFile5 } from "fs/promises";
4000
4048
  import { createRequire as createRequire2 } from "module";
4001
4049
  import { cpus, tmpdir as tmpdir4 } from "os";
4002
- import path9 from "path";
4003
- import { promisify as promisify3 } from "util";
4050
+ import path10 from "path";
4051
+ import { promisify as promisify4 } from "util";
4004
4052
  import { z as z10 } from "zod";
4005
4053
 
4006
4054
  // src/engine/engine/composition-hash.ts
@@ -4194,6 +4242,109 @@ function defaultFilenameForInput(key, kind) {
4194
4242
  return `${key}.png`;
4195
4243
  }
4196
4244
 
4245
+ // src/engine/nodes/local/lib/hyperframe-check.ts
4246
+ import { execFile as execFile3 } from "child_process";
4247
+ import { promisify as promisify3 } from "util";
4248
+ var execFileAsync = promisify3(execFile3);
4249
+ var NEVER_BLOCK = [/contrast/i, /\bwcag\b/i, /missing_local_asset/i, /font[_-]?family/i, /font[_-]?face/i];
4250
+ var UNAVAILABLE = /unknown command|command not found|not found|Did you mean|Unknown argument|ENOENT/i;
4251
+ function isAdvisory(code, message) {
4252
+ const hay = `${code} ${message}`;
4253
+ return NEVER_BLOCK.some((re) => re.test(hay));
4254
+ }
4255
+ function parseCheckJson(raw) {
4256
+ if (!raw) return null;
4257
+ const trimmed = raw.trim();
4258
+ try {
4259
+ return JSON.parse(trimmed);
4260
+ } catch {
4261
+ }
4262
+ const start = trimmed.indexOf("{");
4263
+ const end = trimmed.lastIndexOf("}");
4264
+ if (start >= 0 && end > start) {
4265
+ try {
4266
+ return JSON.parse(trimmed.slice(start, end + 1));
4267
+ } catch {
4268
+ return null;
4269
+ }
4270
+ }
4271
+ return null;
4272
+ }
4273
+ function classifyLint(json) {
4274
+ const out = [];
4275
+ const findings = json?.findings;
4276
+ if (!Array.isArray(findings)) return out;
4277
+ for (const f of findings) {
4278
+ const rec = f;
4279
+ const code = String(rec?.code ?? "");
4280
+ const message = String(rec?.message ?? "");
4281
+ const severity = String(rec?.severity ?? "info");
4282
+ const blocking = severity === "error" && !isAdvisory(code, message);
4283
+ out.push({ source: "lint", code, message, severity: blocking ? "blocking" : "warning" });
4284
+ }
4285
+ return out;
4286
+ }
4287
+ function classifyInspect(json) {
4288
+ const out = [];
4289
+ const obj = json;
4290
+ const issues = obj?.issues;
4291
+ if (!Array.isArray(issues)) return out;
4292
+ for (const iss of issues) {
4293
+ const rec = iss;
4294
+ const code = String(rec?.code ?? rec?.type ?? "overflow");
4295
+ const message = String(rec?.message ?? rec?.detail ?? JSON.stringify(iss));
4296
+ const severity = rec?.severity ? String(rec.severity) : obj?.ok === false ? "error" : "warning";
4297
+ out.push({ source: "inspect", code, message, severity: severity === "error" ? "blocking" : "warning" });
4298
+ }
4299
+ return out;
4300
+ }
4301
+ function classifyCheckOutput(lintRaw, inspectRaw) {
4302
+ const findings = [...classifyLint(parseCheckJson(lintRaw)), ...classifyInspect(parseCheckJson(inspectRaw))];
4303
+ return {
4304
+ blocking: findings.filter((f) => f.severity === "blocking"),
4305
+ warnings: findings.filter((f) => f.severity === "warning")
4306
+ };
4307
+ }
4308
+ function buildLintArgs(dir) {
4309
+ return ["hyperframes", "lint", dir, "--json"];
4310
+ }
4311
+ function buildInspectArgs(dir, samples) {
4312
+ return ["hyperframes", "inspect", dir, "--json", "--samples", String(samples)];
4313
+ }
4314
+ async function runOne(args, timeoutMs) {
4315
+ try {
4316
+ const { stdout } = await execFileAsync("npx", args, { timeout: timeoutMs, maxBuffer: 64 * 1024 * 1024 });
4317
+ return stdout;
4318
+ } catch (e) {
4319
+ const err = e;
4320
+ if (err.stdout?.includes("{")) return err.stdout;
4321
+ const blob = `${err.stderr ?? ""} ${err.message ?? ""}`;
4322
+ if (UNAVAILABLE.test(blob)) return null;
4323
+ return null;
4324
+ }
4325
+ }
4326
+ async function runHyperframesCheck(opts) {
4327
+ const { dir, nodeId, ctx, timeoutMs, samples = 5 } = opts;
4328
+ const [lintRaw, inspectRaw] = await Promise.all([
4329
+ runOne(buildLintArgs(dir), timeoutMs),
4330
+ runOne(buildInspectArgs(dir, samples), timeoutMs)
4331
+ ]);
4332
+ if (lintRaw === null && inspectRaw === null) {
4333
+ ctx.log(`${nodeId}: hyperframes lint/inspect unavailable \u2014 skipping pre-render check`);
4334
+ return;
4335
+ }
4336
+ const { blocking, warnings } = classifyCheckOutput(lintRaw ?? "", inspectRaw ?? "");
4337
+ for (const w of warnings) {
4338
+ ctx.log(`${nodeId}: hyperframe check warning [${w.source}/${w.code}] ${w.message}`);
4339
+ }
4340
+ if (blocking.length > 0) {
4341
+ const detail = blocking.map((b) => `\u2022 [${b.source}/${b.code}] ${b.message}`).join("\n");
4342
+ throw new Error(`${nodeId}: pre-render check failed (${blocking.length} blocking)
4343
+ ${detail}`);
4344
+ }
4345
+ ctx.log(`${nodeId}: pre-render check passed (${warnings.length} warning${warnings.length === 1 ? "" : "s"})`);
4346
+ }
4347
+
4197
4348
  // src/engine/nodes/local/lib/hyperframe-errors.ts
4198
4349
  var KNOWN_ERROR_PATTERNS = [
4199
4350
  {
@@ -4237,6 +4388,29 @@ ${stderr.slice(0, 1500)}`;
4237
4388
  return null;
4238
4389
  }
4239
4390
 
4391
+ // src/engine/nodes/local/lib/hyperframe-meta.ts
4392
+ import { writeFile as writeFile4 } from "fs/promises";
4393
+ import path9 from "path";
4394
+ async function ensureHyperframesMetaJson(tmp, nodeId, meta, duration) {
4395
+ const metaPath = path9.join(tmp, "meta.json");
4396
+ await writeFile4(
4397
+ metaPath,
4398
+ JSON.stringify(
4399
+ {
4400
+ id: nodeId,
4401
+ name: meta.id,
4402
+ duration,
4403
+ width: meta.width,
4404
+ height: meta.height,
4405
+ fps: meta.fps
4406
+ },
4407
+ null,
4408
+ 2
4409
+ ),
4410
+ "utf-8"
4411
+ );
4412
+ }
4413
+
4240
4414
  // src/engine/nodes/local/lib/templating.ts
4241
4415
  var PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
4242
4416
  function substituteVariables(source, values) {
@@ -4272,25 +4446,34 @@ function literalize(value) {
4272
4446
  }
4273
4447
 
4274
4448
  // src/engine/nodes/local/hyperframe.ts
4275
- var execFileAsync = promisify3(execFile3);
4449
+ var execFileAsync2 = promisify4(execFile4);
4276
4450
  var require_2 = createRequire2(import.meta.url);
4277
4451
  var HyperframeParams = z10.object({
4278
4452
  composition: z10.string().min(1),
4453
+ // Output container. mp4 (default) for delivery; webm/mov render WITH
4454
+ // transparency (alpha) when the composition background is transparent —
4455
+ // use for motion-graphic overlays dropped into Premiere/AE/Nuke.
4456
+ format: z10.enum(["mp4", "webm", "mov"]).optional().default("mp4"),
4279
4457
  timeout_ms: z10.number().int().positive().optional().default(10 * 60 * 1e3)
4280
4458
  }).catchall(z10.unknown());
4281
4459
  var HyperframeInputs = z10.record(z10.string(), z10.custom()).optional().default({});
4282
4460
  var HyperframeOutputs = z10.object({
4283
4461
  video: z10.custom()
4284
4462
  }).strict();
4285
- var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "timeout_ms"]);
4463
+ var NODE_OWNED_PARAM_KEYS = /* @__PURE__ */ new Set(["composition", "format", "timeout_ms"]);
4464
+ var MIME_BY_FORMAT = {
4465
+ mp4: "video/mp4",
4466
+ webm: "video/webm",
4467
+ mov: "video/quicktime"
4468
+ };
4286
4469
  var ENGINE_INJECTED_TOKENS = /* @__PURE__ */ new Set(["duration"]);
4287
4470
  var hyperframeRenderNode = defineNode({
4288
4471
  id: "hyperframe_render",
4289
- version: "6.0.0",
4472
+ version: "6.1.0",
4290
4473
  category: "video",
4291
4474
  location: "local",
4292
4475
  summary: "Render an mp4 by composing an HTML/CSS/GSAP composition over upstream assets. Point `params.composition` at a directory containing `index.html` + `meta.json`. All variables are passed as primitives in `params` and substituted into the composition before render.",
4293
- when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality, format, and worker count are fixed by the engine for ad-creative delivery.",
4476
+ when_to_use: "Use to add captions, lower-thirds, branded overlays, title cards, or any HTML-driven graphic over a video. Point `params.composition` at a directory containing `index.html` + `meta.json`. Inputs are keyed by the composition's `meta.json.inputs` map \u2014 wire `inputs.<key> = $ref:<node>.<output>`. Output resolution/fps come from the composition's `meta.json`; quality and worker count are fixed by the engine. Set `params.format` to `webm` or `mov` for a transparent (alpha) overlay to composite in another editor; defaults to `mp4`. Runs a pre-render `hyperframes lint`/`inspect` gate (overflow/structural errors block; contrast warns).",
4294
4477
  inputs: HyperframeInputs,
4295
4478
  params: HyperframeParams,
4296
4479
  outputs: HyperframeOutputs,
@@ -4309,7 +4492,7 @@ var hyperframeRenderNode = defineNode({
4309
4492
  const compositionDir = await resolveCompositionDir(params.composition);
4310
4493
  const meta = await loadCompositionMeta(compositionDir);
4311
4494
  const compositionParams = validateAndParseDynamicParams(meta, params);
4312
- const tmp = await mkdtemp4(path9.join(tmpdir4(), "hf-render-"));
4495
+ const tmp = await mkdtemp4(path10.join(tmpdir4(), "hf-render-"));
4313
4496
  try {
4314
4497
  await copyComposition(compositionDir, tmp);
4315
4498
  await vendorGsap(tmp, ctx);
@@ -4317,15 +4500,16 @@ var hyperframeRenderNode = defineNode({
4317
4500
  const duration = stagedDuration ?? meta.default_duration;
4318
4501
  const substitutionValues = await buildSubstitutionValues(compositionParams, meta, duration);
4319
4502
  await substituteCompositionFiles(tmp, substitutionValues);
4320
- await ensureMetaJson(tmp, ctx.nodeId, meta, duration);
4321
- const outputPath = path9.join(tmp, "output.mp4");
4503
+ await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, duration);
4504
+ await runHyperframesCheck({ dir: tmp, nodeId: "hyperframe_render", ctx, timeoutMs: params.timeout_ms });
4505
+ const outputPath = path10.join(tmp, `output.${params.format}`);
4322
4506
  await runRender({ tmp, outputPath, params, meta, ctx });
4323
4507
  const bytes = await readFile7(outputPath);
4324
4508
  ctx.log(`rendered ${bytes.length} bytes`);
4325
4509
  const ref = await ctx.assets.ingestBytes({
4326
4510
  bytes: Buffer.from(bytes),
4327
4511
  kind: "video",
4328
- mime: "video/mp4",
4512
+ mime: MIME_BY_FORMAT[params.format],
4329
4513
  metadata: {
4330
4514
  width: meta.width,
4331
4515
  height: meta.height,
@@ -4342,10 +4526,10 @@ var hyperframeRenderNode = defineNode({
4342
4526
  }
4343
4527
  });
4344
4528
  async function resolveCompositionDir(composition) {
4345
- const compositionPath = path9.isAbsolute(composition) ? composition : path9.resolve(process.cwd(), composition);
4529
+ const compositionPath = path10.isAbsolute(composition) ? composition : path10.resolve(process.cwd(), composition);
4346
4530
  const s = await stat5(compositionPath);
4347
4531
  if (s.isDirectory()) return compositionPath;
4348
- return path9.dirname(compositionPath);
4532
+ return path10.dirname(compositionPath);
4349
4533
  }
4350
4534
  async function validateCompositionParams(rawParams) {
4351
4535
  const issues = [];
@@ -4412,7 +4596,7 @@ async function copyComposition(srcDir, destDir) {
4412
4596
  await cp(srcDir, destDir, {
4413
4597
  recursive: true,
4414
4598
  filter: (src) => {
4415
- const name = path9.basename(src);
4599
+ const name = path10.basename(src);
4416
4600
  if (name === ".cache" || name === "node_modules" || name === ".git") return false;
4417
4601
  return true;
4418
4602
  }
@@ -4421,7 +4605,7 @@ async function copyComposition(srcDir, destDir) {
4421
4605
  async function vendorGsap(tmp, ctx) {
4422
4606
  try {
4423
4607
  const gsapMin = require_2.resolve("gsap/dist/gsap.min.js");
4424
- await copyFile4(gsapMin, path9.join(tmp, "gsap.min.js"));
4608
+ await copyFile4(gsapMin, path10.join(tmp, "gsap.min.js"));
4425
4609
  } catch (e) {
4426
4610
  ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
4427
4611
  }
@@ -4436,7 +4620,7 @@ async function stageInputs2(tmp, inputs, meta, ctx) {
4436
4620
  await stageAsset(ref, tmp, filename);
4437
4621
  ctx.log(`staged ${spec.kind} \u2192 ${filename}`);
4438
4622
  if (spec.kind === "video" && primaryDuration === null) {
4439
- primaryDuration = await probeDurationSeconds(path9.join(tmp, filename));
4623
+ primaryDuration = await probeDurationSeconds(path10.join(tmp, filename));
4440
4624
  }
4441
4625
  }
4442
4626
  return primaryDuration;
@@ -4482,7 +4666,7 @@ function coerceImageParam(value) {
4482
4666
  throw new Error("hyperframe_render: image param must be a URL string or AssetRef");
4483
4667
  }
4484
4668
  async function substituteCompositionFiles(tmp, values) {
4485
- const entryPath = path9.join(tmp, "index.html");
4669
+ const entryPath = path10.join(tmp, "index.html");
4486
4670
  const original = await readFile7(entryPath, "utf-8");
4487
4671
  const { output, missing } = substituteVariables(original, values);
4488
4672
  if (missing.length > 0) {
@@ -4490,26 +4674,7 @@ async function substituteCompositionFiles(tmp, values) {
4490
4674
  `hyperframe_render: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}. Add to params or to meta.json's params with a default.`
4491
4675
  );
4492
4676
  }
4493
- await writeFile4(entryPath, output, "utf-8");
4494
- }
4495
- async function ensureMetaJson(tmp, nodeId, meta, duration) {
4496
- const metaPath = path9.join(tmp, "meta.json");
4497
- await writeFile4(
4498
- metaPath,
4499
- JSON.stringify(
4500
- {
4501
- id: nodeId,
4502
- name: meta.id,
4503
- duration,
4504
- width: meta.width,
4505
- height: meta.height,
4506
- fps: meta.fps
4507
- },
4508
- null,
4509
- 2
4510
- ),
4511
- "utf-8"
4512
- );
4677
+ await writeFile5(entryPath, output, "utf-8");
4513
4678
  }
4514
4679
  var MAX_WORKERS = 4;
4515
4680
  function workerCount() {
@@ -4517,10 +4682,10 @@ function workerCount() {
4517
4682
  }
4518
4683
  async function runRender(opts) {
4519
4684
  const { tmp, outputPath, params, meta, ctx } = opts;
4520
- const args = buildRenderArgs(tmp, outputPath, meta);
4521
- ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps from ${path9.basename(tmp)}`);
4685
+ const args = buildRenderArgs(tmp, outputPath, meta, params.format);
4686
+ ctx.log(`rendering ${meta.width}x${meta.height}@${meta.fps}fps ${params.format} from ${path10.basename(tmp)}`);
4522
4687
  try {
4523
- await execFileAsync("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
4688
+ await execFileAsync2("npx", args, { timeout: params.timeout_ms, maxBuffer: 64 * 1024 * 1024 });
4524
4689
  } catch (e) {
4525
4690
  const stderr = e.stderr ?? "";
4526
4691
  const stdout = e.stdout ?? "";
@@ -4530,7 +4695,7 @@ async function runRender(opts) {
4530
4695
  ${friendly ?? detail.slice(0, 4e3)}`);
4531
4696
  }
4532
4697
  }
4533
- function buildRenderArgs(tmp, outputPath, meta) {
4698
+ function buildRenderArgs(tmp, outputPath, meta, format) {
4534
4699
  return [
4535
4700
  "hyperframes",
4536
4701
  "render",
@@ -4542,13 +4707,13 @@ function buildRenderArgs(tmp, outputPath, meta) {
4542
4707
  "--quality",
4543
4708
  "high",
4544
4709
  "--format",
4545
- "mp4",
4710
+ format,
4546
4711
  "--workers",
4547
4712
  String(workerCount())
4548
4713
  ];
4549
4714
  }
4550
4715
  async function probeDurationSeconds(filePath) {
4551
- const { stdout } = await execFileAsync(
4716
+ const { stdout } = await execFileAsync2(
4552
4717
  "ffprobe",
4553
4718
  ["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", filePath],
4554
4719
  { encoding: "utf-8" }
@@ -4561,14 +4726,14 @@ async function probeDurationSeconds(filePath) {
4561
4726
  }
4562
4727
 
4563
4728
  // src/engine/nodes/local/hyperframe-snapshot.ts
4564
- import { execFile as execFile4 } from "child_process";
4565
- import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile8, rm as rm5, writeFile as writeFile5 } from "fs/promises";
4729
+ import { execFile as execFile5 } from "child_process";
4730
+ import { copyFile as copyFile5, mkdtemp as mkdtemp5, readFile as readFile8, rm as rm5, writeFile as writeFile6 } from "fs/promises";
4566
4731
  import { createRequire as createRequire3 } from "module";
4567
4732
  import { tmpdir as tmpdir5 } from "os";
4568
- import path10 from "path";
4569
- import { promisify as promisify4 } from "util";
4733
+ import path11 from "path";
4734
+ import { promisify as promisify5 } from "util";
4570
4735
  import { z as z11 } from "zod";
4571
- var _execFileAsync = promisify4(execFile4);
4736
+ var _execFileAsync = promisify5(execFile5);
4572
4737
  var require_3 = createRequire3(import.meta.url);
4573
4738
  var WaitForSpec = z11.discriminatedUnion("kind", [
4574
4739
  z11.object({ kind: z11.literal("auto") }),
@@ -4589,7 +4754,7 @@ var NODE_OWNED_PARAM_KEYS2 = /* @__PURE__ */ new Set(["composition", "wait_for",
4589
4754
  var DEVICE_SCALE_FACTOR2 = 2;
4590
4755
  var hyperframeSnapshotNode = defineNode({
4591
4756
  id: "hyperframe_snapshot",
4592
- version: "4.0.0",
4757
+ version: "4.1.0",
4593
4758
  category: "image",
4594
4759
  location: "local",
4595
4760
  summary: "Render an HTML/CSS composition to a static PNG via headless Chromium at 2x device-scale (retina). Same composition model as `hyperframe_render` \u2014 point `params.composition` at a directory containing `index.html` + `meta.json`.",
@@ -4612,14 +4777,22 @@ var hyperframeSnapshotNode = defineNode({
4612
4777
  const compositionDir = await resolveCompositionDir(params.composition);
4613
4778
  const meta = await loadCompositionMeta(compositionDir);
4614
4779
  const compositionParams = validateAndParseDynamicParams2(meta, params);
4615
- const tmp = await mkdtemp5(path10.join(tmpdir5(), "hf-snap-"));
4780
+ const tmp = await mkdtemp5(path11.join(tmpdir5(), "hf-snap-"));
4616
4781
  try {
4617
4782
  await copyComposition2(compositionDir, tmp);
4618
4783
  await vendorGsap2(tmp, ctx);
4619
4784
  await stageInputs3(tmp, inputs, meta, ctx);
4620
4785
  const substitutionValues = await buildSubstitutionValues2(compositionParams, meta);
4621
4786
  await substituteCompositionFiles2(tmp, substitutionValues);
4622
- const entryPath = path10.join(tmp, "index.html");
4787
+ await ensureHyperframesMetaJson(tmp, ctx.nodeId, meta, meta.default_duration);
4788
+ await runHyperframesCheck({
4789
+ dir: tmp,
4790
+ nodeId: "hyperframe_snapshot",
4791
+ ctx,
4792
+ timeoutMs: params.timeout_ms,
4793
+ samples: 1
4794
+ });
4795
+ const entryPath = path11.join(tmp, "index.html");
4623
4796
  const entryUrl = `file://${entryPath}`;
4624
4797
  ctx.log(`snapshotting ${meta.width}x${meta.height}@${DEVICE_SCALE_FACTOR2}x wait=${params.wait_for.kind}`);
4625
4798
  const pwSpecifier = ["play", "wright"].join("");
@@ -4680,7 +4853,7 @@ async function copyComposition2(srcDir, destDir) {
4680
4853
  await cp(srcDir, destDir, {
4681
4854
  recursive: true,
4682
4855
  filter: (src) => {
4683
- const name = path10.basename(src);
4856
+ const name = path11.basename(src);
4684
4857
  if (name === ".cache" || name === "node_modules" || name === ".git") return false;
4685
4858
  return true;
4686
4859
  }
@@ -4689,7 +4862,7 @@ async function copyComposition2(srcDir, destDir) {
4689
4862
  async function vendorGsap2(tmp, ctx) {
4690
4863
  try {
4691
4864
  const gsapMin = require_3.resolve("gsap/dist/gsap.min.js");
4692
- await copyFile5(gsapMin, path10.join(tmp, "gsap.min.js"));
4865
+ await copyFile5(gsapMin, path11.join(tmp, "gsap.min.js"));
4693
4866
  } catch (e) {
4694
4867
  ctx.log(`warning: could not vendor gsap.min.js (${e.message}); compositions must self-supply`);
4695
4868
  }
@@ -4723,7 +4896,7 @@ function coerceImageParam2(value) {
4723
4896
  throw new Error("hyperframe_snapshot: image param must be a URL string or AssetRef");
4724
4897
  }
4725
4898
  async function substituteCompositionFiles2(tmp, values) {
4726
- const entryPath = path10.join(tmp, "index.html");
4899
+ const entryPath = path11.join(tmp, "index.html");
4727
4900
  const original = await readFile8(entryPath, "utf-8");
4728
4901
  const { output, missing } = substituteVariables(original, values);
4729
4902
  if (missing.length > 0) {
@@ -4731,7 +4904,7 @@ async function substituteCompositionFiles2(tmp, values) {
4731
4904
  `hyperframe_snapshot: composition references undefined variables: ${missing.map((m) => `{{${m}}}`).join(", ")}.`
4732
4905
  );
4733
4906
  }
4734
- await writeFile5(entryPath, output, "utf-8");
4907
+ await writeFile6(entryPath, output, "utf-8");
4735
4908
  }
4736
4909
  async function waitForReady(page, waitFor, timeoutMs) {
4737
4910
  switch (waitFor.kind) {
@@ -4765,10 +4938,10 @@ async function waitForReady(page, waitFor, timeoutMs) {
4765
4938
  }
4766
4939
 
4767
4940
  // src/engine/nodes/local/imagemagick.ts
4768
- import { execFile as execFile5 } from "child_process";
4769
- import { promisify as promisify5 } from "util";
4941
+ import { execFile as execFile6 } from "child_process";
4942
+ import { promisify as promisify6 } from "util";
4770
4943
  import { z as z12 } from "zod";
4771
- var execFileAsync2 = promisify5(execFile5);
4944
+ var execFileAsync3 = promisify6(execFile6);
4772
4945
  var OutputDecl2 = z12.object({
4773
4946
  kind: z12.enum(["image", "video", "audio"]),
4774
4947
  ext: z12.string().min(1).max(8)
@@ -4784,7 +4957,7 @@ async function resolveBin() {
4784
4957
  if (resolvedBin) return resolvedBin;
4785
4958
  for (const candidate of ["magick", "convert"]) {
4786
4959
  try {
4787
- await execFileAsync2(candidate, ["-version"], { encoding: "utf-8" });
4960
+ await execFileAsync3(candidate, ["-version"], { encoding: "utf-8" });
4788
4961
  resolvedBin = candidate;
4789
4962
  return candidate;
4790
4963
  } catch {
@@ -4834,34 +5007,60 @@ var textNode = defineNode({
4834
5007
  execute: ({ params }) => Promise.resolve({ text: params.value })
4835
5008
  });
4836
5009
 
4837
- // src/engine/nodes/remote/dialogue.ts
5010
+ // src/engine/nodes/remote/audioVoiceConvert.ts
4838
5011
  import { z as z14 } from "zod";
4839
- var DialogueInput = z14.object({
4840
- text: z14.string().min(1),
4841
- voice_id: z14.string().min(1)
5012
+ var AudioVoiceConvertParams = z14.object({
5013
+ model: z14.literal("elevenlabs/eleven_multilingual_sts_v2"),
5014
+ /** Target voice id. Splice an upstream `voice_select` via `"{{voice_ref}}"`. */
5015
+ voice: z14.string().min(1),
5016
+ output_format: z14.string().optional(),
5017
+ /** Strip the source clip's background noise before re-voicing. */
5018
+ remove_background_noise: z14.boolean().optional()
5019
+ }).strict();
5020
+ var audioVoiceConvertNode = delegated({
5021
+ id: "audio_voice_convert",
5022
+ version: "1.0.0",
5023
+ category: "audio",
5024
+ summary: "Voice Changer / speech-to-speech via ElevenLabs (eleven_multilingual_sts_v2). Re-voices an existing audio clip in a TARGET voice while preserving timing/prosody.",
5025
+ when_to_use: 'Use to normalize a generator-chosen voice (e.g. a Seedance talking-head clip\'s native audio) into ONE consistent brand voice across every scene \u2014 the cadence is preserved so any lip-sync stays valid. Wire `inputs.voice_ref: $ref:<voice_select>.voice_id` and set `params.voice: "{{voice_ref}}"`.',
5026
+ inputs: z14.object({
5027
+ audio: AudioRef,
5028
+ voice_ref: TextRef.optional()
5029
+ }).strict(),
5030
+ params: AudioVoiceConvertParams,
5031
+ outputs: z14.object({ audio: AudioRef }).strict(),
5032
+ outputKinds: { audio: "audio" },
5033
+ cost: () => ({ credits: 1, seconds_estimate: 20 })
5034
+ });
5035
+
5036
+ // src/engine/nodes/remote/dialogue.ts
5037
+ import { z as z15 } from "zod";
5038
+ var DialogueInput = z15.object({
5039
+ text: z15.string().min(1),
5040
+ voice_id: z15.string().min(1)
4842
5041
  });
4843
5042
  var DIALOGUE_MODELS = ["elevenlabs/eleven_v3"];
4844
- var DialogueParams = z14.object({
4845
- model: z14.enum(DIALOGUE_MODELS),
5043
+ var DialogueParams = z15.object({
5044
+ model: z15.enum(DIALOGUE_MODELS),
4846
5045
  /**
4847
5046
  * Ordered list of lines, each tagged with the voice that should speak it.
4848
5047
  * Up to 10 unique voice_ids; total text across all lines should stay under
4849
5048
  * ~2000 characters for best quality (ElevenLabs guidance).
4850
5049
  */
4851
- inputs: z14.array(DialogueInput).min(1).max(50),
4852
- language_code: z14.string().optional(),
5050
+ inputs: z15.array(DialogueInput).min(1).max(50),
5051
+ language_code: z15.string().optional(),
4853
5052
  /** ElevenLabs voice/model settings passthrough (e.g. `{ stability: 0.5 }`). */
4854
- settings: z14.record(z14.string(), z14.unknown()).optional(),
4855
- seed: z14.number().int().min(0).max(4294967295).optional(),
4856
- apply_text_normalization: z14.enum(["auto", "on", "off"]).optional(),
5053
+ settings: z15.record(z15.string(), z15.unknown()).optional(),
5054
+ seed: z15.number().int().min(0).max(4294967295).optional(),
5055
+ apply_text_normalization: z15.enum(["auto", "on", "off"]).optional(),
4857
5056
  /**
4858
5057
  * When true, hits `/v1/text-to-dialogue/with-timestamps` and emits a
4859
5058
  * separate `timestamps` output — character-level alignment plus
4860
5059
  * per-voice segment markers usable for captions, lipsync, or
4861
5060
  * beat-matched cuts in ad creatives.
4862
5061
  */
4863
- with_timestamps: z14.boolean().optional(),
4864
- output_format: z14.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5062
+ with_timestamps: z15.boolean().optional(),
5063
+ output_format: z15.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
4865
5064
  }).strict().refine((p) => p.inputs.reduce((sum, line) => sum + line.text.length, 0) <= ELEVENLABS_MAX_TEXT_CHARS, {
4866
5065
  message: `total dialogue text exceeds ${ELEVENLABS_MAX_TEXT_CHARS} characters`,
4867
5066
  path: ["inputs"]
@@ -4872,9 +5071,9 @@ var dialogueNode = delegated({
4872
5071
  category: "audio",
4873
5072
  summary: "Multi-voice dialogue / VO with ElevenLabs Eleven v3. Each line is tagged with a `voice_id`, so you can render two-character scripts (e.g. ad VO + customer testimonial reaction) in a single call. Setting `with_timestamps: true` adds character-level alignment for caption rendering and lipsync-friendly cuts.",
4874
5073
  when_to_use: "Use for any ad creative or website video VO that needs more than narration \u2014 interviews, two-actor scripts, character ads, testimonial reads. For single-voice flat reads the existing `tts` node is cheaper and simpler; reach for `dialogue` when you need multiple speakers in one stitched track or word-level timing for downstream lipsync / captions.",
4875
- inputs: z14.object({}).loose(),
5074
+ inputs: z15.object({}).loose(),
4876
5075
  params: DialogueParams,
4877
- outputs: z14.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5076
+ outputs: z15.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
4878
5077
  outputKinds: { audio: "audio", timestamps: "json" },
4879
5078
  cost: ({ params }) => {
4880
5079
  const chars = params.inputs.reduce((sum, line) => sum + line.text.length, 0);
@@ -4883,7 +5082,7 @@ var dialogueNode = delegated({
4883
5082
  });
4884
5083
 
4885
5084
  // src/engine/nodes/remote/image.ts
4886
- import { z as z15 } from "zod";
5085
+ import { z as z16 } from "zod";
4887
5086
  var IMAGE_GENERATE_MODELS2 = [
4888
5087
  "openai/gpt-5.4-image-2",
4889
5088
  "google/gemini-3.5-flash",
@@ -4891,16 +5090,16 @@ var IMAGE_GENERATE_MODELS2 = [
4891
5090
  "google/gemini-3-pro-image-preview",
4892
5091
  "recraft/recraft-v4.1-pro-vector"
4893
5092
  ];
4894
- var ImageGenerateParams = z15.object({
4895
- model: z15.enum(IMAGE_GENERATE_MODELS2),
4896
- prompt: z15.string().min(1),
4897
- aspect_ratio: z15.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
4898
- image_size: z15.enum(["0.5K", "1K", "2K", "4K"]).optional(),
5093
+ var ImageGenerateParams = z16.object({
5094
+ model: z16.enum(IMAGE_GENERATE_MODELS2),
5095
+ prompt: z16.string().min(1),
5096
+ aspect_ratio: z16.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5097
+ image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional(),
4899
5098
  // Recraft v4 vector controls — forwarded into `image_config`. Registry
4900
5099
  // rejects them on non-Recraft models.
4901
- strength: z15.number().min(0).max(1).optional(),
4902
- rgb_colors: z15.array(z15.array(z15.number().int().min(0).max(255))).optional(),
4903
- background_rgb_color: z15.array(z15.number().int().min(0).max(255)).optional()
5100
+ strength: z16.number().min(0).max(1).optional(),
5101
+ rgb_colors: z16.array(z16.array(z16.number().int().min(0).max(255))).optional(),
5102
+ background_rgb_color: z16.array(z16.number().int().min(0).max(255)).optional()
4904
5103
  }).strict();
4905
5104
  var imageGenerateNode = delegated({
4906
5105
  id: "image_generate",
@@ -4910,22 +5109,22 @@ var imageGenerateNode = delegated({
4910
5109
  when_to_use: "Use for hero shots, product photography, illustrations, and vector logos. `recraft/recraft-v4.1-pro-vector` for crisp vector / logo work; `openai/gpt-5.4-image-2` for photorealistic; Gemini variants for fast iteration and editing via the `reference` input. `reference` accepts ONE image or an ARRAY of images \u2014 wire several to combine references in a single generation (e.g. a subject sheet + a font specimen + the original ad). Every reference is forwarded to the model in array order.",
4911
5110
  // `reference` is one image or an ordered array of images. The backend forwards
4912
5111
  // each as a separate `image_url` to the provider (OpenRouter accepts many).
4913
- inputs: z15.object({ reference: z15.union([ImageRef, z15.array(ImageRef).min(1)]).optional() }).loose(),
5112
+ inputs: z16.object({ reference: z16.union([ImageRef, z16.array(ImageRef).min(1)]).optional() }).loose(),
4914
5113
  params: ImageGenerateParams,
4915
- outputs: z15.object({ images: z15.array(ImageRef).min(1) }).strict(),
5114
+ outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
4916
5115
  outputKinds: { images: "image" },
4917
5116
  cost: () => ({ credits: 5, seconds_estimate: 10 })
4918
5117
  });
4919
5118
 
4920
5119
  // src/engine/nodes/remote/imageAspectAdapt.ts
4921
- import { z as z16 } from "zod";
5120
+ import { z as z17 } from "zod";
4922
5121
  var ASPECT_ADAPT_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
4923
5122
  var ASPECT_ADAPT_FORMATS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
4924
- var ImageAspectAdaptParams = z16.object({
4925
- model: z16.enum(ASPECT_ADAPT_MODELS),
4926
- formats: z16.array(z16.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
4927
- guidance: z16.string().min(1).optional(),
4928
- image_size: z16.enum(["0.5K", "1K", "2K", "4K"]).optional()
5123
+ var ImageAspectAdaptParams = z17.object({
5124
+ model: z17.enum(ASPECT_ADAPT_MODELS),
5125
+ formats: z17.array(z17.enum(ASPECT_ADAPT_FORMATS)).min(1).max(6).refine((formats) => new Set(formats).size === formats.length, { message: "formats must be unique" }),
5126
+ guidance: z17.string().min(1).optional(),
5127
+ image_size: z17.enum(["0.5K", "1K", "2K", "4K"]).optional()
4929
5128
  }).strict();
4930
5129
  var imageAspectAdaptNode = delegated({
4931
5130
  id: "image_aspect_adapt",
@@ -4933,9 +5132,9 @@ var imageAspectAdaptNode = delegated({
4933
5132
  category: "image",
4934
5133
  summary: "Adapt ONE creative into multiple aspect ratios (Meta: 9:16 stories, 1:1 feed, 4:5, 16:9\u2026) in a single step. AI recomposes the layout per format \u2014 identical subject, text, logos, colors, and style; the scene is extended/restructured, never stretched or cropped. Formats that already match the source ratio pass through unchanged at zero cost. Outputs are ordered exactly as `formats`.",
4935
5134
  when_to_use: "Use after a hero creative exists (image_generate, ingest, image_search) to fan it out to every placement format \u2014 wire the creative into `source` and list the target ratios in `formats`. Cost is estimated per format; formats matching the source ratio are free pass-throughs. Pick `google/gemini-3.1-flash-image-preview` (Nano Banana flash) while iterating, `google/gemini-3-pro-image-preview` (Nano Banana Pro) for final-quality adaptation.",
4936
- inputs: z16.object({ source: ImageRef }).loose(),
5135
+ inputs: z17.object({ source: ImageRef }).loose(),
4937
5136
  params: ImageAspectAdaptParams,
4938
- outputs: z16.object({ images: z16.array(ImageRef).min(1) }).strict(),
5137
+ outputs: z17.object({ images: z17.array(ImageRef).min(1) }).strict(),
4939
5138
  outputKinds: { images: "image" },
4940
5139
  cost: ({ params }) => {
4941
5140
  const p = params;
@@ -4948,12 +5147,12 @@ var imageAspectAdaptNode = delegated({
4948
5147
  });
4949
5148
 
4950
5149
  // src/engine/nodes/remote/imageBackgroundRemove.ts
4951
- import { z as z17 } from "zod";
4952
- var ImageBackgroundRemoveParams = z17.object({
4953
- model: z17.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
4954
- model_variant: z17.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
4955
- operating_resolution: z17.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
4956
- mask_only: z17.boolean().optional().default(false)
5150
+ import { z as z18 } from "zod";
5151
+ var ImageBackgroundRemoveParams = z18.object({
5152
+ model: z18.literal("fal/birefnet-v2").optional().default("fal/birefnet-v2"),
5153
+ model_variant: z18.enum(["General Use (Light)", "General Use (Heavy)", "Matting", "Portrait", "DIS", "HRSOD", "COD"]).optional().default("General Use (Light)"),
5154
+ operating_resolution: z18.enum(["1024x1024", "2048x2048", "2304x2304"]).optional(),
5155
+ mask_only: z18.boolean().optional().default(false)
4957
5156
  }).strict();
4958
5157
  var imageBackgroundRemoveNode = delegated({
4959
5158
  id: "image_background_remove",
@@ -4961,11 +5160,11 @@ var imageBackgroundRemoveNode = delegated({
4961
5160
  category: "image",
4962
5161
  summary: "Remove the background from an image and return a transparent PNG (or the segmentation mask). Powered by fal.ai `fal-ai/birefnet/v2`.",
4963
5162
  when_to_use: "Use to extract subjects from photos for use as overlays in hyperframe compositions, product shots, or compositing pipelines. Set `mask_only:true` to return the binary mask instead of the alpha-cut image.",
4964
- inputs: z17.object({
5163
+ inputs: z18.object({
4965
5164
  image: ImageRef
4966
5165
  }).strict(),
4967
5166
  params: ImageBackgroundRemoveParams,
4968
- outputs: z17.object({
5167
+ outputs: z18.object({
4969
5168
  image: ImageRef,
4970
5169
  mask: ImageRef.optional()
4971
5170
  }).strict(),
@@ -4974,7 +5173,7 @@ var imageBackgroundRemoveNode = delegated({
4974
5173
  });
4975
5174
 
4976
5175
  // src/engine/nodes/remote/imageDescribe.ts
4977
- import { z as z18 } from "zod";
5176
+ import { z as z19 } from "zod";
4978
5177
  var IMAGE_DESCRIBE_MODELS = ["~google/gemini-pro-latest", "~google/gemini-flash-latest"];
4979
5178
  var imageDescribeNode = delegated({
4980
5179
  id: "image_describe",
@@ -4982,31 +5181,31 @@ var imageDescribeNode = delegated({
4982
5181
  category: "vision",
4983
5182
  summary: "Reverse-engineer an image into an exhaustive, replication-grade JSON description: who the advertiser is and what they sell (source_context), composition, non-person subjects with expression/treatment, deeply detailed people, brand-identified logos (named by brand, not appearance), camera optics, lighting, color palette WITH per-color brand-ownership (brand vs borrowed-functional) and purpose, materials, visible text, ad signals (proof badges/CTA/price), the persuasion engine (ad_intent), style, post-processing.",
4984
5183
  when_to_use: 'Use to turn a reference image into a structured blueprint you can inject into downstream prompts via `{{slot}}` \u2014 e.g. restyle a competitor ad onto your own product, lock a look across a series, or feed exact palette/lighting into image_generate. Purpose-built for market adaptation: logos are identified by brand ("Trustpilot", never "green star"), people and animals carry expression/emotion/intent detail, and each color is tagged brand vs borrowed-functional so a recolor can keep the reds/yellows that do a job. The extraction prompt is baked in; use `focus` to emphasise aspects and `context` to pass known provenance (advertiser, category, market) so source_context and color ownership are grounded. Pick `~google/gemini-pro-latest` for the densest extraction (recommended for ad / market-adaptation passes), `~google/gemini-flash-latest` for cheap/fast passes. The output is rich \u2014 raise `max_tokens` (e.g. 8000+) for dense ads so the JSON isn\'t truncated.',
4985
- inputs: z18.object({ image: ImageRef }).loose(),
4986
- params: z18.object({
4987
- model: z18.enum(IMAGE_DESCRIBE_MODELS),
4988
- focus: z18.string().optional(),
4989
- context: z18.string().optional(),
4990
- temperature: z18.number().min(0).max(2).optional(),
4991
- max_tokens: z18.number().int().positive().optional()
5184
+ inputs: z19.object({ image: ImageRef }).loose(),
5185
+ params: z19.object({
5186
+ model: z19.enum(IMAGE_DESCRIBE_MODELS),
5187
+ focus: z19.string().optional(),
5188
+ context: z19.string().optional(),
5189
+ temperature: z19.number().min(0).max(2).optional(),
5190
+ max_tokens: z19.number().int().positive().optional()
4992
5191
  }).strict(),
4993
- outputs: z18.object({ description: JsonRef }).strict(),
5192
+ outputs: z19.object({ description: JsonRef }).strict(),
4994
5193
  outputKinds: { description: "json" },
4995
5194
  cost: () => ({ credits: 2, seconds_estimate: 10 })
4996
5195
  });
4997
5196
 
4998
5197
  // src/engine/nodes/remote/imageReferenceSheet.ts
4999
- import { z as z19 } from "zod";
5198
+ import { z as z20 } from "zod";
5000
5199
  var REFERENCE_SHEET_MODELS = ["google/gemini-3-pro-image-preview", "google/gemini-3.1-flash-image-preview"];
5001
- var ImageReferenceSheetParams = z19.object({
5002
- model: z19.enum(REFERENCE_SHEET_MODELS),
5003
- subject_description: z19.string().min(1),
5004
- subject_type: z19.enum(["character", "person", "product"]),
5005
- views: z19.array(z19.string().min(1)).min(2).max(6).optional(),
5006
- style: z19.string().optional(),
5007
- prompt_override: z19.string().min(1).optional(),
5008
- aspect_ratio: z19.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5009
- image_size: z19.enum(["0.5K", "1K", "2K", "4K"]).optional()
5200
+ var ImageReferenceSheetParams = z20.object({
5201
+ model: z20.enum(REFERENCE_SHEET_MODELS),
5202
+ subject_description: z20.string().min(1),
5203
+ subject_type: z20.enum(["character", "person", "product"]),
5204
+ views: z20.array(z20.string().min(1)).min(2).max(6).optional(),
5205
+ style: z20.string().optional(),
5206
+ prompt_override: z20.string().min(1).optional(),
5207
+ aspect_ratio: z20.enum(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3", "4:5", "5:4", "21:9", "1:4", "4:1", "1:8", "8:1"]).optional(),
5208
+ image_size: z20.enum(["0.5K", "1K", "2K", "4K"]).optional()
5010
5209
  }).strict();
5011
5210
  var imageReferenceSheetNode = delegated({
5012
5211
  id: "image_reference_sheet",
@@ -5014,9 +5213,9 @@ var imageReferenceSheetNode = delegated({
5014
5213
  category: "image",
5015
5214
  summary: "Fuse 1\u20136 images of a single subject (person, character, or product) into ONE multi-view reference sheet \u2014 a labeled turnaround grid (FRONT / SIDE / BACK\u2026) in consistent style and lighting. Curated models: Gemini 3 Pro Image (best fusion + labels), Gemini 3.1 Flash Image (cheap iteration).",
5016
5215
  when_to_use: "Use before image_generate / video_generate when a subject must stay consistent across many creatives \u2014 wire the `sheet` output into their `reference` input instead of re-describing the subject per prompt. `subject_description` should be the exact wording you reuse downstream. Pick `google/gemini-3-pro-image-preview` for final 6-view sheets at 2K+, `google/gemini-3.1-flash-image-preview` while iterating.",
5017
- inputs: z19.object({ references: z19.array(ImageRef).min(1).max(6) }).loose(),
5216
+ inputs: z20.object({ references: z20.array(ImageRef).min(1).max(6) }).loose(),
5018
5217
  params: ImageReferenceSheetParams,
5019
- outputs: z19.object({ sheet: ImageRef }).strict(),
5218
+ outputs: z20.object({ sheet: ImageRef }).strict(),
5020
5219
  outputKinds: { sheet: "image" },
5021
5220
  cost: ({ params }) => ({
5022
5221
  credits: params?.model === "google/gemini-3-pro-image-preview" ? 20 : 5,
@@ -5025,10 +5224,10 @@ var imageReferenceSheetNode = delegated({
5025
5224
  });
5026
5225
 
5027
5226
  // src/engine/nodes/remote/imageSearch.ts
5028
- import { z as z20 } from "zod";
5029
- var ImageSearchParams = z20.object({
5030
- prompt: z20.string().min(1),
5031
- count: z20.number().int().min(1).max(20).default(5)
5227
+ import { z as z21 } from "zod";
5228
+ var ImageSearchParams = z21.object({
5229
+ prompt: z21.string().min(1),
5230
+ count: z21.number().int().min(1).max(20).default(5)
5032
5231
  }).strict();
5033
5232
  var imageSearchNode = delegated({
5034
5233
  id: "image_search",
@@ -5036,15 +5235,15 @@ var imageSearchNode = delegated({
5036
5235
  category: "image",
5037
5236
  summary: "Agentic image search across Google Images, stock photography (Freepik), and Pinterest. An LLM agent picks the search tools and queries, selects the best matches, and the results are downloaded into canvas assets.",
5038
5237
  when_to_use: "Use to gather real-world reference or inspiration images for a prompt (e.g. several photos of an australian shepherd) so a later step or the user can pick the best one. Not for creating new imagery \u2014 use image_generate for that.",
5039
- inputs: z20.object({}).loose(),
5238
+ inputs: z21.object({}).loose(),
5040
5239
  params: ImageSearchParams,
5041
- outputs: z20.object({ images: z20.array(ImageRef).min(1) }).strict(),
5240
+ outputs: z21.object({ images: z21.array(ImageRef).min(1) }).strict(),
5042
5241
  outputKinds: { images: "image" },
5043
5242
  cost: ({ params }) => ({ credits: Math.ceil(2 + params.count / 2), seconds_estimate: 30 })
5044
5243
  });
5045
5244
 
5046
5245
  // src/engine/nodes/remote/imageSelect.ts
5047
- import { z as z21 } from "zod";
5246
+ import { z as z22 } from "zod";
5048
5247
  var IMAGE_SELECT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5049
5248
  var imageSelectNode = delegated({
5050
5249
  id: "image_select",
@@ -5052,15 +5251,15 @@ var imageSelectNode = delegated({
5052
5251
  category: "vision",
5053
5252
  summary: "Pick the best `count` images out of 2+ candidates with a vision LLM, judged against a prompt. Outputs a passthrough subset of the input refs (no new pixels) plus the model's comparative reasoning.",
5054
5253
  when_to_use: "Use after fanning out several image_generate variants (or any pool of 2+ images) to keep only the strongest before expensive downstream steps \u2014 video generation, reference sheets, final delivery. `count` fixes the output size, so `images#0`\u2026`images#count-1` are always safe to wire. Pick `~google/gemini-flash-latest` for cheap/fast picks and `~google/gemini-pro-latest` for harder aesthetic judgement.",
5055
- inputs: z21.object({ images: z21.array(ImageRef).min(2) }).loose(),
5056
- params: z21.object({
5057
- model: z21.enum(IMAGE_SELECT_MODELS),
5058
- prompt: z21.string().min(1),
5059
- count: z21.number().int().min(1).default(1),
5060
- temperature: z21.number().min(0).max(2).optional(),
5061
- max_tokens: z21.number().int().positive().optional()
5254
+ inputs: z22.object({ images: z22.array(ImageRef).min(2) }).loose(),
5255
+ params: z22.object({
5256
+ model: z22.enum(IMAGE_SELECT_MODELS),
5257
+ prompt: z22.string().min(1),
5258
+ count: z22.number().int().min(1).default(1),
5259
+ temperature: z22.number().min(0).max(2).optional(),
5260
+ max_tokens: z22.number().int().positive().optional()
5062
5261
  }).strict(),
5063
- outputs: z21.object({ images: z21.array(ImageRef).min(1), reasoning: TextRef }).strict(),
5262
+ outputs: z22.object({ images: z22.array(ImageRef).min(1), reasoning: TextRef }).strict(),
5064
5263
  outputKinds: { images: "image", reasoning: "text" },
5065
5264
  cost: () => ({ credits: 1, seconds_estimate: 5 }),
5066
5265
  // Arity is only knowable at validate time when `images` is a literal array
@@ -5085,34 +5284,34 @@ var imageSelectNode = delegated({
5085
5284
  });
5086
5285
 
5087
5286
  // src/engine/nodes/remote/music.ts
5088
- import { z as z22 } from "zod";
5287
+ import { z as z23 } from "zod";
5089
5288
  var MUSIC_MODELS = ["elevenlabs/music-v1", "elevenlabs/video-background-music-v1"];
5090
- var MusicParams = z22.object({
5091
- model: z22.enum(MUSIC_MODELS),
5289
+ var MusicParams = z23.object({
5290
+ model: z23.enum(MUSIC_MODELS),
5092
5291
  /** Free-form prompt. Used by `elevenlabs/music-v1` (compose-detailed). */
5093
- prompt: z22.string().optional(),
5292
+ prompt: z23.string().optional(),
5094
5293
  /**
5095
5294
  * Structured composition plan (intro / hook / verse / outro sections with
5096
5295
  * per-section styles + durations). Mutually exclusive with `prompt`.
5097
5296
  */
5098
- composition_plan: z22.record(z22.string(), z22.unknown()).optional(),
5297
+ composition_plan: z23.record(z23.string(), z23.unknown()).optional(),
5099
5298
  /** Target length when using `prompt`. 3000–454545ms (capped by the $10 per-node cost limit). */
5100
- music_length_ms: z22.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
5101
- seed: z22.number().int().optional(),
5299
+ music_length_ms: z23.number().int().min(3e3).max(ELEVENLABS_MAX_MUSIC_LENGTH_MS).optional(),
5300
+ seed: z23.number().int().optional(),
5102
5301
  /** Prompt mode only — forces an instrumental (no vocals) track. */
5103
- force_instrumental: z22.boolean().optional(),
5302
+ force_instrumental: z23.boolean().optional(),
5104
5303
  /** composition_plan only — honor exact section durations. */
5105
- respect_sections_durations: z22.boolean().optional(),
5304
+ respect_sections_durations: z23.boolean().optional(),
5106
5305
  /** Emit word-level timestamps alongside the audio. */
5107
- with_timestamps: z22.boolean().optional(),
5306
+ with_timestamps: z23.boolean().optional(),
5108
5307
  /**
5109
5308
  * video-to-music only — short description of the desired score
5110
5309
  * ("upbeat synth, fast cuts, 80s") used to bias the model.
5111
5310
  */
5112
- description: z22.string().max(1e3).optional(),
5311
+ description: z23.string().max(1e3).optional(),
5113
5312
  /** video-to-music only — up to 10 style tags. */
5114
- tags: z22.array(z22.string()).max(10).optional(),
5115
- output_format: z22.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5313
+ tags: z23.array(z23.string()).max(10).optional(),
5314
+ output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5116
5315
  }).strict();
5117
5316
  var musicNode = delegated({
5118
5317
  id: "music",
@@ -5120,9 +5319,9 @@ var musicNode = delegated({
5120
5319
  category: "audio",
5121
5320
  summary: "Generate music for ad creatives and website video content. `elevenlabs/music-v1` composes from a text prompt or structured composition plan; `elevenlabs/video-background-music-v1` scores an existing video clip provided via `inputs.video`.",
5122
5321
  when_to_use: "Use to produce background music or a full score for video ads, hero-section reels, or any motion content. Prefer the video-to-music model when you already have a cut and want music timed to it; use compose-detailed when you have only a brief or want section-level control (intro / hook / outro). Pair the resulting audio with `video_generate` or `video_lipsync` at compose time.",
5123
- inputs: z22.object({ video: VideoRef.optional() }).loose(),
5322
+ inputs: z23.object({ video: VideoRef.optional() }).loose(),
5124
5323
  params: MusicParams,
5125
- outputs: z22.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5324
+ outputs: z23.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5126
5325
  outputKinds: { audio: "audio", timestamps: "json" },
5127
5326
  cost: ({ params }) => {
5128
5327
  const seconds = params.music_length_ms ? Math.ceil(params.music_length_ms / 1e3) : 30;
@@ -5153,25 +5352,25 @@ var musicNode = delegated({
5153
5352
  });
5154
5353
 
5155
5354
  // src/engine/nodes/remote/soundEffect.ts
5156
- import { z as z23 } from "zod";
5355
+ import { z as z24 } from "zod";
5157
5356
  var SOUND_EFFECT_MODELS = ["elevenlabs/eleven_text_to_sound_v2"];
5158
- var SoundEffectParams = z23.object({
5159
- model: z23.enum(SOUND_EFFECT_MODELS),
5357
+ var SoundEffectParams = z24.object({
5358
+ model: z24.enum(SOUND_EFFECT_MODELS),
5160
5359
  /** Prompt describing the SFX ("metal door slam", "soft UI tap", "ocean waves"). */
5161
- text: z23.string().min(1),
5360
+ text: z24.string().min(1),
5162
5361
  /**
5163
5362
  * Target length in seconds. 0.5–30. Leave unset to let the model pick the
5164
5363
  * natural length for the described effect.
5165
5364
  */
5166
- duration_seconds: z23.number().min(0.5).max(30).optional(),
5365
+ duration_seconds: z24.number().min(0.5).max(30).optional(),
5167
5366
  /**
5168
5367
  * 0–1. Higher = stick closer to the prompt at the cost of variety; lower
5169
5368
  * = let the model interpret more freely. Defaults to 0.3 on the provider.
5170
5369
  */
5171
- prompt_influence: z23.number().min(0).max(1).optional(),
5370
+ prompt_influence: z24.number().min(0).max(1).optional(),
5172
5371
  /** Only valid on `eleven_text_to_sound_v2` — produce a seamless loop. */
5173
- loop: z23.boolean().optional(),
5174
- output_format: z23.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5372
+ loop: z24.boolean().optional(),
5373
+ output_format: z24.enum(ELEVENLABS_OUTPUT_FORMATS).optional()
5175
5374
  }).strict();
5176
5375
  var soundEffectNode = delegated({
5177
5376
  id: "sound_effect",
@@ -5179,9 +5378,9 @@ var soundEffectNode = delegated({
5179
5378
  category: "audio",
5180
5379
  summary: "Generate short sound effects from a text prompt via ElevenLabs Text-to-Sound. Use for whooshes, impacts, UI clicks, ambient beds, or signature stingers in ad creatives and product videos.",
5181
5380
  when_to_use: "Reach for this when you need a punch-in SFX layered against `video_generate` or `hyperframe_render` output \u2014 e.g. a logo whoosh on a hero shot, a click on a CTA cut, a swelling ambient bed under VO. Set `loop: true` for atmospheric beds that need to tile under longer footage; leave `duration_seconds` unset and the model picks a natural length.",
5182
- inputs: z23.object({}).loose(),
5381
+ inputs: z24.object({}).loose(),
5183
5382
  params: SoundEffectParams,
5184
- outputs: z23.object({ audio: AudioRef }).strict(),
5383
+ outputs: z24.object({ audio: AudioRef }).strict(),
5185
5384
  outputKinds: { audio: "audio" },
5186
5385
  cost: ({ params }) => {
5187
5386
  const seconds = params.duration_seconds ?? 5;
@@ -5190,7 +5389,7 @@ var soundEffectNode = delegated({
5190
5389
  });
5191
5390
 
5192
5391
  // src/engine/nodes/remote/textGenerate.ts
5193
- import { z as z24 } from "zod";
5392
+ import { z as z25 } from "zod";
5194
5393
  var TEXT_GENERATE_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5195
5394
  var textGenerateNode = delegated({
5196
5395
  id: "text_generate",
@@ -5198,58 +5397,58 @@ var textGenerateNode = delegated({
5198
5397
  category: "language",
5199
5398
  summary: "Single-turn LLM text generation via OpenRouter. Returns a text response.",
5200
5399
  when_to_use: 'Use for any short text generation step in a canvas \u2014 ad copy, hooks, headlines, JSON outputs for downstream nodes. Pick `~google/gemini-flash-latest` for cheap/fast work and `~google/gemini-pro-latest` for harder reasoning. When the output must be JSON for a downstream `{{slot}}` (e.g. the ad-blueprint transform), set `response_format: "json_object"` so the model returns clean JSON with no markdown fences or prose. Set `web_search: true` to let the model search the live web first (OpenRouter `:online`) \u2014 useful when the transform must adapt copy to the target brand\'s real facts (current pricing, the trust signals it actually has) rather than guess.',
5201
- inputs: z24.object({}).loose(),
5202
- params: z24.object({
5203
- model: z24.enum(TEXT_GENERATE_MODELS),
5204
- prompt: z24.string().min(1),
5205
- system: z24.string().optional(),
5206
- response_format: z24.enum(["text", "json_object"]).optional(),
5207
- web_search: z24.boolean().optional(),
5208
- temperature: z24.number().min(0).max(2).optional(),
5209
- max_tokens: z24.number().int().positive().optional()
5400
+ inputs: z25.object({}).loose(),
5401
+ params: z25.object({
5402
+ model: z25.enum(TEXT_GENERATE_MODELS),
5403
+ prompt: z25.string().min(1),
5404
+ system: z25.string().optional(),
5405
+ response_format: z25.enum(["text", "json_object"]).optional(),
5406
+ web_search: z25.boolean().optional(),
5407
+ temperature: z25.number().min(0).max(2).optional(),
5408
+ max_tokens: z25.number().int().positive().optional()
5210
5409
  }).strict(),
5211
- outputs: z24.object({ text: TextRef }).strict(),
5410
+ outputs: z25.object({ text: TextRef }).strict(),
5212
5411
  outputKinds: { text: "text" },
5213
5412
  cost: () => ({ credits: 1, seconds_estimate: 3 })
5214
5413
  });
5215
5414
 
5216
5415
  // src/engine/nodes/remote/tts.ts
5217
- import { z as z25 } from "zod";
5416
+ import { z as z26 } from "zod";
5218
5417
  var TTS_MODELS = ["elevenlabs/eleven_v3"];
5219
- var TtsVoiceSettings = z25.object({
5220
- stability: z25.number().min(0).max(1).optional(),
5221
- similarity_boost: z25.number().min(0).max(1).optional(),
5222
- style: z25.number().min(0).max(1).optional(),
5223
- use_speaker_boost: z25.boolean().optional(),
5224
- speed: z25.number().min(0.25).max(4).optional()
5418
+ var TtsVoiceSettings = z26.object({
5419
+ stability: z26.number().min(0).max(1).optional(),
5420
+ similarity_boost: z26.number().min(0).max(1).optional(),
5421
+ style: z26.number().min(0).max(1).optional(),
5422
+ use_speaker_boost: z26.boolean().optional(),
5423
+ speed: z26.number().min(0.25).max(4).optional()
5225
5424
  }).strict();
5226
- var TtsPronunciationLocator = z25.object({
5227
- pronunciation_dictionary_id: z25.string().min(1),
5228
- version_id: z25.string().nullable().optional()
5425
+ var TtsPronunciationLocator = z26.object({
5426
+ pronunciation_dictionary_id: z26.string().min(1),
5427
+ version_id: z26.string().nullable().optional()
5229
5428
  }).strict();
5230
- var TtsParams = z25.object({
5231
- model: z25.enum(TTS_MODELS),
5232
- text: z25.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
5233
- voice: z25.string().min(1),
5429
+ var TtsParams = z26.object({
5430
+ model: z26.enum(TTS_MODELS),
5431
+ text: z26.string().min(1).max(ELEVENLABS_MAX_TEXT_CHARS),
5432
+ voice: z26.string().min(1),
5234
5433
  /** Provider output_format (mp3 family only — assets are stored as audio/mpeg). */
5235
- output_format: z25.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
5236
- seed: z25.number().int().min(0).max(4294967295).optional(),
5434
+ output_format: z26.enum(ELEVENLABS_OUTPUT_FORMATS).optional(),
5435
+ seed: z26.number().int().min(0).max(4294967295).optional(),
5237
5436
  // Top-level shortcuts; structured form is `voice_settings`.
5238
- stability: z25.number().min(0).max(1).optional(),
5239
- similarity_boost: z25.number().min(0).max(1).optional(),
5437
+ stability: z26.number().min(0).max(1).optional(),
5438
+ similarity_boost: z26.number().min(0).max(1).optional(),
5240
5439
  voice_settings: TtsVoiceSettings.optional(),
5241
5440
  /** ISO 639-1 language code. eleven_v3 supports language hints. */
5242
- language_code: z25.string().optional(),
5243
- pronunciation_dictionary_locators: z25.array(TtsPronunciationLocator).max(3).optional(),
5244
- apply_text_normalization: z25.enum(["auto", "on", "off"]).optional(),
5441
+ language_code: z26.string().optional(),
5442
+ pronunciation_dictionary_locators: z26.array(TtsPronunciationLocator).max(3).optional(),
5443
+ apply_text_normalization: z26.enum(["auto", "on", "off"]).optional(),
5245
5444
  /** Currently Japanese-only. Adds latency. */
5246
- apply_language_text_normalization: z25.boolean().optional(),
5445
+ apply_language_text_normalization: z26.boolean().optional(),
5247
5446
  /**
5248
5447
  * When true, hits `/v1/text-to-speech/{voice_id}/with-timestamps` and
5249
5448
  * adds a `timestamps` output (character-level alignment) for caption
5250
5449
  * rendering, lipsync, and beat-matched cuts.
5251
5450
  */
5252
- with_timestamps: z25.boolean().optional()
5451
+ with_timestamps: z26.boolean().optional()
5253
5452
  }).strict();
5254
5453
  var ttsNode = delegated({
5255
5454
  id: "tts",
@@ -5257,9 +5456,9 @@ var ttsNode = delegated({
5257
5456
  category: "audio",
5258
5457
  summary: "Single-voice text-to-speech via ElevenLabs Eleven v3. Optional character-level timestamps for caption rendering and beat-matched cuts.",
5259
5458
  when_to_use: "Use for single-speaker VO \u2014 ad reads, hero-section narration, product walkthroughs. Reach for `dialogue` when you need multiple voices in one stitched track. Set `with_timestamps: true` when downstream needs character-level alignment (captions, lipsync).",
5260
- inputs: z25.object({}).loose(),
5459
+ inputs: z26.object({}).loose(),
5261
5460
  params: TtsParams,
5262
- outputs: z25.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5461
+ outputs: z26.object({ audio: AudioRef, timestamps: JsonRef.optional() }).strict(),
5263
5462
  outputKinds: { audio: "audio", timestamps: "json" },
5264
5463
  cost: ({ params }) => ({
5265
5464
  credits: Math.max(1, Math.ceil(params.text.length * 15e-4)),
@@ -5268,23 +5467,23 @@ var ttsNode = delegated({
5268
5467
  });
5269
5468
 
5270
5469
  // src/engine/nodes/remote/video.ts
5271
- import { z as z26 } from "zod";
5470
+ import { z as z27 } from "zod";
5272
5471
  var VIDEO_GENERATE_MODELS = ["bytedance/seedance-2.0", "google/veo-3.1-fast"];
5273
- var VideoGenerateParams = z26.object({
5274
- model: z26.enum(VIDEO_GENERATE_MODELS),
5275
- prompt: z26.string().min(1),
5276
- duration: z26.number().int().positive().optional(),
5277
- resolution: z26.string().optional(),
5472
+ var VideoGenerateParams = z27.object({
5473
+ model: z27.enum(VIDEO_GENERATE_MODELS),
5474
+ prompt: z27.string().min(1),
5475
+ duration: z27.number().int().positive().optional(),
5476
+ resolution: z27.string().optional(),
5278
5477
  // Union of ratios accepted by at least one curated model (registry gates
5279
5478
  // per-model). 3:2/2:3 are deliberately absent: no registered model takes them.
5280
- aspect_ratio: z26.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
5281
- generate_audio: z26.boolean().optional(),
5282
- seed: z26.number().int().nonnegative().optional(),
5479
+ aspect_ratio: z27.enum(["16:9", "9:16", "1:1", "4:3", "3:4", "21:9", "9:21"]).optional(),
5480
+ generate_audio: z27.boolean().optional(),
5481
+ seed: z27.number().int().nonnegative().optional(),
5283
5482
  // Veo-only passthroughs (routed via `provider.options.google-vertex.parameters`).
5284
- negative_prompt: z26.string().optional(),
5285
- person_generation: z26.string().optional(),
5286
- enhance_prompt: z26.boolean().optional(),
5287
- conditioning_scale: z26.number().optional()
5483
+ negative_prompt: z27.string().optional(),
5484
+ person_generation: z27.string().optional(),
5485
+ enhance_prompt: z27.boolean().optional(),
5486
+ conditioning_scale: z27.number().optional()
5288
5487
  }).strict();
5289
5488
  var videoGenerateNode = delegated({
5290
5489
  id: "video_generate",
@@ -5292,23 +5491,23 @@ var videoGenerateNode = delegated({
5292
5491
  category: "video",
5293
5492
  summary: "Generate video for ad creatives. Two curated models: `bytedance/seedance-2.0` (production quality, photorealistic humans via fal.ai) and `google/veo-3.1-fast` (cheap/fast for iteration and tests). Async with polling.",
5294
5493
  when_to_use: "Use `bytedance/seedance-2.0` for final ad output (photoreal subjects, image-to-video with first/last frames). Use `google/veo-3.1-fast` while iterating to keep cost low. Each model has different supported durations, resolutions, and aspect ratios \u2014 see the README per-model section.",
5295
- inputs: z26.object({
5494
+ inputs: z27.object({
5296
5495
  first_frame: ImageRef.optional(),
5297
5496
  last_frame: ImageRef.optional(),
5298
5497
  reference: ImageRef.optional()
5299
5498
  }).loose(),
5300
5499
  params: VideoGenerateParams,
5301
- outputs: z26.object({ video: VideoRef }).strict(),
5500
+ outputs: z27.object({ video: VideoRef }).strict(),
5302
5501
  outputKinds: { video: "video" },
5303
5502
  cost: () => ({ credits: 50, seconds_estimate: 120 })
5304
5503
  });
5305
5504
 
5306
5505
  // src/engine/nodes/remote/videoBackgroundRemove.ts
5307
- import { z as z27 } from "zod";
5308
- var VideoBackgroundRemoveParams = z27.object({
5309
- model: z27.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
5310
- edge_refinement: z27.boolean().optional().default(true),
5311
- output_codec: z27.enum(["vp9", "h264"]).optional().default("vp9")
5506
+ import { z as z28 } from "zod";
5507
+ var VideoBackgroundRemoveParams = z28.object({
5508
+ model: z28.literal("fal/veed-video-background-removal").optional().default("fal/veed-video-background-removal"),
5509
+ edge_refinement: z28.boolean().optional().default(true),
5510
+ output_codec: z28.enum(["vp9", "h264"]).optional().default("vp9")
5312
5511
  }).strict();
5313
5512
  var videoBackgroundRemoveNode = delegated({
5314
5513
  id: "video_background_remove",
@@ -5316,18 +5515,18 @@ var videoBackgroundRemoveNode = delegated({
5316
5515
  category: "video",
5317
5516
  summary: "Remove the background from a video and return a transparent VP9-with-alpha WebM (or H264 RGB+alpha pair). Drops directly into a hyperframe composition as `<video src='...'>` for chroma-keyed picture-in-picture overlays. Powered by fal.ai `veed/video-background-removal/fast`.",
5318
5517
  when_to_use: "Use when you need a talking-head or subject to float over a custom background in a hyperframe composition. Pair with hyperframe_render(composition: screencast-with-talker) for screencast-with-narrator videos. Output is `video/webm` with alpha \u2014 feed straight into `<video src>` in a composition.",
5319
- inputs: z27.object({
5518
+ inputs: z28.object({
5320
5519
  video: VideoRef
5321
5520
  }).strict(),
5322
5521
  params: VideoBackgroundRemoveParams,
5323
- outputs: z27.object({ video: VideoRef }).strict(),
5522
+ outputs: z28.object({ video: VideoRef }).strict(),
5324
5523
  outputKinds: { video: "video" },
5325
5524
  // $0.012 per 30 frames (edge refinement on) — assume ~30fps; refine via fal dashboard.
5326
5525
  cost: () => ({ credits: 50, seconds_estimate: 60 })
5327
5526
  });
5328
5527
 
5329
5528
  // src/engine/nodes/remote/videoDeconstruct.ts
5330
- import { z as z28 } from "zod";
5529
+ import { z as z29 } from "zod";
5331
5530
  var VIDEO_DECONSTRUCT_MODELS = ["~google/gemini-flash-latest", "~google/gemini-pro-latest"];
5332
5531
  var videoDeconstructNode = delegated({
5333
5532
  id: "video_deconstruct",
@@ -5335,24 +5534,24 @@ var videoDeconstructNode = delegated({
5335
5534
  category: "video",
5336
5535
  summary: 'Deconstruct a video into a replication-grade blueprint: scene boundaries, the real start/end frame of every scene (extracted from the video as images), and an exhaustive JSON analysis \u2014 per-scene action detail, camera motion, generation-ready frame/motion prompts, overlay text with full typographic style, floating elements, deeply detailed cast (perceived demographics, ethnicity/skin-tone, styling, market-recasting notes), brand-identified logos (named by brand and what they signal, not by appearance, with on-screen timestamps), dialogue with voice descriptions, music spec, SFX list, plus a word-level transcript. `mode:"index"` is the cheap structure-first pass: scene boundaries + global blueprint only (one LLM call, no frames).',
5337
5536
  when_to_use: 'Use to reverse-engineer a reference video (e.g. a competitor ad) so a new canvas can reproduce or remix it scene by scene. Agent loop: (1) optionally run `mode:"index"` to see the structure cheaply (scene count, boundaries, transcript) before planning; (2) run the full deconstruct; (3) read `analysis` and author the reproduction canvas. The blueprint maps 1:1 onto generation nodes: `analysis.scenes[i]` aligns positionally with `start_frames#i`/`end_frames#i`; per scene, `start_frame_prompt`/`end_frame_prompt` feed image_generate (overlay text is excluded from them by contract \u2014 recomposite it from `overlays`), `motion_prompt` + the two frames feed video_generate (first_frame/last_frame), `dialogue[].voice_description` casts tts/dialogue voices, `global.music.music_prompt` feeds music, `sfx[].sound_effect_prompt` feeds sound_effect, and `overlays`/`floating_elements` drive an ffmpeg/hyperframe overlay pass. Long videos (over ~8 min single-shot): run `mode:"index"` first, then several full nodes IN PARALLEL each with a `start_s`/`end_s` window (\u2264480s, snap edges to index scene boundaries), and merge by concatenating `analysis.scenes`; over-length errors include suggested windows. Inject fields into downstream prompts via `{{slot}}`. Pick `~google/gemini-pro-latest` for the densest extraction, `~google/gemini-flash-latest` for cheap/fast passes.',
5338
- inputs: z28.object({ video: VideoRef }).loose(),
5339
- params: z28.object({
5340
- model: z28.enum(VIDEO_DECONSTRUCT_MODELS),
5341
- mode: z28.enum(["full", "index"]).optional(),
5342
- language: z28.string().min(2).max(8).optional(),
5343
- max_scenes: z28.number().int().min(1).max(60).optional(),
5344
- focus: z28.string().optional(),
5345
- start_s: z28.number().min(0).optional(),
5346
- end_s: z28.number().positive().optional(),
5537
+ inputs: z29.object({ video: VideoRef }).loose(),
5538
+ params: z29.object({
5539
+ model: z29.enum(VIDEO_DECONSTRUCT_MODELS),
5540
+ mode: z29.enum(["full", "index"]).optional(),
5541
+ language: z29.string().min(2).max(8).optional(),
5542
+ max_scenes: z29.number().int().min(1).max(60).optional(),
5543
+ focus: z29.string().optional(),
5544
+ start_s: z29.number().min(0).optional(),
5545
+ end_s: z29.number().positive().optional(),
5347
5546
  // Transcript provider for the blueprint's dialogue/transcript. Default
5348
5547
  // Groq Whisper; "deepgram" routes to Nova-3 so words carry punctuation.
5349
- transcriber: z28.enum(["groq", "deepgram"]).optional()
5548
+ transcriber: z29.enum(["groq", "deepgram"]).optional()
5350
5549
  }).strict(),
5351
- outputs: z28.object({
5550
+ outputs: z29.object({
5352
5551
  analysis: JsonRef,
5353
5552
  // Absent in mode:"index" (structure only, no Mux frame extraction).
5354
- start_frames: z28.array(ImageRef).min(1).optional(),
5355
- end_frames: z28.array(ImageRef).min(1).optional(),
5553
+ start_frames: z29.array(ImageRef).min(1).optional(),
5554
+ end_frames: z29.array(ImageRef).min(1).optional(),
5356
5555
  transcript: JsonRef
5357
5556
  }).strict(),
5358
5557
  outputKinds: { analysis: "json", start_frames: "image", end_frames: "image", transcript: "json" },
@@ -5360,22 +5559,22 @@ var videoDeconstructNode = delegated({
5360
5559
  });
5361
5560
 
5362
5561
  // src/engine/nodes/remote/videoLipsync.ts
5363
- import { z as z29 } from "zod";
5364
- var FalLipsyncParams = z29.object({
5365
- model: z29.literal("fal/veed-lipsync")
5562
+ import { z as z30 } from "zod";
5563
+ var FalLipsyncParams = z30.object({
5564
+ model: z30.literal("fal/veed-lipsync")
5366
5565
  }).strict();
5367
- var VideoLipsyncParams = z29.discriminatedUnion("model", [FalLipsyncParams]);
5566
+ var VideoLipsyncParams = z30.discriminatedUnion("model", [FalLipsyncParams]);
5368
5567
  var videoLipsyncNode = delegated({
5369
5568
  id: "video_lipsync",
5370
5569
  version: "1.0.0",
5371
5570
  category: "video",
5372
5571
  summary: "Lip-sync a video to an audio track. Currently backed by VEED via fal.ai (`fal/veed-lipsync`). $0.40/min of output.",
5373
- inputs: z29.object({
5572
+ inputs: z30.object({
5374
5573
  video: VideoRef,
5375
5574
  audio: AudioRef
5376
5575
  }).strict(),
5377
5576
  params: VideoLipsyncParams,
5378
- outputs: z29.object({ video: VideoRef }).strict(),
5577
+ outputs: z30.object({ video: VideoRef }).strict(),
5379
5578
  outputKinds: { video: "video" },
5380
5579
  cost: () => ({ credits: 20, seconds_estimate: 120 })
5381
5580
  });
@@ -5383,15 +5582,15 @@ var videoLipsyncNode = delegated({
5383
5582
  // src/engine/nodes/remote/videoTranscribe.ts
5384
5583
  import { mkdtemp as mkdtemp6, readFile as readFile9, rm as rm6 } from "fs/promises";
5385
5584
  import { tmpdir as tmpdir6 } from "os";
5386
- import path11 from "path";
5387
- import { z as z30 } from "zod";
5585
+ import path12 from "path";
5586
+ import { z as z31 } from "zod";
5388
5587
 
5389
5588
  // src/engine/nodes/local/lib/ffmpeg.ts
5390
- import { execFile as execFile6 } from "child_process";
5391
- import { promisify as promisify6 } from "util";
5392
- var execFileAsync3 = promisify6(execFile6);
5589
+ import { execFile as execFile7 } from "child_process";
5590
+ import { promisify as promisify7 } from "util";
5591
+ var execFileAsync4 = promisify7(execFile7);
5393
5592
  async function probeVideo(filePath) {
5394
- const { stdout } = await execFileAsync3(
5593
+ const { stdout } = await execFileAsync4(
5395
5594
  "ffprobe",
5396
5595
  ["-v", "error", "-show_streams", "-show_format", "-of", "json", filePath],
5397
5596
  { encoding: "utf-8", maxBuffer: 8 * 1024 * 1024 }
@@ -5449,7 +5648,7 @@ function parseFrameRate(rate) {
5449
5648
  }
5450
5649
  async function runFfmpeg(args, opts) {
5451
5650
  try {
5452
- await execFileAsync3("ffmpeg", args, {
5651
+ await execFileAsync4("ffmpeg", args, {
5453
5652
  timeout: opts.timeout_ms,
5454
5653
  maxBuffer: 64 * 1024 * 1024
5455
5654
  });
@@ -5463,21 +5662,21 @@ ${detail.slice(-4e3)}`);
5463
5662
  }
5464
5663
 
5465
5664
  // src/engine/nodes/remote/videoTranscribe.ts
5466
- var VideoTranscribeParams = z30.object({
5467
- language: z30.string().min(2).max(8).optional(),
5665
+ var VideoTranscribeParams = z31.object({
5666
+ language: z31.string().min(2).max(8).optional(),
5468
5667
  // Provider choice is explicit (no env-based silent branching). Default Groq
5469
5668
  // Whisper; "deepgram" routes to Deepgram Nova-3, which additionally emits a
5470
5669
  // `rich` JSON output with punctuated words + paragraph/sentence grouping.
5471
- transcriber: z30.enum(["groq", "deepgram"]).optional()
5670
+ transcriber: z31.enum(["groq", "deepgram"]).optional()
5472
5671
  }).strict();
5473
- var VideoTranscribeInputs = z30.object({
5672
+ var VideoTranscribeInputs = z31.object({
5474
5673
  video: VideoRef
5475
5674
  }).strict();
5476
- var VideoTranscribeOutputs = z30.object({
5477
- transcript: z30.custom(),
5675
+ var VideoTranscribeOutputs = z31.object({
5676
+ transcript: z31.custom(),
5478
5677
  // Only emitted by the Deepgram path: full punctuated words + paragraph /
5479
5678
  // sentence grouping with speaker indices. Absent for the default Groq path.
5480
- rich: z30.custom().optional()
5679
+ rich: z31.custom().optional()
5481
5680
  }).strict();
5482
5681
  var AUDIO_EXTRACT_TIMEOUT_MS = 6e4;
5483
5682
  var videoTranscribeNode = defineNode({
@@ -5515,8 +5714,8 @@ async function tryExtractAudio(inputs, ctx) {
5515
5714
  ctx.log("video_transcribe: no audio track detected, sending full video");
5516
5715
  return null;
5517
5716
  }
5518
- tmpDir = await mkdtemp6(path11.join(tmpdir6(), "vtx-"));
5519
- const audioPath = path11.join(tmpDir, "audio.mp3");
5717
+ tmpDir = await mkdtemp6(path12.join(tmpdir6(), "vtx-"));
5718
+ const audioPath = path12.join(tmpDir, "audio.mp3");
5520
5719
  ctx.log("video_transcribe: extracting audio (mono 16kHz mp3)");
5521
5720
  await runFfmpeg(
5522
5721
  ["-i", video.path, "-vn", "-ac", "1", "-ar", "16000", "-b:a", "64k", "-f", "mp3", "-y", audioPath],
@@ -5562,29 +5761,29 @@ async function tryExtractAudio(inputs, ctx) {
5562
5761
  }
5563
5762
 
5564
5763
  // src/engine/nodes/remote/voiceSelect.ts
5565
- import { z as z31 } from "zod";
5764
+ import { z as z32 } from "zod";
5566
5765
  var voiceSelectNode = delegated({
5567
5766
  id: "voice_select",
5568
5767
  version: "1.0.0",
5569
5768
  category: "audio",
5570
5769
  summary: 'Cast an ElevenLabs voice from a natural-language description (e.g. "warm, authoritative female narrator, American accent"). Lists the account\'s voices and ranks them against the brief, emitting the best `voice_id` as a bare-string text asset plus a ranked `candidates` JSON.',
5571
5770
  when_to_use: 'Use to turn a voice description (e.g. from a `video_deconstruct` blueprint\'s `voice_description`) into a usable ElevenLabs voice id, then feed it into a `tts` node by wiring `inputs.voice_ref: $ref:<this>.voice_id` and setting `params.voice: "{{voice_ref}}"` \u2014 the engine splices the id in at run time. Review `candidates` (json) to pick a different voice. Optional `gender`/`age`/`accent`/`language` hints sharpen the ranking.',
5572
- inputs: z31.object({}).loose(),
5573
- params: z31.object({
5574
- description: z31.string().min(1),
5575
- gender: z31.string().optional(),
5576
- age: z31.string().optional(),
5577
- accent: z31.string().optional(),
5578
- language: z31.string().optional(),
5579
- limit: z31.number().int().min(1).max(20).optional()
5771
+ inputs: z32.object({}).loose(),
5772
+ params: z32.object({
5773
+ description: z32.string().min(1),
5774
+ gender: z32.string().optional(),
5775
+ age: z32.string().optional(),
5776
+ accent: z32.string().optional(),
5777
+ language: z32.string().optional(),
5778
+ limit: z32.number().int().min(1).max(20).optional()
5580
5779
  }).strict(),
5581
- outputs: z31.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
5780
+ outputs: z32.object({ voice_id: TextRef, candidates: JsonRef }).strict(),
5582
5781
  outputKinds: { voice_id: "text", candidates: "json" },
5583
5782
  cost: () => ({ credits: 0, seconds_estimate: 5 })
5584
5783
  });
5585
5784
 
5586
5785
  // src/engine/schema/catalog.ts
5587
- import { z as z32 } from "zod";
5786
+ import { z as z33 } from "zod";
5588
5787
  function generateCatalog(registry, opts = {}) {
5589
5788
  const entries = registry.all().map((def) => {
5590
5789
  const cost = def.cost ? safeCost(def) : void 0;
@@ -5595,9 +5794,9 @@ function generateCatalog(registry, opts = {}) {
5595
5794
  summary: def.summary,
5596
5795
  when_to_use: def.when_to_use,
5597
5796
  location: def.location,
5598
- inputs: z32.toJSONSchema(def.inputs, { unrepresentable: "any" }),
5599
- params: z32.toJSONSchema(def.params, { unrepresentable: "any" }),
5600
- outputs: z32.toJSONSchema(def.outputs, { unrepresentable: "any" }),
5797
+ inputs: z33.toJSONSchema(def.inputs, { unrepresentable: "any" }),
5798
+ params: z33.toJSONSchema(def.params, { unrepresentable: "any" }),
5799
+ outputs: z33.toJSONSchema(def.outputs, { unrepresentable: "any" }),
5601
5800
  cost_estimate_credits: cost?.credits,
5602
5801
  runtime_estimate_seconds: cost?.seconds_estimate
5603
5802
  };
@@ -5629,15 +5828,15 @@ function safeCost(def) {
5629
5828
 
5630
5829
  // src/engine/storage/cache-store.ts
5631
5830
  import { randomUUID as randomUUID2 } from "crypto";
5632
- import { mkdir as mkdir3, readFile as readFile10, rename as rename2, writeFile as writeFile6 } from "fs/promises";
5633
- import path12 from "path";
5831
+ import { mkdir as mkdir3, readFile as readFile10, rename as rename2, writeFile as writeFile7 } from "fs/promises";
5832
+ import path13 from "path";
5634
5833
  var LocalCacheStore = class {
5635
5834
  rootDir;
5636
5835
  constructor(rootDir) {
5637
5836
  this.rootDir = rootDir;
5638
5837
  }
5639
5838
  filePath(cacheKey) {
5640
- return path12.join(this.rootDir, `${cacheKey}.json`);
5839
+ return path13.join(this.rootDir, `${cacheKey}.json`);
5641
5840
  }
5642
5841
  async get(cacheKey) {
5643
5842
  try {
@@ -5650,9 +5849,9 @@ var LocalCacheStore = class {
5650
5849
  }
5651
5850
  async put(entry) {
5652
5851
  const dest = this.filePath(entry.cacheKey);
5653
- await mkdir3(path12.dirname(dest), { recursive: true });
5852
+ await mkdir3(path13.dirname(dest), { recursive: true });
5654
5853
  const tmp = `${dest}.tmp-${process.pid}-${randomUUID2()}`;
5655
- await writeFile6(tmp, JSON.stringify(entry, null, 0));
5854
+ await writeFile7(tmp, JSON.stringify(entry, null, 0));
5656
5855
  await rename2(tmp, dest);
5657
5856
  }
5658
5857
  };
@@ -5686,6 +5885,7 @@ var REMOTE_NODES = [
5686
5885
  imageSelectNode,
5687
5886
  videoGenerateNode,
5688
5887
  ttsNode,
5888
+ audioVoiceConvertNode,
5689
5889
  musicNode,
5690
5890
  dialogueNode,
5691
5891
  soundEffectNode,
@@ -5703,14 +5903,14 @@ function defaultRegistry() {
5703
5903
  }
5704
5904
  function createEngineFromEnv(opts = {}) {
5705
5905
  const cwd = opts.cwd ?? process.cwd();
5706
- const cacheDir = opts.cacheDir ?? path13.join(cwd, "canvas", ".cache");
5707
- const outputsDir = opts.outputsDir ?? path13.join(cwd, "canvas");
5906
+ const cacheDir = opts.cacheDir ?? path14.join(cwd, "canvas", ".cache");
5907
+ const outputsDir = opts.outputsDir ?? path14.join(cwd, "canvas");
5708
5908
  const creds = requireCredentialsFromEnv();
5709
5909
  return new Engine({
5710
5910
  registry: defaultRegistry(),
5711
5911
  client: new BackendClient({ baseUrl: creds.url, apiKey: creds.apiKey }),
5712
- assets: new LocalAssetStore(path13.join(cacheDir, "assets")),
5713
- cache: new LocalCacheStore(path13.join(cacheDir, "index")),
5912
+ assets: new LocalAssetStore(path14.join(cacheDir, "assets")),
5913
+ cache: new LocalCacheStore(path14.join(cacheDir, "index")),
5714
5914
  outputsDir,
5715
5915
  log: opts.log
5716
5916
  });
@@ -5731,4 +5931,4 @@ export {
5731
5931
  defaultRegistry,
5732
5932
  createEngineFromEnv
5733
5933
  };
5734
- //# sourceMappingURL=chunk-JIDZ37KG.js.map
5934
+ //# sourceMappingURL=chunk-CCO34ACK.js.map