npm - reelforge - Versions diffs - 0.5.5 → 0.7.0 - Mend

reelforge 0.5.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +240 -222
package/dist/commands/audio.js +73 -0
package/dist/commands/content.js +50 -96
package/dist/commands/create.js +179 -213
package/dist/commands/pipelines.js +52 -34
package/dist/commands/subtitles.js +40 -0
package/dist/index.js +5 -1
package/package.json +51 -51

package/dist/commands/create.js CHANGED Viewed

@@ -8,17 +8,13 @@ import { downloadTo } from "../utils/download.js";
 import { info, print, success, warn } from "../utils/output.js";
 const LAST_CREATE_PATH = path.join(os.homedir(), ".reelforge", "last-create.json");
 // ── Cost estimation (mirrors server src/lib/billing.ts) ──────────
-const IMAGE_UNITS = 3; // matches ATOMIC_UNITS["images.generate"] in src/lib/billing.ts
-const TTS_RELAYX_UNITS = 1; // matches ATOMIC_UNITS["tts.relayx"]
+const PLAN_UNITS = 1;
+const TTS_UNITS = 1;
+const ASR_UNITS = 1;
+const IMAGE_UNITS = 3;
+const CHARS_PER_SEC_ZH = 5;
+const TARGET_SEC_PER_SCENE = 8;
 function estimateUnits(body) {
-    const mode = body.mode || "generate";
-    const titleExplicit = !!body.title;
-    const N = body.n_scenes ?? 5;
-    // Template type resolution mirrors the server (src/lib/billing.ts):
-    //   inline HTML → explicit body.frame_template_type
-    //               → <meta name="template:type" content="..."> in the HTML
-    //               → default "image"
-    //   preset key  → parsed from the filename prefix (static_/asset_/image_)
     let tplType;
     if (body.frame_template_inline) {
         if (body.frame_template_type) {
@@ -31,7 +27,7 @@ function estimateUnits(body) {
         }
     }
     else {
-        const tplKey = body.frame_template || "1080x1920/static_default.html";
+        const tplKey = body.frame_template || "1080x1920/image_default.html";
         const tplBase = (tplKey.split("/").pop() || "").toLowerCase();
         tplType = tplBase.startsWith("static_")
             ? "static"
@@ -39,22 +35,20 @@ function estimateUnits(body) {
                 ? "asset"
                 : "image";
     }
-    const mediaPerFrame = tplType === "image" ? IMAGE_UNITS : 0;
-    const ttsMode = body.tts_inference_mode || "edge";
-    const ttsPerFrame = ttsMode === "relayx" ? TTS_RELAYX_UNITS : 0;
-    const narrations = mode === "generate" ? 1 : 0;
-    const title = titleExplicit ? 0 : 1;
-    const imagePrompts = tplType === "static" ? 0 : 1;
-    return narrations + title + imagePrompts + N * (ttsPerFrame + mediaPerFrame);
+    // Estimated scene count: from script length (fixed) or from duration (generate).
+    let estimatedScenes;
+    if (body.script) {
+        const estSec = body.script.length / CHARS_PER_SEC_ZH;
+        estimatedScenes = Math.max(2, Math.round(estSec / TARGET_SEC_PER_SCENE));
+    }
+    else {
+        const dur = body.duration ?? 45;
+        estimatedScenes = Math.max(2, Math.round(dur / TARGET_SEC_PER_SCENE));
+    }
+    const imageUnits = tplType === "image" ? estimatedScenes * IMAGE_UNITS : 0;
+    return PLAN_UNITS + TTS_UNITS + ASR_UNITS + imageUnits;
 }
 // ── Helpers ─────────────────────────────────────────────────────
-/**
- * Distinguish a local HTML file path from a preset template key.
- * Preset keys look like `"<size>/<file>.html"` (one slash, no dots/slashes
- * outside that pattern). Anything starting with `./`, `../`, `/`, `~`, or
- * containing a backslash, or that ends with `.html` and exists on disk, is
- * treated as a local path.
- */
 function looksLikeLocalHtmlPath(value) {
     if (/^[.~]|^\//.test(value))
         return true;
@@ -64,13 +58,45 @@ function looksLikeLocalHtmlPath(value) {
         return true;
     return false;
 }
-async function resolveText(input) {
+/** `@file` prefix → load file contents; raw text → return as-is. */
+async function resolveTextOrFile(input) {
     if (input.startsWith("@")) {
         const file = input.slice(1);
         return (await fs.readFile(file, "utf-8")).trim();
     }
     return input;
 }
+/**
+ * Reference-image resolver. Accepts a public URL, a data: URI, or a local
+ * file path. Local files are base64-encoded into a data: URI so RelayX can
+ * receive them in a pure-JSON body (no upload endpoint needed on our side).
+ *
+ * Returns undefined when input is missing/blank so the caller can branch on
+ * "user actually provided this knob".
+ */
+async function resolveRefImage(input, flagName) {
+    if (input === undefined)
+        return undefined;
+    const t = input.trim();
+    if (!t)
+        return undefined;
+    if (/^https?:\/\//i.test(t) || t.startsWith("data:"))
+        return t;
+    const abs = path.resolve(t);
+    if (!fsSync.existsSync(abs)) {
+        throw new Error(`${flagName}: local file not found: ${abs}`);
+    }
+    const ext = path.extname(abs).toLowerCase();
+    const mime = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" :
+        ext === ".webp" ? "image/webp" :
+            ext === ".png" ? "image/png" :
+                null;
+    if (!mime) {
+        throw new Error(`${flagName}: unsupported extension ${ext} (use png/jpg/jpeg/webp)`);
+    }
+    const buf = await fs.readFile(abs);
+    return `data:${mime};base64,${buf.toString("base64")}`;
+}
 async function loadRecipe(recipePath) {
     const raw = await fs.readFile(recipePath, "utf-8");
     const parsed = JSON.parse(raw);
@@ -93,14 +119,6 @@ async function saveLastCreate(body) {
     await fs.writeFile(LAST_CREATE_PATH, JSON.stringify(body, null, 2) + "\n", "utf-8");
 }
 // ── Filename derivation ─────────────────────────────────────────
-//
-// Cascade (highest → lowest):
-//   1. result.title              — server's actual video title (LLM or explicit)
-//   2. body.title                — user-supplied --title (pre-task fallback)
-//   3. raw topic (mode=generate, length ≤ 60, no @-prefix)
-//   4. @file stem                — when text was loaded from @./script.txt
-//   5. "reelforge" literal
-// Always suffixed with "-<task_id[:8]>" to avoid collisions.
 const FILENAME_MAX_CHARS = 40;
 function sanitizeFilename(name) {
     const cleaned = name
@@ -120,14 +138,8 @@ function computeDefaultFilename(args) {
     if (args.resultTitle && args.resultTitle.trim()) {
         base = sanitizeFilename(args.resultTitle);
     }
-    else if (args.bodyTitle && args.bodyTitle.trim()) {
-        base = sanitizeFilename(args.bodyTitle);
-    }
-    else if (args.mode === "generate" &&
-        args.rawTextInput &&
-        !args.rawTextInput.startsWith("@") &&
-        Array.from(args.rawTextInput).length <= 60) {
-        base = sanitizeFilename(args.rawTextInput);
+    else if (args.topic && Array.from(args.topic).length <= 60) {
+        base = sanitizeFilename(args.topic);
     }
     else if (args.fileStemFromAt) {
         base = sanitizeFilename(args.fileStemFromAt);
@@ -152,28 +164,35 @@ async function validateOutputPath(out) {
 /** Camel-case CLI options → snake_case body, only including provided fields */
 function optsToBody(opts) {
     const out = {};
-    if (opts.text !== undefined)
-        out.text = opts.text;
-    if (opts.mode !== undefined)
-        out.mode = opts.mode;
-    if (opts.title !== undefined)
-        out.title = opts.title;
-    if (opts.nScenes !== undefined)
-        out.n_scenes = opts.nScenes;
-    if (opts.splitMode !== undefined)
-        out.split_mode = opts.splitMode;
-    if (opts.ttsInferenceMode !== undefined)
-        out.tts_inference_mode = opts.ttsInferenceMode;
-    if (opts.ttsVoice !== undefined)
-        out.tts_voice = opts.ttsVoice;
+    if (opts.topic !== undefined)
+        out.topic = opts.topic;
+    if (opts.script !== undefined)
+        out.script = opts.script;
+    if (opts.duration !== undefined)
+        out.duration = opts.duration;
+    if (opts.pace !== undefined)
+        out.pace = opts.pace;
+    if (opts.llmModel !== undefined)
+        out.llm_model = opts.llmModel;
+    if (opts.ttsModel !== undefined)
+        out.tts_model = opts.ttsModel;
+    if (opts.asrModel !== undefined)
+        out.asr_model = opts.asrModel;
+    if (opts.imageModel !== undefined)
+        out.image_model = opts.imageModel;
+    if (opts.promptPrefix !== undefined)
+        out.prompt_prefix = opts.promptPrefix;
+    if (opts.characterRef !== undefined)
+        out.character_ref = opts.characterRef;
+    if (opts.styleRef !== undefined)
+        out.style_ref = opts.styleRef;
     if (opts.voiceId !== undefined)
         out.voice_id = opts.voiceId;
     if (opts.ttsSpeed !== undefined)
         out.tts_speed = opts.ttsSpeed;
-    if (opts.imageModel !== undefined)
-        out.image_model = opts.imageModel;
+    if (opts.videoFps !== undefined)
+        out.video_fps = opts.videoFps;
     if (opts.frameTemplate !== undefined) {
-        // Local .html path → read and send as inline; preset key → send as-is.
         if (looksLikeLocalHtmlPath(opts.frameTemplate)) {
             const abs = path.resolve(opts.frameTemplate);
             if (!fsSync.existsSync(abs)) {
@@ -189,35 +208,14 @@ function optsToBody(opts) {
         out.frame_template_size = opts.frameTemplateSize;
     if (opts.frameTemplateType !== undefined)
         out.frame_template_type = opts.frameTemplateType;
-    if (opts.promptPrefix !== undefined)
-        out.prompt_prefix = opts.promptPrefix;
-    if (opts.bgm !== undefined)
-        out.bgm_path = opts.bgm;
-    if (opts.bgmVolume !== undefined)
-        out.bgm_volume = opts.bgmVolume;
-    if (opts.bgmMode !== undefined)
-        out.bgm_mode = opts.bgmMode;
-    if (opts.minNarrationWords !== undefined)
-        out.min_narration_words = opts.minNarrationWords;
-    if (opts.maxNarrationWords !== undefined)
-        out.max_narration_words = opts.maxNarrationWords;
-    if (opts.minImagePromptWords !== undefined)
-        out.min_image_prompt_words = opts.minImagePromptWords;
-    if (opts.maxImagePromptWords !== undefined)
-        out.max_image_prompt_words = opts.maxImagePromptWords;
-    if (opts.videoFps !== undefined)
-        out.video_fps = opts.videoFps;
     if (opts.templateParams !== undefined)
         out.template_params = opts.templateParams;
+    if (opts.subtitleMinChars !== undefined)
+        out.subtitle_min_chars = opts.subtitleMinChars;
+    if (opts.subtitleHardMax !== undefined)
+        out.subtitle_hard_max = opts.subtitleHardMax;
     return out;
 }
-const DEFAULTS = {
-    mode: "generate",
-    n_scenes: 5,
-    frame_template: "1080x1920/image_default.html",
-    tts_voice: "zh-CN-YunjianNeural",
-    tts_speed: 1.2,
-};
 const STYLE_PRESETS = {
     matchstick: {
         prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style",
@@ -305,7 +303,6 @@ const STYLE_PRESETS = {
         scene: "奢华品牌 / 复古优雅",
     },
 };
-// CJK chars take 2 display columns in monospace terminals; pad accordingly.
 function displayWidth(s) {
     let w = 0;
     for (const c of s)
@@ -331,36 +328,34 @@ function formatStylePresetsList() {
 export function registerCreate(program) {
     program
         .command("create [topic]")
-        .description("One-click: topic → fully-generated MP4. 23 tunable params + recipe files.")
+        .description("One-click: topic (or your own script) → fully-generated MP4. Audio-first pipeline.")
         .helpOption("-h, --help", "show help")
-        // --- Content ---
-        .option("-t, --text <text>", "topic (mode=generate) or fixed script (mode=fixed). Prefix with @ to read from a file (e.g. @script.txt).")
-        .option("--mode <mode>", "generate | fixed (default: generate)")
-        .option("--title <text>", "explicit video title (default: LLM-generated from topic)")
-        .option("-n, --n-scenes <N>", "number of scenes", (v) => parseInt(v, 10))
-        .option("--split-mode <mode>", "paragraph | line | sentence (mode=fixed only)")
-        .option("--min-narration-words <N>", "narration min words per scene", (v) => parseInt(v, 10))
-        .option("--max-narration-words <N>", "narration max words per scene", (v) => parseInt(v, 10))
-        .option("--min-image-prompt-words <N>", "image prompt min words", (v) => parseInt(v, 10))
-        .option("--max-image-prompt-words <N>", "image prompt max words", (v) => parseInt(v, 10))
+        // --- Content (exactly one of --topic / --script) ---
+        .option("-t, --topic <text>", "video topic; AI writes the script (mode=generate). Prefix with @file to read from disk.")
+        .option("--script <text>", "your own master script text; AI just plans scenes + visuals (mode=fixed). Prefix with @file to read from disk.")
+        .option("-d, --duration <sec>", "target video duration in seconds (generate mode only; default 45). LLM aims for ~duration × 5 chars of narration.", (v) => parseInt(v, 10))
+        .option("-p, --pace <pace>", "visual rhythm hint passed to the LLM: slow | normal | fast (default normal). LLM still decides the actual scene count from semantic structure.")
         // --- Visual ---
         .option("--frame-template <keyOrPath>", "HTML frame template: preset key (e.g. 1080x1920/image_default.html) OR path to a local .html (auto-sent inline)")
         .option("--frame-template-size <wxh>", "size for inline HTML when the file lacks <meta template:width|height>, e.g. 1080x1920")
-        .option("--frame-template-type <type>", "inline template type: image (default) | static | asset. Controls whether AI image generation runs per frame. Can also be set via <meta name=\"template:type\" content=\"...\"> in the HTML.")
-        .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen)")
+        .option("--frame-template-type <type>", "inline template type: image (default) | static | asset. Controls whether AI image generation runs per scene.")
+        .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen | rx-image-qwen-edit). Auto-switches to rx-image-qwen-edit when --character-ref or --style-ref is set.")
         .option("--prompt-prefix <text>", "raw style prefix prepended to every image prompt (overrides --style)")
-        .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below for the full list")
+        .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below")
+        .option("--character-ref <urlOrPath>", "reference image of the main character — locks identity across scenes. URL, data: URI, or local png/jpg/webp path (auto-encoded). Auto-enables rx-image-qwen-edit.")
+        .option("--style-ref <urlOrPath>", "reference image of the overall visual style — locks palette/composition/mood across scenes. Same input formats as --character-ref.")
         // --- Audio (TTS) ---
-        .option("--tts-voice <id>", "TTS voice id; for edge use e.g. zh-CN-YunjianNeural / en-US-AriaNeural; for relayx use vox voice ids (default: 专业解说)")
-        .option("--tts-speed <n>", "speech speed 0.5..2", parseFloat)
-        .option("--tts-inference-mode <mode>", "edge (default, local Microsoft Edge TTS) | relayx (vox/index-tts-2 via RelayX)")
-        .option("--voice-id <id>", "alias of --tts-voice (legacy compat)")
-        // --- Audio (BGM) ---
-        .option("--bgm <path>", "background music file path (server-side relative to bgm/)")
-        .option("--bgm-volume <n>", "BGM volume 0..1", parseFloat)
-        .option("--bgm-mode <mode>", "loop | once")
+        .option("--voice-id <id>", "RelayX TTS voice id (default 专业解说); see `rf tts voices`")
+        .option("--tts-speed <n>", "speech speed 0.5..2 (default 1.0)", parseFloat)
+        // --- Service overrides ---
+        .option("--llm-model <id>", "override the LLM model used for scene-plan")
+        .option("--tts-model <id>", "override the TTS model (default vox/index-tts-2)")
+        .option("--asr-model <id>", "override the ASR model (default alibaba/paraformer-v2)")
+        // --- Subtitle splitter knobs (advanced) ---
+        .option("--subtitle-min-chars <N>", "subtitle line min chars (default 10)", (v) => parseInt(v, 10))
+        .option("--subtitle-hard-max <N>", "subtitle line absolute max chars (default 24)", (v) => parseInt(v, 10))
         // --- Output / extra ---
-        .option("--video-fps <n>", "output video fps", (v) => parseInt(v, 10))
+        .option("--video-fps <n>", "output video fps (default 30)", (v) => parseInt(v, 10))
         .option("--template-params <json>", "extra template placeholders as JSON string", (v) => {
         try {
             return JSON.parse(v);
@@ -374,110 +369,73 @@ export function registerCreate(program) {
         .option("--redo", "replay last successful create from ~/.reelforge/last-create.json")
         .option("--dry-run", "print the final request body + estimated units; do NOT submit")
         .option("--no-wait", "submit and return task_id immediately (do not poll)")
-        .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4). Default: auto-named file in current directory.")
-        .option("--no-download", "do not save the video locally — just print the JSON result with video_url")
+        .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4).")
+        .option("--no-download", "do not save the video locally — just print JSON with video_url")
         .option("--poll-ms <ms>", "poll interval while waiting", (v) => parseInt(v, 10), 1500)
         .option("--timeout-ms <ms>", "max wait time before aborting (default unlimited)", (v) => parseInt(v, 10))
         .addHelpText("after", [
         "",
-        "Defaults match the /create web page:",
-        "  mode=generate · n-scenes=5 · frame-template=1080x1920/image_default.html",
-        "  tts-voice=zh-CN-YunjianNeural · tts-speed=1.2",
+        "Two content modes (one is required):",
+        "  generate    AI writes the script.   --topic / -t <text>  + optional --duration -d",
+        "  fixed       You supply the script.  --script <text-or-@file>",
+        "",
+        "Pace (visual rhythm hint to the LLM):",
+        "  slow    fewer scenes, glued to semantic boundaries",
+        "  normal  balance semantic edges with visual variety (default)",
+        "  fast    split long semantic chunks into multiple shots for variety",
         "",
-        "Param groups:",
-        "  Content : --mode --title -n --split-mode --min/max-narration-words --min/max-image-prompt-words",
-        "  Visual  : --frame-template --image-model --style --prompt-prefix",
-        "  TTS     : --tts-voice --tts-speed --tts-inference-mode --voice-id",
-        "  BGM     : --bgm --bgm-volume --bgm-mode",
-        "  Output  : --video-fps --template-params -o --no-download --no-wait --poll-ms --timeout-ms",
-        "  Workflow: --recipe --redo --dry-run",
+        "Defaults:",
+        "  duration=45s · pace=normal · frame-template=1080x1920/image_default.html · tts-speed=1.0",
         "",
         "Style presets (--style <preset>) — quick shortcut for --prompt-prefix:",
         formatStylePresetsList(),
         "  · Pass --prompt-prefix to override (raw string always wins).",
-        "  · Omit both to use the server's configured default style.",
+        "  · Omit both to use the server's configured default style (if any).",
         "",
         "Output behavior:",
-        "  No flag     → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
-        "  -o <path>   → saves to that exact path (must include filename, not just a directory)",
+        "  No flag       → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
+        "  -o <path>     → saves to that exact path (must include filename)",
         "  --no-download → skips local save, just prints JSON result with video_url",
         "  (when stdout is piped, --no-download is implied automatically)",
         "",
-        "Explore available resources (separate commands):",
-        "  reelforge templates list                  # all HTML templates",
-        "  reelforge tts voices --locale zh          # Edge TTS voice ids",
-        "  reelforge bgm list                        # built-in BGM files",
-        "",
-        "Examples (`rf` is a short alias for `reelforge`):",
-        "  # Minimum — saves to ./<title>-<short_id>.mp4 in cwd",
+        "Examples (`rf` is the short alias):",
+        "  # Minimum — AI writes a 45s script",
         '  rf create "为什么我们还没找到外星文明？"',
         "",
-        "  # Pick the exact output path",
-        '  rf create "..." -o ./videos/space.mp4',
-        "",
-        "  # Long script from a file, fixed mode (no LLM scriptwriting)",
-        "  rf create @./script.txt --mode fixed --split-mode paragraph",
-        "",
-        "  # Landscape (1920x1080)",
-        '  rf create "..." --frame-template 1920x1080/image_default.html',
+        "  # 60-second video with slow visual pace",
+        '  rf create "..." -d 60 -p slow',
         "",
-        "  # Add BGM",
-        '  rf create "..." --bgm bgm/Echoes.mp3 --bgm-volume 0.3 --bgm-mode loop',
+        "  # Your own script, you decide the wording",
+        "  rf create --script @./script.txt",
+        '  rf create --script "整段文案文本..."',
         "",
-        "  # Change voice + speed",
-        '  rf create "..." --tts-voice zh-CN-XiaoxiaoNeural --tts-speed 1.0',
+        "  # Custom HTML template (auto-detected when --frame-template is a local path)",
+        "  rf create '...' --frame-template ./my-brand.html",
         "",
         "  # Pick a built-in style preset",
         '  rf create "..." --style cinematic',
-        '  rf create "美食教程" --style photorealistic',
         "",
-        "  # Free-form style — write your own prefix from scratch",
-        '  rf create "..." --prompt-prefix "Studio Ghibli, pastel, dreamy"',
+        "  # Cross-scene character consistency (auto-switches image model)",
+        '  rf create "主角小女孩的一天" --character-ref ./hero.png',
+        "  rf create '...' --character-ref ./hero.png --style-ref ./mood.jpg",
+        '  rf create "..." --style-ref https://example.com/style.png',
         "",
-        "  # Custom HTML template (auto-detected when --frame-template is a local path)",
-        "  rf create '...' --frame-template ./my-brand.html",
-        "  #   ↳ default type=image (best-practice; AI image generated per scene).",
-        "  #   ↳ pure-text template? declare `--frame-template-type static`",
-        "  #     OR add `<meta name=\"template:type\" content=\"static\">` inside the HTML.",
-        "",
-        "  # Full recipe in one file",
+        "  # Recipe + replay last",
         "  rf create --recipe ./space.recipe.json",
-        "",
-        "  # Override a field on top of a recipe",
-        '  rf create --recipe ./space.recipe.json --text "新主题" -n 8',
-        "",
-        "  # Replay last successful create",
-        "  rf create --redo",
-        "",
-        "  # Replay last but tweak one knob",
-        "  rf create --redo --tts-speed 1.0",
+        "  rf create --redo                       # replay last successful create",
+        "  rf create --redo -p fast               # replay with one knob tweaked",
         "",
         "  # See exactly what would be sent (no submission)",
-        '  rf create "..." -n 7 --bgm bgm/Echoes.mp3 --dry-run',
+        '  rf create "..." -d 60 --dry-run',
         "",
-        "  # Pipe-friendly: skip local download, take video_url for downstream",
+        "  # Pipe-friendly",
         '  rf create "..." --no-download --json | jq -r .video_url',
-        "",
-        "Recipe file format (every field is optional; all keys match the REST API body):",
-        "  {",
-        '    "text": "为什么我们还没找到外星文明？",',
-        '    "n_scenes": 7,',
-        '    "frame_template": "1080x1920/image_default.html",',
-        '    "image_model": "rx-image-flux",',
-        '    "prompt_prefix": "Minimalist matchstick figure style",',
-        '    "tts_voice": "zh-CN-YunjianNeural",',
-        '    "tts_speed": 1.2,',
-        '    "bgm_path": "bgm/Echoes.mp3",',
-        '    "bgm_volume": 0.2',
-        "  }",
     ].join("\n"))
         .action(async (topicArg, opts) => {
-        // Validate -o early so we fail before submitting a paid task
         if (opts.output) {
             await validateOutputPath(opts.output);
         }
-        // Expand --style preset to --prompt-prefix unless an explicit
-        // --prompt-prefix is also given (the raw string always wins).
+        // Expand --style preset to --prompt-prefix unless --prompt-prefix is given.
         if (opts.style) {
             const preset = STYLE_PRESETS[opts.style];
             if (!preset) {
@@ -487,6 +445,9 @@ export function registerCreate(program) {
                 opts.promptPrefix = preset.prefix;
             }
         }
+        if (opts.pace && !["slow", "normal", "fast"].includes(opts.pace)) {
+            throw new Error(`--pace must be one of slow|normal|fast (got: ${opts.pace})`);
+        }
         // 1. Layer defaults: --redo → --recipe → CLI opts → positional topic
         let body = {};
         if (opts.redo) {
@@ -502,51 +463,62 @@ export function registerCreate(program) {
             body = { ...body, ...recipe };
             info(`Loaded recipe from ${opts.recipe}`);
         }
-        // CLI options layer
         const fromOpts = optsToBody(opts);
         body = { ...body, ...fromOpts };
-        // Capture the raw text input (with potential @-prefix) for filename derivation.
-        // After `resolveText` we lose the @path → file stem mapping.
-        const rawTextInput = topicArg ?? (typeof body.text === "string" ? body.text : undefined);
-        const fileStemFromAt = rawTextInput?.startsWith("@")
-            ? path.parse(rawTextInput.slice(1)).name
-            : undefined;
-        // Positional topic wins for `text` (with @file support)
+        // Positional arg always wins for `topic`.
+        // Resolve @file prefix on whichever of topic/script is set.
+        const rawTopicInput = topicArg ?? (typeof body.topic === "string" ? body.topic : undefined);
+        const fileStemFromAt = rawTopicInput?.startsWith("@") ? path.parse(rawTopicInput.slice(1)).name :
+            body.script?.startsWith("@") ? path.parse(body.script.slice(1)).name :
+                undefined;
         if (topicArg) {
-            body.text = await resolveText(topicArg);
+            body.topic = await resolveTextOrFile(topicArg);
+        }
+        else if (typeof body.topic === "string") {
+            body.topic = await resolveTextOrFile(body.topic);
+        }
+        if (typeof body.script === "string") {
+            body.script = await resolveTextOrFile(body.script);
         }
-        else if (typeof body.text === "string") {
-            body.text = await resolveText(body.text);
+        // Resolve refs: local file paths → data: URIs (RelayX accepts both
+        // https:// and data: in image_urls). Done after layering so a recipe
+        // can carry refs by path too.
+        const resolvedChar = await resolveRefImage(body.character_ref, "--character-ref");
+        const resolvedStyle = await resolveRefImage(body.style_ref, "--style-ref");
+        if (resolvedChar !== undefined)
+            body.character_ref = resolvedChar;
+        else
+            delete body.character_ref;
+        if (resolvedStyle !== undefined)
+            body.style_ref = resolvedStyle;
+        else
+            delete body.style_ref;
+        // Validate content mode
+        const hasTopic = typeof body.topic === "string" && body.topic.trim().length > 0;
+        const hasScript = typeof body.script === "string" && body.script.trim().length > 0;
+        if (!hasTopic && !hasScript) {
+            throw new Error("either --topic (or positional arg) or --script is required.");
         }
-        if (!body.text) {
-            throw new Error("text is required — pass it as the positional arg, or via --text / --recipe / --redo.");
+        if (hasTopic && hasScript) {
+            throw new Error("--topic and --script are mutually exclusive (pick one mode).");
         }
-        // 2. Apply defaults for fields still unset
-        const finalBody = {
-            ...DEFAULTS,
-            ...body,
-            text: body.text,
-        };
-        // When the user supplied inline HTML, the DEFAULTS' `frame_template`
-        // key is irrelevant — drop it so the server-side request body stays
-        // clean and the dry-run output isn't misleading.
+        // 3. Final body — drop empty / null fields
+        const finalBody = { ...body };
         if (finalBody.frame_template_inline && finalBody.frame_template) {
             delete finalBody.frame_template;
         }
-        // 3. Estimate cost
+        // 4. Estimate cost
         const estimate = estimateUnits(finalBody);
-        // 4. Dry-run: print & exit
         if (opts.dryRun) {
             info("--- DRY RUN ---");
             info("Final request body:");
             print(finalBody);
-            info(`Estimated cost: ${estimate} units`);
+            info(`Estimated cost: ≈ ${estimate} units`);
             info("(use without --dry-run to actually submit)");
             return;
         }
         info(`Submitting create task (≈ ${estimate} units)...`);
         const submitted = await post("/api/v1/pipelines/standard", finalBody);
-        // 5. Save as last (post-submit, before wait — so even cancelled tasks can be replayed)
         await saveLastCreate(finalBody).catch((e) => {
             warn(`Could not save last-create.json: ${e.message}`);
         });
@@ -563,11 +535,6 @@ export function registerCreate(program) {
             throw new Error(t.error || `Task ended with status ${t.status}`);
         }
         const result = t.result;
-        // Decide where (or whether) to save locally.
-        //   -o            → that exact path
-        //   --no-download → skip
-        //   stdout piped  → skip (clig.dev: don't dump binary-touching side effects into a script)
-        //   otherwise     → auto-named in cwd
         if (result?.video_url) {
             const stdoutIsPipe = !process.stdout.isTTY;
             const skipDownload = !!opts.noDownload || (stdoutIsPipe && !opts.output);
@@ -576,11 +543,10 @@ export function registerCreate(program) {
                 savedPath = opts.output;
             }
             else if (!skipDownload) {
+                const topicForFilename = hasTopic && finalBody.topic ? finalBody.topic : undefined;
                 savedPath = computeDefaultFilename({
                     resultTitle: result.title,
-                    bodyTitle: finalBody.title,
-                    mode: finalBody.mode,
-                    rawTextInput,
+                    topic: topicForFilename,
                     fileStemFromAt,
                     taskId: t.id,
                     ext: "mp4",