reelforge 0.5.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,17 +8,13 @@ import { downloadTo } from "../utils/download.js";
8
8
  import { info, print, success, warn } from "../utils/output.js";
9
9
  const LAST_CREATE_PATH = path.join(os.homedir(), ".reelforge", "last-create.json");
10
10
  // ── Cost estimation (mirrors server src/lib/billing.ts) ──────────
11
- const IMAGE_UNITS = 3; // matches ATOMIC_UNITS["images.generate"] in src/lib/billing.ts
12
- const TTS_RELAYX_UNITS = 1; // matches ATOMIC_UNITS["tts.relayx"]
11
+ const PLAN_UNITS = 1;
12
+ const TTS_UNITS = 1;
13
+ const ASR_UNITS = 1;
14
+ const IMAGE_UNITS = 3;
15
+ const CHARS_PER_SEC_ZH = 5;
16
+ const TARGET_SEC_PER_SCENE = 8;
13
17
  function estimateUnits(body) {
14
- const mode = body.mode || "generate";
15
- const titleExplicit = !!body.title;
16
- const N = body.n_scenes ?? 5;
17
- // Template type resolution mirrors the server (src/lib/billing.ts):
18
- // inline HTML → explicit body.frame_template_type
19
- // → <meta name="template:type" content="..."> in the HTML
20
- // → default "image"
21
- // preset key → parsed from the filename prefix (static_/asset_/image_)
22
18
  let tplType;
23
19
  if (body.frame_template_inline) {
24
20
  if (body.frame_template_type) {
@@ -31,7 +27,7 @@ function estimateUnits(body) {
31
27
  }
32
28
  }
33
29
  else {
34
- const tplKey = body.frame_template || "1080x1920/static_default.html";
30
+ const tplKey = body.frame_template || "1080x1920/image_default.html";
35
31
  const tplBase = (tplKey.split("/").pop() || "").toLowerCase();
36
32
  tplType = tplBase.startsWith("static_")
37
33
  ? "static"
@@ -39,22 +35,20 @@ function estimateUnits(body) {
39
35
  ? "asset"
40
36
  : "image";
41
37
  }
42
- const mediaPerFrame = tplType === "image" ? IMAGE_UNITS : 0;
43
- const ttsMode = body.tts_inference_mode || "edge";
44
- const ttsPerFrame = ttsMode === "relayx" ? TTS_RELAYX_UNITS : 0;
45
- const narrations = mode === "generate" ? 1 : 0;
46
- const title = titleExplicit ? 0 : 1;
47
- const imagePrompts = tplType === "static" ? 0 : 1;
48
- return narrations + title + imagePrompts + N * (ttsPerFrame + mediaPerFrame);
38
+ // Estimated scene count: from script length (fixed) or from duration (generate).
39
+ let estimatedScenes;
40
+ if (body.script) {
41
+ const estSec = body.script.length / CHARS_PER_SEC_ZH;
42
+ estimatedScenes = Math.max(2, Math.round(estSec / TARGET_SEC_PER_SCENE));
43
+ }
44
+ else {
45
+ const dur = body.duration ?? 45;
46
+ estimatedScenes = Math.max(2, Math.round(dur / TARGET_SEC_PER_SCENE));
47
+ }
48
+ const imageUnits = tplType === "image" ? estimatedScenes * IMAGE_UNITS : 0;
49
+ return PLAN_UNITS + TTS_UNITS + ASR_UNITS + imageUnits;
49
50
  }
50
51
  // ── Helpers ─────────────────────────────────────────────────────
51
- /**
52
- * Distinguish a local HTML file path from a preset template key.
53
- * Preset keys look like `"<size>/<file>.html"` (one slash, no dots/slashes
54
- * outside that pattern). Anything starting with `./`, `../`, `/`, `~`, or
55
- * containing a backslash, or that ends with `.html` and exists on disk, is
56
- * treated as a local path.
57
- */
58
52
  function looksLikeLocalHtmlPath(value) {
59
53
  if (/^[.~]|^\//.test(value))
60
54
  return true;
@@ -64,13 +58,45 @@ function looksLikeLocalHtmlPath(value) {
64
58
  return true;
65
59
  return false;
66
60
  }
67
- async function resolveText(input) {
61
+ /** `@file` prefix → load file contents; raw text → return as-is. */
62
+ async function resolveTextOrFile(input) {
68
63
  if (input.startsWith("@")) {
69
64
  const file = input.slice(1);
70
65
  return (await fs.readFile(file, "utf-8")).trim();
71
66
  }
72
67
  return input;
73
68
  }
69
+ /**
70
+ * Reference-image resolver. Accepts a public URL, a data: URI, or a local
71
+ * file path. Local files are base64-encoded into a data: URI so RelayX can
72
+ * receive them in a pure-JSON body (no upload endpoint needed on our side).
73
+ *
74
+ * Returns undefined when input is missing/blank so the caller can branch on
75
+ * "user actually provided this knob".
76
+ */
77
+ async function resolveRefImage(input, flagName) {
78
+ if (input === undefined)
79
+ return undefined;
80
+ const t = input.trim();
81
+ if (!t)
82
+ return undefined;
83
+ if (/^https?:\/\//i.test(t) || t.startsWith("data:"))
84
+ return t;
85
+ const abs = path.resolve(t);
86
+ if (!fsSync.existsSync(abs)) {
87
+ throw new Error(`${flagName}: local file not found: ${abs}`);
88
+ }
89
+ const ext = path.extname(abs).toLowerCase();
90
+ const mime = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" :
91
+ ext === ".webp" ? "image/webp" :
92
+ ext === ".png" ? "image/png" :
93
+ null;
94
+ if (!mime) {
95
+ throw new Error(`${flagName}: unsupported extension ${ext} (use png/jpg/jpeg/webp)`);
96
+ }
97
+ const buf = await fs.readFile(abs);
98
+ return `data:${mime};base64,${buf.toString("base64")}`;
99
+ }
74
100
  async function loadRecipe(recipePath) {
75
101
  const raw = await fs.readFile(recipePath, "utf-8");
76
102
  const parsed = JSON.parse(raw);
@@ -93,14 +119,6 @@ async function saveLastCreate(body) {
93
119
  await fs.writeFile(LAST_CREATE_PATH, JSON.stringify(body, null, 2) + "\n", "utf-8");
94
120
  }
95
121
  // ── Filename derivation ─────────────────────────────────────────
96
- //
97
- // Cascade (highest → lowest):
98
- // 1. result.title — server's actual video title (LLM or explicit)
99
- // 2. body.title — user-supplied --title (pre-task fallback)
100
- // 3. raw topic (mode=generate, length ≤ 60, no @-prefix)
101
- // 4. @file stem — when text was loaded from @./script.txt
102
- // 5. "reelforge" literal
103
- // Always suffixed with "-<task_id[:8]>" to avoid collisions.
104
122
  const FILENAME_MAX_CHARS = 40;
105
123
  function sanitizeFilename(name) {
106
124
  const cleaned = name
@@ -120,14 +138,8 @@ function computeDefaultFilename(args) {
120
138
  if (args.resultTitle && args.resultTitle.trim()) {
121
139
  base = sanitizeFilename(args.resultTitle);
122
140
  }
123
- else if (args.bodyTitle && args.bodyTitle.trim()) {
124
- base = sanitizeFilename(args.bodyTitle);
125
- }
126
- else if (args.mode === "generate" &&
127
- args.rawTextInput &&
128
- !args.rawTextInput.startsWith("@") &&
129
- Array.from(args.rawTextInput).length <= 60) {
130
- base = sanitizeFilename(args.rawTextInput);
141
+ else if (args.topic && Array.from(args.topic).length <= 60) {
142
+ base = sanitizeFilename(args.topic);
131
143
  }
132
144
  else if (args.fileStemFromAt) {
133
145
  base = sanitizeFilename(args.fileStemFromAt);
@@ -152,28 +164,35 @@ async function validateOutputPath(out) {
152
164
  /** Camel-case CLI options → snake_case body, only including provided fields */
153
165
  function optsToBody(opts) {
154
166
  const out = {};
155
- if (opts.text !== undefined)
156
- out.text = opts.text;
157
- if (opts.mode !== undefined)
158
- out.mode = opts.mode;
159
- if (opts.title !== undefined)
160
- out.title = opts.title;
161
- if (opts.nScenes !== undefined)
162
- out.n_scenes = opts.nScenes;
163
- if (opts.splitMode !== undefined)
164
- out.split_mode = opts.splitMode;
165
- if (opts.ttsInferenceMode !== undefined)
166
- out.tts_inference_mode = opts.ttsInferenceMode;
167
- if (opts.ttsVoice !== undefined)
168
- out.tts_voice = opts.ttsVoice;
167
+ if (opts.topic !== undefined)
168
+ out.topic = opts.topic;
169
+ if (opts.script !== undefined)
170
+ out.script = opts.script;
171
+ if (opts.duration !== undefined)
172
+ out.duration = opts.duration;
173
+ if (opts.pace !== undefined)
174
+ out.pace = opts.pace;
175
+ if (opts.llmModel !== undefined)
176
+ out.llm_model = opts.llmModel;
177
+ if (opts.ttsModel !== undefined)
178
+ out.tts_model = opts.ttsModel;
179
+ if (opts.asrModel !== undefined)
180
+ out.asr_model = opts.asrModel;
181
+ if (opts.imageModel !== undefined)
182
+ out.image_model = opts.imageModel;
183
+ if (opts.promptPrefix !== undefined)
184
+ out.prompt_prefix = opts.promptPrefix;
185
+ if (opts.characterRef !== undefined)
186
+ out.character_ref = opts.characterRef;
187
+ if (opts.styleRef !== undefined)
188
+ out.style_ref = opts.styleRef;
169
189
  if (opts.voiceId !== undefined)
170
190
  out.voice_id = opts.voiceId;
171
191
  if (opts.ttsSpeed !== undefined)
172
192
  out.tts_speed = opts.ttsSpeed;
173
- if (opts.imageModel !== undefined)
174
- out.image_model = opts.imageModel;
193
+ if (opts.videoFps !== undefined)
194
+ out.video_fps = opts.videoFps;
175
195
  if (opts.frameTemplate !== undefined) {
176
- // Local .html path → read and send as inline; preset key → send as-is.
177
196
  if (looksLikeLocalHtmlPath(opts.frameTemplate)) {
178
197
  const abs = path.resolve(opts.frameTemplate);
179
198
  if (!fsSync.existsSync(abs)) {
@@ -189,35 +208,14 @@ function optsToBody(opts) {
189
208
  out.frame_template_size = opts.frameTemplateSize;
190
209
  if (opts.frameTemplateType !== undefined)
191
210
  out.frame_template_type = opts.frameTemplateType;
192
- if (opts.promptPrefix !== undefined)
193
- out.prompt_prefix = opts.promptPrefix;
194
- if (opts.bgm !== undefined)
195
- out.bgm_path = opts.bgm;
196
- if (opts.bgmVolume !== undefined)
197
- out.bgm_volume = opts.bgmVolume;
198
- if (opts.bgmMode !== undefined)
199
- out.bgm_mode = opts.bgmMode;
200
- if (opts.minNarrationWords !== undefined)
201
- out.min_narration_words = opts.minNarrationWords;
202
- if (opts.maxNarrationWords !== undefined)
203
- out.max_narration_words = opts.maxNarrationWords;
204
- if (opts.minImagePromptWords !== undefined)
205
- out.min_image_prompt_words = opts.minImagePromptWords;
206
- if (opts.maxImagePromptWords !== undefined)
207
- out.max_image_prompt_words = opts.maxImagePromptWords;
208
- if (opts.videoFps !== undefined)
209
- out.video_fps = opts.videoFps;
210
211
  if (opts.templateParams !== undefined)
211
212
  out.template_params = opts.templateParams;
213
+ if (opts.subtitleMinChars !== undefined)
214
+ out.subtitle_min_chars = opts.subtitleMinChars;
215
+ if (opts.subtitleHardMax !== undefined)
216
+ out.subtitle_hard_max = opts.subtitleHardMax;
212
217
  return out;
213
218
  }
214
- const DEFAULTS = {
215
- mode: "generate",
216
- n_scenes: 5,
217
- frame_template: "1080x1920/image_default.html",
218
- tts_voice: "zh-CN-YunjianNeural",
219
- tts_speed: 1.2,
220
- };
221
219
  const STYLE_PRESETS = {
222
220
  matchstick: {
223
221
  prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style",
@@ -305,7 +303,6 @@ const STYLE_PRESETS = {
305
303
  scene: "奢华品牌 / 复古优雅",
306
304
  },
307
305
  };
308
- // CJK chars take 2 display columns in monospace terminals; pad accordingly.
309
306
  function displayWidth(s) {
310
307
  let w = 0;
311
308
  for (const c of s)
@@ -331,36 +328,34 @@ function formatStylePresetsList() {
331
328
  export function registerCreate(program) {
332
329
  program
333
330
  .command("create [topic]")
334
- .description("One-click: topic → fully-generated MP4. 23 tunable params + recipe files.")
331
+ .description("One-click: topic (or your own script) → fully-generated MP4. Audio-first pipeline.")
335
332
  .helpOption("-h, --help", "show help")
336
- // --- Content ---
337
- .option("-t, --text <text>", "topic (mode=generate) or fixed script (mode=fixed). Prefix with @ to read from a file (e.g. @script.txt).")
338
- .option("--mode <mode>", "generate | fixed (default: generate)")
339
- .option("--title <text>", "explicit video title (default: LLM-generated from topic)")
340
- .option("-n, --n-scenes <N>", "number of scenes", (v) => parseInt(v, 10))
341
- .option("--split-mode <mode>", "paragraph | line | sentence (mode=fixed only)")
342
- .option("--min-narration-words <N>", "narration min words per scene", (v) => parseInt(v, 10))
343
- .option("--max-narration-words <N>", "narration max words per scene", (v) => parseInt(v, 10))
344
- .option("--min-image-prompt-words <N>", "image prompt min words", (v) => parseInt(v, 10))
345
- .option("--max-image-prompt-words <N>", "image prompt max words", (v) => parseInt(v, 10))
333
+ // --- Content (exactly one of --topic / --script) ---
334
+ .option("-t, --topic <text>", "video topic; AI writes the script (mode=generate). Prefix with @file to read from disk.")
335
+ .option("--script <text>", "your own master script text; AI just plans scenes + visuals (mode=fixed). Prefix with @file to read from disk.")
336
+ .option("-d, --duration <sec>", "target video duration in seconds (generate mode only; default 45). LLM aims for ~duration × 5 chars of narration.", (v) => parseInt(v, 10))
337
+ .option("-p, --pace <pace>", "visual rhythm hint passed to the LLM: slow | normal | fast (default normal). LLM still decides the actual scene count from semantic structure.")
346
338
  // --- Visual ---
347
339
  .option("--frame-template <keyOrPath>", "HTML frame template: preset key (e.g. 1080x1920/image_default.html) OR path to a local .html (auto-sent inline)")
348
340
  .option("--frame-template-size <wxh>", "size for inline HTML when the file lacks <meta template:width|height>, e.g. 1080x1920")
349
- .option("--frame-template-type <type>", "inline template type: image (default) | static | asset. Controls whether AI image generation runs per frame. Can also be set via <meta name=\"template:type\" content=\"...\"> in the HTML.")
350
- .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen)")
341
+ .option("--frame-template-type <type>", "inline template type: image (default) | static | asset. Controls whether AI image generation runs per scene.")
342
+ .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen | rx-image-qwen-edit). Auto-switches to rx-image-qwen-edit when --character-ref or --style-ref is set.")
351
343
  .option("--prompt-prefix <text>", "raw style prefix prepended to every image prompt (overrides --style)")
352
- .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below for the full list")
344
+ .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below")
345
+ .option("--character-ref <urlOrPath>", "reference image of the main character — locks identity across scenes. URL, data: URI, or local png/jpg/webp path (auto-encoded). Auto-enables rx-image-qwen-edit.")
346
+ .option("--style-ref <urlOrPath>", "reference image of the overall visual style — locks palette/composition/mood across scenes. Same input formats as --character-ref.")
353
347
  // --- Audio (TTS) ---
354
- .option("--tts-voice <id>", "TTS voice id; for edge use e.g. zh-CN-YunjianNeural / en-US-AriaNeural; for relayx use vox voice ids (default: 专业解说)")
355
- .option("--tts-speed <n>", "speech speed 0.5..2", parseFloat)
356
- .option("--tts-inference-mode <mode>", "edge (default, local Microsoft Edge TTS) | relayx (vox/index-tts-2 via RelayX)")
357
- .option("--voice-id <id>", "alias of --tts-voice (legacy compat)")
358
- // --- Audio (BGM) ---
359
- .option("--bgm <path>", "background music file path (server-side relative to bgm/)")
360
- .option("--bgm-volume <n>", "BGM volume 0..1", parseFloat)
361
- .option("--bgm-mode <mode>", "loop | once")
348
+ .option("--voice-id <id>", "RelayX TTS voice id (default 专业解说); see `rf tts voices`")
349
+ .option("--tts-speed <n>", "speech speed 0.5..2 (default 1.0)", parseFloat)
350
+ // --- Service overrides ---
351
+ .option("--llm-model <id>", "override the LLM model used for scene-plan")
352
+ .option("--tts-model <id>", "override the TTS model (default vox/index-tts-2)")
353
+ .option("--asr-model <id>", "override the ASR model (default alibaba/paraformer-v2)")
354
+ // --- Subtitle splitter knobs (advanced) ---
355
+ .option("--subtitle-min-chars <N>", "subtitle line min chars (default 10)", (v) => parseInt(v, 10))
356
+ .option("--subtitle-hard-max <N>", "subtitle line absolute max chars (default 24)", (v) => parseInt(v, 10))
362
357
  // --- Output / extra ---
363
- .option("--video-fps <n>", "output video fps", (v) => parseInt(v, 10))
358
+ .option("--video-fps <n>", "output video fps (default 30)", (v) => parseInt(v, 10))
364
359
  .option("--template-params <json>", "extra template placeholders as JSON string", (v) => {
365
360
  try {
366
361
  return JSON.parse(v);
@@ -374,110 +369,73 @@ export function registerCreate(program) {
374
369
  .option("--redo", "replay last successful create from ~/.reelforge/last-create.json")
375
370
  .option("--dry-run", "print the final request body + estimated units; do NOT submit")
376
371
  .option("--no-wait", "submit and return task_id immediately (do not poll)")
377
- .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4). Default: auto-named file in current directory.")
378
- .option("--no-download", "do not save the video locally — just print the JSON result with video_url")
372
+ .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4).")
373
+ .option("--no-download", "do not save the video locally — just print JSON with video_url")
379
374
  .option("--poll-ms <ms>", "poll interval while waiting", (v) => parseInt(v, 10), 1500)
380
375
  .option("--timeout-ms <ms>", "max wait time before aborting (default unlimited)", (v) => parseInt(v, 10))
381
376
  .addHelpText("after", [
382
377
  "",
383
- "Defaults match the /create web page:",
384
- " mode=generate · n-scenes=5 · frame-template=1080x1920/image_default.html",
385
- " tts-voice=zh-CN-YunjianNeural · tts-speed=1.2",
378
+ "Two content modes (one is required):",
379
+ " generate AI writes the script. --topic / -t <text> + optional --duration -d",
380
+ " fixed You supply the script. --script <text-or-@file>",
381
+ "",
382
+ "Pace (visual rhythm hint to the LLM):",
383
+ " slow fewer scenes, glued to semantic boundaries",
384
+ " normal balance semantic edges with visual variety (default)",
385
+ " fast split long semantic chunks into multiple shots for variety",
386
386
  "",
387
- "Param groups:",
388
- " Content : --mode --title -n --split-mode --min/max-narration-words --min/max-image-prompt-words",
389
- " Visual : --frame-template --image-model --style --prompt-prefix",
390
- " TTS : --tts-voice --tts-speed --tts-inference-mode --voice-id",
391
- " BGM : --bgm --bgm-volume --bgm-mode",
392
- " Output : --video-fps --template-params -o --no-download --no-wait --poll-ms --timeout-ms",
393
- " Workflow: --recipe --redo --dry-run",
387
+ "Defaults:",
388
+ " duration=45s · pace=normal · frame-template=1080x1920/image_default.html · tts-speed=1.0",
394
389
  "",
395
390
  "Style presets (--style <preset>) — quick shortcut for --prompt-prefix:",
396
391
  formatStylePresetsList(),
397
392
  " · Pass --prompt-prefix to override (raw string always wins).",
398
- " · Omit both to use the server's configured default style.",
393
+ " · Omit both to use the server's configured default style (if any).",
399
394
  "",
400
395
  "Output behavior:",
401
- " No flag → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
402
- " -o <path> → saves to that exact path (must include filename, not just a directory)",
396
+ " No flag → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
397
+ " -o <path> → saves to that exact path (must include filename)",
403
398
  " --no-download → skips local save, just prints JSON result with video_url",
404
399
  " (when stdout is piped, --no-download is implied automatically)",
405
400
  "",
406
- "Explore available resources (separate commands):",
407
- " reelforge templates list # all HTML templates",
408
- " reelforge tts voices --locale zh # Edge TTS voice ids",
409
- " reelforge bgm list # built-in BGM files",
410
- "",
411
- "Examples (`rf` is a short alias for `reelforge`):",
412
- " # Minimum — saves to ./<title>-<short_id>.mp4 in cwd",
401
+ "Examples (`rf` is the short alias):",
402
+ " # Minimum AI writes a 45s script",
413
403
  ' rf create "为什么我们还没找到外星文明?"',
414
404
  "",
415
- " # Pick the exact output path",
416
- ' rf create "..." -o ./videos/space.mp4',
417
- "",
418
- " # Long script from a file, fixed mode (no LLM scriptwriting)",
419
- " rf create @./script.txt --mode fixed --split-mode paragraph",
420
- "",
421
- " # Landscape (1920x1080)",
422
- ' rf create "..." --frame-template 1920x1080/image_default.html',
405
+ " # 60-second video with slow visual pace",
406
+ ' rf create "..." -d 60 -p slow',
423
407
  "",
424
- " # Add BGM",
425
- ' rf create "..." --bgm bgm/Echoes.mp3 --bgm-volume 0.3 --bgm-mode loop',
408
+ " # Your own script, you decide the wording",
409
+ " rf create --script @./script.txt",
410
+ ' rf create --script "整段文案文本..."',
426
411
  "",
427
- " # Change voice + speed",
428
- ' rf create "..." --tts-voice zh-CN-XiaoxiaoNeural --tts-speed 1.0',
412
+ " # Custom HTML template (auto-detected when --frame-template is a local path)",
413
+ " rf create '...' --frame-template ./my-brand.html",
429
414
  "",
430
415
  " # Pick a built-in style preset",
431
416
  ' rf create "..." --style cinematic',
432
- ' rf create "美食教程" --style photorealistic',
433
417
  "",
434
- " # Free-form style write your own prefix from scratch",
435
- ' rf create "..." --prompt-prefix "Studio Ghibli, pastel, dreamy"',
418
+ " # Cross-scene character consistency (auto-switches image model)",
419
+ ' rf create "主角小女孩的一天" --character-ref ./hero.png',
420
+ " rf create '...' --character-ref ./hero.png --style-ref ./mood.jpg",
421
+ ' rf create "..." --style-ref https://example.com/style.png',
436
422
  "",
437
- " # Custom HTML template (auto-detected when --frame-template is a local path)",
438
- " rf create '...' --frame-template ./my-brand.html",
439
- " # ↳ default type=image (best-practice; AI image generated per scene).",
440
- " # ↳ pure-text template? declare `--frame-template-type static`",
441
- " # OR add `<meta name=\"template:type\" content=\"static\">` inside the HTML.",
442
- "",
443
- " # Full recipe in one file",
423
+ " # Recipe + replay last",
444
424
  " rf create --recipe ./space.recipe.json",
445
- "",
446
- " # Override a field on top of a recipe",
447
- ' rf create --recipe ./space.recipe.json --text "新主题" -n 8',
448
- "",
449
- " # Replay last successful create",
450
- " rf create --redo",
451
- "",
452
- " # Replay last but tweak one knob",
453
- " rf create --redo --tts-speed 1.0",
425
+ " rf create --redo # replay last successful create",
426
+ " rf create --redo -p fast # replay with one knob tweaked",
454
427
  "",
455
428
  " # See exactly what would be sent (no submission)",
456
- ' rf create "..." -n 7 --bgm bgm/Echoes.mp3 --dry-run',
429
+ ' rf create "..." -d 60 --dry-run',
457
430
  "",
458
- " # Pipe-friendly: skip local download, take video_url for downstream",
431
+ " # Pipe-friendly",
459
432
  ' rf create "..." --no-download --json | jq -r .video_url',
460
- "",
461
- "Recipe file format (every field is optional; all keys match the REST API body):",
462
- " {",
463
- ' "text": "为什么我们还没找到外星文明?",',
464
- ' "n_scenes": 7,',
465
- ' "frame_template": "1080x1920/image_default.html",',
466
- ' "image_model": "rx-image-flux",',
467
- ' "prompt_prefix": "Minimalist matchstick figure style",',
468
- ' "tts_voice": "zh-CN-YunjianNeural",',
469
- ' "tts_speed": 1.2,',
470
- ' "bgm_path": "bgm/Echoes.mp3",',
471
- ' "bgm_volume": 0.2',
472
- " }",
473
433
  ].join("\n"))
474
434
  .action(async (topicArg, opts) => {
475
- // Validate -o early so we fail before submitting a paid task
476
435
  if (opts.output) {
477
436
  await validateOutputPath(opts.output);
478
437
  }
479
- // Expand --style preset to --prompt-prefix unless an explicit
480
- // --prompt-prefix is also given (the raw string always wins).
438
+ // Expand --style preset to --prompt-prefix unless --prompt-prefix is given.
481
439
  if (opts.style) {
482
440
  const preset = STYLE_PRESETS[opts.style];
483
441
  if (!preset) {
@@ -487,6 +445,9 @@ export function registerCreate(program) {
487
445
  opts.promptPrefix = preset.prefix;
488
446
  }
489
447
  }
448
+ if (opts.pace && !["slow", "normal", "fast"].includes(opts.pace)) {
449
+ throw new Error(`--pace must be one of slow|normal|fast (got: ${opts.pace})`);
450
+ }
490
451
  // 1. Layer defaults: --redo → --recipe → CLI opts → positional topic
491
452
  let body = {};
492
453
  if (opts.redo) {
@@ -502,51 +463,62 @@ export function registerCreate(program) {
502
463
  body = { ...body, ...recipe };
503
464
  info(`Loaded recipe from ${opts.recipe}`);
504
465
  }
505
- // CLI options layer
506
466
  const fromOpts = optsToBody(opts);
507
467
  body = { ...body, ...fromOpts };
508
- // Capture the raw text input (with potential @-prefix) for filename derivation.
509
- // After `resolveText` we lose the @path file stem mapping.
510
- const rawTextInput = topicArg ?? (typeof body.text === "string" ? body.text : undefined);
511
- const fileStemFromAt = rawTextInput?.startsWith("@")
512
- ? path.parse(rawTextInput.slice(1)).name
513
- : undefined;
514
- // Positional topic wins for `text` (with @file support)
468
+ // Positional arg always wins for `topic`.
469
+ // Resolve @file prefix on whichever of topic/script is set.
470
+ const rawTopicInput = topicArg ?? (typeof body.topic === "string" ? body.topic : undefined);
471
+ const fileStemFromAt = rawTopicInput?.startsWith("@") ? path.parse(rawTopicInput.slice(1)).name :
472
+ body.script?.startsWith("@") ? path.parse(body.script.slice(1)).name :
473
+ undefined;
515
474
  if (topicArg) {
516
- body.text = await resolveText(topicArg);
475
+ body.topic = await resolveTextOrFile(topicArg);
476
+ }
477
+ else if (typeof body.topic === "string") {
478
+ body.topic = await resolveTextOrFile(body.topic);
479
+ }
480
+ if (typeof body.script === "string") {
481
+ body.script = await resolveTextOrFile(body.script);
517
482
  }
518
- else if (typeof body.text === "string") {
519
- body.text = await resolveText(body.text);
483
+ // Resolve refs: local file paths → data: URIs (RelayX accepts both
484
+ // https:// and data: in image_urls). Done after layering so a recipe
485
+ // can carry refs by path too.
486
+ const resolvedChar = await resolveRefImage(body.character_ref, "--character-ref");
487
+ const resolvedStyle = await resolveRefImage(body.style_ref, "--style-ref");
488
+ if (resolvedChar !== undefined)
489
+ body.character_ref = resolvedChar;
490
+ else
491
+ delete body.character_ref;
492
+ if (resolvedStyle !== undefined)
493
+ body.style_ref = resolvedStyle;
494
+ else
495
+ delete body.style_ref;
496
+ // Validate content mode
497
+ const hasTopic = typeof body.topic === "string" && body.topic.trim().length > 0;
498
+ const hasScript = typeof body.script === "string" && body.script.trim().length > 0;
499
+ if (!hasTopic && !hasScript) {
500
+ throw new Error("either --topic (or positional arg) or --script is required.");
520
501
  }
521
- if (!body.text) {
522
- throw new Error("text is required pass it as the positional arg, or via --text / --recipe / --redo.");
502
+ if (hasTopic && hasScript) {
503
+ throw new Error("--topic and --script are mutually exclusive (pick one mode).");
523
504
  }
524
- // 2. Apply defaults for fields still unset
525
- const finalBody = {
526
- ...DEFAULTS,
527
- ...body,
528
- text: body.text,
529
- };
530
- // When the user supplied inline HTML, the DEFAULTS' `frame_template`
531
- // key is irrelevant — drop it so the server-side request body stays
532
- // clean and the dry-run output isn't misleading.
505
+ // 3. Final body drop empty / null fields
506
+ const finalBody = { ...body };
533
507
  if (finalBody.frame_template_inline && finalBody.frame_template) {
534
508
  delete finalBody.frame_template;
535
509
  }
536
- // 3. Estimate cost
510
+ // 4. Estimate cost
537
511
  const estimate = estimateUnits(finalBody);
538
- // 4. Dry-run: print & exit
539
512
  if (opts.dryRun) {
540
513
  info("--- DRY RUN ---");
541
514
  info("Final request body:");
542
515
  print(finalBody);
543
- info(`Estimated cost: ${estimate} units`);
516
+ info(`Estimated cost: ${estimate} units`);
544
517
  info("(use without --dry-run to actually submit)");
545
518
  return;
546
519
  }
547
520
  info(`Submitting create task (≈ ${estimate} units)...`);
548
521
  const submitted = await post("/api/v1/pipelines/standard", finalBody);
549
- // 5. Save as last (post-submit, before wait — so even cancelled tasks can be replayed)
550
522
  await saveLastCreate(finalBody).catch((e) => {
551
523
  warn(`Could not save last-create.json: ${e.message}`);
552
524
  });
@@ -563,11 +535,6 @@ export function registerCreate(program) {
563
535
  throw new Error(t.error || `Task ended with status ${t.status}`);
564
536
  }
565
537
  const result = t.result;
566
- // Decide where (or whether) to save locally.
567
- // -o → that exact path
568
- // --no-download → skip
569
- // stdout piped → skip (clig.dev: don't dump binary-touching side effects into a script)
570
- // otherwise → auto-named in cwd
571
538
  if (result?.video_url) {
572
539
  const stdoutIsPipe = !process.stdout.isTTY;
573
540
  const skipDownload = !!opts.noDownload || (stdoutIsPipe && !opts.output);
@@ -576,11 +543,10 @@ export function registerCreate(program) {
576
543
  savedPath = opts.output;
577
544
  }
578
545
  else if (!skipDownload) {
546
+ const topicForFilename = hasTopic && finalBody.topic ? finalBody.topic : undefined;
579
547
  savedPath = computeDefaultFilename({
580
548
  resultTitle: result.title,
581
- bodyTitle: finalBody.title,
582
- mode: finalBody.mode,
583
- rawTextInput,
549
+ topic: topicForFilename,
584
550
  fileStemFromAt,
585
551
  taskId: t.id,
586
552
  ext: "mp4",