reelforge 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -79,15 +79,14 @@ Run `rf <command> --help` for full details on any of these.
79
79
  | `tts voices [--locale zh]` | List supported Edge TTS voices |
80
80
  | `images generate -p <prompt> -m rx-image-flux` | Image generation via RelayX (rx-image-z / rx-image-flux / rx-image-qwen) |
81
81
 
82
- ### Content generation
82
+ ### Content / audio / subtitle atomics
83
83
 
84
84
  | command | what it does |
85
85
  |---|---|
86
- | `content narration -t <topic>` | Generate N narration sentences from a topic |
87
- | `content split -s <script>` | Split a fixed script into narrations |
88
- | `content image-prompts -i <file>` | English image prompts from narration list |
89
- | `content title -c <content>` | Generate a short video title |
90
- | `content asset-script --intent ... --assets <file>` | Asset-based scene script |
86
+ | `content scene-plan -t <topic>` | Single LLM call: title + master script + per-scene image prompts (replaces the old narration / split / image-prompts / title trio) |
87
+ | `content scene-plan --script <text-or-@file>` | Same, but the user supplies the script verbatim LLM only segments and writes image prompts |
88
+ | `audio transcribe -f <file>` / `--url <url>` | RelayX paraformer-v2 ASR with word + segment timestamps |
89
+ | `subtitles split -t <text-or-@file>` | Deterministic tiered-punctuation subtitle line splitter (pure function, zero billing) |
91
90
 
92
91
  ### Composition
93
92
 
@@ -106,9 +105,11 @@ Run `rf <command> --help` for full details on any of these.
106
105
 
107
106
  All `pipelines *` commands submit an **async task** and (by default) poll until it finishes with a live progress indicator on stderr. Use `--no-wait` to return immediately with a `task_id`, then `rf tasks wait <id>` later.
108
107
 
108
+ The standard pipeline is **audio-first**: scene-plan → one-shot TTS → ASR alignment → per-scene image generation → per-subtitle-line frame rendering → ffmpeg mux. One continuous master audio track; image cuts at scene boundaries; subtitle cuts at line boundaries.
109
+
109
110
  | command | what it does |
110
111
  |---|---|
111
- | `pipelines standard -t <topic\|script>` | Topic / script narration frames final MP4 |
112
+ | `pipelines standard -t <topic>` (or `--script <text>`) | Audio-first pipeline; `-d/--duration` and `-p/--pace` are the two main knobs |
112
113
 
113
114
  ### Resources
114
115
 
@@ -132,35 +133,49 @@ All `pipelines *` commands submit an **async task** and (by default) poll until
132
133
  ## Examples
133
134
 
134
135
  ```bash
135
- # 1. One-click out a video (auto-saves to ./<title>-<id>.mp4 in cwd)
136
+ # 1. One-click out a video (45s default, AI writes the script)
136
137
  rf create "为什么我们还没找到外星文明?"
137
138
 
138
- # 2. Same, but with a fixed script and explicit output path
139
+ # 2. Longer video with a slower visual rhythm
140
+ rf create "深夜便利店的灯光" -d 90 -p slow
141
+
142
+ # 3. Your own script — no narration-splitting on your side, the pipeline handles it
143
+ rf create --script @./my-script.txt
144
+ rf create --script "雨水缓缓滑落在玻璃窗上,像是无声的泪珠。"
145
+
146
+ # 4. Pick a built-in visual style preset
147
+ rf create "美食教程" --style photorealistic
148
+
149
+ # 5. Pipeline form with explicit output path
139
150
  rf pipelines standard \
140
- -t "Hello world. This is scene one.\n\nThis is scene two." \
141
- --mode fixed --title "Smoke Test" \
142
- --frame-template 1080x1920/static_default.html \
143
- --tts-voice en-US-AriaNeural -o smoke.mp4
151
+ --script @./script.txt \
152
+ --frame-template 1080x1920/image_default.html \
153
+ -p normal -o smoke.mp4
144
154
 
145
- # 3. Inspect existing tasks & redownload a finished video
155
+ # 6. Inspect existing tasks & redownload a finished video
146
156
  rf tasks list --limit 5
147
157
  rf history get <task-id> --download recovered.mp4
148
158
 
149
- # 4. JSON pipe for automation
159
+ # 7. Atomics for stand-alone use
160
+ rf content scene-plan -t "雨天的玻璃窗" -d 45 --json | jq .scenes
161
+ rf audio transcribe -f narration.mp3 --json | jq '.words[:5]'
162
+ rf subtitles split -t @./narration.txt --min 10 --hard-max 24
163
+
164
+ # 8. JSON pipe for automation
150
165
  rf llm presets --json | jq '.[].defaultModel'
151
166
 
152
- # 5. Configure & test LLM (self-hosted)
167
+ # 9. Configure & test LLM (self-hosted)
153
168
  rf config set llm.api_key rx-xxxxx # RelayX key (or your own provider key)
154
169
  rf config set llm.base_url https://relayx.timor419.com/v1
155
170
  rf config set llm.model anthropic/claude-4-7-sonnet
156
171
  rf llm chat -p 'one-sentence summary of antifragile'
157
172
 
158
- # 6. Use your own HTML template (no PR/release needed)
159
- # Any of -t / --frame-template that points to a local .html file is read and
160
- # sent inline. Declare size inside the file via
161
- # <meta name="template:width" content="1080">
162
- # <meta name="template:height" content="1920">
163
- # or pass --size 1080x1920 on the CLI.
173
+ # 10. Use your own HTML template (no PR/release needed)
174
+ # Any --frame-template that points to a local .html file is read and sent
175
+ # inline. Declare size inside the file via
176
+ # <meta name="template:width" content="1080">
177
+ # <meta name="template:height" content="1920">
178
+ # or pass --frame-template-size 1080x1920.
164
179
  rf templates show 1080x1920/image_default.html -o my-brand.html # copy a preset
165
180
  # ...edit my-brand.html to suit your style...
166
181
  rf templates preview ./my-brand.html --title "Hello" -o preview.png
@@ -180,12 +195,32 @@ rf templates show 1080x1920/image_default.html -o my-brand.html # save and ed
180
195
 
181
196
  `{{title}}`, `{{text}}`, `{{image}}`, `{{index}}` are reserved built-ins; everything else uses the `{{name:type=default}}` DSL (`type` ∈ `text|number|color|bool`). Pass extras through `--values '{"author":"Alice"}'` (or `template_params` on the pipeline API).
182
197
 
198
+ #### Template type — does the pipeline generate an AI image per scene?
199
+
200
+ When you ship an inline template through `rf create` / `rf pipelines standard`, ReelForge needs to know whether each scene should kick off RelayX image generation. Resolution priority (high → low):
201
+
202
+ 1. Explicit flag — `--frame-template-type image|static|asset` (or `frame_template_type` in the API body).
203
+ 2. Inside the HTML — `<meta name="template:type" content="image">` (or `static` / `asset`).
204
+ 3. **Default: `image`** — best practice for zero-config users. If your template doesn't reference scene imagery (pure-text card, etc.), declare `static` explicitly to skip image generation and its cost.
205
+
206
+ The placeholder `{{image}}` no longer doubles as a type signal — declare type explicitly.
207
+
183
208
  Limits and safety:
184
209
 
185
210
  - Max 2 MB per inline HTML.
186
211
  - The render sandbox blocks `file://`, loopback / private / link-local IPs, CGNAT range, cloud-metadata, and `*.local` / `*.internal` hostnames. So your template can only reference public `https`/`http` resources or `data:` URIs.
187
212
  - If the CLI is talking to a hosted server, local-path `--image` won't reach the server; either upload to `rf files upload` first or use an HTTPS URL / data: URI.
188
213
 
214
+ #### API field reference
215
+
216
+ | endpoint | inline HTML field | size field | type field |
217
+ |---|---|---|---|
218
+ | `POST /api/v1/frames/render` | `template_html` | `size` | — (n/a, no image generation) |
219
+ | `POST /api/v1/templates/preview` | `template_html` | `size` | — |
220
+ | `POST /api/v1/pipelines/standard` | `frame_template_inline` | `frame_template_size` | `frame_template_type` |
221
+
222
+ The pipeline endpoint uses the `frame_template_*` prefix because it already has a `frame_template` field (preset key). The single-frame endpoints use the shorter `template_html` because they don't.
223
+
189
224
  ## Tip — getting unstuck
190
225
 
191
226
  Every level has `--help`:
@@ -0,0 +1,73 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { uploadMultipart, post } from "../client.js";
4
+ import { print } from "../utils/output.js";
5
+ export function registerAudio(program) {
6
+ const audio = program
7
+ .command("audio")
8
+ .description("Audio atomics — transcription / forced alignment")
9
+ .helpOption("-h, --help", "show help");
10
+ audio
11
+ .command("transcribe")
12
+ .description("Transcribe an audio file to text + word-level timestamps (RelayX paraformer-v2)")
13
+ .helpOption("-h, --help", "show help")
14
+ .option("-f, --file <path>", "local audio file (mp3/wav/m4a). Use this OR --url.")
15
+ .option("-u, --url <url>", "remote audio URL — server downloads and transcribes.")
16
+ .option("-l, --language <code>", "language hint (e.g. zh, en). Optional — paraformer-v2 auto-detects.")
17
+ .option("-m, --model <id>", "override ASR model id (default alibaba/paraformer-v2)")
18
+ .option("-o, --output <file>", "write the full JSON response to this file as well as stdout")
19
+ .addHelpText("after", [
20
+ "",
21
+ "Examples:",
22
+ " rf audio transcribe -f ./narration.mp3",
23
+ " rf audio transcribe --url https://example.com/clip.mp3 --language zh",
24
+ " rf audio transcribe -f ./voice.wav --json | jq '.words[:5]'",
25
+ ].join("\n"))
26
+ .action(async (opts) => {
27
+ if (!opts.file && !opts.url) {
28
+ throw new Error("either --file or --url is required");
29
+ }
30
+ if (opts.file && opts.url) {
31
+ throw new Error("--file and --url are mutually exclusive");
32
+ }
33
+ let r;
34
+ if (opts.file) {
35
+ const buf = await fs.readFile(opts.file);
36
+ const filename = path.basename(opts.file);
37
+ const ext = path.extname(filename).toLowerCase();
38
+ const mime = ext === ".wav" ? "audio/wav" :
39
+ ext === ".m4a" ? "audio/mp4" :
40
+ ext === ".flac" ? "audio/flac" :
41
+ ext === ".ogg" ? "audio/ogg" :
42
+ "audio/mpeg";
43
+ const fileBlob = new File([new Uint8Array(buf)], filename, { type: mime });
44
+ const fields = { file: fileBlob };
45
+ if (opts.language)
46
+ fields.language = opts.language;
47
+ if (opts.model)
48
+ fields.model = opts.model;
49
+ r = await uploadMultipart("/api/v1/audio/transcribe", fields);
50
+ }
51
+ else {
52
+ const body = { audio_url: opts.url };
53
+ if (opts.language)
54
+ body.language = opts.language;
55
+ if (opts.model)
56
+ body.model = opts.model;
57
+ r = await post("/api/v1/audio/transcribe", body);
58
+ }
59
+ if (opts.output) {
60
+ await fs.writeFile(opts.output, JSON.stringify(r, null, 2), "utf-8");
61
+ }
62
+ print({
63
+ model: r.model,
64
+ language: r.language,
65
+ duration: r.duration,
66
+ text: r.text,
67
+ n_segments: r.segments.length,
68
+ n_words: r.words.length,
69
+ segments: r.segments,
70
+ words: r.words,
71
+ });
72
+ });
73
+ }
@@ -4,109 +4,63 @@ import { print } from "../utils/output.js";
4
4
  export function registerContent(program) {
5
5
  const content = program
6
6
  .command("content")
7
- .description("LLM-based content generators (script, image prompts, titles, asset scripts)")
7
+ .description("Content atomics scene planning (master script + image prompts in one call)")
8
8
  .helpOption("-h, --help", "show help");
9
9
  content
10
- .command("narration")
11
- .description("Generate N narration sentences from a topic")
10
+ .command("scene-plan")
11
+ .description("Generate a master script + per-scene image prompts (replaces narration/image-prompts/title)")
12
12
  .helpOption("-h, --help", "show help")
13
- .requiredOption("-t, --topic <text>", "the video topic")
14
- .option("-n, --n-scenes <n>", "number of scenes", parseInt, 5)
15
- .option("--min-words <n>", "minimum words per narration", parseInt, 5)
16
- .option("--max-words <n>", "maximum words per narration", parseInt, 20)
17
- .addHelpText("after", "\nExample:\n reelforge content narration -t 'why we explore space' -n 5")
18
- .action(async (opts) => {
19
- const r = await post("/api/v1/content/narration", {
20
- topic: opts.topic,
21
- n_scenes: opts.nScenes,
22
- min_words: opts.minWords,
23
- max_words: opts.maxWords,
24
- });
25
- print(r);
26
- });
27
- content
28
- .command("split")
29
- .description("Split a fixed script into narrations (no LLM cost)")
30
- .helpOption("-h, --help", "show help")
31
- .requiredOption("-s, --script <text>", "raw script text (use @file for a file)")
32
- .option("-m, --mode <mode>", "paragraph | line | sentence", "paragraph")
33
- .addHelpText("after", "\nExample:\n reelforge content split -s @script.txt -m sentence")
34
- .action(async (opts) => {
35
- let script = opts.script;
36
- if (script.startsWith("@"))
37
- script = await fs.readFile(script.slice(1), "utf-8");
38
- const r = await post("/api/v1/content/narration/split", { script, mode: opts.mode });
39
- print(r);
40
- });
41
- content
42
- .command("image-prompts")
43
- .description("Generate English image-generation prompts from narrations")
44
- .helpOption("-h, --help", "show help")
45
- .requiredOption("-i, --narrations <file>", "file with one narration per line (or @file)")
46
- .option("--prefix <text>", "style prefix prepended to each prompt")
47
- .option("--min-words <n>", "minimum words per prompt", parseInt, 30)
48
- .option("--max-words <n>", "maximum words per prompt", parseInt, 60)
49
- .addHelpText("after", "\nExample:\n reelforge content image-prompts -i narrations.txt --prefix 'cinematic'")
50
- .action(async (opts) => {
51
- let src = opts.narrations;
52
- if (src.startsWith("@"))
53
- src = src.slice(1);
54
- const text = await fs.readFile(src, "utf-8");
55
- const narrations = text.split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
56
- const r = await post("/api/v1/content/image-prompts", {
57
- narrations,
58
- prompt_prefix: opts.prefix,
59
- min_words: opts.minWords,
60
- max_words: opts.maxWords,
61
- });
62
- print(r);
63
- });
64
- content
65
- .command("title")
66
- .description("Generate a short video title from content")
67
- .helpOption("-h, --help", "show help")
68
- .requiredOption("-c, --content <text>", "content to title (use @file)")
69
- .option("--max-length <n>", "maximum characters", parseInt, 15)
70
- .action(async (opts) => {
71
- let body = opts.content;
72
- if (body.startsWith("@"))
73
- body = await fs.readFile(body.slice(1), "utf-8");
74
- const r = await post("/api/v1/content/title", {
75
- content: body,
76
- max_length: opts.maxLength,
77
- });
78
- print(r);
79
- });
80
- content
81
- .command("asset-script")
82
- .description("Generate a scene script that assigns user-uploaded assets to scenes")
83
- .helpOption("-h, --help", "show help")
84
- .requiredOption("--intent <text>", "video intent / purpose")
85
- .option("--title <text>", "optional video title")
86
- .option("--duration <s>", "target duration in seconds", parseInt, 30)
87
- .requiredOption("--assets <file>", "file with one asset per line, format: `path | description`")
13
+ .option("-t, --topic <text>", "video topic; AI writes the script (generate mode). Use @file for disk input.")
14
+ .option("--script <text>", "your own master script text (fixed mode). Use @file for disk input.")
15
+ .option("-d, --duration <sec>", "target video duration in seconds (generate mode; default 45)", (v) => parseInt(v, 10))
16
+ .option("-p, --pace <pace>", "visual rhythm hint: slow | normal | fast (default normal)")
17
+ .option("-m, --model <id>", "override LLM model")
88
18
  .addHelpText("after", [
89
19
  "",
90
- "Example assets.txt:",
91
- " data/uploads/cat.jpg | A fluffy cat",
92
- " data/uploads/dog.jpg | A happy dog wagging tail",
20
+ "Two modes (exactly one required):",
21
+ " generate -t / --topic <text> LLM writes both script and image prompts",
22
+ " fixed --script @file or text LLM only segments + writes image prompts; text unchanged verbatim",
23
+ "",
24
+ "Examples:",
25
+ " rf content scene-plan -t '深夜便利店' -d 60 -p slow",
26
+ " rf content scene-plan --script @./my-script.txt -p fast",
27
+ " rf content scene-plan -t '雨天的玻璃窗' --json | jq .scenes",
93
28
  ].join("\n"))
94
29
  .action(async (opts) => {
95
- const raw = await fs.readFile(opts.assets, "utf-8");
96
- const assets = raw
97
- .split(/\r?\n/)
98
- .map((s) => s.trim())
99
- .filter(Boolean)
100
- .map((line) => {
101
- const [p, d] = line.split("|").map((s) => s.trim());
102
- return { path: p, description: d || "" };
103
- });
104
- const r = await post("/api/v1/content/asset-script", {
105
- intent: opts.intent,
106
- title: opts.title,
107
- duration: opts.duration,
108
- assets,
30
+ const hasTopic = typeof opts.topic === "string" && opts.topic.length > 0;
31
+ const hasScript = typeof opts.script === "string" && opts.script.length > 0;
32
+ if (!hasTopic && !hasScript) {
33
+ throw new Error("either --topic / -t or --script is required");
34
+ }
35
+ if (hasTopic && hasScript) {
36
+ throw new Error("--topic and --script are mutually exclusive");
37
+ }
38
+ if (opts.pace && !["slow", "normal", "fast"].includes(opts.pace)) {
39
+ throw new Error(`--pace must be one of slow|normal|fast (got: ${opts.pace})`);
40
+ }
41
+ let topic = opts.topic;
42
+ let script = opts.script;
43
+ if (topic?.startsWith("@"))
44
+ topic = (await fs.readFile(topic.slice(1), "utf-8")).trim();
45
+ if (script?.startsWith("@"))
46
+ script = (await fs.readFile(script.slice(1), "utf-8")).trim();
47
+ const body = {};
48
+ if (topic)
49
+ body.topic = topic;
50
+ if (script)
51
+ body.script = script;
52
+ if (opts.duration !== undefined)
53
+ body.duration = opts.duration;
54
+ if (opts.pace)
55
+ body.pace = opts.pace;
56
+ if (opts.model)
57
+ body.model = opts.model;
58
+ const r = await post("/api/v1/content/scene-plan", body);
59
+ print({
60
+ mode: r.mode,
61
+ title: r.title,
62
+ n_scenes: r.scenes.length,
63
+ scenes: r.scenes,
109
64
  });
110
- print(r);
111
65
  });
112
66
  }
@@ -1,4 +1,5 @@
1
1
  import fs from "node:fs/promises";
2
+ import fsSync from "node:fs";
2
3
  import path from "node:path";
3
4
  import os from "node:os";
4
5
  import { post } from "../client.js";
@@ -7,30 +8,58 @@ import { downloadTo } from "../utils/download.js";
7
8
  import { info, print, success, warn } from "../utils/output.js";
8
9
  const LAST_CREATE_PATH = path.join(os.homedir(), ".reelforge", "last-create.json");
9
10
  // ── Cost estimation (mirrors server src/lib/billing.ts) ──────────
10
- const IMAGE_UNITS = 3; // matches ATOMIC_UNITS["images.generate"] in src/lib/billing.ts
11
- const TTS_RELAYX_UNITS = 1; // matches ATOMIC_UNITS["tts.relayx"]
11
+ const PLAN_UNITS = 1;
12
+ const TTS_UNITS = 1;
13
+ const ASR_UNITS = 1;
14
+ const IMAGE_UNITS = 3;
15
+ const CHARS_PER_SEC_ZH = 5;
16
+ const TARGET_SEC_PER_SCENE = 8;
12
17
  function estimateUnits(body) {
13
- const mode = body.mode || "generate";
14
- const titleExplicit = !!body.title;
15
- const N = body.n_scenes ?? 5;
16
- // Template type from filename prefix
17
- const tplKey = body.frame_template || "1080x1920/static_default.html";
18
- const tplBase = (tplKey.split("/").pop() || "").toLowerCase();
19
- const tplType = tplBase.startsWith("static_")
20
- ? "static"
21
- : tplBase.startsWith("asset_")
22
- ? "asset"
23
- : "image";
24
- const mediaPerFrame = tplType === "image" ? IMAGE_UNITS : 0;
25
- const ttsMode = body.tts_inference_mode || "edge";
26
- const ttsPerFrame = ttsMode === "relayx" ? TTS_RELAYX_UNITS : 0;
27
- const narrations = mode === "generate" ? 1 : 0;
28
- const title = titleExplicit ? 0 : 1;
29
- const imagePrompts = tplType === "static" ? 0 : 1;
30
- return narrations + title + imagePrompts + N * (ttsPerFrame + mediaPerFrame);
18
+ let tplType;
19
+ if (body.frame_template_inline) {
20
+ if (body.frame_template_type) {
21
+ tplType = body.frame_template_type;
22
+ }
23
+ else {
24
+ const m = body.frame_template_inline.match(/<meta[^>]+name=["']template:type["'][^>]+content=["']([a-z]+)["']/i);
25
+ const v = m?.[1].toLowerCase();
26
+ tplType = v === "static" || v === "asset" || v === "image" ? v : "image";
27
+ }
28
+ }
29
+ else {
30
+ const tplKey = body.frame_template || "1080x1920/image_default.html";
31
+ const tplBase = (tplKey.split("/").pop() || "").toLowerCase();
32
+ tplType = tplBase.startsWith("static_")
33
+ ? "static"
34
+ : tplBase.startsWith("asset_")
35
+ ? "asset"
36
+ : "image";
37
+ }
38
+ // Estimated scene count: from script length (fixed) or from duration (generate).
39
+ let estimatedScenes;
40
+ if (body.script) {
41
+ const estSec = body.script.length / CHARS_PER_SEC_ZH;
42
+ estimatedScenes = Math.max(2, Math.round(estSec / TARGET_SEC_PER_SCENE));
43
+ }
44
+ else {
45
+ const dur = body.duration ?? 45;
46
+ estimatedScenes = Math.max(2, Math.round(dur / TARGET_SEC_PER_SCENE));
47
+ }
48
+ const imageUnits = tplType === "image" ? estimatedScenes * IMAGE_UNITS : 0;
49
+ return PLAN_UNITS + TTS_UNITS + ASR_UNITS + imageUnits;
31
50
  }
32
51
  // ── Helpers ─────────────────────────────────────────────────────
33
- async function resolveText(input) {
52
+ function looksLikeLocalHtmlPath(value) {
53
+ if (/^[.~]|^\//.test(value))
54
+ return true;
55
+ if (value.includes("\\"))
56
+ return true;
57
+ if (value.endsWith(".html") && fsSync.existsSync(value))
58
+ return true;
59
+ return false;
60
+ }
61
+ /** `@file` prefix → load file contents; raw text → return as-is. */
62
+ async function resolveTextOrFile(input) {
34
63
  if (input.startsWith("@")) {
35
64
  const file = input.slice(1);
36
65
  return (await fs.readFile(file, "utf-8")).trim();
@@ -59,14 +88,6 @@ async function saveLastCreate(body) {
59
88
  await fs.writeFile(LAST_CREATE_PATH, JSON.stringify(body, null, 2) + "\n", "utf-8");
60
89
  }
61
90
  // ── Filename derivation ─────────────────────────────────────────
62
- //
63
- // Cascade (highest → lowest):
64
- // 1. result.title — server's actual video title (LLM or explicit)
65
- // 2. body.title — user-supplied --title (pre-task fallback)
66
- // 3. raw topic (mode=generate, length ≤ 60, no @-prefix)
67
- // 4. @file stem — when text was loaded from @./script.txt
68
- // 5. "reelforge" literal
69
- // Always suffixed with "-<task_id[:8]>" to avoid collisions.
70
91
  const FILENAME_MAX_CHARS = 40;
71
92
  function sanitizeFilename(name) {
72
93
  const cleaned = name
@@ -86,14 +107,8 @@ function computeDefaultFilename(args) {
86
107
  if (args.resultTitle && args.resultTitle.trim()) {
87
108
  base = sanitizeFilename(args.resultTitle);
88
109
  }
89
- else if (args.bodyTitle && args.bodyTitle.trim()) {
90
- base = sanitizeFilename(args.bodyTitle);
91
- }
92
- else if (args.mode === "generate" &&
93
- args.rawTextInput &&
94
- !args.rawTextInput.startsWith("@") &&
95
- Array.from(args.rawTextInput).length <= 60) {
96
- base = sanitizeFilename(args.rawTextInput);
110
+ else if (args.topic && Array.from(args.topic).length <= 60) {
111
+ base = sanitizeFilename(args.topic);
97
112
  }
98
113
  else if (args.fileStemFromAt) {
99
114
  base = sanitizeFilename(args.fileStemFromAt);
@@ -118,57 +133,54 @@ async function validateOutputPath(out) {
118
133
  /** Camel-case CLI options → snake_case body, only including provided fields */
119
134
  function optsToBody(opts) {
120
135
  const out = {};
121
- if (opts.text !== undefined)
122
- out.text = opts.text;
123
- if (opts.mode !== undefined)
124
- out.mode = opts.mode;
125
- if (opts.title !== undefined)
126
- out.title = opts.title;
127
- if (opts.nScenes !== undefined)
128
- out.n_scenes = opts.nScenes;
129
- if (opts.splitMode !== undefined)
130
- out.split_mode = opts.splitMode;
131
- if (opts.ttsInferenceMode !== undefined)
132
- out.tts_inference_mode = opts.ttsInferenceMode;
133
- if (opts.ttsVoice !== undefined)
134
- out.tts_voice = opts.ttsVoice;
135
- if (opts.voiceId !== undefined)
136
- out.voice_id = opts.voiceId;
137
- if (opts.ttsSpeed !== undefined)
138
- out.tts_speed = opts.ttsSpeed;
136
+ if (opts.topic !== undefined)
137
+ out.topic = opts.topic;
138
+ if (opts.script !== undefined)
139
+ out.script = opts.script;
140
+ if (opts.duration !== undefined)
141
+ out.duration = opts.duration;
142
+ if (opts.pace !== undefined)
143
+ out.pace = opts.pace;
144
+ if (opts.llmModel !== undefined)
145
+ out.llm_model = opts.llmModel;
146
+ if (opts.ttsModel !== undefined)
147
+ out.tts_model = opts.ttsModel;
148
+ if (opts.asrModel !== undefined)
149
+ out.asr_model = opts.asrModel;
139
150
  if (opts.imageModel !== undefined)
140
151
  out.image_model = opts.imageModel;
141
- if (opts.frameTemplate !== undefined)
142
- out.frame_template = opts.frameTemplate;
143
152
  if (opts.promptPrefix !== undefined)
144
153
  out.prompt_prefix = opts.promptPrefix;
145
- if (opts.bgm !== undefined)
146
- out.bgm_path = opts.bgm;
147
- if (opts.bgmVolume !== undefined)
148
- out.bgm_volume = opts.bgmVolume;
149
- if (opts.bgmMode !== undefined)
150
- out.bgm_mode = opts.bgmMode;
151
- if (opts.minNarrationWords !== undefined)
152
- out.min_narration_words = opts.minNarrationWords;
153
- if (opts.maxNarrationWords !== undefined)
154
- out.max_narration_words = opts.maxNarrationWords;
155
- if (opts.minImagePromptWords !== undefined)
156
- out.min_image_prompt_words = opts.minImagePromptWords;
157
- if (opts.maxImagePromptWords !== undefined)
158
- out.max_image_prompt_words = opts.maxImagePromptWords;
154
+ if (opts.voiceId !== undefined)
155
+ out.voice_id = opts.voiceId;
156
+ if (opts.ttsSpeed !== undefined)
157
+ out.tts_speed = opts.ttsSpeed;
159
158
  if (opts.videoFps !== undefined)
160
159
  out.video_fps = opts.videoFps;
160
+ if (opts.frameTemplate !== undefined) {
161
+ if (looksLikeLocalHtmlPath(opts.frameTemplate)) {
162
+ const abs = path.resolve(opts.frameTemplate);
163
+ if (!fsSync.existsSync(abs)) {
164
+ throw new Error(`--frame-template: local file not found: ${abs}`);
165
+ }
166
+ out.frame_template_inline = fsSync.readFileSync(abs, "utf-8");
167
+ }
168
+ else {
169
+ out.frame_template = opts.frameTemplate;
170
+ }
171
+ }
172
+ if (opts.frameTemplateSize !== undefined)
173
+ out.frame_template_size = opts.frameTemplateSize;
174
+ if (opts.frameTemplateType !== undefined)
175
+ out.frame_template_type = opts.frameTemplateType;
161
176
  if (opts.templateParams !== undefined)
162
177
  out.template_params = opts.templateParams;
178
+ if (opts.subtitleMinChars !== undefined)
179
+ out.subtitle_min_chars = opts.subtitleMinChars;
180
+ if (opts.subtitleHardMax !== undefined)
181
+ out.subtitle_hard_max = opts.subtitleHardMax;
163
182
  return out;
164
183
  }
165
- const DEFAULTS = {
166
- mode: "generate",
167
- n_scenes: 5,
168
- frame_template: "1080x1920/image_default.html",
169
- tts_voice: "zh-CN-YunjianNeural",
170
- tts_speed: 1.2,
171
- };
172
184
  const STYLE_PRESETS = {
173
185
  matchstick: {
174
186
  prefix: "Minimalist black-and-white matchstick figure style illustration, clean lines, simple sketch style",
@@ -256,7 +268,6 @@ const STYLE_PRESETS = {
256
268
  scene: "奢华品牌 / 复古优雅",
257
269
  },
258
270
  };
259
- // CJK chars take 2 display columns in monospace terminals; pad accordingly.
260
271
  function displayWidth(s) {
261
272
  let w = 0;
262
273
  for (const c of s)
@@ -282,34 +293,32 @@ function formatStylePresetsList() {
282
293
  export function registerCreate(program) {
283
294
  program
284
295
  .command("create [topic]")
285
- .description("One-click: topic → fully-generated MP4. 23 tunable params + recipe files.")
296
+ .description("One-click: topic (or your own script) → fully-generated MP4. Audio-first pipeline.")
286
297
  .helpOption("-h, --help", "show help")
287
- // --- Content ---
288
- .option("-t, --text <text>", "topic (mode=generate) or fixed script (mode=fixed). Prefix with @ to read from a file (e.g. @script.txt).")
289
- .option("--mode <mode>", "generate | fixed (default: generate)")
290
- .option("--title <text>", "explicit video title (default: LLM-generated from topic)")
291
- .option("-n, --n-scenes <N>", "number of scenes", (v) => parseInt(v, 10))
292
- .option("--split-mode <mode>", "paragraph | line | sentence (mode=fixed only)")
293
- .option("--min-narration-words <N>", "narration min words per scene", (v) => parseInt(v, 10))
294
- .option("--max-narration-words <N>", "narration max words per scene", (v) => parseInt(v, 10))
295
- .option("--min-image-prompt-words <N>", "image prompt min words", (v) => parseInt(v, 10))
296
- .option("--max-image-prompt-words <N>", "image prompt max words", (v) => parseInt(v, 10))
298
+ // --- Content (exactly one of --topic / --script) ---
299
+ .option("-t, --topic <text>", "video topic; AI writes the script (mode=generate). Prefix with @file to read from disk.")
300
+ .option("--script <text>", "your own master script text; AI just plans scenes + visuals (mode=fixed). Prefix with @file to read from disk.")
301
+ .option("-d, --duration <sec>", "target video duration in seconds (generate mode only; default 45). LLM aims for ~duration × 5 chars of narration.", (v) => parseInt(v, 10))
302
+ .option("-p, --pace <pace>", "visual rhythm hint passed to the LLM: slow | normal | fast (default normal). LLM still decides the actual scene count from semantic structure.")
297
303
  // --- Visual ---
298
- .option("--frame-template <key>", "HTML frame template, e.g. 1080x1920/image_default.html")
304
+ .option("--frame-template <keyOrPath>", "HTML frame template: preset key (e.g. 1080x1920/image_default.html) OR path to a local .html (auto-sent inline)")
305
+ .option("--frame-template-size <wxh>", "size for inline HTML when the file lacks <meta template:width|height>, e.g. 1080x1920")
306
+ .option("--frame-template-type <type>", "inline template type: image (default) | static | asset. Controls whether AI image generation runs per scene.")
299
307
  .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen)")
300
308
  .option("--prompt-prefix <text>", "raw style prefix prepended to every image prompt (overrides --style)")
301
- .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below for the full list")
309
+ .option("--style <preset>", "image style preset — shortcut for --prompt-prefix; see 'Style presets' below")
302
310
  // --- Audio (TTS) ---
303
- .option("--tts-voice <id>", "TTS voice id; for edge use e.g. zh-CN-YunjianNeural / en-US-AriaNeural; for relayx use vox voice ids (default: 专业解说)")
304
- .option("--tts-speed <n>", "speech speed 0.5..2", parseFloat)
305
- .option("--tts-inference-mode <mode>", "edge (default, local Microsoft Edge TTS) | relayx (vox/index-tts-2 via RelayX)")
306
- .option("--voice-id <id>", "alias of --tts-voice (legacy compat)")
307
- // --- Audio (BGM) ---
308
- .option("--bgm <path>", "background music file path (server-side relative to bgm/)")
309
- .option("--bgm-volume <n>", "BGM volume 0..1", parseFloat)
310
- .option("--bgm-mode <mode>", "loop | once")
311
+ .option("--voice-id <id>", "RelayX TTS voice id (default 专业解说); see `rf tts voices`")
312
+ .option("--tts-speed <n>", "speech speed 0.5..2 (default 1.0)", parseFloat)
313
+ // --- Service overrides ---
314
+ .option("--llm-model <id>", "override the LLM model used for scene-plan")
315
+ .option("--tts-model <id>", "override the TTS model (default vox/index-tts-2)")
316
+ .option("--asr-model <id>", "override the ASR model (default alibaba/paraformer-v2)")
317
+ // --- Subtitle splitter knobs (advanced) ---
318
+ .option("--subtitle-min-chars <N>", "subtitle line min chars (default 10)", (v) => parseInt(v, 10))
319
+ .option("--subtitle-hard-max <N>", "subtitle line absolute max chars (default 24)", (v) => parseInt(v, 10))
311
320
  // --- Output / extra ---
312
- .option("--video-fps <n>", "output video fps", (v) => parseInt(v, 10))
321
+ .option("--video-fps <n>", "output video fps (default 30)", (v) => parseInt(v, 10))
313
322
  .option("--template-params <json>", "extra template placeholders as JSON string", (v) => {
314
323
  try {
315
324
  return JSON.parse(v);
@@ -323,104 +332,68 @@ export function registerCreate(program) {
323
332
  .option("--redo", "replay last successful create from ~/.reelforge/last-create.json")
324
333
  .option("--dry-run", "print the final request body + estimated units; do NOT submit")
325
334
  .option("--no-wait", "submit and return task_id immediately (do not poll)")
326
- .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4). Default: auto-named file in current directory.")
327
- .option("--no-download", "do not save the video locally — just print the JSON result with video_url")
335
+ .option("-o, --output <file>", "save the final video to this exact path (must include filename, e.g. ./out/space.mp4).")
336
+ .option("--no-download", "do not save the video locally — just print JSON with video_url")
328
337
  .option("--poll-ms <ms>", "poll interval while waiting", (v) => parseInt(v, 10), 1500)
329
338
  .option("--timeout-ms <ms>", "max wait time before aborting (default unlimited)", (v) => parseInt(v, 10))
330
339
  .addHelpText("after", [
331
340
  "",
332
- "Defaults match the /create web page:",
333
- " mode=generate · n-scenes=5 · frame-template=1080x1920/image_default.html",
334
- " tts-voice=zh-CN-YunjianNeural · tts-speed=1.2",
341
+ "Two content modes (one is required):",
342
+ " generate AI writes the script. --topic / -t <text> + optional --duration -d",
343
+ " fixed You supply the script. --script <text-or-@file>",
344
+ "",
345
+ "Pace (visual rhythm hint to the LLM):",
346
+ " slow fewer scenes, glued to semantic boundaries",
347
+ " normal balance semantic edges with visual variety (default)",
348
+ " fast split long semantic chunks into multiple shots for variety",
335
349
  "",
336
- "Param groups:",
337
- " Content : --mode --title -n --split-mode --min/max-narration-words --min/max-image-prompt-words",
338
- " Visual : --frame-template --image-model --style --prompt-prefix",
339
- " TTS : --tts-voice --tts-speed --tts-inference-mode --voice-id",
340
- " BGM : --bgm --bgm-volume --bgm-mode",
341
- " Output : --video-fps --template-params -o --no-download --no-wait --poll-ms --timeout-ms",
342
- " Workflow: --recipe --redo --dry-run",
350
+ "Defaults:",
351
+ " duration=45s · pace=normal · frame-template=1080x1920/image_default.html · tts-speed=1.0",
343
352
  "",
344
353
  "Style presets (--style <preset>) — quick shortcut for --prompt-prefix:",
345
354
  formatStylePresetsList(),
346
355
  " · Pass --prompt-prefix to override (raw string always wins).",
347
- " · Omit both to use the server's configured default style.",
356
+ " · Omit both to use the server's configured default style (if any).",
348
357
  "",
349
358
  "Output behavior:",
350
- " No flag → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
351
- " -o <path> → saves to that exact path (must include filename, not just a directory)",
359
+ " No flag → saves to ./<title>-<task_id>.mp4 in current directory, prints the path",
360
+ " -o <path> → saves to that exact path (must include filename)",
352
361
  " --no-download → skips local save, just prints JSON result with video_url",
353
362
  " (when stdout is piped, --no-download is implied automatically)",
354
363
  "",
355
- "Explore available resources (separate commands):",
356
- " reelforge templates list # all HTML templates",
357
- " reelforge tts voices --locale zh # Edge TTS voice ids",
358
- " reelforge bgm list # built-in BGM files",
359
- "",
360
- "Examples (`rf` is a short alias for `reelforge`):",
361
- " # Minimum — saves to ./<title>-<short_id>.mp4 in cwd",
364
+ "Examples (`rf` is the short alias):",
365
+ " # Minimum AI writes a 45s script",
362
366
  ' rf create "为什么我们还没找到外星文明?"',
363
367
  "",
364
- " # Pick the exact output path",
365
- ' rf create "..." -o ./videos/space.mp4',
366
- "",
367
- " # Long script from a file, fixed mode (no LLM scriptwriting)",
368
- " rf create @./script.txt --mode fixed --split-mode paragraph",
368
+ " # 60-second video with slow visual pace",
369
+ ' rf create "..." -d 60 -p slow',
369
370
  "",
370
- " # Landscape (1920x1080)",
371
- ' rf create "..." --frame-template 1920x1080/image_default.html',
371
+ " # Your own script, you decide the wording",
372
+ " rf create --script @./script.txt",
373
+ ' rf create --script "整段文案文本..."',
372
374
  "",
373
- " # Add BGM",
374
- ' rf create "..." --bgm bgm/Echoes.mp3 --bgm-volume 0.3 --bgm-mode loop',
375
- "",
376
- " # Change voice + speed",
377
- ' rf create "..." --tts-voice zh-CN-XiaoxiaoNeural --tts-speed 1.0',
375
+ " # Custom HTML template (auto-detected when --frame-template is a local path)",
376
+ " rf create '...' --frame-template ./my-brand.html",
378
377
  "",
379
378
  " # Pick a built-in style preset",
380
379
  ' rf create "..." --style cinematic',
381
- ' rf create "美食教程" --style photorealistic',
382
- "",
383
- " # Free-form style — write your own prefix from scratch",
384
- ' rf create "..." --prompt-prefix "Studio Ghibli, pastel, dreamy"',
385
380
  "",
386
- " # Full recipe in one file",
381
+ " # Recipe + replay last",
387
382
  " rf create --recipe ./space.recipe.json",
388
- "",
389
- " # Override a field on top of a recipe",
390
- ' rf create --recipe ./space.recipe.json --text "新主题" -n 8',
391
- "",
392
- " # Replay last successful create",
393
- " rf create --redo",
394
- "",
395
- " # Replay last but tweak one knob",
396
- " rf create --redo --tts-speed 1.0",
383
+ " rf create --redo # replay last successful create",
384
+ " rf create --redo -p fast # replay with one knob tweaked",
397
385
  "",
398
386
  " # See exactly what would be sent (no submission)",
399
- ' rf create "..." -n 7 --bgm bgm/Echoes.mp3 --dry-run',
387
+ ' rf create "..." -d 60 --dry-run',
400
388
  "",
401
- " # Pipe-friendly: skip local download, take video_url for downstream",
389
+ " # Pipe-friendly",
402
390
  ' rf create "..." --no-download --json | jq -r .video_url',
403
- "",
404
- "Recipe file format (every field is optional; all keys match the REST API body):",
405
- " {",
406
- ' "text": "为什么我们还没找到外星文明?",',
407
- ' "n_scenes": 7,',
408
- ' "frame_template": "1080x1920/image_default.html",',
409
- ' "image_model": "rx-image-flux",',
410
- ' "prompt_prefix": "Minimalist matchstick figure style",',
411
- ' "tts_voice": "zh-CN-YunjianNeural",',
412
- ' "tts_speed": 1.2,',
413
- ' "bgm_path": "bgm/Echoes.mp3",',
414
- ' "bgm_volume": 0.2',
415
- " }",
416
391
  ].join("\n"))
417
392
  .action(async (topicArg, opts) => {
418
- // Validate -o early so we fail before submitting a paid task
419
393
  if (opts.output) {
420
394
  await validateOutputPath(opts.output);
421
395
  }
422
- // Expand --style preset to --prompt-prefix unless an explicit
423
- // --prompt-prefix is also given (the raw string always wins).
396
+ // Expand --style preset to --prompt-prefix unless --prompt-prefix is given.
424
397
  if (opts.style) {
425
398
  const preset = STYLE_PRESETS[opts.style];
426
399
  if (!preset) {
@@ -430,6 +403,9 @@ export function registerCreate(program) {
430
403
  opts.promptPrefix = preset.prefix;
431
404
  }
432
405
  }
406
+ if (opts.pace && !["slow", "normal", "fast"].includes(opts.pace)) {
407
+ throw new Error(`--pace must be one of slow|normal|fast (got: ${opts.pace})`);
408
+ }
433
409
  // 1. Layer defaults: --redo → --recipe → CLI opts → positional topic
434
410
  let body = {};
435
411
  if (opts.redo) {
@@ -445,45 +421,49 @@ export function registerCreate(program) {
445
421
  body = { ...body, ...recipe };
446
422
  info(`Loaded recipe from ${opts.recipe}`);
447
423
  }
448
- // CLI options layer
449
424
  const fromOpts = optsToBody(opts);
450
425
  body = { ...body, ...fromOpts };
451
- // Capture the raw text input (with potential @-prefix) for filename derivation.
452
- // After `resolveText` we lose the @path file stem mapping.
453
- const rawTextInput = topicArg ?? (typeof body.text === "string" ? body.text : undefined);
454
- const fileStemFromAt = rawTextInput?.startsWith("@")
455
- ? path.parse(rawTextInput.slice(1)).name
456
- : undefined;
457
- // Positional topic wins for `text` (with @file support)
426
+ // Positional arg always wins for `topic`.
427
+ // Resolve @file prefix on whichever of topic/script is set.
428
+ const rawTopicInput = topicArg ?? (typeof body.topic === "string" ? body.topic : undefined);
429
+ const fileStemFromAt = rawTopicInput?.startsWith("@") ? path.parse(rawTopicInput.slice(1)).name :
430
+ body.script?.startsWith("@") ? path.parse(body.script.slice(1)).name :
431
+ undefined;
458
432
  if (topicArg) {
459
- body.text = await resolveText(topicArg);
433
+ body.topic = await resolveTextOrFile(topicArg);
434
+ }
435
+ else if (typeof body.topic === "string") {
436
+ body.topic = await resolveTextOrFile(body.topic);
437
+ }
438
+ if (typeof body.script === "string") {
439
+ body.script = await resolveTextOrFile(body.script);
440
+ }
441
+ // Validate content mode
442
+ const hasTopic = typeof body.topic === "string" && body.topic.trim().length > 0;
443
+ const hasScript = typeof body.script === "string" && body.script.trim().length > 0;
444
+ if (!hasTopic && !hasScript) {
445
+ throw new Error("either --topic (or positional arg) or --script is required.");
460
446
  }
461
- else if (typeof body.text === "string") {
462
- body.text = await resolveText(body.text);
447
+ if (hasTopic && hasScript) {
448
+ throw new Error("--topic and --script are mutually exclusive (pick one mode).");
463
449
  }
464
- if (!body.text) {
465
- throw new Error("text is required — pass it as the positional arg, or via --text / --recipe / --redo.");
450
+ // 3. Final body — drop empty / null fields
451
+ const finalBody = { ...body };
452
+ if (finalBody.frame_template_inline && finalBody.frame_template) {
453
+ delete finalBody.frame_template;
466
454
  }
467
- // 2. Apply defaults for fields still unset
468
- const finalBody = {
469
- ...DEFAULTS,
470
- ...body,
471
- text: body.text,
472
- };
473
- // 3. Estimate cost
455
+ // 4. Estimate cost
474
456
  const estimate = estimateUnits(finalBody);
475
- // 4. Dry-run: print & exit
476
457
  if (opts.dryRun) {
477
458
  info("--- DRY RUN ---");
478
459
  info("Final request body:");
479
460
  print(finalBody);
480
- info(`Estimated cost: ${estimate} units`);
461
+ info(`Estimated cost: ${estimate} units`);
481
462
  info("(use without --dry-run to actually submit)");
482
463
  return;
483
464
  }
484
465
  info(`Submitting create task (≈ ${estimate} units)...`);
485
466
  const submitted = await post("/api/v1/pipelines/standard", finalBody);
486
- // 5. Save as last (post-submit, before wait — so even cancelled tasks can be replayed)
487
467
  await saveLastCreate(finalBody).catch((e) => {
488
468
  warn(`Could not save last-create.json: ${e.message}`);
489
469
  });
@@ -500,11 +480,6 @@ export function registerCreate(program) {
500
480
  throw new Error(t.error || `Task ended with status ${t.status}`);
501
481
  }
502
482
  const result = t.result;
503
- // Decide where (or whether) to save locally.
504
- // -o → that exact path
505
- // --no-download → skip
506
- // stdout piped → skip (clig.dev: don't dump binary-touching side effects into a script)
507
- // otherwise → auto-named in cwd
508
483
  if (result?.video_url) {
509
484
  const stdoutIsPipe = !process.stdout.isTTY;
510
485
  const skipDownload = !!opts.noDownload || (stdoutIsPipe && !opts.output);
@@ -513,11 +488,10 @@ export function registerCreate(program) {
513
488
  savedPath = opts.output;
514
489
  }
515
490
  else if (!skipDownload) {
491
+ const topicForFilename = hasTopic && finalBody.topic ? finalBody.topic : undefined;
516
492
  savedPath = computeDefaultFilename({
517
493
  resultTitle: result.title,
518
- bodyTitle: finalBody.title,
519
- mode: finalBody.mode,
520
- rawTextInput,
494
+ topic: topicForFilename,
521
495
  fileStemFromAt,
522
496
  taskId: t.id,
523
497
  ext: "mp4",
@@ -36,54 +36,74 @@ export function registerPipelines(program) {
36
36
  const pl = program
37
37
  .command("pipelines")
38
38
  .alias("pipeline")
39
- .description("End-to-end video pipelines (standard)")
39
+ .description("End-to-end video pipelines (standard, audio-first)")
40
40
  .helpOption("-h, --help", "show help");
41
41
  // ---------- standard ----------
42
42
  commonOptions(pl
43
43
  .command("standard")
44
- .description("Topic / script → narrationframes → final MP4")
44
+ .description("Audio-first pipeline: topic|script → master TTS ASRscene/subtitle layers → final MP4")
45
45
  .helpOption("-h, --help", "show help")
46
- .requiredOption("-t, --text <text>", "topic OR fixed script (use @file)")
47
- .option("--mode <mode>", "generate | fixed", "generate")
48
- .option("--title <text>", "explicit video title (skip LLM title gen)")
49
- .option("-n, --n-scenes <n>", "number of scenes (mode=generate)", parseInt, 5)
50
- .option("--split-mode <mode>", "paragraph | line | sentence (mode=fixed)", "paragraph")
51
- .option("--frame-template <keyOrPath>", "preset key (e.g. 1080x1920/static_default.html) OR path to a local .html file", "1080x1920/static_default.html")
46
+ .option("-t, --topic <text>", "video topic (mode=generate). Use @file to read from disk.")
47
+ .option("--script <text>", "your own master script text (mode=fixed). Use @file to read from disk.")
48
+ .option("-d, --duration <sec>", "target video duration in seconds (generate mode; default 45)", (v) => parseInt(v, 10))
49
+ .option("-p, --pace <pace>", "visual rhythm hint: slow | normal | fast (default normal)")
50
+ .option("--frame-template <keyOrPath>", "preset key (e.g. 1080x1920/image_default.html) OR path to a local .html file")
52
51
  .option("--frame-template-size <wxh>", "size for inline HTML when the file lacks <meta template:width|height>")
53
- .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen) — only when template requires AI images")
54
- .option("--prompt-prefix <text>", "style prefix prepended to image prompts")
55
- .option("--tts-voice <id>", "Edge TTS voice", "zh-CN-YunjianNeural")
56
- .option("--tts-speed <n>", "speech speed (0.5..2)", parseFloat, 1.2)
57
- .option("--bgm <path>", "BGM file path")
58
- .option("--bgm-volume <n>", "BGM volume", parseFloat, 0.2)
52
+ .option("--frame-template-type <type>", "inline type: image (default) | static | asset")
53
+ .option("--image-model <id>", "RelayX image model (rx-image-z | rx-image-flux | rx-image-qwen)")
54
+ .option("--prompt-prefix <text>", "style prefix prepended to every image prompt")
55
+ .option("--voice-id <id>", "RelayX TTS voice id (default 专业解说); see `rf tts voices`")
56
+ .option("--tts-speed <n>", "speech speed (0.5..2; default 1.0)", parseFloat)
57
+ .option("--video-fps <n>", "output video fps (default 30)", (v) => parseInt(v, 10))
58
+ .option("--subtitle-min-chars <N>", "subtitle line min chars (default 10)", (v) => parseInt(v, 10))
59
+ .option("--subtitle-hard-max <N>", "subtitle line absolute max chars (default 24)", (v) => parseInt(v, 10))
59
60
  .addHelpText("after", [
60
61
  "",
61
- "Examples:",
62
- " reelforge pipelines standard -t 'why we explore space' -n 5 -o space.mp4",
63
- " reelforge pipelines standard -t @script.txt --mode fixed --split-mode paragraph --title 'My Show' -o out.mp4",
64
- " reelforge pipelines standard -t '宠物' --frame-template 1080x1920/image_default.html --image-model rx-image-flux --prompt-prefix 'cinematic'",
62
+ "Two content modes (exactly one required):",
63
+ " generate AI writes the script. --topic / -t <text> + optional --duration -d",
64
+ " fixed You supply the script. --script <text-or-@file>",
65
+ "",
66
+ "Pace (LLM visual rhythm hint): slow | normal | fast",
65
67
  "",
66
- " Custom HTML template (sent inline; no upload needed):",
67
- " reelforge pipelines standard -t '宠物' --frame-template ./my-brand.html -o final.mp4",
68
- " (declare size via <meta name=\"template:width|height\"> or pass --frame-template-size 1080x1920)",
68
+ "Examples:",
69
+ " rf pipelines standard -t 'why we explore space' -d 60 -o space.mp4",
70
+ " rf pipelines standard --script @script.txt -p slow -o out.mp4",
71
+ " rf pipelines standard -t '宠物' --frame-template ./my-brand.html -o final.mp4",
69
72
  ].join("\n"))).action(async (opts) => {
70
- let text = opts.text;
71
- if (text.startsWith("@"))
72
- text = await fs.readFile(text.slice(1), "utf-8");
73
- const tpl = resolveTemplateArg(opts.frameTemplate, opts.frameTemplateSize);
73
+ const hasTopic = typeof opts.topic === "string" && opts.topic.length > 0;
74
+ const hasScript = typeof opts.script === "string" && opts.script.length > 0;
75
+ if (!hasTopic && !hasScript) {
76
+ throw new Error("either --topic / -t or --script is required");
77
+ }
78
+ if (hasTopic && hasScript) {
79
+ throw new Error("--topic and --script are mutually exclusive");
80
+ }
81
+ if (opts.pace && !["slow", "normal", "fast"].includes(opts.pace)) {
82
+ throw new Error(`--pace must be one of slow|normal|fast (got: ${opts.pace})`);
83
+ }
84
+ let topic = opts.topic;
85
+ let script = opts.script;
86
+ if (topic?.startsWith("@"))
87
+ topic = await fs.readFile(topic.slice(1), "utf-8");
88
+ if (script?.startsWith("@"))
89
+ script = await fs.readFile(script.slice(1), "utf-8");
90
+ const tpl = opts.frameTemplate
91
+ ? resolveTemplateArg(opts.frameTemplate, opts.frameTemplateSize)
92
+ : {};
74
93
  await submitAndMaybeWait("/api/v1/pipelines/standard", {
75
- text,
76
- mode: opts.mode,
77
- title: opts.title,
78
- n_scenes: opts.nScenes,
79
- split_mode: opts.splitMode,
94
+ topic,
95
+ script,
96
+ duration: opts.duration,
97
+ pace: opts.pace,
80
98
  ...tpl,
99
+ frame_template_type: opts.frameTemplateType,
81
100
  image_model: opts.imageModel,
82
101
  prompt_prefix: opts.promptPrefix,
83
- tts_voice: opts.ttsVoice,
102
+ voice_id: opts.voiceId,
84
103
  tts_speed: opts.ttsSpeed,
85
- bgm_path: opts.bgm,
86
- bgm_volume: opts.bgmVolume,
104
+ video_fps: opts.videoFps,
105
+ subtitle_min_chars: opts.subtitleMinChars,
106
+ subtitle_hard_max: opts.subtitleHardMax,
87
107
  }, { wait: opts.wait, output: opts.output, pollMs: opts.pollMs, timeoutMs: opts.timeoutMs });
88
108
  });
89
109
  }
@@ -0,0 +1,40 @@
1
+ import fs from "node:fs/promises";
2
+ import { post } from "../client.js";
3
+ import { print } from "../utils/output.js";
4
+ export function registerSubtitles(program) {
5
+ const sub = program
6
+ .command("subtitles")
7
+ .alias("subtitle")
8
+ .description("Subtitle atomics — deterministic line splitter (no LLM, no billing)")
9
+ .helpOption("-h, --help", "show help");
10
+ sub
11
+ .command("split")
12
+ .description("Split a chunk of text into subtitle-sized lines using tiered punctuation priority")
13
+ .helpOption("-h, --help", "show help")
14
+ .requiredOption("-t, --text <text>", "text to split. Use @file to read from disk.")
15
+ .option("--min <N>", "minimum line length in chars (default 10)", (v) => parseInt(v, 10))
16
+ .option("--hard-max <N>", "absolute maximum line length in chars (default 24)", (v) => parseInt(v, 10))
17
+ .addHelpText("after", [
18
+ "",
19
+ "Rule:",
20
+ " Within [min, hard-max], pick the highest-tier punctuation; same tier → latest position.",
21
+ " Tier 1 (。!?) > Tier 2 (;:) > Tier 3 (,、)",
22
+ " No punctuation in window → force-cut at hard-max.",
23
+ "",
24
+ "Examples:",
25
+ " rf subtitles split -t '雨水缓缓滑落在玻璃窗上,像是无声的泪珠。'",
26
+ " rf subtitles split -t @./narration.txt --min 8 --hard-max 20",
27
+ ].join("\n"))
28
+ .action(async (opts) => {
29
+ let text = opts.text;
30
+ if (text.startsWith("@"))
31
+ text = (await fs.readFile(text.slice(1), "utf-8")).trim();
32
+ const body = { text };
33
+ if (opts.min !== undefined)
34
+ body.min_chars = opts.min;
35
+ if (opts.hardMax !== undefined)
36
+ body.hard_max = opts.hardMax;
37
+ const r = await post("/api/v1/subtitles/split", body);
38
+ print({ count: r.count, lines: r.lines });
39
+ });
40
+ }
package/dist/index.js CHANGED
@@ -19,6 +19,8 @@ import { registerModels } from "./commands/models.js";
19
19
  import { registerTts } from "./commands/tts.js";
20
20
  import { registerImages } from "./commands/images.js";
21
21
  import { registerContent } from "./commands/content.js";
22
+ import { registerAudio } from "./commands/audio.js";
23
+ import { registerSubtitles } from "./commands/subtitles.js";
22
24
  import { registerTemplates } from "./commands/templates.js";
23
25
  import { registerFrames } from "./commands/frames.js";
24
26
  import { registerCompositions } from "./commands/compositions.js";
@@ -70,7 +72,7 @@ program.addHelpText("afterAll", [
70
72
  " rf llm chat --prompt 'explain antifragile in 3 sentences'",
71
73
  " rf tts edge --text 'hello world' --voice en-US-AriaNeural -o out.mp3",
72
74
  " rf images generate --prompt 'a cat' --model rx-image-flux -o cat.png",
73
- " rf pipelines standard --text 'why we explore space' --tts-voice zh-CN-YunjianNeural",
75
+ " rf pipelines standard -t 'why we explore space' -d 60",
74
76
  " rf tasks list --status running",
75
77
  " rf config get",
76
78
  ].join("\n"));
@@ -81,6 +83,8 @@ registerModels(program);
81
83
  registerTts(program);
82
84
  registerImages(program);
83
85
  registerContent(program);
86
+ registerAudio(program);
87
+ registerSubtitles(program);
84
88
  registerTemplates(program);
85
89
  registerFrames(program);
86
90
  registerCompositions(program);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "reelforge",
3
- "version": "0.5.4",
3
+ "version": "0.6.0",
4
4
  "description": "CLI for ReelForge Studio — AI video engine. Installs as both `reelforge` and the short alias `rf`. Every REST API exposed as a command, with --help on every level.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",