reelforge 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +295 -0
  2. package/bin/reelforge.js +8 -0
  3. package/dist/client.js +120 -0
  4. package/dist/commands/assets-workflow-text.js +22 -0
  5. package/dist/commands/assets.js +231 -0
  6. package/dist/commands/audio.js +73 -0
  7. package/dist/commands/auth.js +170 -0
  8. package/dist/commands/bgm.js +45 -0
  9. package/dist/commands/compose.js +293 -0
  10. package/dist/commands/compositions.js +143 -0
  11. package/dist/commands/config.js +62 -0
  12. package/dist/commands/content.js +66 -0
  13. package/dist/commands/cover.js +397 -0
  14. package/dist/commands/create.js +629 -0
  15. package/dist/commands/extract.js +102 -0
  16. package/dist/commands/fetch.js +129 -0
  17. package/dist/commands/files.js +56 -0
  18. package/dist/commands/health.js +12 -0
  19. package/dist/commands/history.js +44 -0
  20. package/dist/commands/images.js +88 -0
  21. package/dist/commands/llm.js +67 -0
  22. package/dist/commands/media.js +128 -0
  23. package/dist/commands/models.js +36 -0
  24. package/dist/commands/pipelines.js +142 -0
  25. package/dist/commands/platform.js +218 -0
  26. package/dist/commands/regen.js +134 -0
  27. package/dist/commands/render.js +82 -0
  28. package/dist/commands/script.js +128 -0
  29. package/dist/commands/styles.js +113 -0
  30. package/dist/commands/subtitles.js +246 -0
  31. package/dist/commands/tasks.js +59 -0
  32. package/dist/commands/tts.js +134 -0
  33. package/dist/index.js +173 -0
  34. package/dist/utils/config-file.js +37 -0
  35. package/dist/utils/download.js +13 -0
  36. package/dist/utils/file-upload.js +59 -0
  37. package/dist/utils/output.js +91 -0
  38. package/dist/utils/task-waiter.js +40 -0
  39. package/package.json +44 -0
package/README.md ADDED
@@ -0,0 +1,295 @@
1
+ # reelforge
2
+
3
+ > Turn a topic or script into a finished vertical video — narration, visuals, and subtitles, assembled for you. Every capability is a command, with `--help` at every level.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install -g reelforge
9
+ ```
10
+
11
+ Or use directly without install:
12
+
13
+ ```bash
14
+ npx reelforge <command>
15
+ ```
16
+
17
+ After install, two binaries are on your `PATH` — `reelforge` and the short alias `rf`. Both behave identically; the docs use `rf` from here on.
18
+
19
+ ```bash
20
+ rf --version # same as `reelforge --version`
21
+ ```
22
+
23
+ ## Quick start
24
+
25
+ The CLI ships pointing at the hosted service. Log in once, then call:
26
+
27
+ ```bash
28
+ npm install -g reelforge
29
+ rf login # opens browser; headless? rf login <api_key>
30
+ rf whoami # balance + api_keys
31
+ rf create "为什么我们还没找到外星文明?" # auto-saves to ./<title>-<id>.mp4 in cwd
32
+ ```
33
+
34
+ That's the whole story — no server to run.
35
+
36
+ ### Output behavior
37
+
38
+ | invocation | result |
39
+ |---|---|
40
+ | `rf create "..."` | Saves to `./<sanitized-title>-<task_id_short>.mp4`, prints the path |
41
+ | `rf create "..." -o ./videos/space.mp4` | Saves to that exact path (must include filename, not just a directory) |
42
+ | `rf create "..." --no-download` | Skips local save, prints JSON result with `video_url` |
43
+ | `rf create "..." \| jq .video_url` | When stdout is piped, download is skipped automatically |
44
+
45
+ ## Global options
46
+
47
+ | flag | description |
48
+ |---|---|
49
+ | `-s, --server <url>` | Server URL (overrides `$REELFORGE_SERVER`; defaults to the hosted service) |
50
+ | `-k, --api-key <key>` | API key (overrides `$REELFORGE_API_KEY` and `reelforge login` saved key) |
51
+ | `--json` | Output raw JSON instead of pretty text — pipe-friendly |
52
+ | `--quiet` | Suppress informational messages on stderr |
53
+ | `-v, --version` | Show CLI version |
54
+ | `-h, --help` | Show help (works on every sub-command) |
55
+
56
+ ## Command map
57
+
58
+ Run `rf <command> --help` for full details on any of these.
59
+
60
+ ### Core capabilities
61
+
62
+ | command | what it does |
63
+ |---|---|
64
+ | `llm chat -p <text>` | Send one prompt to the configured model |
65
+ | `llm presets` | List built-in model presets |
66
+ | `tts ... -t <text> -o out.mp3` | Text-to-speech (free local + cloud voice options) |
67
+ | `tts voices [--locale zh]` | List supported voices |
68
+ | `images generate -p <prompt>` | Image generation |
69
+
70
+ ### Content / audio / subtitle atomics
71
+
72
+ | command | what it does |
73
+ |---|---|
74
+ | `content scene-plan -t <topic>` | Single LLM call: title + master script + per-scene image prompts (replaces the old narration / split / image-prompts / title trio) |
75
+ | `content scene-plan --script <text-or-@file>` | Same, but the user supplies the script verbatim — LLM only segments and writes image prompts |
76
+ | `audio transcribe -f <file>` / `--url <url>` | Speech-to-text with word + segment timestamps |
77
+ | `subtitles split -t <text-or-@file>` | Deterministic tiered-punctuation subtitle line splitter (pure function, zero billing) |
78
+
79
+ ### Composition
80
+
81
+ | command | what it does |
82
+ |---|---|
83
+ | `templates list [--size 1080x1920] [--type image]` | List HTML frame templates |
84
+ | `templates preview <keyOrPath> [-o out.png]` | Render a preview from a preset key **or your own local .html file** |
85
+ | `templates show <key> [-o file.html]` | Print or save the source HTML of any preset — copy it as a starting point for a custom template |
86
+ | `frames render -t <keyOrPath> --title ... --text ...` | Render a single composed frame to PNG. `-t` accepts a preset key **or a local .html path** |
87
+ | `compositions concat <v1> <v2> -o out.mp4` | FFmpeg concat (+ optional BGM) |
88
+ | `compositions bgm -i video.mp4 --bgm bgm.mp3 -o out.mp4` | Add background music |
89
+ | `compositions image-to-video -i img.png -a aud.mp3 -o out.mp4` | Build video from image + audio |
90
+ | `compositions overlay -v video.mp4 --overlay overlay.png -o out.mp4` | Overlay PNG on video |
91
+
92
+ ### End-to-end pipelines
93
+
94
+ All `pipelines *` commands submit an **async task** and (by default) poll until it finishes with a live progress indicator on stderr. Use `--no-wait` to return immediately with a `task_id`, then `rf tasks wait <id>` later.
95
+
96
+ The standard pipeline turns a topic or script into a finished vertical video — narration, visuals, and subtitles are generated and assembled for you. One continuous narration track; visuals cut at scene boundaries; subtitles cut at line boundaries.
97
+
98
+ | command | what it does |
99
+ |---|---|
100
+ | `pipelines standard -t <topic>` (or `--script <text>`) | Audio-first pipeline; `-d/--duration` and `-p/--pace` are the two main knobs |
101
+
102
+ #### Composition knobs
103
+
104
+ Three independent axes — mix and match as you like:
105
+
106
+ | flag | values | default | what changes |
107
+ |---|---|---|---|
108
+ | `--motion` | `off` / `lite` / `max` | `lite` | per-scene zoompan + crossfade intensity |
109
+ | `--layout` | `full` / `blur-bg` / `letterbox` | `full` | how the image sits in the canvas |
110
+ | `--subtitle-style` | `plate` / `stroke` / `cinema` | `plate` | subtitle look |
111
+
112
+ **Layout presets**:
113
+ - `full` — image fills the whole 1080×1920 canvas. Generates a 1080×1920 image. High-impact; best for human portraits, landscapes, 9:16-native content.
114
+ - `blur-bg` — image at 1080×1080 centered, top/bottom is a gaussian-blurred copy of the **same image** moving in sync with the foreground. Generates a 1080×1080 image (cheaper + no wasted pixels). Best for charts, screenshots, non-9:16 source content (小红书 / 抖音 style).
115
+ - `letterbox` — image at 1080×1080 centered, top/bottom is a solid matte (CSS color). Generates a 1080×1080 image. Cinematic / calm. Customize the matte with `--layout-matte-color "#1a1a1a"` (default `black`).
116
+
117
+ Examples:
118
+ ```bash
119
+ rf create "财经日报" --layout blur-bg # 小红书 / 抖音
120
+ rf create "纪录片片段" --layout letterbox --motion max # 电影感
121
+ rf create "..." --layout letterbox --layout-matte-color "#1a1a1a" # 柔和黑
122
+ ```
123
+
124
+ ### Resources
125
+
126
+ | command | what it does |
127
+ |---|---|
128
+ | `bgm list / upload <file> / delete <name>` | Manage background music |
129
+ | `files list / upload <file> / download <path> / delete <path>` | Manage user assets |
130
+
131
+ ### System
132
+
133
+ | command | what it does |
134
+ |---|---|
135
+ | `config get` | Read server config (keys masked) |
136
+ | `config set <key> <value>` | Update a dotted-path setting (e.g. `llm.api_key sk-xxx`) |
137
+ | `config patch <file>` | Apply a JSON-merge patch |
138
+ | `tasks list [--status running]` | List recent tasks |
139
+ | `tasks get <id>` / `tasks wait <id>` / `tasks cancel <id>` | Task lifecycle |
140
+ | `history list / get <id> / delete <id>` | Browse / delete completed runs |
141
+ | `health` | Server health + capability check |
142
+
143
+ ### Heavy brand customization (custom overlay templates)
144
+
145
+ `--motion` / `--layout` / `--subtitle-*` / `--brand-*` cover the common cases. For full visual identity ownership (custom path bar, accent decorations, light theme, footer block, etc.) pass a custom overlay HTML via `--frame-template <local.html | preset_key>`:
146
+
147
+ ```bash
148
+ rf templates show 1080x1920/default.html -o ./my-brand.html
149
+ # ...edit my-brand.html (change colors, add structure, etc.)...
150
+ rf create "我的视频" --frame-template ./my-brand.html
151
+ ```
152
+
153
+ **Contract for custom HTML**:
154
+
155
+ - Canvas is **1080×1920** (pipeline-fixed; don't declare a different size in `<meta>`).
156
+ - Background must be **transparent** — the scene image is composited by the renderer outside the HTML layer.
157
+ - **`{{image}}` no longer exists.** Using `<img src="{{image}}">` is a hard error at submit time. The image is composited by the renderer.
158
+ - The pipeline injects these placeholders for you:
159
+
160
+ | Placeholder | What it is |
161
+ |---|---|
162
+ | `{{title}}` `{{text}}` | per-frame content |
163
+ | `{{index}}` `{{total}}` | "scene N of M" — `{{total}}` is the LLM-decided scene count, don't hardcode |
164
+ | `{{layout}}` | `"full"` / `"blur-bg"` / `"letterbox"` — react via `body[data-layout="..."]` CSS to put title/subtitle in the matte zones when layout ≠ full |
165
+ | `{{subtitle_style}}` `{{subtitle_color}}` `{{subtitle_background}}` | subtitle preset + overrides |
166
+ | `{{brand_position}}` `{{brand_handle}}` `{{brand_slogan}}` `{{brand_logo}}` `{{brand_color}}` | brand-chrome inputs (use or ignore as you like) |
167
+
168
+ Inline HTML is hard-capped at 2 MB. The audio + motion + character-ref + scene-plan stages all keep working identically — only the overlay layer is yours.
169
+
170
+ **Publishing to short-video platforms (Douyin / TikTok / WeChat Channels)** — the default composition renders to the full 1080×1920 canvas, but those apps overlay UI on top/bottom/right and cover-crop ~96-180px on each side on taller phones. ReelForge does NOT bake platform-specific padding into the renderer. Look up reference safe-zone numbers + recommended `--media-anchor-y` values per platform:
171
+
172
+ ```bash
173
+ rf platform # overview table for all platforms
174
+ rf platform 抖音 # detail + anchor variants (alias of douyin)
175
+ rf platform tiktok
176
+ rf platform wechat # 视频号
177
+ ```
178
+
179
+ ## Examples
180
+
181
+ ```bash
182
+ # 1. One-click out a video (45s default, AI writes the script)
183
+ rf create "为什么我们还没找到外星文明?"
184
+
185
+ # 2. Longer video with a slower visual rhythm
186
+ rf create "深夜便利店的灯光" -d 90 -p slow
187
+
188
+ # 3. Your own script — no narration-splitting on your side, the pipeline handles it
189
+ rf create --script @./my-script.txt
190
+ rf create --script "雨水缓缓滑落在玻璃窗上,像是无声的泪珠。"
191
+
192
+ # 4. Preview-first workflow: scrub the storyboard in the browser
193
+ # before paying for the MP4 render. Finalize with `rf render` when satisfied.
194
+ rf create "..." --preview-only # opens browser studio
195
+ rf render <task-id> # produce the MP4
196
+
197
+ # 5. Push the image square up to grow the bottom matte (抖音 long-description
198
+ # case); subtitle position follows the image automatically.
199
+ rf create "..." --layout blur-bg --media-anchor-y 0.40
200
+ # See `rf platform 抖音` for the safe range and per-scenario recommendations.
201
+
202
+ # 6. Encoder budget — pick a quality preset or set an exact bitrate.
203
+ rf create "..." --quality draft # ~1 Mbps, ~3-4× smaller than default
204
+ rf create "..." --video-bitrate 750k # exact bitrate
205
+ rf create "..." --crf 28 # fine-grained control
206
+
207
+ # 7. Pick a built-in visual style preset
208
+ rf create "美食教程" --style photorealistic
209
+
210
+ # 5. Pipeline form with explicit output path
211
+ rf pipelines standard \
212
+ --script @./script.txt \
213
+ --frame-template 1080x1920/image_default.html \
214
+ -p normal -o smoke.mp4
215
+
216
+ # 6. Inspect existing tasks & redownload a finished video
217
+ rf tasks list --limit 5
218
+ rf history get <task-id> --download recovered.mp4
219
+
220
+ # 7. Atomics for stand-alone use
221
+ rf content scene-plan -t "雨天的玻璃窗" -d 45 --json | jq .scenes
222
+ rf audio transcribe -f narration.mp3 --json | jq '.words[:5]'
223
+ rf subtitles split -t @./narration.txt --min 10 --hard-max 24
224
+
225
+ # 8. JSON pipe for automation
226
+ rf llm presets --json | jq '.[].defaultModel'
227
+
228
+ # 9. Use your own HTML template (no PR/release needed)
229
+ # Any --frame-template that points to a local .html file is read and sent
230
+ # inline. Declare size inside the file via
231
+ # <meta name="template:width" content="1080">
232
+ # <meta name="template:height" content="1920">
233
+ # or pass --frame-template-size 1080x1920.
234
+ rf templates show 1080x1920/image_default.html -o my-brand.html # copy a preset
235
+ # ...edit my-brand.html to suit your style...
236
+ rf templates preview ./my-brand.html --title "Hello" -o preview.png
237
+ rf frames render -t ./my-brand.html --values '{"author":"Alice"}' -o frame.png
238
+ rf pipelines standard -t "宠物" --frame-template ./my-brand.html -o final.mp4
239
+ ```
240
+
241
+ ### Custom HTML templates
242
+
243
+ Easiest way to start: grab a preset as a reference.
244
+
245
+ ```bash
246
+ rf templates list # see all keys
247
+ rf templates show 1080x1920/static_default.html # print to stdout
248
+ rf templates show 1080x1920/image_default.html -o my-brand.html # save and edit
249
+ ```
250
+
251
+ `{{title}}`, `{{text}}`, `{{image}}`, `{{index}}`, `{{total}}` are reserved built-ins auto-injected by the pipeline; everything else uses the `{{name:type=default}}` DSL (`type` ∈ `text|number|color|bool`). Pass extras through `--values '{"author":"Alice"}'` (or `template_params` on the pipeline API).
252
+
253
+ - `{{index}}` — current scene number, 1-based
254
+ - `{{total}}` — scene count the LLM actually produced (use this for "scene N of M" badges; don't hardcode in `template_params`, the scene count is decided at runtime)
255
+
256
+ #### Template type — does the pipeline generate an AI image per scene?
257
+
258
+ When you ship an inline template through `rf create` / `rf pipelines standard`, ReelForge needs to know whether each scene should kick off image generation. Resolution priority (high → low):
259
+
260
+ 1. Explicit flag — `--frame-template-type image|static|asset` (or `frame_template_type` in the API body).
261
+ 2. Inside the HTML — `<meta name="template:type" content="image">` (or `static` / `asset`).
262
+ 3. **Default: `image`** — best practice for zero-config users. If your template doesn't reference scene imagery (pure-text card, etc.), declare `static` explicitly to skip image generation and its cost.
263
+
264
+ The placeholder `{{image}}` no longer doubles as a type signal — declare type explicitly.
265
+
266
+ Limits and safety:
267
+
268
+ - Max 2 MB per inline HTML.
269
+ - The render sandbox blocks `file://`, loopback / private / link-local IPs, CGNAT range, cloud-metadata, and `*.local` / `*.internal` hostnames. So your template can only reference public `https`/`http` resources or `data:` URIs.
270
+ - If the CLI is talking to a hosted server, local-path `--image` won't reach the server; either upload to `rf files upload` first or use an HTTPS URL / data: URI.
271
+
272
+ #### API field reference
273
+
274
+ | endpoint | inline HTML field | size field | type field |
275
+ |---|---|---|---|
276
+ | `POST /api/v1/frames/render` | `template_html` | `size` | — (n/a, no image generation) |
277
+ | `POST /api/v1/templates/preview` | `template_html` | `size` | — |
278
+ | `POST /api/v1/pipelines/standard` | `frame_template_inline` | `frame_template_size` | `frame_template_type` |
279
+
280
+ The pipeline endpoint uses the `frame_template_*` prefix because it already has a `frame_template` field (preset key). The single-frame endpoints use the shorter `template_html` because they don't.
281
+
282
+ ## Tip — getting unstuck
283
+
284
+ Every level has `--help`:
285
+
286
+ ```bash
287
+ rf --help # top-level overview
288
+ rf pipelines --help # list of pipelines
289
+ rf pipelines standard --help # full option reference
290
+ rf tts edge --help # one specific command
291
+ ```
292
+
293
+ ## License
294
+
295
+ Apache-2.0
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ // Thin shim so `reelforge` (alias `rf`) is invokable globally. The actual
3
+ // implementation is bundled to dist/ by `npm run build`.
4
+ import("../dist/index.js").catch((err) => {
5
+ // eslint-disable-next-line no-console
6
+ console.error(err?.stack || err);
7
+ process.exit(1);
8
+ });
package/dist/client.js ADDED
@@ -0,0 +1,120 @@
1
+ import { readFileSync } from "node:fs";
2
+ import path from "node:path";
3
+ import os from "node:os";
4
+ const DEFAULT_SERVER = "https://reelforge.timor419.com";
5
+ function loadConfigSync() {
6
+ try {
7
+ const p = path.join(os.homedir(), ".reelforge", "config.json");
8
+ const raw = readFileSync(p, "utf-8");
9
+ const parsed = JSON.parse(raw);
10
+ return typeof parsed === "object" && parsed ? parsed : {};
11
+ }
12
+ catch {
13
+ return {};
14
+ }
15
+ }
16
+ const fileConfig = loadConfigSync();
17
+ let overrideServer = null;
18
+ let overrideApiKey = null;
19
+ export function setServer(url) {
20
+ overrideServer = url.replace(/\/$/, "");
21
+ }
22
+ export function getServer() {
23
+ if (overrideServer)
24
+ return overrideServer;
25
+ const env = process.env.REELFORGE_SERVER;
26
+ if (env)
27
+ return env.replace(/\/$/, "");
28
+ if (fileConfig.server)
29
+ return fileConfig.server.replace(/\/$/, "");
30
+ return DEFAULT_SERVER;
31
+ }
32
+ export function setApiKey(key) {
33
+ overrideApiKey = key;
34
+ }
35
+ export function getApiKey() {
36
+ if (overrideApiKey)
37
+ return overrideApiKey;
38
+ if (process.env.REELFORGE_API_KEY)
39
+ return process.env.REELFORGE_API_KEY;
40
+ if (fileConfig.api_key)
41
+ return fileConfig.api_key;
42
+ return null;
43
+ }
44
+ export class ApiCallError extends Error {
45
+ status;
46
+ code;
47
+ details;
48
+ constructor(err) {
49
+ super(err.message);
50
+ this.status = err.status;
51
+ this.code = err.code;
52
+ this.details = err.details;
53
+ }
54
+ }
55
+ async function request(path, init = {}) {
56
+ const url = `${getServer()}${path.startsWith("/") ? path : `/${path}`}`;
57
+ const headers = new Headers(init.headers);
58
+ const key = getApiKey();
59
+ if (key && !headers.has("Authorization") && !headers.has("authorization")) {
60
+ headers.set("Authorization", `Bearer ${key}`);
61
+ }
62
+ let res;
63
+ try {
64
+ res = await fetch(url, { ...init, headers });
65
+ }
66
+ catch (err) {
67
+ const msg = err instanceof Error ? err.message : String(err);
68
+ throw new ApiCallError({
69
+ status: 0,
70
+ message: `Network error contacting ${url}: ${msg}`,
71
+ });
72
+ }
73
+ const text = await res.text();
74
+ let json = null;
75
+ try {
76
+ json = text ? JSON.parse(text) : null;
77
+ }
78
+ catch {
79
+ if (!res.ok) {
80
+ throw new ApiCallError({ status: res.status, message: text.slice(0, 500) });
81
+ }
82
+ return text;
83
+ }
84
+ if (!res.ok) {
85
+ const err = json?.error;
86
+ throw new ApiCallError({
87
+ status: res.status,
88
+ code: err?.code,
89
+ message: err?.message || `HTTP ${res.status}`,
90
+ details: err?.details,
91
+ });
92
+ }
93
+ return json;
94
+ }
95
+ export function get(path) {
96
+ return request(path);
97
+ }
98
+ export function post(path, body) {
99
+ return request(path, {
100
+ method: "POST",
101
+ headers: { "Content-Type": "application/json" },
102
+ body: JSON.stringify(body),
103
+ });
104
+ }
105
+ export function patch(path, body) {
106
+ return request(path, {
107
+ method: "PATCH",
108
+ headers: { "Content-Type": "application/json" },
109
+ body: JSON.stringify(body),
110
+ });
111
+ }
112
+ export function del(path) {
113
+ return request(path, { method: "DELETE" });
114
+ }
115
+ export async function uploadMultipart(path, fields) {
116
+ const form = new FormData();
117
+ for (const [k, v] of Object.entries(fields))
118
+ form.append(k, v);
119
+ return request(path, { method: "POST", body: form });
120
+ }
@@ -0,0 +1,22 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ const __filename = fileURLToPath(import.meta.url);
5
+ const __dirname = path.dirname(__filename);
6
+ function loadWorkflowText() {
7
+ const candidates = [
8
+ path.resolve(__dirname, "../data/assets-to-video-workflow.md"),
9
+ path.resolve(__dirname, "../../../docs/assets-to-video-workflow.md"),
10
+ ];
11
+ for (const p of candidates) {
12
+ try {
13
+ return fs.readFileSync(p, "utf8");
14
+ }
15
+ catch {
16
+ }
17
+ }
18
+ return ("# 素材 → 视频 工作流\n\n" +
19
+ "用 `rf compose <spec.json>` 把图片/视频素材 + 旁白拼成竖屏视频。\n" +
20
+ "spec 的结构、字段与完整示例见 `rf compose --help`。\n");
21
+ }
22
+ export const ASSETS_WORKFLOW_TEXT = loadWorkflowText();
@@ -0,0 +1,231 @@
1
+ import fs from "node:fs/promises";
2
+ import fsSync from "node:fs";
3
+ import path from "node:path";
4
+ import { post } from "../client.js";
5
+ import { info, isJson, print, success, warn } from "../utils/output.js";
6
+ import { ASSETS_WORKFLOW_TEXT } from "./assets-workflow-text.js";
7
+ const IMAGE_EXT = new Set([".jpg", ".jpeg", ".png", ".webp"]);
8
+ const VIDEO_EXT = new Set([".mp4", ".mov", ".webm", ".mkv", ".m4v"]);
9
+ const SUPPORTED_EXT = new Set([...IMAGE_EXT, ...VIDEO_EXT]);
10
+ const BATCH_CONCURRENCY = 4;
11
+ function mimeForExt(ext) {
12
+ switch (ext) {
13
+ case ".png": return "image/png";
14
+ case ".jpg":
15
+ case ".jpeg": return "image/jpeg";
16
+ case ".webp": return "image/webp";
17
+ case ".mp4":
18
+ case ".m4v": return "video/mp4";
19
+ case ".mov": return "video/quicktime";
20
+ case ".webm": return "video/webm";
21
+ case ".mkv": return "video/x-matroska";
22
+ default: return null;
23
+ }
24
+ }
25
+ function typeForExt(ext) {
26
+ if (IMAGE_EXT.has(ext))
27
+ return "image";
28
+ if (VIDEO_EXT.has(ext))
29
+ return "video";
30
+ return null;
31
+ }
32
+ async function resolveAsset(input) {
33
+ const t = input.trim();
34
+ if (t.startsWith("data:image/"))
35
+ return { url: t, type: "image" };
36
+ if (t.startsWith("data:video/"))
37
+ return { url: t, type: "video" };
38
+ if (t.startsWith("data:")) {
39
+ throw new Error(`unsupported data: URI MIME (need data:image/... or data:video/...)`);
40
+ }
41
+ if (/^https?:\/\//i.test(t)) {
42
+ let ext = "";
43
+ try {
44
+ ext = path.extname(new URL(t).pathname).toLowerCase();
45
+ }
46
+ catch {
47
+ }
48
+ const guessed = typeForExt(ext);
49
+ if (!guessed) {
50
+ throw new Error(`cannot infer asset type from URL (extension "${ext || "<none>"}"). ` +
51
+ `Use a URL ending in a supported extension, or download the file and pass the local path.`);
52
+ }
53
+ return { url: t, type: guessed };
54
+ }
55
+ const abs = path.resolve(t);
56
+ if (!fsSync.existsSync(abs)) {
57
+ throw new Error(`asset not found: ${abs}`);
58
+ }
59
+ const ext = path.extname(abs).toLowerCase();
60
+ const type = typeForExt(ext);
61
+ const mime = mimeForExt(ext);
62
+ if (!type || !mime) {
63
+ const supported = [...IMAGE_EXT, ...VIDEO_EXT].join(" / ");
64
+ throw new Error(`unsupported extension ${ext} (supported: ${supported})`);
65
+ }
66
+ const buf = await fs.readFile(abs);
67
+ return { url: `data:${mime};base64,${buf.toString("base64")}`, type };
68
+ }
69
+ async function describeOne(input, opts) {
70
+ const { url, type } = await resolveAsset(input);
71
+ const body = {};
72
+ body[`${type}_url`] = url;
73
+ if (opts.model)
74
+ body.model = opts.model;
75
+ if (opts.prompt)
76
+ body.prompt = opts.prompt;
77
+ return post("/api/v1/assets/describe", body);
78
+ }
79
+ function listAssetFiles(dir) {
80
+ return fsSync
81
+ .readdirSync(dir)
82
+ .filter((name) => SUPPORTED_EXT.has(path.extname(name).toLowerCase()))
83
+ .filter((name) => !name.startsWith("."))
84
+ .sort();
85
+ }
86
+ async function pMap(items, fn, limit) {
87
+ const out = new Array(items.length);
88
+ let idx = 0;
89
+ async function worker() {
90
+ while (true) {
91
+ const i = idx++;
92
+ if (i >= items.length)
93
+ return;
94
+ out[i] = await fn(items[i], i);
95
+ }
96
+ }
97
+ await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker));
98
+ return out;
99
+ }
100
+ export function registerAssets(program) {
101
+ const cmd = program
102
+ .command("assets")
103
+ .description("素材相关能力: describe(图片/视频 → 一句话描述) / workflow(agent 用户素材→视频 SOP)")
104
+ .helpOption("-h, --help", "show help");
105
+ cmd
106
+ .command("workflow")
107
+ .description("打印 agent 用户素材→视频 完整 SOP(给 Claude / Cursor 等 agent 看;底层使用 rf compose 渲染)")
108
+ .helpOption("-h, --help", "show help")
109
+ .addHelpText("after", [
110
+ "",
111
+ "打印素材→视频工作流指南到 stdout。",
112
+ "适合 agent 启动时一次性读入;普通用户也可以 `rf assets workflow | less` 浏览。",
113
+ "spec 的结构与示例见 `rf compose --help`。",
114
+ ].join("\n"))
115
+ .action(() => {
116
+ process.stdout.write(ASSETS_WORKFLOW_TEXT);
117
+ if (!ASSETS_WORKFLOW_TEXT.endsWith("\n"))
118
+ process.stdout.write("\n");
119
+ });
120
+ cmd
121
+ .command("describe <pathOrDir>")
122
+ .description("用多模态 LLM 给图片或视频生成中文描述。" +
123
+ "单文件打印一句话;目录批量,扫描所有 jpg/png/mp4/mov 等并输出 recipe 风格 JSON。")
124
+ .helpOption("-h, --help", "show help")
125
+ .option("-o, --output <file>", "写到文件而不是 stdout(单文件写描述文本,目录写 JSON)")
126
+ .option("--model <id>", "指定 RelayX 上的 vision-capable 模型(默认 qwen/qwen3-vl-flash;其他可选: qwen/qwen3-vl-plus / openai/gpt-4.1-mini / google/gemini-3-flash / anthropic/claude-4-7-sonnet)")
127
+ .option("--prompt <text>", "覆盖默认提示词(图片默认 30 字简描,视频默认 80 字动作+氛围描述)")
128
+ .addHelpText("after", [
129
+ "",
130
+ "支持的扩展名:",
131
+ " 图片: .jpg .jpeg .png .webp",
132
+ " 视频: .mp4 .mov .webm .mkv .m4v",
133
+ "",
134
+ "Examples:",
135
+ " # 图片,直接打到终端",
136
+ " rf assets describe ./cat.jpg",
137
+ "",
138
+ " # 视频(自动检测扩展名),输出 ~80 字含动作和氛围的描述",
139
+ " rf assets describe ./clip.mp4",
140
+ "",
141
+ " # 整个相册目录批量(图片视频混着扫),输出 recipe JSON",
142
+ " rf assets describe ./trip/ -o trip.recipe.json",
143
+ "",
144
+ " # 自定义提示词",
145
+ " rf assets describe ./clip.mp4 --prompt 'List 3 key visual moments in this video, comma-separated.'",
146
+ "",
147
+ "Output:",
148
+ " 单文件: 直接打印 description 到 stdout(--json 时打印完整 JSON)",
149
+ " 目录: { assets: [{image|video, description}, ...] }",
150
+ "",
151
+ "成本参考(默认 qwen3-vl-flash):",
152
+ " 图片 ~$0.00007 (¥0.0005)",
153
+ " 视频 ~$0.00007/秒 (60s 视频 ¥0.03)",
154
+ ].join("\n"))
155
+ .action(async (pathOrDir, opts) => {
156
+ const isLocal = !/^https?:\/\//i.test(pathOrDir) && !pathOrDir.startsWith("data:");
157
+ const isDir = isLocal && fsSync.existsSync(pathOrDir) && fsSync.statSync(pathOrDir).isDirectory();
158
+ if (isDir) {
159
+ const abs = path.resolve(pathOrDir);
160
+ const names = listAssetFiles(abs);
161
+ if (names.length === 0) {
162
+ throw new Error(`no supported asset files in ${abs} ` +
163
+ `(looked for ${[...SUPPORTED_EXT].join(" / ")})`);
164
+ }
165
+ info(`Captioning ${names.length} assets from ${abs} (concurrency=${BATCH_CONCURRENCY})...`);
166
+ const results = await pMap(names, async (name) => {
167
+ const filePath = path.join(abs, name);
168
+ const ext = path.extname(name).toLowerCase();
169
+ const type = typeForExt(ext);
170
+ try {
171
+ const r = await describeOne(filePath, opts);
172
+ return {
173
+ type,
174
+ path: filePath,
175
+ description: r.description,
176
+ cost_usd: r.cost_usd ?? 0,
177
+ intrinsic_duration_ms: r.intrinsic_duration_ms,
178
+ };
179
+ }
180
+ catch (err) {
181
+ const msg = err instanceof Error ? err.message : String(err);
182
+ warn(` ${name}: failed — ${msg}`);
183
+ return { type, path: filePath, description: "", error: msg, cost_usd: 0 };
184
+ }
185
+ }, BATCH_CONCURRENCY);
186
+ const totalCost = results.reduce((s, r) => s + (r.cost_usd ?? 0), 0);
187
+ const okCount = results.filter((r) => r.description).length;
188
+ const stub = {
189
+ assets: results.map((r) => {
190
+ if (r.type === "video") {
191
+ const entry = {
192
+ video: r.path,
193
+ description: r.description,
194
+ };
195
+ if (typeof r.intrinsic_duration_ms === "number" && r.intrinsic_duration_ms > 0) {
196
+ entry.intrinsic_duration_ms = r.intrinsic_duration_ms;
197
+ }
198
+ return entry;
199
+ }
200
+ return { image: r.path, description: r.description };
201
+ }),
202
+ };
203
+ const json = JSON.stringify(stub, null, 2);
204
+ if (opts.output) {
205
+ const outAbs = path.resolve(opts.output);
206
+ await fs.mkdir(path.dirname(outAbs), { recursive: true });
207
+ await fs.writeFile(outAbs, json);
208
+ success(`Wrote ${okCount}/${names.length} captions → ${outAbs} (total cost $${totalCost.toFixed(4)})`);
209
+ }
210
+ else {
211
+ print(stub);
212
+ info(`(${okCount}/${names.length} ok, total cost $${totalCost.toFixed(4)})`);
213
+ }
214
+ return;
215
+ }
216
+ const r = await describeOne(pathOrDir, opts);
217
+ if (opts.output) {
218
+ const outAbs = path.resolve(opts.output);
219
+ await fs.mkdir(path.dirname(outAbs), { recursive: true });
220
+ await fs.writeFile(outAbs, r.description);
221
+ success(`Wrote description (${r.type}) → ${outAbs} (${r.model} · ${(r.duration_ms / 1000).toFixed(2)}s · $${(r.cost_usd ?? 0).toFixed(6)})`);
222
+ }
223
+ else if (isJson()) {
224
+ print(r);
225
+ }
226
+ else {
227
+ print(r.description);
228
+ info(`${r.type} · ${r.model} · ${(r.duration_ms / 1000).toFixed(2)}s · cost $${(r.cost_usd ?? 0).toFixed(6)}`);
229
+ }
230
+ });
231
+ }