npm - zerocut-cli - Versions diffs - 0.2.2 → 0.3.1 - Mend

zerocut-cli 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +5 -2
package/dist/commands/image.js +2 -0
package/dist/commands/skill.js +22 -9
package/dist/commands/video.js +109 -14
package/dist/services/config.js +7 -2
package/dist/skill/SKILL.md +153 -18
package/dist/skill/edit-video/SKILL.md +111 -0
package/dist/skill/one-click-video/SKILL.md +140 -0
package/package.json +2 -2
package/scripts/copy-skill-md.cjs +4 -4
package/src/commands/image.ts +2 -0
package/src/commands/skill.ts +26 -9
package/src/commands/video.ts +49 -3
package/src/services/config.ts +10 -2
package/src/skill/SKILL.md +21 -2
package/src/skill/edit-video/SKILL.md +111 -0
package/src/skill/one-click-video/SKILL.md +140 -0

package/README.md CHANGED Viewed

@@ -101,7 +101,7 @@ zerocut config --ott <token> --region <cn|us>   # non-interactive
 - `image` — create a new image (default action; requires `--prompt`)
   - Options:
     - `--prompt <prompt>` (required)
-    - `--model <model>` (seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan)
+    - `--model <model>` (seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro)
     - `--aspectRatio <ratio>` (1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1)
     - `--resolution <resolution>` (1K|2K|4K)
     - `--refs <img1,img2,...>` (comma-separated paths/URLs)
@@ -110,15 +110,18 @@ zerocut config --ott <token> --region <cn|us>   # non-interactive
   - Options:
     - `--prompt <prompt>` (required)
     - `--duration <seconds>` (integer 1–16; when `--sourceVideo` is set, must be 3–10)
-    - `--model <model>` (enum: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|seedance-1.5-pro|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro`; default `vidu`)
+    - `--model <model>` (enum: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|zerocut3.0-turbo|seedance-1.5-pro|seedance-2.0|seedance-2.0-fast|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro`; default `vidu`)
     - `--sourceVideo <video>` (base video path/url for edit mode)
     - `--seed <seed>`
     - `--firstFrame <image>`
     - `--lastFrame <image>`
+    - `--storyboard <image>`
+    - `--persons <persons>` (comma-separated person image paths/URLs)
     - `--refs <assets>`
     - `--resolution <resolution>`
     - `--aspectRatio <ratio>` (9:16|16:9|1:1)
     - `--withAudio`
+    - `--withBGM <withBGM>` (true|false, default true)
     - `--optimizeCameraMotion`
     - `--output <file>`
   - Notes:

package/dist/commands/image.js CHANGED Viewed

@@ -21,6 +21,7 @@ function register(program) {
         "banana2",
         "banana-pro",
         "wan",
+        "wan-pro",
     ];
     const allowedAspectRatios = [
         "1:1",
@@ -46,6 +47,7 @@ function register(program) {
             aspect_ratio: aspectRatio,
             resolution,
             reference_images: referenceImages,
+            sequential_image_generation: "disabled",
             onProgress,
         };
         const res = await session.ai.generateImage(payload);

package/dist/commands/skill.js CHANGED Viewed

@@ -9,16 +9,29 @@ const node_fs_1 = __importDefault(require("node:fs"));
 const node_path_1 = __importDefault(require("node:path"));
 exports.name = "skill";
 exports.description = "Print built-in SKILL.md content";
+function printSkill(relativePath) {
+    const filePath = node_path_1.default.resolve(__dirname, relativePath);
+    const content = node_fs_1.default.readFileSync(filePath, "utf8");
+    process.stdout.write(content);
+    if (!content.endsWith("\n")) {
+        process.stdout.write("\n");
+    }
+}
 function register(program) {
-    program
-        .command("skill")
-        .description("Print built-in skill markdown")
+    const parent = program.command("skill").description("Print built-in skill markdown");
+    parent
+        .command("one-click-video")
+        .description("Print one-click-video skill markdown")
+        .action(() => {
+        printSkill("../skill/one-click-video/SKILL.md");
+    });
+    parent
+        .command("edit-video")
+        .description("Print edit-video skill markdown")
         .action(() => {
-        const filePath = node_path_1.default.resolve(__dirname, "../skill/SKILL.md");
-        const content = node_fs_1.default.readFileSync(filePath, "utf8");
-        process.stdout.write(content);
-        if (!content.endsWith("\n")) {
-            process.stdout.write("\n");
-        }
+        printSkill("../skill/edit-video/SKILL.md");
+    });
+    parent.action(() => {
+        printSkill("../skill/SKILL.md");
     });
 }

package/dist/commands/video.js CHANGED Viewed

@@ -11,11 +11,35 @@ const node_path_1 = __importDefault(require("node:path"));
 const progress_1 = require("../utils/progress");
 exports.name = "video";
 exports.description = "Video command: create video";
+function resolveResultUrl(result) {
+    if (!result || typeof result !== "object") {
+        return undefined;
+    }
+    const record = result;
+    if (typeof record.url === "string" && record.url.length > 0) {
+        return record.url;
+    }
+    const data = record.data;
+    if (data && typeof data === "object") {
+        const dataRecord = data;
+        if (typeof dataRecord.url === "string" && dataRecord.url.length > 0) {
+            return dataRecord.url;
+        }
+    }
+    return undefined;
+}
 function register(program) {
+    const avatarModels = ["zerocut-avatar-1.0", "zerocut-avatar-1.5"];
+    const mvModels = ["zerocut-mv-1.0"];
     const parent = program.command("video").description("Create a new video; requires --prompt");
     const allowedTypes = [
         "zerocut3.0",
+        "zerocut3.0-pro",
+        "zerocut3.0-pro-fast",
+        "zerocut3.0-turbo",
         "seedance-1.5-pro",
+        "seedance-2.0",
+        "seedance-2.0-fast",
         "vidu",
         "vidu-pro",
         "viduq3",
@@ -28,6 +52,8 @@ function register(program) {
         "sora2-pro",
         "veo3.1",
         "veo3.1-pro",
+        ...avatarModels,
+        ...mvModels,
     ];
     async function videoCreateAction(opts) {
         const session = (0, cerevox_1.getSessionFromCommand)(this);
@@ -41,20 +67,30 @@ function register(program) {
             process.exitCode = 1;
             return;
         }
-        let model = typeof opts.video === "string" ? opts.video.trim() : undefined;
+        let model = typeof opts.model === "string" ? opts.model.trim() : undefined;
         if (model && !allowedTypes.includes(model)) {
-            process.stderr.write(`Invalid value for --video: ${model}. Allowed: ${allowedTypes.join("|")}\n`);
+            process.stderr.write(`Invalid value for --model: ${model}. Allowed: ${allowedTypes.join("|")}\n`);
             process.exitCode = 1;
             return;
         }
         if (!model)
             model = "vidu";
         const durationStr = typeof opts.duration === "string" ? opts.duration.trim() : undefined;
+        const sourceVideo = typeof opts.sourceVideo === "string" ? opts.sourceVideo.trim() : undefined;
         let duration = 0;
+        const durationRange = (() => {
+            if (avatarModels.includes(model)) {
+                return { min: 5, max: 240 };
+            }
+            if (mvModels.includes(model)) {
+                return { min: 1, max: 240 };
+            }
+            return { min: 1, max: 16 };
+        })();
         if (durationStr) {
             const n = Number.parseInt(durationStr, 10);
-            if (!Number.isFinite(n) || n < 1 || n > 16) {
-                process.stderr.write("Invalid value for --duration: must be integer 1-16\n");
+            if (!Number.isFinite(n) || n < durationRange.min || n > durationRange.max) {
+                process.stderr.write(`Invalid value for --duration: model ${model} supports integer ${durationRange.min}-${durationRange.max}\n`);
                 process.exitCode = 1;
                 return;
             }
@@ -68,6 +104,21 @@ function register(program) {
             return;
         }
         const aspectRatio = ar;
+        let withBGM = true;
+        if (typeof opts.withBGM === "string") {
+            const withBGMRaw = opts.withBGM.trim().toLowerCase();
+            if (withBGMRaw === "true") {
+                withBGM = true;
+            }
+            else if (withBGMRaw === "false") {
+                withBGM = false;
+            }
+            else {
+                process.stderr.write("Invalid value for --withBGM: expected true|false\n");
+                process.exitCode = 1;
+                return;
+            }
+        }
         const images = [];
         if (opts.firstFrame) {
             images.push({
@@ -81,6 +132,24 @@ function register(program) {
                 url: await (0, cerevox_1.getMaterialUri)(session, opts.lastFrame),
             });
         }
+        if (opts.storyboard) {
+            images.push({
+                type: "storyboard",
+                url: await (0, cerevox_1.getMaterialUri)(session, opts.storyboard),
+            });
+        }
+        const personList = typeof opts.persons === "string" && opts.persons.length > 0
+            ? opts.persons
+                .split(",")
+                .map((s) => s.trim())
+                .filter((s) => s.length > 0)
+            : [];
+        for (const person of personList) {
+            images.push({
+                type: "person",
+                url: await (0, cerevox_1.getMaterialUri)(session, person),
+            });
+        }
         const refsList = typeof opts.refs === "string" && opts.refs.length > 0
             ? opts.refs
                 .split(",")
@@ -93,21 +162,33 @@ function register(program) {
                 url: await (0, cerevox_1.getMaterialUri)(session, ref),
             });
         }
-        const res = await session.ai.generateVideo({
+        const request = {
             prompt,
             model: model,
             duration: duration || undefined,
             resolution: opts.resolution,
             aspect_ratio: aspectRatio,
-            mute: !opts.withAudio,
+            mute: !(opts.withAudio ?? true),
+            bgm: withBGM,
             optimize_camera: opts.optimizeCameraMotion,
             seed: opts.seed ? Number.parseInt(opts.seed, 10) : undefined,
             images: images.length > 0 ? images : undefined,
+            videos: sourceVideo
+                ? [
+                    {
+                        type: "base",
+                        url: await (0, cerevox_1.getMaterialUri)(session, sourceVideo),
+                    },
+                ]
+                : undefined,
             onProgress: (0, progress_1.createProgressSpinner)("inferencing"),
-        });
+            timeout: 7200000,
+        };
+        const res = await session.ai.generateVideo(request);
+        const initialUrl = resolveResultUrl(res);
         try {
-            if (res?.url) {
-                const tosUrl = await (0, cerevox_1.syncToTOS)(res.url);
+            if (initialUrl) {
+                const tosUrl = await (0, cerevox_1.syncToTOS)(initialUrl);
                 if (tosUrl) {
                     res.url = tosUrl;
                 }
@@ -118,7 +199,13 @@ function register(program) {
         const output = typeof opts.output === "string" ? opts.output : undefined;
         if (output) {
             const dir = process.cwd();
-            const url = res.url;
+            const url = resolveResultUrl(res);
+            if (!url) {
+                process.stderr.write("Cannot save --output because no video URL was returned. Please retry later or run without --output to inspect raw response.\n");
+                process.exitCode = 1;
+                console.log(res);
+                return;
+            }
             const response = await fetch(url);
             const buffer = Buffer.from(await response.arrayBuffer());
             const filePath = node_path_1.default.resolve(dir, output);
@@ -133,15 +220,19 @@ function register(program) {
     // default action on `zerocut video`
     parent
         .option("--prompt <prompt>", "Text prompt for video generation (required)")
-        .option("--duration <duration>", "Video duration in seconds")
-        .option("--video <video>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
+        .option("--duration <duration>", "Video duration in seconds (default models: 1-16, avatar: 5-240, mv: 1-240)")
+        .option("--model <model>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
+        .option("--sourceVideo <video>", "Base video path/url for edit mode (requires --duration 3-10)")
         .option("--seed <seed>", "Random seed")
         .option("--firstFrame <image>", "First frame image path/url")
         .option("--lastFrame <image>", "Last frame image path/url")
+        .option("--storyboard <image>", "Storyboard image path/url")
+        .option("--persons <persons>", "Comma-separated person image paths/urls")
         .option("--refs <refs>", "Comma-separated reference image/video paths/urls")
         .option("--resolution <resolution>", "Resolution, e.g., 720p")
         .option("--aspectRatio <ratio>", "Aspect ratio: 9:16|16:9|1:1")
         .option("--withAudio", "Include audio track")
+        .option("--withBGM <withBGM>", "Include background music: true|false (default: true)")
         .option("--optimizeCameraMotion", "Optimize camera motion")
         .option("--output <file>", "Output file path")
         .action(videoCreateAction);
@@ -150,15 +241,19 @@ function register(program) {
         .command("create")
         .description("Create a new video; requires --prompt")
         .option("--prompt <prompt>", "Text prompt for video generation (required)")
-        .option("--duration <duration>", "Video duration in seconds")
-        .option("--video <video>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
+        .option("--duration <duration>", "Video duration in seconds (default models: 1-16, avatar: 5-240, mv: 1-240)")
+        .option("--model <model>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
+        .option("--sourceVideo <video>", "Base video path/url for edit mode (requires --duration 3-10)")
         .option("--seed <seed>", "Random seed")
         .option("--firstFrame <image>", "First frame image path/url")
         .option("--lastFrame <image>", "Last frame image path/url")
+        .option("--storyboard <image>", "Storyboard image path/url")
+        .option("--persons <persons>", "Comma-separated person image paths/urls")
         .option("--refs <refs>", "Comma-separated reference image/video paths/urls")
         .option("--resolution <resolution>", "Resolution, e.g., 720p")
         .option("--aspectRatio <ratio>", "Aspect ratio: 9:16|16:9|1:1")
         .option("--withAudio", "Include audio track")
+        .option("--withBGM <withBGM>", "Include background music: true|false (default: true)")
         .option("--optimizeCameraMotion", "Optimize camera motion")
         .option("--output <file>", "Output file path")
         .action(videoCreateAction);

package/dist/services/config.js CHANGED Viewed

@@ -207,7 +207,11 @@ function applyConfigInterceptor(program) {
         const current = (actionCommand ?? thisCommand);
         const name = current?.name?.();
         const parentName = current?.parent?.name?.();
-        if (name === "help" || name === "skill" || name === "config" || parentName === "config")
+        if (name === "help" ||
+            name === "skill" ||
+            name === "config" ||
+            parentName === "config" ||
+            parentName === "skill")
             return;
         const ok = await ensureConfig();
         if (!ok) {
@@ -221,7 +225,8 @@ function applyConfigInterceptor(program) {
     });
     program.hook("postAction", async (thisCommand, actionCommand) => {
         const name = actionCommand?.name?.() ?? thisCommand?.name?.();
-        if (name === "help" || name === "skill")
+        const parentName = actionCommand?.parent?.name?.() ?? thisCommand?.parent?.name?.();
+        if (name === "help" || name === "skill" || parentName === "skill")
             return;
         try {
             const cmd = (actionCommand ?? thisCommand);

package/dist/skill/SKILL.md CHANGED Viewed

@@ -1,6 +1,11 @@
 ---
 name: "zerocut-cli-tools"
 description: "Use ZeroCut CLI media and document tools. Invoke when user needs generate media, run ffmpeg/pandoc, sync resources, or save outputs."
+homepage: "https://github.com/liubei-ai/zerocut-cli"
+source: "https://github.com/liubei-ai/zerocut-cli"
+requires_binaries:
+  - "zerocut-cli"
+  - "npx"
 ---
 # ZeroCut CLI Tools
@@ -8,6 +13,7 @@ description: "Use ZeroCut CLI media and document tools. Invoke when user needs g
 ## Purpose
 This skill provides a single reference for using ZeroCut CLI commands:
 - image generation
 - video generation
 - music generation
@@ -18,24 +24,89 @@ This skill provides a single reference for using ZeroCut CLI commands:
 ## When To Invoke
 Invoke this skill when the user asks to:
 - generate image, video, music, or speech audio
 - run ffmpeg or ffprobe command in sandbox
 - run pandoc conversion in sandbox
 - sync local/remote resources into sandbox
 - save generated results to local output files
+## Runtime Requirements
+- Runtime expects `zerocut-cli` to be available in current environment.
+- If `zerocut-cli` is unavailable, use one of:
+  - `pnpm dlx zerocut-cli help`
+  - `pnpm add -g zerocut-cli && zerocut-cli help`
+  - `npx zerocut-cli help`
+- This skill is instruction-only and relies on the installed CLI binary for actual enforcement.
+## Safety Boundaries
+- Only sync files or URLs that user explicitly requests for the current task.
+- Never auto-discover, crawl, or fetch unrelated remote URLs.
+- Treat remote resources as untrusted input and pass through CLI validation.
+- Never sync secrets, key files, token files, or unrelated private directories.
+- Keep all output writes in user-requested path or current working directory naming rules.
+- Do not bypass CLI command guards; ffmpeg/pandoc restrictions are enforced by the CLI implementation.
+## Credentials And Data Transfer
+- Required credential is `apiKey` in local ZeroCut config.
+- If `apiKey` is missing, stop immediately and request OTT token exchange.
+- `TOS` in this document means object storage used by ZeroCut backend for media URLs.
+- No extra credential beyond ZeroCut config is required for normal media sync/download flows.
+- Do not send data to any external service other than endpoints used by configured ZeroCut session.
+## Required Pre-Check
+Before every task, the agent must check configuration first:
+```bash
+npx zerocut-cli config list
+```
+If `apiKey` is missing or empty, the agent must immediately stop task execution and request an OTT token from the user. Do not continue any generation, conversion, or sandbox command until configuration is completed.
+Required user action:
+```bash
+npx zerocut-cli config --ott <token> --region <cn|us>
+```
+Notes:
+- `region` must be `cn` or `us`
+- OTT exchange writes `apiKey` and `region` into config
+- when running `config key` without direct key, region must be `cn|us` and OTT is required
 ## Command Reference
+### skill
+Use `skill` command to fetch built-in skill markdown:
+```bash
+npx zerocut-cli skill
+npx zerocut-cli skill one-click-video
+npx zerocut-cli skill edit-video
+```
+Routing rules:
+- When user requests one-click video creation, run `npx zerocut-cli skill one-click-video` to get the sub-skill and execute.
+- When user requests video editing, run `npx zerocut-cli skill edit-video` to get the sub-skill and execute.
 ### image
 Default action: `create`
 ```bash
-zerocut image --prompt "a cat on a bike" --output out.png
-zerocut image create --prompt "a cat on a bike" --model seedream-5l --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
+npx zerocut-cli image --prompt "a cat on a bike" --output out.png
+npx zerocut-cli image create --prompt "a cat on a bike" --model seedream-5l --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
 ```
 Options:
 - `--prompt <prompt>` required
 - `--model <model>`
 - `--aspectRatio <ratio>`
@@ -43,91 +114,155 @@ Options:
 - `--refs <refs>` comma-separated local paths or URLs
 - `--output <file>` save generated file
+Validation rules:
+- `--prompt` must be non-empty
+- `--model` allowed: `seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro`
+- `--aspectRatio` allowed: `1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1`
+- unless user specifies aspect ratio, default to `16:9`
+- `--resolution` allowed: `1K|2K|4K`
+- unless user specifies resolution, default to `1K`
 ### video
 Default action: `create`
 ```bash
-zerocut video --prompt "city night drive" --video vidu --duration 8 --output out.mp4
-zerocut video create --prompt "city night drive" --video vidu --aspectRatio 1:1 --refs ref1.png,ref2.png --output out.mp4
+npx zerocut-cli video --prompt "city night drive" --model vidu --duration 8 --output out.mp4
+npx zerocut-cli video create --prompt "city night drive" --model vidu --aspectRatio 1:1 --refs ref1.png,ref2.png --output out.mp4
+npx zerocut-cli video --prompt "remix this clip" --model vidu --sourceVideo input.mp4 --duration 6 --output edited.mp4
 ```
 Options:
 - `--prompt <prompt>` required
-- `--video <model>`
-- `--duration <seconds>`
+- `--model <model>`
+- `--duration <seconds>` model-dependent integer
+- `--sourceVideo <video>` base video for edit mode
 - `--seed <seed>`
 - `--firstFrame <image>`
 - `--lastFrame <image>`
+- `--storyboard <image>`
+- `--persons <persons>`
 - `--refs <assets>`
 - `--resolution <resolution>`
 - `--aspectRatio <ratio>`
 - `--withAudio`
+- `--withBGM <withBGM>`
 - `--optimizeCameraMotion`
 - `--output <file>`
+Validation rules:
+- `--prompt` must be non-empty
+- `--model` allowed: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|zerocut3.0-turbo|seedance-1.5-pro|seedance-2.0|seedance-2.0-fast|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro|zerocut-avatar-1.0|zerocut-avatar-1.5|zerocut-mv-1.0`
+- `--duration` must follow model range:
+  - default models: `1-16`
+  - `zerocut-avatar-1.0` / `zerocut-avatar-1.5`: `5-240`
+  - `zerocut-mv-1.0`: `1-240`
+- `--aspectRatio` allowed: `9:16|16:9|1:1`
+- unless user specifies aspect ratio, default to `16:9`
+- unless user specifies resolution, default to `720p`
+- `--withBGM` allowed: `true|false`, default to `true`
+Long video guidance:
+- for default models, if required duration is over 16s, split into multiple generations (each 1-16s)
+- then concatenate clips with ffmpeg
+- example:
+```bash
+printf "file 'part1.mp4'\nfile 'part2.mp4'\nfile 'part3.mp4'\n" > concat.txt
+npx zerocut-cli ffmpeg --args -f concat -safe 0 -i concat.txt -c copy final.mp4 --resources concat.txt part1.mp4 part2.mp4 part3.mp4
+```
 ### music
 Default action: `create`
 ```bash
-zerocut music --prompt "lofi beat" --output music.mp3
-zerocut music create --prompt "lofi beat" --output music.mp3
+npx zerocut-cli music --prompt "lofi beat" --output music.mp3
+npx zerocut-cli music create --prompt "lofi beat" --output music.mp3
 ```
 Options:
 - `--prompt <prompt>` required
 - `--output <file>`
+Validation rules:
+- `--prompt` must be non-empty
 ### tts
 Default action: `create`
 ```bash
-zerocut tts --text "你好，欢迎使用 ZeroCut" --voiceId voice_xxx --output speech.mp3
-zerocut tts create --prompt "calm tone" --text "Hello world" --voiceId voice_xxx --output speech.mp3
+npx zerocut-cli tts --text "你好，欢迎使用 ZeroCut" --voiceId voice_xxx --output speech.mp3
+npx zerocut-cli tts create --prompt "calm tone" --text "Hello world" --voiceId voice_xxx --output speech.mp3
 ```
 Options:
 - `--prompt <prompt>`
 - `--text <text>` required
 - `--voiceId <voiceId>`
 - `--output <file>`
+Validation rules:
+- `--text` must be non-empty
 ### ffmpeg
 ```bash
-zerocut ffmpeg --args -i input.mp4 -vn output.mp3 --resources input.mp4
-zerocut ffmpeg --args -i input.mp4 -vf scale=1280:720 output.mp4 --resources input.mp4
+npx zerocut-cli ffmpeg --args -i input.mp4 -vn output.mp3 --resources input.mp4
+npx zerocut-cli ffmpeg --args -i input.mp4 -vf scale=1280:720 output.mp4 --resources input.mp4
 ```
 Options:
 - `--args <args...>` required, arguments appended after `ffmpeg`
 - `--resources <resources...>` optional, files/URLs to sync into sandbox materials
 Behavior:
-- command is validated to only allow `ffmpeg` or `ffprobe`
+- `--args` must be provided
+- command prefix is fixed as `ffmpeg`
 - for `ffmpeg`, `-y` is auto-injected when absent
 - output file is auto-downloaded from sandbox to local current directory
 ### pandoc
 ```bash
-zerocut pandoc --args input.md -o output.pdf --resources input.md
-zerocut pandoc --args input.md --output=output.docx --resources input.md template.docx
+npx zerocut-cli pandoc --args input.md -o output.pdf --resources input.md
+npx zerocut-cli pandoc --args input.md --output=output.docx --resources input.md template.docx
 ```
 Options:
 - `--args <args...>` required, arguments appended after `pandoc`
 - `--resources <resources...>` optional, files/URLs to sync into sandbox materials
 Behavior:
-- command is validated to only allow `pandoc`
-- output file must be specified in args with `-o`, `--output`, or `--output=...`
-- output file is auto-downloaded from sandbox to local current directory
+- `--args` must be provided
+- command prefix is fixed as `pandoc`
+- output file is auto-downloaded only when args include `-o`, `--output`, or `--output=...`
 ## Output And Sync Rules
 - Media URLs from generation are synced to TOS when available.
 - `--output` saves files to an absolute path resolved from current working directory.
 - Missing parent directories for `--output` are created automatically.
+- File type constraints:
+  - image output uses `.png`
+  - video output uses `.mp4`
+  - audio output (`music`/`tts`) uses `.mp3`
+- If user does not explicitly provide output file name, agent must generate one in current directory:
+  - use 3-digit incremental prefix to avoid collisions, like `001_...`, `002_...`
+  - keep file name meaningful by task content, e.g. `001_city-night-drive.mp4`, `002_lofi-beat.mp3`
+- ffmpeg and pandoc outputs follow the same naming rule:
+  - if output path is not explicitly specified by user, agent should generate a meaningful file name with `NNN_` prefix and correct extension
+  - for pandoc, keep extension aligned with conversion target format