zerocut-cli 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,7 +101,7 @@ zerocut config --ott <token> --region <cn|us> # non-interactive
101
101
  - `image` — create a new image (default action; requires `--prompt`)
102
102
  - Options:
103
103
  - `--prompt <prompt>` (required)
104
- - `--model <model>` (seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan)
104
+ - `--model <model>` (seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro)
105
105
  - `--aspectRatio <ratio>` (1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1)
106
106
  - `--resolution <resolution>` (1K|2K|4K)
107
107
  - `--refs <img1,img2,...>` (comma-separated paths/URLs)
@@ -110,15 +110,18 @@ zerocut config --ott <token> --region <cn|us> # non-interactive
110
110
  - Options:
111
111
  - `--prompt <prompt>` (required)
112
112
  - `--duration <seconds>` (integer 1–16; when `--sourceVideo` is set, must be 3–10)
113
- - `--model <model>` (enum: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|seedance-1.5-pro|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro`; default `vidu`)
113
+ - `--model <model>` (enum: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|zerocut3.0-turbo|seedance-1.5-pro|seedance-2.0|seedance-2.0-fast|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro`; default `vidu`)
114
114
  - `--sourceVideo <video>` (base video path/url for edit mode)
115
115
  - `--seed <seed>`
116
116
  - `--firstFrame <image>`
117
117
  - `--lastFrame <image>`
118
+ - `--storyboard <image>`
119
+ - `--persons <persons>` (comma-separated person image paths/URLs)
118
120
  - `--refs <assets>`
119
121
  - `--resolution <resolution>`
120
122
  - `--aspectRatio <ratio>` (9:16|16:9|1:1)
121
123
  - `--withAudio`
124
+ - `--withBGM <withBGM>` (true|false, default true)
122
125
  - `--optimizeCameraMotion`
123
126
  - `--output <file>`
124
127
  - Notes:
@@ -21,6 +21,7 @@ function register(program) {
21
21
  "banana2",
22
22
  "banana-pro",
23
23
  "wan",
24
+ "wan-pro",
24
25
  ];
25
26
  const allowedAspectRatios = [
26
27
  "1:1",
@@ -46,6 +47,7 @@ function register(program) {
46
47
  aspect_ratio: aspectRatio,
47
48
  resolution,
48
49
  reference_images: referenceImages,
50
+ sequential_image_generation: "disabled",
49
51
  onProgress,
50
52
  };
51
53
  const res = await session.ai.generateImage(payload);
@@ -9,16 +9,29 @@ const node_fs_1 = __importDefault(require("node:fs"));
9
9
  const node_path_1 = __importDefault(require("node:path"));
10
10
  exports.name = "skill";
11
11
  exports.description = "Print built-in SKILL.md content";
12
+ function printSkill(relativePath) {
13
+ const filePath = node_path_1.default.resolve(__dirname, relativePath);
14
+ const content = node_fs_1.default.readFileSync(filePath, "utf8");
15
+ process.stdout.write(content);
16
+ if (!content.endsWith("\n")) {
17
+ process.stdout.write("\n");
18
+ }
19
+ }
12
20
  function register(program) {
13
- program
14
- .command("skill")
15
- .description("Print built-in skill markdown")
21
+ const parent = program.command("skill").description("Print built-in skill markdown");
22
+ parent
23
+ .command("one-click-video")
24
+ .description("Print one-click-video skill markdown")
25
+ .action(() => {
26
+ printSkill("../skill/one-click-video/SKILL.md");
27
+ });
28
+ parent
29
+ .command("edit-video")
30
+ .description("Print edit-video skill markdown")
16
31
  .action(() => {
17
- const filePath = node_path_1.default.resolve(__dirname, "../skill/SKILL.md");
18
- const content = node_fs_1.default.readFileSync(filePath, "utf8");
19
- process.stdout.write(content);
20
- if (!content.endsWith("\n")) {
21
- process.stdout.write("\n");
22
- }
32
+ printSkill("../skill/edit-video/SKILL.md");
33
+ });
34
+ parent.action(() => {
35
+ printSkill("../skill/SKILL.md");
23
36
  });
24
37
  }
@@ -11,11 +11,35 @@ const node_path_1 = __importDefault(require("node:path"));
11
11
  const progress_1 = require("../utils/progress");
12
12
  exports.name = "video";
13
13
  exports.description = "Video command: create video";
14
+ function resolveResultUrl(result) {
15
+ if (!result || typeof result !== "object") {
16
+ return undefined;
17
+ }
18
+ const record = result;
19
+ if (typeof record.url === "string" && record.url.length > 0) {
20
+ return record.url;
21
+ }
22
+ const data = record.data;
23
+ if (data && typeof data === "object") {
24
+ const dataRecord = data;
25
+ if (typeof dataRecord.url === "string" && dataRecord.url.length > 0) {
26
+ return dataRecord.url;
27
+ }
28
+ }
29
+ return undefined;
30
+ }
14
31
  function register(program) {
32
+ const avatarModels = ["zerocut-avatar-1.0", "zerocut-avatar-1.5"];
33
+ const mvModels = ["zerocut-mv-1.0"];
15
34
  const parent = program.command("video").description("Create a new video; requires --prompt");
16
35
  const allowedTypes = [
17
36
  "zerocut3.0",
37
+ "zerocut3.0-pro",
38
+ "zerocut3.0-pro-fast",
39
+ "zerocut3.0-turbo",
18
40
  "seedance-1.5-pro",
41
+ "seedance-2.0",
42
+ "seedance-2.0-fast",
19
43
  "vidu",
20
44
  "vidu-pro",
21
45
  "viduq3",
@@ -28,6 +52,8 @@ function register(program) {
28
52
  "sora2-pro",
29
53
  "veo3.1",
30
54
  "veo3.1-pro",
55
+ ...avatarModels,
56
+ ...mvModels,
31
57
  ];
32
58
  async function videoCreateAction(opts) {
33
59
  const session = (0, cerevox_1.getSessionFromCommand)(this);
@@ -41,20 +67,30 @@ function register(program) {
41
67
  process.exitCode = 1;
42
68
  return;
43
69
  }
44
- let model = typeof opts.video === "string" ? opts.video.trim() : undefined;
70
+ let model = typeof opts.model === "string" ? opts.model.trim() : undefined;
45
71
  if (model && !allowedTypes.includes(model)) {
46
- process.stderr.write(`Invalid value for --video: ${model}. Allowed: ${allowedTypes.join("|")}\n`);
72
+ process.stderr.write(`Invalid value for --model: ${model}. Allowed: ${allowedTypes.join("|")}\n`);
47
73
  process.exitCode = 1;
48
74
  return;
49
75
  }
50
76
  if (!model)
51
77
  model = "vidu";
52
78
  const durationStr = typeof opts.duration === "string" ? opts.duration.trim() : undefined;
79
+ const sourceVideo = typeof opts.sourceVideo === "string" ? opts.sourceVideo.trim() : undefined;
53
80
  let duration = 0;
81
+ const durationRange = (() => {
82
+ if (avatarModels.includes(model)) {
83
+ return { min: 5, max: 240 };
84
+ }
85
+ if (mvModels.includes(model)) {
86
+ return { min: 1, max: 240 };
87
+ }
88
+ return { min: 1, max: 16 };
89
+ })();
54
90
  if (durationStr) {
55
91
  const n = Number.parseInt(durationStr, 10);
56
- if (!Number.isFinite(n) || n < 1 || n > 16) {
57
- process.stderr.write("Invalid value for --duration: must be integer 1-16\n");
92
+ if (!Number.isFinite(n) || n < durationRange.min || n > durationRange.max) {
93
+ process.stderr.write(`Invalid value for --duration: model ${model} supports integer ${durationRange.min}-${durationRange.max}\n`);
58
94
  process.exitCode = 1;
59
95
  return;
60
96
  }
@@ -68,6 +104,21 @@ function register(program) {
68
104
  return;
69
105
  }
70
106
  const aspectRatio = ar;
107
+ let withBGM = true;
108
+ if (typeof opts.withBGM === "string") {
109
+ const withBGMRaw = opts.withBGM.trim().toLowerCase();
110
+ if (withBGMRaw === "true") {
111
+ withBGM = true;
112
+ }
113
+ else if (withBGMRaw === "false") {
114
+ withBGM = false;
115
+ }
116
+ else {
117
+ process.stderr.write("Invalid value for --withBGM: expected true|false\n");
118
+ process.exitCode = 1;
119
+ return;
120
+ }
121
+ }
71
122
  const images = [];
72
123
  if (opts.firstFrame) {
73
124
  images.push({
@@ -81,6 +132,24 @@ function register(program) {
81
132
  url: await (0, cerevox_1.getMaterialUri)(session, opts.lastFrame),
82
133
  });
83
134
  }
135
+ if (opts.storyboard) {
136
+ images.push({
137
+ type: "storyboard",
138
+ url: await (0, cerevox_1.getMaterialUri)(session, opts.storyboard),
139
+ });
140
+ }
141
+ const personList = typeof opts.persons === "string" && opts.persons.length > 0
142
+ ? opts.persons
143
+ .split(",")
144
+ .map((s) => s.trim())
145
+ .filter((s) => s.length > 0)
146
+ : [];
147
+ for (const person of personList) {
148
+ images.push({
149
+ type: "person",
150
+ url: await (0, cerevox_1.getMaterialUri)(session, person),
151
+ });
152
+ }
84
153
  const refsList = typeof opts.refs === "string" && opts.refs.length > 0
85
154
  ? opts.refs
86
155
  .split(",")
@@ -93,21 +162,33 @@ function register(program) {
93
162
  url: await (0, cerevox_1.getMaterialUri)(session, ref),
94
163
  });
95
164
  }
96
- const res = await session.ai.generateVideo({
165
+ const request = {
97
166
  prompt,
98
167
  model: model,
99
168
  duration: duration || undefined,
100
169
  resolution: opts.resolution,
101
170
  aspect_ratio: aspectRatio,
102
- mute: !opts.withAudio,
171
+ mute: !(opts.withAudio ?? true),
172
+ bgm: withBGM,
103
173
  optimize_camera: opts.optimizeCameraMotion,
104
174
  seed: opts.seed ? Number.parseInt(opts.seed, 10) : undefined,
105
175
  images: images.length > 0 ? images : undefined,
176
+ videos: sourceVideo
177
+ ? [
178
+ {
179
+ type: "base",
180
+ url: await (0, cerevox_1.getMaterialUri)(session, sourceVideo),
181
+ },
182
+ ]
183
+ : undefined,
106
184
  onProgress: (0, progress_1.createProgressSpinner)("inferencing"),
107
- });
185
+ timeout: 7200000,
186
+ };
187
+ const res = await session.ai.generateVideo(request);
188
+ const initialUrl = resolveResultUrl(res);
108
189
  try {
109
- if (res?.url) {
110
- const tosUrl = await (0, cerevox_1.syncToTOS)(res.url);
190
+ if (initialUrl) {
191
+ const tosUrl = await (0, cerevox_1.syncToTOS)(initialUrl);
111
192
  if (tosUrl) {
112
193
  res.url = tosUrl;
113
194
  }
@@ -118,7 +199,13 @@ function register(program) {
118
199
  const output = typeof opts.output === "string" ? opts.output : undefined;
119
200
  if (output) {
120
201
  const dir = process.cwd();
121
- const url = res.url;
202
+ const url = resolveResultUrl(res);
203
+ if (!url) {
204
+ process.stderr.write("Cannot save --output because no video URL was returned. Please retry later or run without --output to inspect raw response.\n");
205
+ process.exitCode = 1;
206
+ console.log(res);
207
+ return;
208
+ }
122
209
  const response = await fetch(url);
123
210
  const buffer = Buffer.from(await response.arrayBuffer());
124
211
  const filePath = node_path_1.default.resolve(dir, output);
@@ -133,15 +220,19 @@ function register(program) {
133
220
  // default action on `zerocut video`
134
221
  parent
135
222
  .option("--prompt <prompt>", "Text prompt for video generation (required)")
136
- .option("--duration <duration>", "Video duration in seconds")
137
- .option("--video <video>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
223
+ .option("--duration <duration>", "Video duration in seconds (default models: 1-16, avatar: 5-240, mv: 1-240)")
224
+ .option("--model <model>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
225
+ .option("--sourceVideo <video>", "Base video path/url for edit mode (requires --duration 3-10)")
138
226
  .option("--seed <seed>", "Random seed")
139
227
  .option("--firstFrame <image>", "First frame image path/url")
140
228
  .option("--lastFrame <image>", "Last frame image path/url")
229
+ .option("--storyboard <image>", "Storyboard image path/url")
230
+ .option("--persons <persons>", "Comma-separated person image paths/urls")
141
231
  .option("--refs <refs>", "Comma-separated reference image/video paths/urls")
142
232
  .option("--resolution <resolution>", "Resolution, e.g., 720p")
143
233
  .option("--aspectRatio <ratio>", "Aspect ratio: 9:16|16:9|1:1")
144
234
  .option("--withAudio", "Include audio track")
235
+ .option("--withBGM <withBGM>", "Include background music: true|false (default: true)")
145
236
  .option("--optimizeCameraMotion", "Optimize camera motion")
146
237
  .option("--output <file>", "Output file path")
147
238
  .action(videoCreateAction);
@@ -150,15 +241,19 @@ function register(program) {
150
241
  .command("create")
151
242
  .description("Create a new video; requires --prompt")
152
243
  .option("--prompt <prompt>", "Text prompt for video generation (required)")
153
- .option("--duration <duration>", "Video duration in seconds")
154
- .option("--video <video>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
244
+ .option("--duration <duration>", "Video duration in seconds (default models: 1-16, avatar: 5-240, mv: 1-240)")
245
+ .option("--model <model>", `Video model: ${allowedTypes.join("|")} (default: vidu)`)
246
+ .option("--sourceVideo <video>", "Base video path/url for edit mode (requires --duration 3-10)")
155
247
  .option("--seed <seed>", "Random seed")
156
248
  .option("--firstFrame <image>", "First frame image path/url")
157
249
  .option("--lastFrame <image>", "Last frame image path/url")
250
+ .option("--storyboard <image>", "Storyboard image path/url")
251
+ .option("--persons <persons>", "Comma-separated person image paths/urls")
158
252
  .option("--refs <refs>", "Comma-separated reference image/video paths/urls")
159
253
  .option("--resolution <resolution>", "Resolution, e.g., 720p")
160
254
  .option("--aspectRatio <ratio>", "Aspect ratio: 9:16|16:9|1:1")
161
255
  .option("--withAudio", "Include audio track")
256
+ .option("--withBGM <withBGM>", "Include background music: true|false (default: true)")
162
257
  .option("--optimizeCameraMotion", "Optimize camera motion")
163
258
  .option("--output <file>", "Output file path")
164
259
  .action(videoCreateAction);
@@ -207,7 +207,11 @@ function applyConfigInterceptor(program) {
207
207
  const current = (actionCommand ?? thisCommand);
208
208
  const name = current?.name?.();
209
209
  const parentName = current?.parent?.name?.();
210
- if (name === "help" || name === "skill" || name === "config" || parentName === "config")
210
+ if (name === "help" ||
211
+ name === "skill" ||
212
+ name === "config" ||
213
+ parentName === "config" ||
214
+ parentName === "skill")
211
215
  return;
212
216
  const ok = await ensureConfig();
213
217
  if (!ok) {
@@ -221,7 +225,8 @@ function applyConfigInterceptor(program) {
221
225
  });
222
226
  program.hook("postAction", async (thisCommand, actionCommand) => {
223
227
  const name = actionCommand?.name?.() ?? thisCommand?.name?.();
224
- if (name === "help" || name === "skill")
228
+ const parentName = actionCommand?.parent?.name?.() ?? thisCommand?.parent?.name?.();
229
+ if (name === "help" || name === "skill" || parentName === "skill")
225
230
  return;
226
231
  try {
227
232
  const cmd = (actionCommand ?? thisCommand);
@@ -1,6 +1,11 @@
1
1
  ---
2
2
  name: "zerocut-cli-tools"
3
3
  description: "Use ZeroCut CLI media and document tools. Invoke when user needs generate media, run ffmpeg/pandoc, sync resources, or save outputs."
4
+ homepage: "https://github.com/liubei-ai/zerocut-cli"
5
+ source: "https://github.com/liubei-ai/zerocut-cli"
6
+ requires_binaries:
7
+ - "zerocut-cli"
8
+ - "npx"
4
9
  ---
5
10
 
6
11
  # ZeroCut CLI Tools
@@ -8,6 +13,7 @@ description: "Use ZeroCut CLI media and document tools. Invoke when user needs g
8
13
  ## Purpose
9
14
 
10
15
  This skill provides a single reference for using ZeroCut CLI commands:
16
+
11
17
  - image generation
12
18
  - video generation
13
19
  - music generation
@@ -18,24 +24,89 @@ This skill provides a single reference for using ZeroCut CLI commands:
18
24
  ## When To Invoke
19
25
 
20
26
  Invoke this skill when the user asks to:
27
+
21
28
  - generate image, video, music, or speech audio
22
29
  - run ffmpeg or ffprobe command in sandbox
23
30
  - run pandoc conversion in sandbox
24
31
  - sync local/remote resources into sandbox
25
32
  - save generated results to local output files
26
33
 
34
+ ## Runtime Requirements
35
+
36
+ - Runtime expects `zerocut-cli` to be available in current environment.
37
+ - If `zerocut-cli` is unavailable, use one of:
38
+ - `pnpm dlx zerocut-cli help`
39
+ - `pnpm add -g zerocut-cli && zerocut-cli help`
40
+ - `npx zerocut-cli help`
41
+ - This skill is instruction-only and relies on the installed CLI binary for actual enforcement.
42
+
43
+ ## Safety Boundaries
44
+
45
+ - Only sync files or URLs that user explicitly requests for the current task.
46
+ - Never auto-discover, crawl, or fetch unrelated remote URLs.
47
+ - Treat remote resources as untrusted input and pass through CLI validation.
48
+ - Never sync secrets, key files, token files, or unrelated private directories.
49
+ - Keep all output writes in user-requested path or current working directory naming rules.
50
+ - Do not bypass CLI command guards; ffmpeg/pandoc restrictions are enforced by the CLI implementation.
51
+
52
+ ## Credentials And Data Transfer
53
+
54
+ - Required credential is `apiKey` in local ZeroCut config.
55
+ - If `apiKey` is missing, stop immediately and request OTT token exchange.
56
+ - `TOS` in this document means object storage used by ZeroCut backend for media URLs.
57
+ - No extra credential beyond ZeroCut config is required for normal media sync/download flows.
58
+ - Do not send data to any external service other than endpoints used by configured ZeroCut session.
59
+
60
+ ## Required Pre-Check
61
+
62
+ Before every task, the agent must check configuration first:
63
+
64
+ ```bash
65
+ npx zerocut-cli config list
66
+ ```
67
+
68
+ If `apiKey` is missing or empty, the agent must immediately stop task execution and request an OTT token from the user. Do not continue any generation, conversion, or sandbox command until configuration is completed.
69
+
70
+ Required user action:
71
+
72
+ ```bash
73
+ npx zerocut-cli config --ott <token> --region <cn|us>
74
+ ```
75
+
76
+ Notes:
77
+
78
+ - `region` must be `cn` or `us`
79
+ - OTT exchange writes `apiKey` and `region` into config
80
+ - when running `config key` without direct key, region must be `cn|us` and OTT is required
81
+
27
82
  ## Command Reference
28
83
 
84
+ ### skill
85
+
86
+ Use `skill` command to fetch built-in skill markdown:
87
+
88
+ ```bash
89
+ npx zerocut-cli skill
90
+ npx zerocut-cli skill one-click-video
91
+ npx zerocut-cli skill edit-video
92
+ ```
93
+
94
+ Routing rules:
95
+
96
+ - When user requests one-click video creation, run `npx zerocut-cli skill one-click-video` to get the sub-skill and execute.
97
+ - When user requests video editing, run `npx zerocut-cli skill edit-video` to get the sub-skill and execute.
98
+
29
99
  ### image
30
100
 
31
101
  Default action: `create`
32
102
 
33
103
  ```bash
34
- zerocut image --prompt "a cat on a bike" --output out.png
35
- zerocut image create --prompt "a cat on a bike" --model seedream-5l --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
104
+ npx zerocut-cli image --prompt "a cat on a bike" --output out.png
105
+ npx zerocut-cli image create --prompt "a cat on a bike" --model seedream-5l --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
36
106
  ```
37
107
 
38
108
  Options:
109
+
39
110
  - `--prompt <prompt>` required
40
111
  - `--model <model>`
41
112
  - `--aspectRatio <ratio>`
@@ -43,91 +114,155 @@ Options:
43
114
  - `--refs <refs>` comma-separated local paths or URLs
44
115
  - `--output <file>` save generated file
45
116
 
117
+ Validation rules:
118
+
119
+ - `--prompt` must be non-empty
120
+ - `--model` allowed: `seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro`
121
+ - `--aspectRatio` allowed: `1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1`
122
+ - unless user specifies aspect ratio, default to `16:9`
123
+ - `--resolution` allowed: `1K|2K|4K`
124
+ - unless user specifies resolution, default to `1K`
125
+
46
126
  ### video
47
127
 
48
128
  Default action: `create`
49
129
 
50
130
  ```bash
51
- zerocut video --prompt "city night drive" --video vidu --duration 8 --output out.mp4
52
- zerocut video create --prompt "city night drive" --video vidu --aspectRatio 1:1 --refs ref1.png,ref2.png --output out.mp4
131
+ npx zerocut-cli video --prompt "city night drive" --model vidu --duration 8 --output out.mp4
132
+ npx zerocut-cli video create --prompt "city night drive" --model vidu --aspectRatio 1:1 --refs ref1.png,ref2.png --output out.mp4
133
+ npx zerocut-cli video --prompt "remix this clip" --model vidu --sourceVideo input.mp4 --duration 6 --output edited.mp4
53
134
  ```
54
135
 
55
136
  Options:
137
+
56
138
  - `--prompt <prompt>` required
57
- - `--video <model>`
58
- - `--duration <seconds>`
139
+ - `--model <model>`
140
+ - `--duration <seconds>` model-dependent integer
141
+ - `--sourceVideo <video>` base video for edit mode
59
142
  - `--seed <seed>`
60
143
  - `--firstFrame <image>`
61
144
  - `--lastFrame <image>`
145
+ - `--storyboard <image>`
146
+ - `--persons <persons>`
62
147
  - `--refs <assets>`
63
148
  - `--resolution <resolution>`
64
149
  - `--aspectRatio <ratio>`
65
150
  - `--withAudio`
151
+ - `--withBGM <withBGM>`
66
152
  - `--optimizeCameraMotion`
67
153
  - `--output <file>`
68
154
 
155
+ Validation rules:
156
+
157
+ - `--prompt` must be non-empty
158
+ - `--model` allowed: `zerocut3.0|zerocut3.0-pro|zerocut3.0-pro-fast|zerocut3.0-turbo|seedance-1.5-pro|seedance-2.0|seedance-2.0-fast|vidu|vidu-pro|viduq3|viduq3-turbo|kling|kling-v3|wan|wan-flash|sora2|sora2-pro|veo3.1|veo3.1-pro|zerocut-avatar-1.0|zerocut-avatar-1.5|zerocut-mv-1.0`
159
+ - `--duration` must follow model range:
160
+ - default models: `1-16`
161
+ - `zerocut-avatar-1.0` / `zerocut-avatar-1.5`: `5-240`
162
+ - `zerocut-mv-1.0`: `1-240`
163
+ - `--aspectRatio` allowed: `9:16|16:9|1:1`
164
+ - unless user specifies aspect ratio, default to `16:9`
165
+ - unless user specifies resolution, default to `720p`
166
+ - `--withBGM` allowed: `true|false`, default to `true`
167
+
168
+ Long video guidance:
169
+
170
+ - for default models, if required duration is over 16s, split into multiple generations (each 1-16s)
171
+ - then concatenate clips with ffmpeg
172
+ - example:
173
+
174
+ ```bash
175
+ printf "file 'part1.mp4'\nfile 'part2.mp4'\nfile 'part3.mp4'\n" > concat.txt
176
+ npx zerocut-cli ffmpeg --args -f concat -safe 0 -i concat.txt -c copy final.mp4 --resources concat.txt part1.mp4 part2.mp4 part3.mp4
177
+ ```
178
+
69
179
  ### music
70
180
 
71
181
  Default action: `create`
72
182
 
73
183
  ```bash
74
- zerocut music --prompt "lofi beat" --output music.mp3
75
- zerocut music create --prompt "lofi beat" --output music.mp3
184
+ npx zerocut-cli music --prompt "lofi beat" --output music.mp3
185
+ npx zerocut-cli music create --prompt "lofi beat" --output music.mp3
76
186
  ```
77
187
 
78
188
  Options:
189
+
79
190
  - `--prompt <prompt>` required
80
191
  - `--output <file>`
81
192
 
193
+ Validation rules:
194
+
195
+ - `--prompt` must be non-empty
196
+
82
197
  ### tts
83
198
 
84
199
  Default action: `create`
85
200
 
86
201
  ```bash
87
- zerocut tts --text "你好,欢迎使用 ZeroCut" --voiceId voice_xxx --output speech.mp3
88
- zerocut tts create --prompt "calm tone" --text "Hello world" --voiceId voice_xxx --output speech.mp3
202
+ npx zerocut-cli tts --text "你好,欢迎使用 ZeroCut" --voiceId voice_xxx --output speech.mp3
203
+ npx zerocut-cli tts create --prompt "calm tone" --text "Hello world" --voiceId voice_xxx --output speech.mp3
89
204
  ```
90
205
 
91
206
  Options:
207
+
92
208
  - `--prompt <prompt>`
93
209
  - `--text <text>` required
94
210
  - `--voiceId <voiceId>`
95
211
  - `--output <file>`
96
212
 
213
+ Validation rules:
214
+
215
+ - `--text` must be non-empty
216
+
97
217
  ### ffmpeg
98
218
 
99
219
  ```bash
100
- zerocut ffmpeg --args -i input.mp4 -vn output.mp3 --resources input.mp4
101
- zerocut ffmpeg --args -i input.mp4 -vf scale=1280:720 output.mp4 --resources input.mp4
220
+ npx zerocut-cli ffmpeg --args -i input.mp4 -vn output.mp3 --resources input.mp4
221
+ npx zerocut-cli ffmpeg --args -i input.mp4 -vf scale=1280:720 output.mp4 --resources input.mp4
102
222
  ```
103
223
 
104
224
  Options:
225
+
105
226
  - `--args <args...>` required, arguments appended after `ffmpeg`
106
227
  - `--resources <resources...>` optional, files/URLs to sync into sandbox materials
107
228
 
108
229
  Behavior:
109
- - command is validated to only allow `ffmpeg` or `ffprobe`
230
+
231
+ - `--args` must be provided
232
+ - command prefix is fixed as `ffmpeg`
110
233
  - for `ffmpeg`, `-y` is auto-injected when absent
111
234
  - output file is auto-downloaded from sandbox to local current directory
112
235
 
113
236
  ### pandoc
114
237
 
115
238
  ```bash
116
- zerocut pandoc --args input.md -o output.pdf --resources input.md
117
- zerocut pandoc --args input.md --output=output.docx --resources input.md template.docx
239
+ npx zerocut-cli pandoc --args input.md -o output.pdf --resources input.md
240
+ npx zerocut-cli pandoc --args input.md --output=output.docx --resources input.md template.docx
118
241
  ```
119
242
 
120
243
  Options:
244
+
121
245
  - `--args <args...>` required, arguments appended after `pandoc`
122
246
  - `--resources <resources...>` optional, files/URLs to sync into sandbox materials
123
247
 
124
248
  Behavior:
125
- - command is validated to only allow `pandoc`
126
- - output file must be specified in args with `-o`, `--output`, or `--output=...`
127
- - output file is auto-downloaded from sandbox to local current directory
249
+
250
+ - `--args` must be provided
251
+ - command prefix is fixed as `pandoc`
252
+ - output file is auto-downloaded only when args include `-o`, `--output`, or `--output=...`
128
253
 
129
254
  ## Output And Sync Rules
130
255
 
131
256
  - Media URLs from generation are synced to TOS when available.
132
257
  - `--output` saves files to an absolute path resolved from current working directory.
133
258
  - Missing parent directories for `--output` are created automatically.
259
+ - File type constraints:
260
+ - image output uses `.png`
261
+ - video output uses `.mp4`
262
+ - audio output (`music`/`tts`) uses `.mp3`
263
+ - If user does not explicitly provide output file name, agent must generate one in current directory:
264
+ - use 3-digit incremental prefix to avoid collisions, like `001_...`, `002_...`
265
+ - keep file name meaningful by task content, e.g. `001_city-night-drive.mp4`, `002_lofi-beat.mp3`
266
+ - ffmpeg and pandoc outputs follow the same naming rule:
267
+ - if output path is not explicitly specified by user, agent should generate a meaningful file name with `NNN_` prefix and correct extension
268
+ - for pandoc, keep extension aligned with conversion target format