npm - zerocut-cli - Versions diffs - 0.3.4 → 0.4.0 - Mend

zerocut-cli 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +2 -1
package/dist/commands/image.js +13 -1
package/dist/services/cerevox.d.ts +1 -0
package/dist/services/cerevox.js +5 -2
package/dist/skill/SKILL.md +4 -1
package/dist/skill/one-click-video/SKILL.md +339 -66
package/dist/skill/one-click-video/SKILL_CN.md +851 -0
package/package.json +1 -1
package/src/commands/image.ts +18 -0
package/src/services/cerevox.ts +5 -4
package/src/skill/SKILL.md +4 -1
package/src/skill/one-click-video/SKILL.md +339 -66
package/src/skill/one-click-video/SKILL_CN.md +851 -0

package/README.md CHANGED Viewed

@@ -102,6 +102,7 @@ zerocut config --ott <token> --region <cn|us>   # non-interactive
   - Options:
     - `--prompt <prompt>` (required)
     - `--model <model>` (seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro)
+    - `--type <type>` (default|storyboard|subject-turnaround, default: default)
     - `--aspectRatio <ratio>` (1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1)
     - `--resolution <resolution>` (1K|2K|4K)
     - `--refs <img1,img2,...>` (comma-separated paths/URLs)
@@ -158,7 +159,7 @@ zerocut config --ott <token> --region <cn|us>   # non-interactive
 ```bash
 # Create an image (default action)
-npx zerocut-cli image --prompt "a cat" --model seedream --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
+npx zerocut-cli image --prompt "a cat" --model seedream --type default --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
 # Create video (default action)
 npx zerocut-cli video --prompt "city night drive" --duration 12 --model vidu --refs frame1.png,frame2.png --resolution 720p --output movie.mp4

package/dist/commands/image.js CHANGED Viewed

@@ -38,11 +38,13 @@ function register(program) {
         "8:1",
     ];
     const allowedResolutions = ["1K", "2K", "4K"];
-    async function performImageGeneration(session, { prompt, model, aspectRatio, resolution, refsList, output, }) {
+    const allowedTypes = ["default", "storyboard", "subject-turnaround"];
+    async function performImageGeneration(session, { prompt, model, type, aspectRatio, resolution, refsList, output, }) {
         const referenceImages = await Promise.all(refsList.map(async (ref) => ({ url: await (0, cerevox_1.getMaterialUri)(session, ref) })));
         const onProgress = (0, progress_1.createProgressSpinner)("inferencing");
         const payload = {
             model: model || "seedream-5l",
+            type,
             prompt,
             aspect_ratio: aspectRatio,
             resolution,
@@ -95,6 +97,13 @@ function register(program) {
             return;
         }
         const modelArg = (model ?? undefined);
+        const type = typeof opts.type === "string" ? opts.type.trim() : "default";
+        if (!allowedTypes.includes(type)) {
+            process.stderr.write(`Invalid value for --type: ${type}. Allowed: ${allowedTypes.join("|")}\n`);
+            process.exitCode = 1;
+            return;
+        }
+        const typeArg = type;
         const aspectRatio = typeof opts.aspectRatio === "string"
             ? opts.aspectRatio.trim()
             : undefined;
@@ -121,6 +130,7 @@ function register(program) {
         await performImageGeneration(session, {
             prompt,
             model: modelArg,
+            type: typeArg,
             aspectRatio,
             resolution,
             refsList,
@@ -131,6 +141,7 @@ function register(program) {
     parent
         .option("--prompt <prompt>", "Text prompt for image generation (required)")
         .option("--model <model>", `Generator model: ${allowedModels.join("|")}`)
+        .option("--type <type>", `Image type: ${allowedTypes.join("|")}`)
         .option("--aspectRatio <ratio>", `Aspect ratio: ${allowedAspectRatios.join("|")}`)
         .option("--resolution <resolution>", `Resolution: ${allowedResolutions.join("|")}`)
         .option("--refs <refs>", "Comma-separated reference image paths/urls")
@@ -142,6 +153,7 @@ function register(program) {
         .description("Create a new image; requires --prompt")
         .option("--prompt <prompt>", "Text prompt for image generation (required)")
         .option("--model <model>", `Generator model: ${allowedModels.join("|")}`)
+        .option("--type <type>", `Image type: ${allowedTypes.join("|")}`)
         .option("--aspectRatio <ratio>", `Aspect ratio: ${allowedAspectRatios.join("|")}`)
         .option("--resolution <resolution>", `Resolution: ${allowedResolutions.join("|")}`)
         .option("--refs <refs>", "Comma-separated reference image paths/urls")

package/dist/services/cerevox.d.ts CHANGED Viewed

@@ -18,6 +18,7 @@ export declare function syncToTOS(url: string): Promise<string>;
 export declare function runFFMpegCommand(session: Session, command: string, resources?: string[]): Promise<{
     exitCode: number;
     outputFilePath: string;
+    tosUrl: string | undefined;
     data: {
         stdout: string;
         stderr: string | undefined;

package/dist/services/cerevox.js CHANGED Viewed

@@ -193,20 +193,23 @@ async function runFFMpegCommand(session, command, resources = []) {
         cwd: workDir,
     });
     const outputFilePath = trimmedCommand.startsWith("ffmpeg")
-        ? finalCommand.split(" ").pop() || ""
+        ? (finalCommand.split(" ").pop() || "").replace(/^["']|["']$/g, "")
         : "";
     const sandboxFilePath = (0, node_path_2.join)(workDir, outputFilePath);
+    let tosUrl;
     // 等待命令完成
     const result = await response.json();
     if (result.exitCode === 0 && outputFilePath) {
         const savePath = (0, node_path_2.join)(process.cwd(), (0, node_path_1.basename)(outputFilePath));
-        console.log(sandboxFilePath, savePath);
         const files = session.files;
         await files.download(sandboxFilePath, savePath);
+        const sandboxUrl = await getMaterialUri(session, savePath);
+        tosUrl = await syncToTOS(sandboxUrl);
     }
     return {
         exitCode: result.exitCode,
         outputFilePath,
+        tosUrl,
         data: {
             stdout: result.stdout || (!result.exitCode && result.stderr) || "",
             stderr: result.exitCode ? result.stderr : undefined,

package/dist/skill/SKILL.md CHANGED Viewed

@@ -102,13 +102,14 @@ Default action: `create`
 ```bash
 npx zerocut-cli image --prompt "a cat on a bike" --output out.png
-npx zerocut-cli image create --prompt "a cat on a bike" --model seedream-5l --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
+npx zerocut-cli image create --prompt "a cat on a bike" --model seedream-5l --type default --aspectRatio 1:1 --resolution 1K --refs ref1.png,ref2.jpg --output out.png
 ```
 Options:
 - `--prompt <prompt>` required
 - `--model <model>`
+- `--type <type>`
 - `--aspectRatio <ratio>`
 - `--resolution <resolution>`
 - `--refs <refs>` comma-separated local paths or URLs
@@ -118,6 +119,8 @@ Validation rules:
 - `--prompt` must be non-empty
 - `--model` allowed: `seedream|seedream-pro|seedream-5l|banana|banana2|banana-pro|wan|wan-pro`
+- `--type` allowed: `default|storyboard|subject-turnaround`
+- unless user specifies type, default to `default`
 - `--aspectRatio` allowed: `1:1|3:4|4:3|16:9|9:16|2:3|3:2|21:9|1:4|4:1|1:8|8:1`
 - unless user specifies aspect ratio, default to `16:9`
 - `--resolution` allowed: `1K|2K|4K`

package/dist/skill/one-click-video/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: one-click-video
-description: Use this skill when the user wants to produce a complete short video from a topic with fast CLI-driven workflow: scene planning, storyboard creation, scene video generation, optional background music, and final ffmpeg composition.
+description: Use this skill when the user asks for one-click end-to-end video creation. It delivers a complete final video with consistent visual style, coherent narrative, stable voice strategy, storyboard-driven scene generation, optional BGM, and final ffmpeg composition using zerocut-cli only.
 homepage: "https://github.com/liubei-ai/zerocut-cli"
 source: "https://github.com/liubei-ai/zerocut-cli"
 requires_binaries:
@@ -10,35 +10,309 @@ requires_binaries:
 # One-Click Video
-## Purpose
+## Role
-Create a deliverable final video by orchestrating `zerocut-cli` commands only.
+You are a film-level director, storyboard designer, dialogue writer, and final delivery coordinator. Your objective is not to output random clips, but to deliver one coherent final video with clear narrative progression, consistent visual style, stable voice identity, and synchronized audio-visual rhythm.
-## Runtime Requirements
+## Mission
-- Use CLI commands only, never MCP tool names.
-- Ensure `zerocut-cli` is available:
-  - `pnpm dlx zerocut-cli help`
-  - `pnpm add -g zerocut-cli && zerocut-cli help`
-  - `npx zerocut-cli help`
+Based on user topic, purpose, duration, style, references, and subject assets:
-## Required Pre-Check
+1. Decide whether recurring subjects are required.
+2. Define subject, outfit, prop, and style constraints.
+3. Split concept into 1-5 scenes.
+4. Create storyboard and camera rhythm for every scene.
+5. Generate scene videos with consistent constraints.
+6. Create matching BGM.
+7. Compose final deliverable.
-Run config check first:
+## Highest-Priority Constraints
+1. Follow full workflow. Do not skip core steps.
+2. Perform quality checks at each step.
+3. Use CLI commands only, never MCP tool names.
+Required workflow:
+project preparation -> subject creation -> scene planning (ensure `./materials/scene-bible.md` exists) -> storyboard generation -> scene video generation -> BGM generation -> final composition
+## Runtime Preparation
+Verify CLI availability:
+```bash
+npx zerocut-cli help
+```
+Run config pre-check:
 ```bash
 npx zerocut-cli config list
 ```
-If `apiKey` is missing, stop and request user OTT token:
+If `apiKey` is missing, stop and ask user OTT token:
 ```bash
 npx zerocut-cli config --ott <token> --region <cn|us>
 ```
-## Video Parameter Contract
+## Model Policy
+### Image model policy
+- Default image model is `banana2` when user does not explicitly name a model.
+- Do not switch image model because of aesthetic preference words like "more cinematic" or "more realistic" unless model name is explicit.
+- This applies to subject turnaround, storyboard, and any reference image generation.
+### Video model policy
+- Default video model is `seedance-2.0-turbo` when user does not explicitly specify a compliant model.
+- Allowed video models in this skill:
+  - `seedance-2.0`
+  - `seedance-2.0-turbo`
+  - `seedance-2.0-fast`
+- If user requests an unsupported video model, fallback to `seedance-2.0-turbo`.
+### Priority
+1. Explicit user model
+2. Skill default model
+## Audio Strategy
+- Unless user explicitly asks for mute, keep scene audio (`--withAudio`).
+- Disable BGM at scene generation stage (`--withBGM false`).
+- Add BGM only in final composition stage.
+- If scenes include narration/dialogue, preserve intelligibility first.
+## Narration And Dialogue Rules
+- If user does not provide exact script, generate concise narration/dialogue aligned with story and duration.
+- If user provides script or key message, keep original intent, wording priority, and brand keywords.
+- Estimate speech duration with normal-slow pace.
+- Per-scene narration/dialogue total should not exceed 12 seconds.
+- If over limit, compress script or split into more scenes.
+## Voice Consistency Rules
+- Keep stable voice identity per character across scenes.
+- Keep narration voice stable across the full video unless story explicitly changes narrator.
+- Use voice formula internally:
+  - gender + age range + vocal traits + speaking pace + emotional baseline + language
+## Subject Creation Rules
+- Not all tasks require recurring subjects.
+- For narrative/ad/commercial stories with recurring characters, subject creation is mandatory.
+- If user provides subject reference images, use them to maintain consistency.
+- If user does not provide references, design stable subject specs first.
+Suggested subject turnaround command:
+```bash
+npx zerocut-cli image --prompt "<subject turnaround prompt>" --model banana2 --type subject-turnaround --aspectRatio 1:1 --resolution 1K --output 001_subject_turnaround.png
+```
+## Scene Planning And Scene-Bible Rules
+Scene planning is a critical quality gate. Before any storyboard or video generation, create a complete `./materials/scene-bible.md` and treat it as the single source of truth for all downstream prompts.
+### Mandatory Scene-Bible Checklist
+`scene-bible.md` must include:
+1. **Project intent**
+   - user goal, platform, target audience, runtime target, delivery format
+2. **Global style lock**
+   - style keywords, texture/look, color system, lighting logic, camera language, post-look constraints
+3. **Model lock**
+   - image model and video model selected by policy
+4. **Subject lock**
+   - subject roster, appearance lock, outfit lock, prop lock, relationship rules
+5. **Voice lock**
+   - narrator/character voice formula and language rules
+6. **Scene plan**
+   - 1-5 scenes, each with objective, emotion shift, estimated duration, and shot count
+7. **Shot plan per scene**
+   - each shot has self-contained prompt requirements and camera intention
+8. **Narration/dialogue plan**
+   - per scene script and estimated speech duration (must stay within 12s per scene)
+9. **Asset binding**
+   - which references are required for each scene (`--storyboard`, `--persons`, `--refs`)
+10. **Output plan**
+    - deterministic output filenames for storyboard, scene clips, bgm, and final output
+11. **Quality gates**
+    - pass/fail checks before moving to storyboard and before moving to video generation
+### Scene-Bible Template
+Use this structure when writing `./materials/scene-bible.md`:
+```markdown
+# Scene Bible
+## 1. Project Intent
+- Goal:
+- Platform:
+- Audience:
+- Runtime Target:
+- Delivery:
+## 2. Global Style Lock
+- Style:
+- Texture:
+- Color System:
+- Lighting Logic:
+- Camera Language:
+- Post-Look:
+## 3. Model Lock
+- Image Model:
+- Video Model:
+## 4. Subject Lock And References
+- Subject A:
+  - Appearance Lock:
+  - Outfit Lock:
+  - Prop Lock:
+  - Reference Files:
+## 5. Voice Lock
+- Narrator Formula:
+- Character A Formula:
+- Language:
+## 6. Scene Plan (1-5 Scenes)
+### Scene 1
+- Goal:
+- Emotion:
+- Duration:
+- Shot Count:
+- Storyboard Output:
+- Video Output:
+## 7. Shot Plan
+### Scene 1 Shot 1
+- Shot Purpose:
+- Camera:
+- Action:
+- Prompt Must Include:
-When generating scene videos, only use legal `video` command parameters:
+## 8. Narration/Dialogue Plan
+### Scene 1
+- Script:
+- Estimated Speech Duration:
+## 9. Asset Binding
+### Scene 1
+- Storyboard:
+- Persons:
+- Refs:
+## 10. Output Naming Plan
+- 001_subject_turnaround.png
+- 010_storyboard_scene1.png
+- 020_scene1.mp4
+- 090_bgm.mp3
+- 110_final.mp4
+## 11. Quality Gates
+- Gate A (Before Storyboard):
+- Gate B (Before Video):
+```
+### Enforced Planning Rules
+- Do not generate storyboard until `scene-bible.md` exists and all mandatory sections are filled.
+- Do not generate scene video until scene-level entries are complete in scene bible.
+- All storyboard prompts and video prompts must inherit locked constraints from scene bible.
+- If user updates style/story/character constraints, update `scene-bible.md` first, then regenerate affected assets.
+- If quality gates fail, revise the plan instead of forcing downstream generation.
+### Quality Gate Definitions
+- Gate A (Before Storyboard) passes only when:
+  - global style lock is explicit and non-ambiguous
+  - scene count, duration, and shot count are fully defined
+  - subject lock and voice lock are complete for all recurring characters
+  - asset binding is ready for each scene
+- Gate B (Before Video) passes only when:
+  - every scene has a storyboard prompt and output target
+  - every scene has narration/dialogue text and duration estimate
+  - every scene prompt is self-contained and does not rely on previous context
+  - scene durations and speech durations are within constraints
+### Scene Planning Deliverables Per Scene
+For each scene, planning output must include:
+1. one-sentence scene objective
+2. emotional transition
+3. exact duration target
+4. shot list with camera intention
+5. storyboard prompt draft
+6. video prompt draft
+7. narration/dialogue draft
+8. required assets list (`--storyboard`, `--persons`, `--refs`)
+## Scene And Camera Rules
+- Split into 1-5 scenes.
+- Recommended scene duration is 12-15s, and scene target should not exceed 15s.
+- Each scene can contain 1-6 shots.
+- Maintain consistency of style, lighting logic, camera language, character identity, and voice design across scenes.
+## Storyboard Rules
+- Every scene must have a storyboard image.
+- Storyboard must include environment, subject position/action, framing, camera movement, rhythm, and key emotion.
+- If a subject appears in that scene, include matching subject references.
+- Storyboard prompts must be complete and self-contained.
+Storyboard command:
+```bash
+npx zerocut-cli image --prompt "<scene storyboard prompt>" --model banana2 --type storyboard --aspectRatio 16:9 --resolution 1K --refs 001_subject_turnaround.png --output 010_storyboard_scene1.png
+```
+If no subject references are required in that scene, omit `--refs`.
+## Prompt Independence Hard Constraint
+- Every shot prompt and every scene video prompt must be fully self-contained.
+- Do not use shorthand such as:
+  - "same as previous shot"
+  - "continue above"
+  - "keep unchanged"
+  - "refer to previous settings"
+## Video Generation Rules
+- Scene video must be grounded on storyboard and scene-specific references.
+- Every scene prompt must repeat key constraints explicitly.
+- Defaults:
+  - `--resolution 720p`
+  - `--aspectRatio 9:16` unless user requests otherwise
+  - `--withAudio`
+  - `--withBGM false`
+- Keep each scene duration compatible with script and pacing.
+Allowed video parameters:
 - `--prompt <prompt>` required
 - `--duration <seconds>`
@@ -48,93 +322,92 @@ When generating scene videos, only use legal `video` command parameters:
 - `--firstFrame <image>`
 - `--lastFrame <image>`
 - `--storyboard <image>`
-- `--persons <persons>` comma-separated image paths/URLs, mapped to `type=person`
+- `--persons <persons>`
 - `--refs <assets>`
 - `--resolution <resolution>`
 - `--aspectRatio <ratio>`
 - `--withAudio`
-- `--withBGM <withBGM>` `true|false`, default `true`
+- `--withBGM <withBGM>`
 - `--optimizeCameraMotion`
 - `--output <file>`
-## Output Naming Rules
-- If user does not provide output path, generate meaningful names with 3-digit prefix:
-  - `001_storyboard_scene1.png`
-  - `002_scene1.mp4`
-  - `003_scene2.mp4`
-  - `004_bgm.mp3`
-  - `005_final.mp4`
-## Workflow
-1. Understand topic, goal, duration, platform orientation, and style.
-2. Split into 1-5 scenes with clear narrative progression.
-3. Create one storyboard image for each scene.
-4. Generate one video clip for each scene using only legal video parameters.
-5. Optionally generate one background music track with `music` command.
-6. Compose final video with `ffmpeg` command.
-## Scene Storyboard Step
-Generate storyboard for each scene:
-```bash
-npx zerocut-cli image --prompt "<scene storyboard prompt>" --model banana2 --aspectRatio 16:9 --resolution 1K --output 001_storyboard_scene1.png
-```
-## Scene Video Step
-Use storyboard as `--storyboard`, optional character images via `--persons`, and optional extra references via `--refs`.
+Scene video example:
 ```bash
 npx zerocut-cli video \
-  --prompt "<scene video prompt>" \
-  --model seedance-2.0 \
+  --prompt "<self-contained scene video prompt>" \
+  --model seedance-2.0-turbo \
   --duration 12 \
   --resolution 720p \
   --aspectRatio 16:9 \
-  --storyboard 001_storyboard_scene1.png \
+  --storyboard 010_storyboard_scene1.png \
   --persons actor_front.png,actor_side.png \
   --refs prop_ref.png,env_ref.png \
   --withAudio \
-  --withBGM true \
-  --output 002_scene1.mp4
+  --withBGM false \
+  --output 020_scene1.mp4
 ```
-## Background Music Step
+## BGM Rules
+- Generate BGM only after all scene videos are ready.
+- Recommended durations: `30|60|90|120|150` seconds.
+- BGM duration must be longer than total scene duration.
+- Keep BGM supportive, never overpower dialogue/narration.
-Generate one BGM track when needed:
+BGM example:
 ```bash
-npx zerocut-cli music --prompt "<bgm prompt>" --output 004_bgm.mp3
+npx zerocut-cli music --prompt "<bgm prompt>" --output 090_bgm.mp3
 ```
-## Final Composition Step (ffmpeg only)
+## Final Composition Rules
+- Concatenate scene videos in order.
+- Mix original scene audio and BGM with proper balance.
+- Keep final pacing, narrative continuity, and style consistency.
 Create concat list:
 ```bash
-printf "file '002_scene1.mp4'\nfile '003_scene2.mp4'\n" > concat.txt
+printf "file '020_scene1.mp4'\nfile '030_scene2.mp4'\n" > concat.txt
 ```
-Concatenate scene clips:
+Concatenate:
 ```bash
-npx zerocut-cli ffmpeg --args -f concat -safe 0 -i concat.txt -c copy 005_concat.mp4 --resources concat.txt 002_scene1.mp4 003_scene2.mp4
+npx zerocut-cli ffmpeg --args -f concat -safe 0 -i concat.txt -c copy 100_concat.mp4 --resources concat.txt 020_scene1.mp4 030_scene2.mp4
 ```
-Mix BGM with original video audio:
+Mix BGM:
 ```bash
-npx zerocut-cli ffmpeg --args -i 005_concat.mp4 -i 004_bgm.mp3 -filter_complex "[1:a]volume=0.2[bgm];[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=2[aout]" -map 0:v -map "[aout]" -c:v copy -c:a aac 005_final.mp4 --resources 005_concat.mp4 004_bgm.mp3
+npx zerocut-cli ffmpeg --args -i 100_concat.mp4 -i 090_bgm.mp3 -filter_complex "[1:a]volume=0.2[bgm];[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=2[aout]" -map 0:v -map "[aout]" -c:v copy -c:a aac 110_final.mp4 --resources 100_concat.mp4 090_bgm.mp3
 ```
-## Hard Rules
+## Failure Handling
+- If command output contains `Not enough credits`, stop immediately and ask user to recharge.
+- If a scene drifts away from global style, revise prompt and regenerate before final composition.
+- If voice identity drifts, revise script/constraints and regenerate the affected scene.
+- Never skip core steps and output incomplete low-quality final result.
+## Output Naming Rules
+If user does not provide explicit output names, use meaningful incremental names:
+- `001_subject_turnaround.png`
+- `010_storyboard_scene1.png`
+- `020_scene1.mp4`
+- `030_scene2.mp4`
+- `090_bgm.mp3`
+- `100_concat.mp4`
+- `110_final.mp4`
+## Non-Negotiable Rules
-- Do not introduce non-CLI tool calls.
-- Do not use parameters outside the legal `video` parameter contract.
-- Keep single scene duration within model limits.
-- Keep visual style consistent across all scenes.
-- Keep role identity consistent when using `--persons`.
-- Do not generate subtitles in this workflow.
+- Use only `zerocut-cli` commands.
+- Keep prompts complete and executable without hidden context.
+- Keep style consistency across all scenes.
+- Keep character and voice consistency across all scenes.
+- Keep per-scene language duration within 12 seconds at normal-slow pace.