npm - @renoise/video-maker - Versions diffs - 0.1.3 → 0.2.0 - Mend

@renoise/video-maker 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.claude-plugin/marketplace.json +15 -0
package/.claude-plugin/plugin.json +17 -3
package/README.md +20 -33
package/hooks/check-api-key.sh +28 -0
package/hooks/hooks.json +3 -3
package/openclaw.plugin.json +5 -3
package/package.json +4 -9
package/skills/director/SKILL.md +4 -7
package/skills/file-upload/SKILL.md +79 -0
package/skills/file-upload/scripts/upload.mjs +103 -0
package/skills/gemini-gen/SKILL.md +232 -0
package/skills/gemini-gen/scripts/gemini.mjs +220 -0
package/skills/renoise-gen/SKILL.md +3 -1
package/skills/short-film-editor/SKILL.md +23 -24
package/skills/short-film-editor/references/continuity-guide.md +2 -2
package/skills/tiktok-content-maker/SKILL.md +78 -81
package/skills/tiktok-content-maker/examples/dress-demo.md +42 -42
package/skills/tiktok-content-maker/references/ecom-prompt-guide.md +157 -152
package/skills/video-download/SKILL.md +1 -1
package/hooks/session-start.sh +0 -17
package/lib/gemini.ts +0 -49
package/skills/short-film-editor/scripts/generate-storyboard-html.ts +0 -714
package/skills/tiktok-content-maker/scripts/analyze-images.ts +0 -122

package/skills/gemini-gen/scripts/gemini.mjs ADDED Viewed

@@ -0,0 +1,220 @@
+#!/usr/bin/env node
+/**
+ * Gemini API client via Renoise gateway.
+ * Zero npm dependencies — uses native fetch.
+ *
+ * Usage:
+ *   # Text only
+ *   node gemini.mjs "Explain quantum computing"
+ *
+ *   # With image(s)
+ *   node gemini.mjs --file photo.jpg "Describe this product"
+ *   node gemini.mjs --file a.jpg --file b.jpg "Compare these two"
+ *
+ *   # With video
+ *   node gemini.mjs --file clip.mp4 --resolution low "Summarize this clip"
+ *
+ *   # With uploaded file URI (from file upload skill)
+ *   node gemini.mjs --file-uri "https://...fileUri" --file-mime video/mp4 "Analyze this video"
+ *
+ *   # JSON output mode
+ *   node gemini.mjs --json "Return a JSON object with name and age"
+ *
+ * Options:
+ *   --file <path>         Attach a local file (image/video). Repeatable.
+ *   --file-uri <uri>      Attach an uploaded file by URI. Requires --file-mime.
+ *   --file-mime <mime>     MIME type for --file-uri.
+ *   --resolution <level>  Media resolution: low|medium|high|ultra_high (default: medium)
+ *   --model <name>        Model name (default: gemini-3.1-pro)
+ *   --temperature <n>     Temperature (default: 1.0)
+ *   --max-tokens <n>      Max output tokens (default: 8192)
+ *   --json                Request JSON response format
+ *
+ * Environment:
+ *   RENOISE_API_KEY       Required. Get one at https://www.renoise.ai
+ */
+import fs from "fs/promises";
+import path from "path";
+// --- Auth ---
+const RENOISE_API_KEY = process.env.RENOISE_API_KEY;
+if (!RENOISE_API_KEY) {
+  console.error(
+    "RENOISE_API_KEY not set. Get one at: https://www.renoise.ai"
+  );
+  process.exit(1);
+}
+// --- MIME detection ---
+const MIME_MAP = {
+  ".jpg": "image/jpeg",
+  ".jpeg": "image/jpeg",
+  ".png": "image/png",
+  ".webp": "image/webp",
+  ".gif": "image/gif",
+  ".mp4": "video/mp4",
+  ".mov": "video/quicktime",
+  ".webm": "video/webm",
+};
+function getMimeType(filePath) {
+  return (
+    MIME_MAP[path.extname(filePath).toLowerCase()] ?? "application/octet-stream"
+  );
+}
+// --- Resolution mapping ---
+const RESOLUTION_MAP = {
+  low: "media_resolution_low",
+  medium: "media_resolution_medium",
+  high: "media_resolution_high",
+  ultra_high: "media_resolution_ultra_high",
+};
+// --- Parse args ---
+function parseArgs(argv) {
+  const files = [];
+  let fileUri = null;
+  let fileMime = null;
+  let resolution = "medium";
+  let model = "gemini-3.1-pro";
+  let temperature = 1.0;
+  let maxTokens = 8192;
+  let json = false;
+  const textParts = [];
+  for (let i = 0; i < argv.length; i++) {
+    switch (argv[i]) {
+      case "--file":
+        files.push(argv[++i]);
+        break;
+      case "--file-uri":
+        fileUri = argv[++i];
+        break;
+      case "--file-mime":
+        fileMime = argv[++i];
+        break;
+      case "--resolution":
+        resolution = argv[++i];
+        break;
+      case "--model":
+        model = argv[++i];
+        break;
+      case "--temperature":
+        temperature = parseFloat(argv[++i]);
+        break;
+      case "--max-tokens":
+        maxTokens = parseInt(argv[++i], 10);
+        break;
+      case "--json":
+        json = true;
+        break;
+      default:
+        textParts.push(argv[i]);
+    }
+  }
+  return {
+    files,
+    fileUri,
+    fileMime,
+    resolution,
+    model,
+    temperature,
+    maxTokens,
+    json,
+    prompt: textParts.join(" "),
+  };
+}
+// --- Build parts ---
+async function buildParts(opts) {
+  const parts = [];
+  const resLevel = RESOLUTION_MAP[opts.resolution] ?? RESOLUTION_MAP.medium;
+  // Local files → inline base64
+  for (const filePath of opts.files) {
+    const data = await fs.readFile(filePath);
+    parts.push({
+      inlineData: {
+        mimeType: getMimeType(filePath),
+        data: data.toString("base64"),
+      },
+      mediaResolution: { level: resLevel },
+    });
+  }
+  // Uploaded file URI
+  if (opts.fileUri) {
+    parts.push({
+      fileData: {
+        mimeType: opts.fileMime ?? "application/octet-stream",
+        fileUri: opts.fileUri,
+      },
+    });
+  }
+  // Text prompt (always last)
+  if (opts.prompt) {
+    parts.push({ text: opts.prompt });
+  }
+  return parts;
+}
+// --- Main ---
+async function main() {
+  const opts = parseArgs(process.argv.slice(2));
+  if (!opts.prompt && opts.files.length === 0 && !opts.fileUri) {
+    console.error(
+      "Usage: node gemini.mjs [--file <path>] [--resolution low|medium|high|ultra_high] <prompt>"
+    );
+    process.exit(1);
+  }
+  const endpoint = `https://staging--ujgsvru36x4korjj10nq.edgespark.app/api/public/llm/proxy/v1beta/models/${opts.model}:generateContent?key=${RENOISE_API_KEY}`;
+  const parts = await buildParts(opts);
+  const body = {
+    contents: [{ role: "user", parts }],
+    generationConfig: {
+      temperature: opts.temperature,
+      maxOutputTokens: opts.maxTokens,
+    },
+  };
+  if (opts.json) {
+    body.generationConfig.responseMimeType = "application/json";
+  }
+  const res = await fetch(endpoint, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  if (!res.ok) {
+    const errText = await res.text();
+    console.error(`Gemini API error ${res.status}: ${errText}`);
+    process.exit(1);
+  }
+  const data = await res.json();
+  const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
+  if (!text) {
+    console.error("No text in response:", JSON.stringify(data, null, 2));
+    process.exit(1);
+  }
+  console.log(text);
+}
+main().catch((err) => {
+  console.error("ERROR:", err.message);
+  process.exit(1);
+});

package/skills/renoise-gen/SKILL.md CHANGED Viewed

@@ -13,6 +13,8 @@ metadata:
 Generate AI videos and images through the Renoise platform.
+> **IMPORTANT**: The Renoise website is **https://www.renoise.ai** — NOT renoise.com. Always use `renoise.ai` when referencing the platform URL.
 ## Supported Models
 | Model | Type | Description |
@@ -96,7 +98,7 @@ See `${CLAUDE_SKILL_DIR}/references/video-capabilities.md` for details.
 CLI path: `${CLAUDE_SKILL_DIR}/renoise-cli.mjs` (Node.js 18+)
-API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`).
+API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`). Get your API key at https://www.renoise.ai (NOT renoise.com).
 ## CLI Commands

package/skills/short-film-editor/SKILL.md CHANGED Viewed

@@ -1,15 +1,17 @@
 ---
 name: short-film-editor
 description: >
-  短片剪辑师：音乐先行，按节拍卡点拆分故事为多段（5-15s不等），
-  生成HTML分镜预览供确认，维护角色/画风一致性，批量生成并输出组装指南。
-  触发词：short film, multi-clip, 短片, 1分钟视频, story video, 多段视频
+  Short film editor: music-first workflow, splits story into beat-synced segments (5-15s each),
+  generates HTML storyboard preview for confirmation, maintains character/style consistency,
+  batch generates and outputs assembly guide.
+  Use when user says "short film", "multi-clip", "story video", "multi-segment video",
+  "1-minute video".
 allowed-tools: Bash, Read
 metadata:
   author: renoise
   version: 0.1.0
   category: video-production
-  tags: [short-film, multi-clip, narrative, story, 短片]
+  tags: [short-film, multi-clip, narrative, story]
 ---
 # Short Film Editor
@@ -86,7 +88,7 @@ You are a short film editor specializing in multi-clip AI video production. You
 **If user has or wants music**, follow Steps 1-3 below.
-**If user skips music** (e.g. "先不用音乐" / "no music for now"), skip to **Step 4 — Manual Rhythm**:
+**If user skips music** (e.g. "no music for now"), skip to **Step 4 — Manual Rhythm**:
 - Define segments based on narrative pacing, not equal splits.
 - Vary durations: establishing shots 7-10s, action bursts 5-6s, aftermath/resolution 5-7s.
 - Aim for 4-7 segments depending on total duration.
@@ -149,7 +151,7 @@ Output JSON:
 | Climax / collision | 6-8s | Intense, dense |
 | Aftermath / resolution | 5-7s | Slow, lingering |
-**In-clip cutting (镜内切换)**:
+**In-clip cutting**:
 Real films average 2-4s per camera angle (action films: 1-2s). Seedance minimum is 5s per clip, so use **time-annotated camera changes within each clip** to simulate fast cutting:
 ```
@@ -247,13 +249,14 @@ Store each prompt in the shot's `prompt` field in `project.json`.
 ### Step 2 — Generate Reference Images
-Three image sources (ask user preference, or default to Gemini):
+Three image sources (ask user preference, or default to Renoise):
-**Option A — Gemini (default, automatic, free)**:
+**Option A — Renoise (default)**:
+Use `renoise-gen` with `nano-banana-2` model to generate a reference image. Prompt should describe the shot scene + character appearance + key action + lighting (NO camera movement). Save the result to `${PROJECT_DIR}/storyboard/${shot_id}.png`.
 ```bash
-npx tsx ${CLAUDE_PLUGIN_ROOT}/skills/scene-generate/scripts/generate-scene.ts \
-  "<shot scene + character appearance + key action + lighting — NO camera movement>" \
-  "${PROJECT_DIR}/storyboard/${shot_id}.png"
+node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
+  --model nano-banana-2 --resolution 2k --ratio 16:9 \
+  --prompt "<shot scene + character appearance + key action + lighting>"
 ```
 **Option B — Midjourney (higher quality, recommended for stylized projects)**:
@@ -271,12 +274,14 @@ Submit all shots in parallel via `/v1/tob/diffusion`, poll for completion, downl
 **Option C — User-provided**:
 User manually places reference images in `${PROJECT_DIR}/storyboard/S1.png`, `S2.png`, etc.
-**Option D — Gemini Grid Storyboard (recommended for best consistency)**:
-Generate ALL shots in a single grid image so characters and style are naturally consistent across panels, then split into individual reference images.
+**Option D — Renoise Grid Storyboard (recommended for best consistency)**:
+Generate ALL shots in a single grid image via `renoise-gen` `nano-banana-2` so characters and style are naturally consistent across panels, then split into individual reference images.
-1. Generate a single N-panel grid with Gemini:
-   ```
-   Prompt: "Generate a single N-panel [manga/cinematic] storyboard grid image.
+1. Generate a single N-panel grid:
+   ```bash
+   node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
+     --model nano-banana-2 --resolution 2k --ratio 16:9 \
+     --prompt "Generate a single N-panel [manga/cinematic] storyboard grid image.
    Layout: 2 rows x 4 columns grid with thin white borders.
    The SAME two characters must appear consistently across all panels:
    Character A: [verbatim from Character Bible]
@@ -312,18 +317,12 @@ Reference image prompts should include:
 ### Step 3 — Generate HTML Storyboard Preview
-```bash
-npx tsx ${CLAUDE_SKILL_DIR}/scripts/generate-storyboard-html.ts \
-  --project-file "${PROJECT_DIR}/project.json" \
-  --output "${PROJECT_DIR}/storyboard.html"
-```
-This generates a single self-contained HTML file with:
+Generate a single self-contained HTML file from `${PROJECT_DIR}/project.json` and save it to `${PROJECT_DIR}/storyboard.html`. The HTML should include:
 - **Header**: Project title, total duration, clip count, BPM, character summary, style summary
 - **Music timeline**: Visual bar showing sections and cut points
 - **Shot cards**: One card per shot with reference image, scene/action, dialogue/beats, continuity, and collapsible Seedance prompt
 - **Reference images**: Base64-embedded inline (single-file, shareable). Use `--skip-images` to skip Gemini generation and read existing images from `storyboard/` directory (useful when using Midjourney or user-provided images).
-- **UI language**: Matches user's language (default: Chinese). White theme, bold confident design.
+- **UI language**: English. White theme, bold confident design.
 - **Responsive**: Viewable on phone
 Open for preview:

package/skills/short-film-editor/references/continuity-guide.md CHANGED Viewed

@@ -129,7 +129,7 @@ AI-generated clips will achieve ~80% visual consistency when following these tec
 - Subtle speed adjustments for timing
 - Audio continuity (shared BGM) creates perceived visual continuity
-## Grid Storyboard Method (宫格分镜法)
+## Grid Storyboard Method
 ### Why One Image > Many Images
@@ -147,7 +147,7 @@ When generating reference images for each shot independently (even with the same
 ### Workflow
 1. Write a single prompt describing all panels with verbatim character descriptions
-2. Generate one grid image via Gemini (`gemini-3-pro-image-preview`)
+2. Generate one grid image via `renoise-gen` (`nano-banana-2`)
 3. Split into individual panels: `bash split-grid.sh grid.png storyboard/ 2 4`
 4. Upload each panel as material for Image-to-Video generation
 5. Each Seedance clip now has a visual anchor from the same source

package/skills/tiktok-content-maker/SKILL.md CHANGED Viewed

@@ -3,8 +3,8 @@ name: tiktok-content-maker
 description: >
   TikTok e-commerce short video script generator. Analyzes product photos,
   generates 15s video scripts with video prompts and English dialogue.
-  Use when user says "TikTok product video", "ecommerce video", "电商视频",
-  "带货视频", "商品视频", "拍商品". Do NOT use for non-ecommerce videos or
+  Use when user says "TikTok product video", "ecommerce video",
+  "product video", "sales video", "shoot product". Do NOT use for non-ecommerce videos or
   general creative direction (use director instead).
 allowed-tools: Bash, Read
 metadata:
@@ -14,130 +14,127 @@ metadata:
   tags: [product, ecommerce, tiktok]
 ---
-# Content Maker — 电商短视频脚本 + 视频生成
+# Content Maker — E-commerce Short Video Script + Generation
 ## Overview
-电商短视频全流程工具：用户提供商品图（+ 可选模特图）→ 分析商品信息 → 生成 15 秒 TikTok 脚本（视频 prompt，含英文台词嵌入）→ 提交视频生成任务。
+End-to-end e-commerce short video tool: user provides product images (+ optional model images) → analyze product info → generate 15-second TikTok script (video prompt with embedded English dialogue) → submit video generation task.
 ## Workflow
-### Phase 1: 素材收集 & 商品分析
+### Phase 1: Material Collection & Product Analysis
-1. **收集素材路径**：向用户索要图片
-   - `商品图路径`（必需）：产品主图。**最佳：干净白底纯产品图，无文字/标注/装饰**。有营销文字覆盖的图会干扰模型。
-   - `模特图路径`（可选，仅供分析参考）：展示穿搭/使用效果的图。**注意：模特图仅用于理解产品使用方式，不上传到 Renoise**（隐私检测会拦截含真人面孔的图片）。
+1. **Collect material paths**: Ask user for images
+   - `Product image path` (required): Product hero image. **Best: clean white-background product photo with no text/labels/decorations**. Images with marketing text overlays will interfere with the model.
+   - `Model image path` (optional, for analysis reference only): Shows how the product is worn/used. **Note: Model images are only used to understand product usage — they are NOT uploaded to Renoise** (privacy detection will block images containing realistic human faces).
-2. **分析商品信息**：
-   - 如果有 Gemini API 可用，调用 Gemini 分析：
-     ```bash
-     cd ${CLAUDE_PLUGIN_ROOT} && npm install --silent && npx tsx ${CLAUDE_SKILL_DIR}/scripts/analyze-images.ts "<商品图路径>" "<模特图路径>"
-     ```
-   - 也可以直接通过 Read 工具查看图片，人工分析商品信息
-   - 需要提取：商品类型、颜色、材质、卖点、品牌调性、适用场景
-   - **（关键）从使用场景图中理解产品的正确使用方式**：
-     - 用户的姿势是什么？（站/坐/躺/走）
-     - 产品放在身体哪个位置？（手持/地面/桌面/身体下方）
-     - 产品与身体的交互方式？（用手按压 vs 用体重压 vs 穿戴 vs 涂抹）
-     - 使用场景在哪？（健身房/办公室/家里/户外）
-   - 如果用户提供了商品链接，用 WebFetch 抓取产品详情页补充理解
+2. **Analyze product info**:
+   - Use the `gemini-gen` skill to analyze product images — send the image(s) with a prompt requesting product analysis (type, color, material, selling points, brand tone, scene suggestions)
+   - Alternatively, view images directly via the Read tool and analyze manually
+   - Extract: product type, color, material, selling points, brand tone, applicable scenarios
+   - **(Critical) Understand correct product usage from lifestyle images**:
+     - What is the user's posture? (standing/sitting/lying/walking)
+     - Where is the product positioned on the body? (handheld/floor/table/under body)
+     - How does the product interact with the body? (hand pressure vs body weight vs wearing vs applying)
+     - Where is the usage scenario? (gym/office/home/outdoors)
+   - If the user provides a product link, use WebFetch to scrape product detail page for additional context
-3. **展示分析结果**，让用户确认或补充信息。分析结果中必须包含一条明确的「**使用方式描述**」，例如：
-   > 使用方式：将花生球放在地面/瑜伽垫上，用户躺在球上方，通过自身体重施压按摩脊柱两侧肌肉。花生形凹槽避开脊柱，两侧球体作用于竖脊肌。
+3. **Present analysis results** for user to confirm or supplement. Results must include a clear "**Usage description**", e.g.:
+   > Usage: Place the peanut ball on the floor/yoga mat, user lies on top of the ball, using body weight to massage the muscles along both sides of the spine. The peanut-shaped groove avoids the spine while the two ball ends work the erector spinae muscles.
-### Phase 2: 15 秒脚本 + Prompt 生成
+### Phase 2: 15-Second Script + Prompt Generation
-基于分析结果 + 参考指南，生成完整的 15 秒视频脚本。
+Based on analysis results + reference guide, generate a complete 15-second video script.
-**必须参考以下指南**（先 Read 再生成）：
-- `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — 电商视频 prompt 指南
+**Must reference the following guide** (Read before generating):
+- `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — E-commerce video prompt guide
-**Prompt 结构（3 个必需组成部分）：**
+**Prompt structure (3 required components):**
-#### Part A: 产品锚定（Prompt 开头，一句话）
+#### Part A: Product Anchoring (first line of prompt)
-产品外观靠参考图传达，prompt 里只需**一句话**说明产品是什么 + 用途：
+Product appearance is conveyed by the reference image. The prompt only needs **one sentence** stating what the product is + its use case:
 ```
 The product is a [brand] [product type] for [primary use case], shown in the reference image.
 The product must match the reference image exactly in every frame. Do not invent any packaging, box, or container unless the reference image shows one.
 ```
-**关键**：不要在 prompt 里重复描述颜色、材质、形状、logo — 这些信息已在参考图里。把 prompt 空间留给 hook 和画面叙事。
+**Key**: Do not repeat color, material, shape, or logo descriptions in the prompt — that information is already in the reference image. Save prompt space for the hook and visual narrative.
-#### Part B: 台词嵌入（贯穿全段）
+#### Part B: Dialogue Embedding (throughout)
-台词必须是英文，以强制口型同步格式嵌入叙事段落中：
+Dialogue must be in English, embedded in the narrative using forced lip-sync format:
 ```
 Spoken dialogue (say EXACTLY, word-for-word): "..."
 Mouth clearly visible when speaking, lip-sync aligned.
 ```
-**台词风格要求**：
-- **闺蜜聊天感**：像在跟朋友推荐，不像在念广告词
-- **高信息密度**：每句话都带具体信息（数字、对比、使用场景），没有废话
-- **不硬推销**：结尾不用 "link below" / "点击链接" 这种生硬 CTA，用自然的个人推荐收尾（如 "Best money I have spent this year"、"Trust me just start"）
+**Dialogue style requirements**:
+- **Best-friend casual tone**: Like recommending to a friend, not reading ad copy
+- **High information density**: Every sentence includes specific details (numbers, comparisons, usage scenarios) — no filler
+- **No hard sell**: Don't end with "link below" or generic CTAs. Use natural personal recommendations (e.g., "Best money I have spent this year", "Trust me just start")
-**台词节奏**（4 句，对应 4 个时间段）：
+**Dialogue pacing** (4 lines, matching 4 time segments):
 ```
-[0-3s]   Hook — 一句话喊停用户（痛点/悬念/结果前置）
-[3-8s]   卖点 — 具体参数 + 使用体验
-[8-12s]  场景 — 在哪用 + 便携性/多功能
-[12-15s] 收尾 — 个人真实推荐感，不硬推销
+[0-3s]   Hook — One sentence to stop the scroll (pain point / suspense / result-first)
+[3-8s]   Selling point — Specific specs + personal experience
+[8-12s]  Scene — Where to use + portability / versatility
+[12-15s] Close — Genuine personal recommendation, no hard sell
 ```
-#### Part C: 画面叙事（一段连续叙事）
+#### Part C: Visual Narrative (one continuous narrative)
-**视频结构（一个连续 15 秒视频）：**
+**Video structure (one continuous 15-second video):**
 ```
-[0-3s]   HOOK — 高冲击力开场。必须：快速运镜（whip pan / snap dolly in）+ 动态动作 + 立即开口说台词。绝对不能慢热。
-[3-8s]   SHOWCASE — 产品展示 + 模特互动。运镜变化展示材质细节。
-[8-12s]  SCENE — 生活场景使用。拉远到中景/全景。
-[12-15s] CLOSE — 模特面对镜头 + 产品在画面中 + 自然收尾。frame holds steady。
+[0-3s]   HOOK — High-impact opening. Must: fast camera movement (whip pan / snap dolly in) + dynamic action + start speaking immediately. Never start slow.
+[3-8s]   SHOWCASE — Product display + model interaction. Camera transitions to reveal material details.
+[8-12s]  SCENE — Real-life usage scenario. Pull back to medium/wide shot.
+[12-15s] CLOSE — Model faces camera + product in frame + natural ending. Frame holds steady.
 ```
-**输出 3 项内容：**
+**Output 3 items:**
-#### 1. Video Prompt（英文，含台词）
-导演口述式段落（6-10 句，每句只做一件事），包含：
-- 产品锚定（一句话，Part A）在最开头
-- 台词以 `Spoken dialogue (say EXACTLY, word-for-word):` 格式嵌入（Part B）
-- 每句台词后跟 `Mouth clearly visible when speaking, lip-sync aligned.`
-- Ad-6D Protocol 元素穿插
-- 模特外观一致性描述（性别、发型、肤色、体型、服装）
-- 运镜变化至少 3 次
-- 光线/氛围描述
+#### 1. Video Prompt (English, with dialogue)
+Director-dictation style paragraph (6-10 sentences, one thing per sentence), containing:
+- Product anchoring (one sentence, Part A) at the very beginning
+- Dialogue embedded with `Spoken dialogue (say EXACTLY, word-for-word):` format (Part B)
+- `Mouth clearly visible when speaking, lip-sync aligned.` after each dialogue line
+- Ad-6D Protocol elements interspersed
+- Model appearance consistency description (gender, hair, skin tone, body type, outfit)
+- At least 3 camera movement changes
+- Lighting/atmosphere description
-#### 2. 台词脚本（英文，标注时间段）
-单独列出 4 句台词及对应时间段，方便审阅。
+#### 2. Dialogue Script (English, with timestamps)
+List the 4 dialogue lines separately with their time segments for easy review.
-#### 3. BGM/音效建议
-- 推荐适合产品调性的音乐风格
-- 关键节点的音效提示
+#### 3. BGM / Sound Design Suggestions
+- Recommend music style matching the product tone
+- Key moment sound effect cues
-**参考示例**：Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` 了解最新标准输出格式。
+**Reference example**: Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` for the latest standard output format.
-### Phase 3: 用户确认
+### Phase 3: User Confirmation
-展示完整脚本后，询问用户：
-- 是否调整台词
-- 是否更换场景
-- 是否修改 prompt 细节
-- 确认后进入提交
+After presenting the full script, ask the user:
+- Whether to adjust dialogue
+- Whether to change the scene
+- Whether to modify prompt details
+- Proceed to submission after confirmation
-### Phase 4: 上传素材 + 提交视频生成任务
+### Phase 4: Upload Materials + Submit Video Generation Task
-用户确认脚本后，上传商品图并提交视频生成任务。
+After user confirms the script, upload the product image and submit the video generation task.
-**重要规则**：
-- 只上传商品图，**不上传模特/真人图**（隐私检测会拦截含真人面孔的图片，报错 `InputImageSensitiveContentDetected.PrivacyInformation`）
-- 模特外观完全靠 prompt 文字描述控制
-- 商品图最好用干净白底纯产品图，避免有营销文字覆盖的图
-- 批量生成时：商品图只需上传一次，复用 material ID 提交多个不同场景的任务
+**Important rules**:
+- Only upload product images — **never upload model/real person photos** (privacy detection will block images containing realistic human faces, error: `InputImageSensitiveContentDetected.PrivacyInformation`)
+- Model appearance is controlled entirely by prompt text description
+- Product images should ideally be clean white-background product photos, avoid images with marketing text overlays
+- For batch generation: upload the product image once, reuse the material ID to submit multiple tasks with different scenes
 ## Important Notes
-- 图片支持 jpg/jpeg/png/webp 格式
-- 视频 prompt 必须全英文
-- 台词必须英文，嵌入 prompt（`Spoken dialogue (say EXACTLY, word-for-word): "..."`）
-- **不输出单独的字幕文案** — 台词已在 prompt 中，不需要额外字幕层
+- Images support jpg/jpeg/png/webp formats
+- Video prompts must be entirely in English
+- Dialogue must be in English, embedded in the prompt (`Spoken dialogue (say EXACTLY, word-for-word): "..."`)
+- **Do not output separate subtitle text** — dialogue is already in the prompt, no additional subtitle layer needed