@renoise/video-maker 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Gemini API client via Renoise gateway.
5
+ * Zero npm dependencies — uses native fetch.
6
+ *
7
+ * Usage:
8
+ * # Text only
9
+ * node gemini.mjs "Explain quantum computing"
10
+ *
11
+ * # With image(s)
12
+ * node gemini.mjs --file photo.jpg "Describe this product"
13
+ * node gemini.mjs --file a.jpg --file b.jpg "Compare these two"
14
+ *
15
+ * # With video
16
+ * node gemini.mjs --file clip.mp4 --resolution low "Summarize this clip"
17
+ *
18
+ * # With uploaded file URI (from file upload skill)
19
+ * node gemini.mjs --file-uri "https://...fileUri" --file-mime video/mp4 "Analyze this video"
20
+ *
21
+ * # JSON output mode
22
+ * node gemini.mjs --json "Return a JSON object with name and age"
23
+ *
24
+ * Options:
25
+ * --file <path> Attach a local file (image/video). Repeatable.
26
+ * --file-uri <uri> Attach an uploaded file by URI. Requires --file-mime.
27
+ * --file-mime <mime> MIME type for --file-uri.
28
+ * --resolution <level> Media resolution: low|medium|high|ultra_high (default: medium)
29
+ * --model <name> Model name (default: gemini-3.1-pro)
30
+ * --temperature <n> Temperature (default: 1.0)
31
+ * --max-tokens <n> Max output tokens (default: 8192)
32
+ * --json Request JSON response format
33
+ *
34
+ * Environment:
35
+ * RENOISE_API_KEY Required. Get one at https://www.renoise.ai
36
+ */
37
+
38
+ import fs from "fs/promises";
39
+ import path from "path";
40
+
41
+ // --- Auth ---
42
+ const RENOISE_API_KEY = process.env.RENOISE_API_KEY;
43
+ if (!RENOISE_API_KEY) {
44
+ console.error(
45
+ "RENOISE_API_KEY not set. Get one at: https://www.renoise.ai"
46
+ );
47
+ process.exit(1);
48
+ }
49
+
50
+ // --- MIME detection ---
51
+ const MIME_MAP = {
52
+ ".jpg": "image/jpeg",
53
+ ".jpeg": "image/jpeg",
54
+ ".png": "image/png",
55
+ ".webp": "image/webp",
56
+ ".gif": "image/gif",
57
+ ".mp4": "video/mp4",
58
+ ".mov": "video/quicktime",
59
+ ".webm": "video/webm",
60
+ };
61
+
62
+ function getMimeType(filePath) {
63
+ return (
64
+ MIME_MAP[path.extname(filePath).toLowerCase()] ?? "application/octet-stream"
65
+ );
66
+ }
67
+
68
+ // --- Resolution mapping ---
69
+ const RESOLUTION_MAP = {
70
+ low: "media_resolution_low",
71
+ medium: "media_resolution_medium",
72
+ high: "media_resolution_high",
73
+ ultra_high: "media_resolution_ultra_high",
74
+ };
75
+
76
+ // --- Parse args ---
77
+ function parseArgs(argv) {
78
+ const files = [];
79
+ let fileUri = null;
80
+ let fileMime = null;
81
+ let resolution = "medium";
82
+ let model = "gemini-3.1-pro";
83
+ let temperature = 1.0;
84
+ let maxTokens = 8192;
85
+ let json = false;
86
+ const textParts = [];
87
+
88
+ for (let i = 0; i < argv.length; i++) {
89
+ switch (argv[i]) {
90
+ case "--file":
91
+ files.push(argv[++i]);
92
+ break;
93
+ case "--file-uri":
94
+ fileUri = argv[++i];
95
+ break;
96
+ case "--file-mime":
97
+ fileMime = argv[++i];
98
+ break;
99
+ case "--resolution":
100
+ resolution = argv[++i];
101
+ break;
102
+ case "--model":
103
+ model = argv[++i];
104
+ break;
105
+ case "--temperature":
106
+ temperature = parseFloat(argv[++i]);
107
+ break;
108
+ case "--max-tokens":
109
+ maxTokens = parseInt(argv[++i], 10);
110
+ break;
111
+ case "--json":
112
+ json = true;
113
+ break;
114
+ default:
115
+ textParts.push(argv[i]);
116
+ }
117
+ }
118
+
119
+ return {
120
+ files,
121
+ fileUri,
122
+ fileMime,
123
+ resolution,
124
+ model,
125
+ temperature,
126
+ maxTokens,
127
+ json,
128
+ prompt: textParts.join(" "),
129
+ };
130
+ }
131
+
132
+ // --- Build parts ---
133
+ async function buildParts(opts) {
134
+ const parts = [];
135
+ const resLevel = RESOLUTION_MAP[opts.resolution] ?? RESOLUTION_MAP.medium;
136
+
137
+ // Local files → inline base64
138
+ for (const filePath of opts.files) {
139
+ const data = await fs.readFile(filePath);
140
+ parts.push({
141
+ inlineData: {
142
+ mimeType: getMimeType(filePath),
143
+ data: data.toString("base64"),
144
+ },
145
+ mediaResolution: { level: resLevel },
146
+ });
147
+ }
148
+
149
+ // Uploaded file URI
150
+ if (opts.fileUri) {
151
+ parts.push({
152
+ fileData: {
153
+ mimeType: opts.fileMime ?? "application/octet-stream",
154
+ fileUri: opts.fileUri,
155
+ },
156
+ });
157
+ }
158
+
159
+ // Text prompt (always last)
160
+ if (opts.prompt) {
161
+ parts.push({ text: opts.prompt });
162
+ }
163
+
164
+ return parts;
165
+ }
166
+
167
+ // --- Main ---
168
+ async function main() {
169
+ const opts = parseArgs(process.argv.slice(2));
170
+
171
+ if (!opts.prompt && opts.files.length === 0 && !opts.fileUri) {
172
+ console.error(
173
+ "Usage: node gemini.mjs [--file <path>] [--resolution low|medium|high|ultra_high] <prompt>"
174
+ );
175
+ process.exit(1);
176
+ }
177
+
178
+ const endpoint = `https://staging--ujgsvru36x4korjj10nq.edgespark.app/api/public/llm/proxy/v1beta/models/${opts.model}:generateContent?key=${RENOISE_API_KEY}`;
179
+
180
+ const parts = await buildParts(opts);
181
+
182
+ const body = {
183
+ contents: [{ role: "user", parts }],
184
+ generationConfig: {
185
+ temperature: opts.temperature,
186
+ maxOutputTokens: opts.maxTokens,
187
+ },
188
+ };
189
+
190
+ if (opts.json) {
191
+ body.generationConfig.responseMimeType = "application/json";
192
+ }
193
+
194
+ const res = await fetch(endpoint, {
195
+ method: "POST",
196
+ headers: { "Content-Type": "application/json" },
197
+ body: JSON.stringify(body),
198
+ });
199
+
200
+ if (!res.ok) {
201
+ const errText = await res.text();
202
+ console.error(`Gemini API error ${res.status}: ${errText}`);
203
+ process.exit(1);
204
+ }
205
+
206
+ const data = await res.json();
207
+ const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
208
+
209
+ if (!text) {
210
+ console.error("No text in response:", JSON.stringify(data, null, 2));
211
+ process.exit(1);
212
+ }
213
+
214
+ console.log(text);
215
+ }
216
+
217
+ main().catch((err) => {
218
+ console.error("ERROR:", err.message);
219
+ process.exit(1);
220
+ });
@@ -13,6 +13,8 @@ metadata:
13
13
 
14
14
  Generate AI videos and images through the Renoise platform.
15
15
 
16
+ > **IMPORTANT**: The Renoise website is **https://www.renoise.ai** — NOT renoise.com. Always use `renoise.ai` when referencing the platform URL.
17
+
16
18
  ## Supported Models
17
19
 
18
20
  | Model | Type | Description |
@@ -96,7 +98,7 @@ See `${CLAUDE_SKILL_DIR}/references/video-capabilities.md` for details.
96
98
 
97
99
  CLI path: `${CLAUDE_SKILL_DIR}/renoise-cli.mjs` (Node.js 18+)
98
100
 
99
- API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`).
101
+ API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`). Get your API key at https://www.renoise.ai (NOT renoise.com).
100
102
 
101
103
  ## CLI Commands
102
104
 
@@ -1,15 +1,17 @@
1
1
  ---
2
2
  name: short-film-editor
3
3
  description: >
4
- 短片剪辑师:音乐先行,按节拍卡点拆分故事为多段(5-15s不等),
5
- 生成HTML分镜预览供确认,维护角色/画风一致性,批量生成并输出组装指南。
6
- 触发词:short film, multi-clip, 短片, 1分钟视频, story video, 多段视频
4
+ Short film editor: music-first workflow, splits story into beat-synced segments (5-15s each),
5
+ generates HTML storyboard preview for confirmation, maintains character/style consistency,
6
+ batch generates and outputs assembly guide.
7
+ Use when user says "short film", "multi-clip", "story video", "multi-segment video",
8
+ "1-minute video".
7
9
  allowed-tools: Bash, Read
8
10
  metadata:
9
11
  author: renoise
10
12
  version: 0.1.0
11
13
  category: video-production
12
- tags: [short-film, multi-clip, narrative, story, 短片]
14
+ tags: [short-film, multi-clip, narrative, story]
13
15
  ---
14
16
 
15
17
  # Short Film Editor
@@ -86,7 +88,7 @@ You are a short film editor specializing in multi-clip AI video production. You
86
88
 
87
89
  **If user has or wants music**, follow Steps 1-3 below.
88
90
 
89
- **If user skips music** (e.g. "先不用音乐" / "no music for now"), skip to **Step 4 — Manual Rhythm**:
91
+ **If user skips music** (e.g. "no music for now"), skip to **Step 4 — Manual Rhythm**:
90
92
  - Define segments based on narrative pacing, not equal splits.
91
93
  - Vary durations: establishing shots 7-10s, action bursts 5-6s, aftermath/resolution 5-7s.
92
94
  - Aim for 4-7 segments depending on total duration.
@@ -149,7 +151,7 @@ Output JSON:
149
151
  | Climax / collision | 6-8s | Intense, dense |
150
152
  | Aftermath / resolution | 5-7s | Slow, lingering |
151
153
 
152
- **In-clip cutting (镜内切换)**:
154
+ **In-clip cutting**:
153
155
  Real films average 2-4s per camera angle (action films: 1-2s). Seedance minimum is 5s per clip, so use **time-annotated camera changes within each clip** to simulate fast cutting:
154
156
 
155
157
  ```
@@ -247,13 +249,14 @@ Store each prompt in the shot's `prompt` field in `project.json`.
247
249
 
248
250
  ### Step 2 — Generate Reference Images
249
251
 
250
- Three image sources (ask user preference, or default to Gemini):
252
+ Three image sources (ask user preference, or default to Renoise):
251
253
 
252
- **Option A — Gemini (default, automatic, free)**:
254
+ **Option A — Renoise (default)**:
255
+ Use `renoise-gen` with `nano-banana-2` model to generate a reference image. Prompt should describe the shot scene + character appearance + key action + lighting (NO camera movement). Save the result to `${PROJECT_DIR}/storyboard/${shot_id}.png`.
253
256
  ```bash
254
- npx tsx ${CLAUDE_PLUGIN_ROOT}/skills/scene-generate/scripts/generate-scene.ts \
255
- "<shot scene + character appearance + key action + lighting — NO camera movement>" \
256
- "${PROJECT_DIR}/storyboard/${shot_id}.png"
257
+ node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
258
+ --model nano-banana-2 --resolution 2k --ratio 16:9 \
259
+ --prompt "<shot scene + character appearance + key action + lighting>"
257
260
  ```
258
261
 
259
262
  **Option B — Midjourney (higher quality, recommended for stylized projects)**:
@@ -271,12 +274,14 @@ Submit all shots in parallel via `/v1/tob/diffusion`, poll for completion, downl
271
274
  **Option C — User-provided**:
272
275
  User manually places reference images in `${PROJECT_DIR}/storyboard/S1.png`, `S2.png`, etc.
273
276
 
274
- **Option D — Gemini Grid Storyboard (recommended for best consistency)**:
275
- Generate ALL shots in a single grid image so characters and style are naturally consistent across panels, then split into individual reference images.
277
+ **Option D — Renoise Grid Storyboard (recommended for best consistency)**:
278
+ Generate ALL shots in a single grid image via `renoise-gen` `nano-banana-2` so characters and style are naturally consistent across panels, then split into individual reference images.
276
279
 
277
- 1. Generate a single N-panel grid with Gemini:
278
- ```
279
- Prompt: "Generate a single N-panel [manga/cinematic] storyboard grid image.
280
+ 1. Generate a single N-panel grid:
281
+ ```bash
282
+ node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
283
+ --model nano-banana-2 --resolution 2k --ratio 16:9 \
284
+ --prompt "Generate a single N-panel [manga/cinematic] storyboard grid image.
280
285
  Layout: 2 rows x 4 columns grid with thin white borders.
281
286
  The SAME two characters must appear consistently across all panels:
282
287
  Character A: [verbatim from Character Bible]
@@ -312,18 +317,12 @@ Reference image prompts should include:
312
317
 
313
318
  ### Step 3 — Generate HTML Storyboard Preview
314
319
 
315
- ```bash
316
- npx tsx ${CLAUDE_SKILL_DIR}/scripts/generate-storyboard-html.ts \
317
- --project-file "${PROJECT_DIR}/project.json" \
318
- --output "${PROJECT_DIR}/storyboard.html"
319
- ```
320
-
321
- This generates a single self-contained HTML file with:
320
+ Generate a single self-contained HTML file from `${PROJECT_DIR}/project.json` and save it to `${PROJECT_DIR}/storyboard.html`. The HTML should include:
322
321
  - **Header**: Project title, total duration, clip count, BPM, character summary, style summary
323
322
  - **Music timeline**: Visual bar showing sections and cut points
324
323
  - **Shot cards**: One card per shot with reference image, scene/action, dialogue/beats, continuity, and collapsible Seedance prompt
325
324
  - **Reference images**: Base64-embedded inline (single-file, shareable). Use `--skip-images` to skip Gemini generation and read existing images from `storyboard/` directory (useful when using Midjourney or user-provided images).
326
- - **UI language**: Matches user's language (default: Chinese). White theme, bold confident design.
325
+ - **UI language**: English. White theme, bold confident design.
327
326
  - **Responsive**: Viewable on phone
328
327
 
329
328
  Open for preview:
@@ -129,7 +129,7 @@ AI-generated clips will achieve ~80% visual consistency when following these tec
129
129
  - Subtle speed adjustments for timing
130
130
  - Audio continuity (shared BGM) creates perceived visual continuity
131
131
 
132
- ## Grid Storyboard Method (宫格分镜法)
132
+ ## Grid Storyboard Method
133
133
 
134
134
  ### Why One Image > Many Images
135
135
 
@@ -147,7 +147,7 @@ When generating reference images for each shot independently (even with the same
147
147
  ### Workflow
148
148
 
149
149
  1. Write a single prompt describing all panels with verbatim character descriptions
150
- 2. Generate one grid image via Gemini (`gemini-3-pro-image-preview`)
150
+ 2. Generate one grid image via `renoise-gen` (`nano-banana-2`)
151
151
  3. Split into individual panels: `bash split-grid.sh grid.png storyboard/ 2 4`
152
152
  4. Upload each panel as material for Image-to-Video generation
153
153
  5. Each Seedance clip now has a visual anchor from the same source
@@ -3,8 +3,8 @@ name: tiktok-content-maker
3
3
  description: >
4
4
  TikTok e-commerce short video script generator. Analyzes product photos,
5
5
  generates 15s video scripts with video prompts and English dialogue.
6
- Use when user says "TikTok product video", "ecommerce video", "电商视频",
7
- "带货视频", "商品视频", "拍商品". Do NOT use for non-ecommerce videos or
6
+ Use when user says "TikTok product video", "ecommerce video",
7
+ "product video", "sales video", "shoot product". Do NOT use for non-ecommerce videos or
8
8
  general creative direction (use director instead).
9
9
  allowed-tools: Bash, Read
10
10
  metadata:
@@ -14,130 +14,127 @@ metadata:
14
14
  tags: [product, ecommerce, tiktok]
15
15
  ---
16
16
 
17
- # Content Maker — 电商短视频脚本 + 视频生成
17
+ # Content Maker — E-commerce Short Video Script + Generation
18
18
 
19
19
  ## Overview
20
20
 
21
- 电商短视频全流程工具:用户提供商品图(+ 可选模特图)→ 分析商品信息生成 15 TikTok 脚本(视频 prompt,含英文台词嵌入)→ 提交视频生成任务。
21
+ End-to-end e-commerce short video tool: user provides product images (+ optional model images) analyze product info → generate 15-second TikTok script (video prompt with embedded English dialogue) → submit video generation task.
22
22
 
23
23
  ## Workflow
24
24
 
25
- ### Phase 1: 素材收集 & 商品分析
25
+ ### Phase 1: Material Collection & Product Analysis
26
26
 
27
- 1. **收集素材路径**:向用户索要图片
28
- - `商品图路径`(必需):产品主图。**最佳:干净白底纯产品图,无文字/标注/装饰**。有营销文字覆盖的图会干扰模型。
29
- - `模特图路径`(可选,仅供分析参考):展示穿搭/使用效果的图。**注意:模特图仅用于理解产品使用方式,不上传到 Renoise**(隐私检测会拦截含真人面孔的图片)。
27
+ 1. **Collect material paths**: Ask user for images
28
+ - `Product image path` (required): Product hero image. **Best: clean white-background product photo with no text/labels/decorations**. Images with marketing text overlays will interfere with the model.
29
+ - `Model image path` (optional, for analysis reference only): Shows how the product is worn/used. **Note: Model images are only used to understand product usage — they are NOT uploaded to Renoise** (privacy detection will block images containing realistic human faces).
30
30
 
31
- 2. **分析商品信息**:
32
- - 如果有 Gemini API 可用,调用 Gemini 分析:
33
- ```bash
34
- cd ${CLAUDE_PLUGIN_ROOT} && npm install --silent && npx tsx ${CLAUDE_SKILL_DIR}/scripts/analyze-images.ts "<商品图路径>" "<模特图路径>"
35
- ```
36
- - 也可以直接通过 Read 工具查看图片,人工分析商品信息
37
- - 需要提取:商品类型、颜色、材质、卖点、品牌调性、适用场景
38
- - **(关键)从使用场景图中理解产品的正确使用方式**:
39
- - 用户的姿势是什么?(站/坐/躺/走)
40
- - 产品放在身体哪个位置?(手持/地面/桌面/身体下方)
41
- - 产品与身体的交互方式?(用手按压 vs 用体重压 vs 穿戴 vs 涂抹)
42
- - 使用场景在哪?(健身房/办公室/家里/户外)
43
- - 如果用户提供了商品链接,用 WebFetch 抓取产品详情页补充理解
31
+ 2. **Analyze product info**:
32
+ - Use the `gemini-gen` skill to analyze product images — send the image(s) with a prompt requesting product analysis (type, color, material, selling points, brand tone, scene suggestions)
33
+ - Alternatively, view images directly via the Read tool and analyze manually
34
+ - Extract: product type, color, material, selling points, brand tone, applicable scenarios
35
+ - **(Critical) Understand correct product usage from lifestyle images**:
36
+ - What is the user's posture? (standing/sitting/lying/walking)
37
+ - Where is the product positioned on the body? (handheld/floor/table/under body)
38
+ - How does the product interact with the body? (hand pressure vs body weight vs wearing vs applying)
39
+ - Where is the usage scenario? (gym/office/home/outdoors)
40
+ - If the user provides a product link, use WebFetch to scrape product detail page for additional context
44
41
 
45
- 3. **展示分析结果**,让用户确认或补充信息。分析结果中必须包含一条明确的「**使用方式描述**」,例如:
46
- > 使用方式:将花生球放在地面/瑜伽垫上,用户躺在球上方,通过自身体重施压按摩脊柱两侧肌肉。花生形凹槽避开脊柱,两侧球体作用于竖脊肌。
42
+ 3. **Present analysis results** for user to confirm or supplement. Results must include a clear "**Usage description**", e.g.:
43
+ > Usage: Place the peanut ball on the floor/yoga mat, user lies on top of the ball, using body weight to massage the muscles along both sides of the spine. The peanut-shaped groove avoids the spine while the two ball ends work the erector spinae muscles.
47
44
 
48
- ### Phase 2: 15 秒脚本 + Prompt 生成
45
+ ### Phase 2: 15-Second Script + Prompt Generation
49
46
 
50
- 基于分析结果 + 参考指南,生成完整的 15 秒视频脚本。
47
+ Based on analysis results + reference guide, generate a complete 15-second video script.
51
48
 
52
- **必须参考以下指南**(先 Read 再生成):
53
- - `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — 电商视频 prompt 指南
49
+ **Must reference the following guide** (Read before generating):
50
+ - `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — E-commerce video prompt guide
54
51
 
55
- **Prompt 结构(3 个必需组成部分):**
52
+ **Prompt structure (3 required components):**
56
53
 
57
- #### Part A: 产品锚定(Prompt 开头,一句话)
54
+ #### Part A: Product Anchoring (first line of prompt)
58
55
 
59
- 产品外观靠参考图传达,prompt 里只需**一句话**说明产品是什么 + 用途:
56
+ Product appearance is conveyed by the reference image. The prompt only needs **one sentence** stating what the product is + its use case:
60
57
 
61
58
  ```
62
59
  The product is a [brand] [product type] for [primary use case], shown in the reference image.
63
60
  The product must match the reference image exactly in every frame. Do not invent any packaging, box, or container unless the reference image shows one.
64
61
  ```
65
62
 
66
- **关键**:不要在 prompt 里重复描述颜色、材质、形状、logo — 这些信息已在参考图里。把 prompt 空间留给 hook 和画面叙事。
63
+ **Key**: Do not repeat color, material, shape, or logo descriptions in the prompt that information is already in the reference image. Save prompt space for the hook and visual narrative.
67
64
 
68
- #### Part B: 台词嵌入(贯穿全段)
65
+ #### Part B: Dialogue Embedding (throughout)
69
66
 
70
- 台词必须是英文,以强制口型同步格式嵌入叙事段落中:
67
+ Dialogue must be in English, embedded in the narrative using forced lip-sync format:
71
68
  ```
72
69
  Spoken dialogue (say EXACTLY, word-for-word): "..."
73
70
  Mouth clearly visible when speaking, lip-sync aligned.
74
71
  ```
75
72
 
76
- **台词风格要求**:
77
- - **闺蜜聊天感**:像在跟朋友推荐,不像在念广告词
78
- - **高信息密度**:每句话都带具体信息(数字、对比、使用场景),没有废话
79
- - **不硬推销**:结尾不用 "link below" / "点击链接" 这种生硬 CTA,用自然的个人推荐收尾(如 "Best money I have spent this year""Trust me just start"
73
+ **Dialogue style requirements**:
74
+ - **Best-friend casual tone**: Like recommending to a friend, not reading ad copy
75
+ - **High information density**: Every sentence includes specific details (numbers, comparisons, usage scenarios) — no filler
76
+ - **No hard sell**: Don't end with "link below" or generic CTAs. Use natural personal recommendations (e.g., "Best money I have spent this year", "Trust me just start")
80
77
 
81
- **台词节奏**(4 句,对应 4 个时间段):
78
+ **Dialogue pacing** (4 lines, matching 4 time segments):
82
79
  ```
83
- [0-3s] Hook — 一句话喊停用户(痛点/悬念/结果前置)
84
- [3-8s] 卖点具体参数 + 使用体验
85
- [8-12s] 场景在哪用 + 便携性/多功能
86
- [12-15s] 收尾个人真实推荐感,不硬推销
80
+ [0-3s] Hook — One sentence to stop the scroll (pain point / suspense / result-first)
81
+ [3-8s] Selling point Specific specs + personal experience
82
+ [8-12s] SceneWhere to use + portability / versatility
83
+ [12-15s] CloseGenuine personal recommendation, no hard sell
87
84
  ```
88
85
 
89
- #### Part C: 画面叙事(一段连续叙事)
86
+ #### Part C: Visual Narrative (one continuous narrative)
90
87
 
91
- **视频结构(一个连续 15 秒视频):**
88
+ **Video structure (one continuous 15-second video):**
92
89
  ```
93
- [0-3s] HOOK — 高冲击力开场。必须:快速运镜(whip pan / snap dolly in)+ 动态动作 + 立即开口说台词。绝对不能慢热。
94
- [3-8s] SHOWCASE — 产品展示 + 模特互动。运镜变化展示材质细节。
95
- [8-12s] SCENE — 生活场景使用。拉远到中景/全景。
96
- [12-15s] CLOSE — 模特面对镜头 + 产品在画面中 + 自然收尾。frame holds steady
90
+ [0-3s] HOOK — High-impact opening. Must: fast camera movement (whip pan / snap dolly in) + dynamic action + start speaking immediately. Never start slow.
91
+ [3-8s] SHOWCASE — Product display + model interaction. Camera transitions to reveal material details.
92
+ [8-12s] SCENE — Real-life usage scenario. Pull back to medium/wide shot.
93
+ [12-15s] CLOSE — Model faces camera + product in frame + natural ending. Frame holds steady.
97
94
  ```
98
95
 
99
- **输出 3 项内容:**
96
+ **Output 3 items:**
100
97
 
101
- #### 1. Video Prompt(英文,含台词)
102
- 导演口述式段落(6-10 句,每句只做一件事),包含:
103
- - 产品锚定(一句话,Part A)在最开头
104
- - 台词以 `Spoken dialogue (say EXACTLY, word-for-word):` 格式嵌入(Part B
105
- - 每句台词后跟 `Mouth clearly visible when speaking, lip-sync aligned.`
106
- - Ad-6D Protocol 元素穿插
107
- - 模特外观一致性描述(性别、发型、肤色、体型、服装)
108
- - 运镜变化至少 3
109
- - 光线/氛围描述
98
+ #### 1. Video Prompt (English, with dialogue)
99
+ Director-dictation style paragraph (6-10 sentences, one thing per sentence), containing:
100
+ - Product anchoring (one sentence, Part A) at the very beginning
101
+ - Dialogue embedded with `Spoken dialogue (say EXACTLY, word-for-word):` format (Part B)
102
+ - `Mouth clearly visible when speaking, lip-sync aligned.` after each dialogue line
103
+ - Ad-6D Protocol elements interspersed
104
+ - Model appearance consistency description (gender, hair, skin tone, body type, outfit)
105
+ - At least 3 camera movement changes
106
+ - Lighting/atmosphere description
110
107
 
111
- #### 2. 台词脚本(英文,标注时间段)
112
- 单独列出 4 句台词及对应时间段,方便审阅。
108
+ #### 2. Dialogue Script (English, with timestamps)
109
+ List the 4 dialogue lines separately with their time segments for easy review.
113
110
 
114
- #### 3. BGM/音效建议
115
- - 推荐适合产品调性的音乐风格
116
- - 关键节点的音效提示
111
+ #### 3. BGM / Sound Design Suggestions
112
+ - Recommend music style matching the product tone
113
+ - Key moment sound effect cues
117
114
 
118
- **参考示例**:Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` 了解最新标准输出格式。
115
+ **Reference example**: Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` for the latest standard output format.
119
116
 
120
- ### Phase 3: 用户确认
117
+ ### Phase 3: User Confirmation
121
118
 
122
- 展示完整脚本后,询问用户:
123
- - 是否调整台词
124
- - 是否更换场景
125
- - 是否修改 prompt 细节
126
- - 确认后进入提交
119
+ After presenting the full script, ask the user:
120
+ - Whether to adjust dialogue
121
+ - Whether to change the scene
122
+ - Whether to modify prompt details
123
+ - Proceed to submission after confirmation
127
124
 
128
- ### Phase 4: 上传素材 + 提交视频生成任务
125
+ ### Phase 4: Upload Materials + Submit Video Generation Task
129
126
 
130
- 用户确认脚本后,上传商品图并提交视频生成任务。
127
+ After user confirms the script, upload the product image and submit the video generation task.
131
128
 
132
- **重要规则**:
133
- - 只上传商品图,**不上传模特/真人图**(隐私检测会拦截含真人面孔的图片,报错 `InputImageSensitiveContentDetected.PrivacyInformation`)
134
- - 模特外观完全靠 prompt 文字描述控制
135
- - 商品图最好用干净白底纯产品图,避免有营销文字覆盖的图
136
- - 批量生成时:商品图只需上传一次,复用 material ID 提交多个不同场景的任务
129
+ **Important rules**:
130
+ - Only upload product images — **never upload model/real person photos** (privacy detection will block images containing realistic human faces, error: `InputImageSensitiveContentDetected.PrivacyInformation`)
131
+ - Model appearance is controlled entirely by prompt text description
132
+ - Product images should ideally be clean white-background product photos, avoid images with marketing text overlays
133
+ - For batch generation: upload the product image once, reuse the material ID to submit multiple tasks with different scenes
137
134
 
138
135
  ## Important Notes
139
136
 
140
- - 图片支持 jpg/jpeg/png/webp 格式
141
- - 视频 prompt 必须全英文
142
- - 台词必须英文,嵌入 prompt(`Spoken dialogue (say EXACTLY, word-for-word): "..."`)
143
- - **不输出单独的字幕文案**台词已在 prompt 中,不需要额外字幕层
137
+ - Images support jpg/jpeg/png/webp formats
138
+ - Video prompts must be entirely in English
139
+ - Dialogue must be in English, embedded in the prompt (`Spoken dialogue (say EXACTLY, word-for-word): "..."`)
140
+ - **Do not output separate subtitle text** dialogue is already in the prompt, no additional subtitle layer needed