@renoise/video-maker 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +15 -0
- package/.claude-plugin/plugin.json +17 -3
- package/README.md +20 -33
- package/hooks/check-api-key.sh +28 -0
- package/hooks/hooks.json +3 -3
- package/index.mjs +1 -0
- package/openclaw.plugin.json +5 -3
- package/package.json +6 -10
- package/skills/director/SKILL.md +4 -7
- package/skills/file-upload/SKILL.md +79 -0
- package/skills/file-upload/scripts/upload.mjs +103 -0
- package/skills/gemini-gen/SKILL.md +232 -0
- package/skills/gemini-gen/scripts/gemini.mjs +220 -0
- package/skills/renoise-gen/SKILL.md +3 -1
- package/skills/short-film-editor/SKILL.md +23 -24
- package/skills/short-film-editor/references/continuity-guide.md +2 -2
- package/skills/tiktok-content-maker/SKILL.md +78 -81
- package/skills/tiktok-content-maker/examples/dress-demo.md +42 -42
- package/skills/tiktok-content-maker/references/ecom-prompt-guide.md +157 -152
- package/skills/video-download/SKILL.md +1 -1
- package/hooks/session-start.sh +0 -17
- package/lib/gemini.ts +0 -49
- package/skills/short-film-editor/scripts/generate-storyboard-html.ts +0 -714
- package/skills/tiktok-content-maker/scripts/analyze-images.ts +0 -122
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Gemini API client via Renoise gateway.
|
|
5
|
+
* Zero npm dependencies — uses native fetch.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* # Text only
|
|
9
|
+
* node gemini.mjs "Explain quantum computing"
|
|
10
|
+
*
|
|
11
|
+
* # With image(s)
|
|
12
|
+
* node gemini.mjs --file photo.jpg "Describe this product"
|
|
13
|
+
* node gemini.mjs --file a.jpg --file b.jpg "Compare these two"
|
|
14
|
+
*
|
|
15
|
+
* # With video
|
|
16
|
+
* node gemini.mjs --file clip.mp4 --resolution low "Summarize this clip"
|
|
17
|
+
*
|
|
18
|
+
* # With uploaded file URI (from file upload skill)
|
|
19
|
+
* node gemini.mjs --file-uri "https://...fileUri" --file-mime video/mp4 "Analyze this video"
|
|
20
|
+
*
|
|
21
|
+
* # JSON output mode
|
|
22
|
+
* node gemini.mjs --json "Return a JSON object with name and age"
|
|
23
|
+
*
|
|
24
|
+
* Options:
|
|
25
|
+
* --file <path> Attach a local file (image/video). Repeatable.
|
|
26
|
+
* --file-uri <uri> Attach an uploaded file by URI. Requires --file-mime.
|
|
27
|
+
* --file-mime <mime> MIME type for --file-uri.
|
|
28
|
+
* --resolution <level> Media resolution: low|medium|high|ultra_high (default: medium)
|
|
29
|
+
* --model <name> Model name (default: gemini-3.1-pro)
|
|
30
|
+
* --temperature <n> Temperature (default: 1.0)
|
|
31
|
+
* --max-tokens <n> Max output tokens (default: 8192)
|
|
32
|
+
* --json Request JSON response format
|
|
33
|
+
*
|
|
34
|
+
* Environment:
|
|
35
|
+
* RENOISE_API_KEY Required. Get one at https://www.renoise.ai
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import fs from "fs/promises";
|
|
39
|
+
import path from "path";
|
|
40
|
+
|
|
41
|
+
// --- Auth ---
|
|
42
|
+
const RENOISE_API_KEY = process.env.RENOISE_API_KEY;
|
|
43
|
+
if (!RENOISE_API_KEY) {
|
|
44
|
+
console.error(
|
|
45
|
+
"RENOISE_API_KEY not set. Get one at: https://www.renoise.ai"
|
|
46
|
+
);
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// --- MIME detection ---
|
|
51
|
+
const MIME_MAP = {
|
|
52
|
+
".jpg": "image/jpeg",
|
|
53
|
+
".jpeg": "image/jpeg",
|
|
54
|
+
".png": "image/png",
|
|
55
|
+
".webp": "image/webp",
|
|
56
|
+
".gif": "image/gif",
|
|
57
|
+
".mp4": "video/mp4",
|
|
58
|
+
".mov": "video/quicktime",
|
|
59
|
+
".webm": "video/webm",
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
function getMimeType(filePath) {
|
|
63
|
+
return (
|
|
64
|
+
MIME_MAP[path.extname(filePath).toLowerCase()] ?? "application/octet-stream"
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// --- Resolution mapping ---
|
|
69
|
+
const RESOLUTION_MAP = {
|
|
70
|
+
low: "media_resolution_low",
|
|
71
|
+
medium: "media_resolution_medium",
|
|
72
|
+
high: "media_resolution_high",
|
|
73
|
+
ultra_high: "media_resolution_ultra_high",
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// --- Parse args ---
|
|
77
|
+
function parseArgs(argv) {
|
|
78
|
+
const files = [];
|
|
79
|
+
let fileUri = null;
|
|
80
|
+
let fileMime = null;
|
|
81
|
+
let resolution = "medium";
|
|
82
|
+
let model = "gemini-3.1-pro";
|
|
83
|
+
let temperature = 1.0;
|
|
84
|
+
let maxTokens = 8192;
|
|
85
|
+
let json = false;
|
|
86
|
+
const textParts = [];
|
|
87
|
+
|
|
88
|
+
for (let i = 0; i < argv.length; i++) {
|
|
89
|
+
switch (argv[i]) {
|
|
90
|
+
case "--file":
|
|
91
|
+
files.push(argv[++i]);
|
|
92
|
+
break;
|
|
93
|
+
case "--file-uri":
|
|
94
|
+
fileUri = argv[++i];
|
|
95
|
+
break;
|
|
96
|
+
case "--file-mime":
|
|
97
|
+
fileMime = argv[++i];
|
|
98
|
+
break;
|
|
99
|
+
case "--resolution":
|
|
100
|
+
resolution = argv[++i];
|
|
101
|
+
break;
|
|
102
|
+
case "--model":
|
|
103
|
+
model = argv[++i];
|
|
104
|
+
break;
|
|
105
|
+
case "--temperature":
|
|
106
|
+
temperature = parseFloat(argv[++i]);
|
|
107
|
+
break;
|
|
108
|
+
case "--max-tokens":
|
|
109
|
+
maxTokens = parseInt(argv[++i], 10);
|
|
110
|
+
break;
|
|
111
|
+
case "--json":
|
|
112
|
+
json = true;
|
|
113
|
+
break;
|
|
114
|
+
default:
|
|
115
|
+
textParts.push(argv[i]);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
files,
|
|
121
|
+
fileUri,
|
|
122
|
+
fileMime,
|
|
123
|
+
resolution,
|
|
124
|
+
model,
|
|
125
|
+
temperature,
|
|
126
|
+
maxTokens,
|
|
127
|
+
json,
|
|
128
|
+
prompt: textParts.join(" "),
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// --- Build parts ---
|
|
133
|
+
async function buildParts(opts) {
|
|
134
|
+
const parts = [];
|
|
135
|
+
const resLevel = RESOLUTION_MAP[opts.resolution] ?? RESOLUTION_MAP.medium;
|
|
136
|
+
|
|
137
|
+
// Local files → inline base64
|
|
138
|
+
for (const filePath of opts.files) {
|
|
139
|
+
const data = await fs.readFile(filePath);
|
|
140
|
+
parts.push({
|
|
141
|
+
inlineData: {
|
|
142
|
+
mimeType: getMimeType(filePath),
|
|
143
|
+
data: data.toString("base64"),
|
|
144
|
+
},
|
|
145
|
+
mediaResolution: { level: resLevel },
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Uploaded file URI
|
|
150
|
+
if (opts.fileUri) {
|
|
151
|
+
parts.push({
|
|
152
|
+
fileData: {
|
|
153
|
+
mimeType: opts.fileMime ?? "application/octet-stream",
|
|
154
|
+
fileUri: opts.fileUri,
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Text prompt (always last)
|
|
160
|
+
if (opts.prompt) {
|
|
161
|
+
parts.push({ text: opts.prompt });
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return parts;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// --- Main ---
|
|
168
|
+
async function main() {
|
|
169
|
+
const opts = parseArgs(process.argv.slice(2));
|
|
170
|
+
|
|
171
|
+
if (!opts.prompt && opts.files.length === 0 && !opts.fileUri) {
|
|
172
|
+
console.error(
|
|
173
|
+
"Usage: node gemini.mjs [--file <path>] [--resolution low|medium|high|ultra_high] <prompt>"
|
|
174
|
+
);
|
|
175
|
+
process.exit(1);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const endpoint = `https://staging--ujgsvru36x4korjj10nq.edgespark.app/api/public/llm/proxy/v1beta/models/${opts.model}:generateContent?key=${RENOISE_API_KEY}`;
|
|
179
|
+
|
|
180
|
+
const parts = await buildParts(opts);
|
|
181
|
+
|
|
182
|
+
const body = {
|
|
183
|
+
contents: [{ role: "user", parts }],
|
|
184
|
+
generationConfig: {
|
|
185
|
+
temperature: opts.temperature,
|
|
186
|
+
maxOutputTokens: opts.maxTokens,
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
if (opts.json) {
|
|
191
|
+
body.generationConfig.responseMimeType = "application/json";
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const res = await fetch(endpoint, {
|
|
195
|
+
method: "POST",
|
|
196
|
+
headers: { "Content-Type": "application/json" },
|
|
197
|
+
body: JSON.stringify(body),
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
if (!res.ok) {
|
|
201
|
+
const errText = await res.text();
|
|
202
|
+
console.error(`Gemini API error ${res.status}: ${errText}`);
|
|
203
|
+
process.exit(1);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const data = await res.json();
|
|
207
|
+
const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
208
|
+
|
|
209
|
+
if (!text) {
|
|
210
|
+
console.error("No text in response:", JSON.stringify(data, null, 2));
|
|
211
|
+
process.exit(1);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
console.log(text);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
main().catch((err) => {
|
|
218
|
+
console.error("ERROR:", err.message);
|
|
219
|
+
process.exit(1);
|
|
220
|
+
});
|
|
@@ -13,6 +13,8 @@ metadata:
|
|
|
13
13
|
|
|
14
14
|
Generate AI videos and images through the Renoise platform.
|
|
15
15
|
|
|
16
|
+
> **IMPORTANT**: The Renoise website is **https://www.renoise.ai** — NOT renoise.com. Always use `renoise.ai` when referencing the platform URL.
|
|
17
|
+
|
|
16
18
|
## Supported Models
|
|
17
19
|
|
|
18
20
|
| Model | Type | Description |
|
|
@@ -96,7 +98,7 @@ See `${CLAUDE_SKILL_DIR}/references/video-capabilities.md` for details.
|
|
|
96
98
|
|
|
97
99
|
CLI path: `${CLAUDE_SKILL_DIR}/renoise-cli.mjs` (Node.js 18+)
|
|
98
100
|
|
|
99
|
-
API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`).
|
|
101
|
+
API Key and base URL are configured via environment variables (`RENOISE_API_KEY`, `RENOISE_BASE_URL`). Get your API key at https://www.renoise.ai (NOT renoise.com).
|
|
100
102
|
|
|
101
103
|
## CLI Commands
|
|
102
104
|
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: short-film-editor
|
|
3
3
|
description: >
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
Short film editor: music-first workflow, splits story into beat-synced segments (5-15s each),
|
|
5
|
+
generates HTML storyboard preview for confirmation, maintains character/style consistency,
|
|
6
|
+
batch generates and outputs assembly guide.
|
|
7
|
+
Use when user says "short film", "multi-clip", "story video", "multi-segment video",
|
|
8
|
+
"1-minute video".
|
|
7
9
|
allowed-tools: Bash, Read
|
|
8
10
|
metadata:
|
|
9
11
|
author: renoise
|
|
10
12
|
version: 0.1.0
|
|
11
13
|
category: video-production
|
|
12
|
-
tags: [short-film, multi-clip, narrative, story
|
|
14
|
+
tags: [short-film, multi-clip, narrative, story]
|
|
13
15
|
---
|
|
14
16
|
|
|
15
17
|
# Short Film Editor
|
|
@@ -86,7 +88,7 @@ You are a short film editor specializing in multi-clip AI video production. You
|
|
|
86
88
|
|
|
87
89
|
**If user has or wants music**, follow Steps 1-3 below.
|
|
88
90
|
|
|
89
|
-
**If user skips music** (e.g. "
|
|
91
|
+
**If user skips music** (e.g. "no music for now"), skip to **Step 4 — Manual Rhythm**:
|
|
90
92
|
- Define segments based on narrative pacing, not equal splits.
|
|
91
93
|
- Vary durations: establishing shots 7-10s, action bursts 5-6s, aftermath/resolution 5-7s.
|
|
92
94
|
- Aim for 4-7 segments depending on total duration.
|
|
@@ -149,7 +151,7 @@ Output JSON:
|
|
|
149
151
|
| Climax / collision | 6-8s | Intense, dense |
|
|
150
152
|
| Aftermath / resolution | 5-7s | Slow, lingering |
|
|
151
153
|
|
|
152
|
-
**In-clip cutting
|
|
154
|
+
**In-clip cutting**:
|
|
153
155
|
Real films average 2-4s per camera angle (action films: 1-2s). Seedance minimum is 5s per clip, so use **time-annotated camera changes within each clip** to simulate fast cutting:
|
|
154
156
|
|
|
155
157
|
```
|
|
@@ -247,13 +249,14 @@ Store each prompt in the shot's `prompt` field in `project.json`.
|
|
|
247
249
|
|
|
248
250
|
### Step 2 — Generate Reference Images
|
|
249
251
|
|
|
250
|
-
Three image sources (ask user preference, or default to
|
|
252
|
+
Three image sources (ask user preference, or default to Renoise):
|
|
251
253
|
|
|
252
|
-
**Option A —
|
|
254
|
+
**Option A — Renoise (default)**:
|
|
255
|
+
Use `renoise-gen` with `nano-banana-2` model to generate a reference image. Prompt should describe the shot scene + character appearance + key action + lighting (NO camera movement). Save the result to `${PROJECT_DIR}/storyboard/${shot_id}.png`.
|
|
253
256
|
```bash
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
"
|
|
257
|
+
node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
|
|
258
|
+
--model nano-banana-2 --resolution 2k --ratio 16:9 \
|
|
259
|
+
--prompt "<shot scene + character appearance + key action + lighting>"
|
|
257
260
|
```
|
|
258
261
|
|
|
259
262
|
**Option B — Midjourney (higher quality, recommended for stylized projects)**:
|
|
@@ -271,12 +274,14 @@ Submit all shots in parallel via `/v1/tob/diffusion`, poll for completion, downl
|
|
|
271
274
|
**Option C — User-provided**:
|
|
272
275
|
User manually places reference images in `${PROJECT_DIR}/storyboard/S1.png`, `S2.png`, etc.
|
|
273
276
|
|
|
274
|
-
**Option D —
|
|
275
|
-
Generate ALL shots in a single grid image so characters and style are naturally consistent across panels, then split into individual reference images.
|
|
277
|
+
**Option D — Renoise Grid Storyboard (recommended for best consistency)**:
|
|
278
|
+
Generate ALL shots in a single grid image via `renoise-gen` `nano-banana-2` so characters and style are naturally consistent across panels, then split into individual reference images.
|
|
276
279
|
|
|
277
|
-
1. Generate a single N-panel grid
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
+
1. Generate a single N-panel grid:
|
|
281
|
+
```bash
|
|
282
|
+
node ${CLAUDE_PLUGIN_ROOT}/skills/renoise-gen/renoise-cli.mjs task generate \
|
|
283
|
+
--model nano-banana-2 --resolution 2k --ratio 16:9 \
|
|
284
|
+
--prompt "Generate a single N-panel [manga/cinematic] storyboard grid image.
|
|
280
285
|
Layout: 2 rows x 4 columns grid with thin white borders.
|
|
281
286
|
The SAME two characters must appear consistently across all panels:
|
|
282
287
|
Character A: [verbatim from Character Bible]
|
|
@@ -312,18 +317,12 @@ Reference image prompts should include:
|
|
|
312
317
|
|
|
313
318
|
### Step 3 — Generate HTML Storyboard Preview
|
|
314
319
|
|
|
315
|
-
|
|
316
|
-
npx tsx ${CLAUDE_SKILL_DIR}/scripts/generate-storyboard-html.ts \
|
|
317
|
-
--project-file "${PROJECT_DIR}/project.json" \
|
|
318
|
-
--output "${PROJECT_DIR}/storyboard.html"
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
This generates a single self-contained HTML file with:
|
|
320
|
+
Generate a single self-contained HTML file from `${PROJECT_DIR}/project.json` and save it to `${PROJECT_DIR}/storyboard.html`. The HTML should include:
|
|
322
321
|
- **Header**: Project title, total duration, clip count, BPM, character summary, style summary
|
|
323
322
|
- **Music timeline**: Visual bar showing sections and cut points
|
|
324
323
|
- **Shot cards**: One card per shot with reference image, scene/action, dialogue/beats, continuity, and collapsible Seedance prompt
|
|
325
324
|
- **Reference images**: Base64-embedded inline (single-file, shareable). Use `--skip-images` to skip Gemini generation and read existing images from `storyboard/` directory (useful when using Midjourney or user-provided images).
|
|
326
|
-
- **UI language**:
|
|
325
|
+
- **UI language**: English. White theme, bold confident design.
|
|
327
326
|
- **Responsive**: Viewable on phone
|
|
328
327
|
|
|
329
328
|
Open for preview:
|
|
@@ -129,7 +129,7 @@ AI-generated clips will achieve ~80% visual consistency when following these tec
|
|
|
129
129
|
- Subtle speed adjustments for timing
|
|
130
130
|
- Audio continuity (shared BGM) creates perceived visual continuity
|
|
131
131
|
|
|
132
|
-
## Grid Storyboard Method
|
|
132
|
+
## Grid Storyboard Method
|
|
133
133
|
|
|
134
134
|
### Why One Image > Many Images
|
|
135
135
|
|
|
@@ -147,7 +147,7 @@ When generating reference images for each shot independently (even with the same
|
|
|
147
147
|
### Workflow
|
|
148
148
|
|
|
149
149
|
1. Write a single prompt describing all panels with verbatim character descriptions
|
|
150
|
-
2. Generate one grid image via
|
|
150
|
+
2. Generate one grid image via `renoise-gen` (`nano-banana-2`)
|
|
151
151
|
3. Split into individual panels: `bash split-grid.sh grid.png storyboard/ 2 4`
|
|
152
152
|
4. Upload each panel as material for Image-to-Video generation
|
|
153
153
|
5. Each Seedance clip now has a visual anchor from the same source
|
|
@@ -3,8 +3,8 @@ name: tiktok-content-maker
|
|
|
3
3
|
description: >
|
|
4
4
|
TikTok e-commerce short video script generator. Analyzes product photos,
|
|
5
5
|
generates 15s video scripts with video prompts and English dialogue.
|
|
6
|
-
Use when user says "TikTok product video", "ecommerce video",
|
|
7
|
-
"
|
|
6
|
+
Use when user says "TikTok product video", "ecommerce video",
|
|
7
|
+
"product video", "sales video", "shoot product". Do NOT use for non-ecommerce videos or
|
|
8
8
|
general creative direction (use director instead).
|
|
9
9
|
allowed-tools: Bash, Read
|
|
10
10
|
metadata:
|
|
@@ -14,130 +14,127 @@ metadata:
|
|
|
14
14
|
tags: [product, ecommerce, tiktok]
|
|
15
15
|
---
|
|
16
16
|
|
|
17
|
-
# Content Maker —
|
|
17
|
+
# Content Maker — E-commerce Short Video Script + Generation
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
End-to-end e-commerce short video tool: user provides product images (+ optional model images) → analyze product info → generate 15-second TikTok script (video prompt with embedded English dialogue) → submit video generation task.
|
|
22
22
|
|
|
23
23
|
## Workflow
|
|
24
24
|
|
|
25
|
-
### Phase 1:
|
|
25
|
+
### Phase 1: Material Collection & Product Analysis
|
|
26
26
|
|
|
27
|
-
1.
|
|
28
|
-
-
|
|
29
|
-
-
|
|
27
|
+
1. **Collect material paths**: Ask user for images
|
|
28
|
+
- `Product image path` (required): Product hero image. **Best: clean white-background product photo with no text/labels/decorations**. Images with marketing text overlays will interfere with the model.
|
|
29
|
+
- `Model image path` (optional, for analysis reference only): Shows how the product is worn/used. **Note: Model images are only used to understand product usage — they are NOT uploaded to Renoise** (privacy detection will block images containing realistic human faces).
|
|
30
30
|
|
|
31
|
-
2.
|
|
32
|
-
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
-
|
|
40
|
-
|
|
41
|
-
- 产品与身体的交互方式?(用手按压 vs 用体重压 vs 穿戴 vs 涂抹)
|
|
42
|
-
- 使用场景在哪?(健身房/办公室/家里/户外)
|
|
43
|
-
- 如果用户提供了商品链接,用 WebFetch 抓取产品详情页补充理解
|
|
31
|
+
2. **Analyze product info**:
|
|
32
|
+
- Use the `gemini-gen` skill to analyze product images — send the image(s) with a prompt requesting product analysis (type, color, material, selling points, brand tone, scene suggestions)
|
|
33
|
+
- Alternatively, view images directly via the Read tool and analyze manually
|
|
34
|
+
- Extract: product type, color, material, selling points, brand tone, applicable scenarios
|
|
35
|
+
- **(Critical) Understand correct product usage from lifestyle images**:
|
|
36
|
+
- What is the user's posture? (standing/sitting/lying/walking)
|
|
37
|
+
- Where is the product positioned on the body? (handheld/floor/table/under body)
|
|
38
|
+
- How does the product interact with the body? (hand pressure vs body weight vs wearing vs applying)
|
|
39
|
+
- Where is the usage scenario? (gym/office/home/outdoors)
|
|
40
|
+
- If the user provides a product link, use WebFetch to scrape product detail page for additional context
|
|
44
41
|
|
|
45
|
-
3.
|
|
46
|
-
>
|
|
42
|
+
3. **Present analysis results** for user to confirm or supplement. Results must include a clear "**Usage description**", e.g.:
|
|
43
|
+
> Usage: Place the peanut ball on the floor/yoga mat, user lies on top of the ball, using body weight to massage the muscles along both sides of the spine. The peanut-shaped groove avoids the spine while the two ball ends work the erector spinae muscles.
|
|
47
44
|
|
|
48
|
-
### Phase 2: 15
|
|
45
|
+
### Phase 2: 15-Second Script + Prompt Generation
|
|
49
46
|
|
|
50
|
-
|
|
47
|
+
Based on analysis results + reference guide, generate a complete 15-second video script.
|
|
51
48
|
|
|
52
|
-
|
|
53
|
-
- `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` —
|
|
49
|
+
**Must reference the following guide** (Read before generating):
|
|
50
|
+
- `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — E-commerce video prompt guide
|
|
54
51
|
|
|
55
|
-
**Prompt
|
|
52
|
+
**Prompt structure (3 required components):**
|
|
56
53
|
|
|
57
|
-
#### Part A:
|
|
54
|
+
#### Part A: Product Anchoring (first line of prompt)
|
|
58
55
|
|
|
59
|
-
|
|
56
|
+
Product appearance is conveyed by the reference image. The prompt only needs **one sentence** stating what the product is + its use case:
|
|
60
57
|
|
|
61
58
|
```
|
|
62
59
|
The product is a [brand] [product type] for [primary use case], shown in the reference image.
|
|
63
60
|
The product must match the reference image exactly in every frame. Do not invent any packaging, box, or container unless the reference image shows one.
|
|
64
61
|
```
|
|
65
62
|
|
|
66
|
-
|
|
63
|
+
**Key**: Do not repeat color, material, shape, or logo descriptions in the prompt — that information is already in the reference image. Save prompt space for the hook and visual narrative.
|
|
67
64
|
|
|
68
|
-
#### Part B:
|
|
65
|
+
#### Part B: Dialogue Embedding (throughout)
|
|
69
66
|
|
|
70
|
-
|
|
67
|
+
Dialogue must be in English, embedded in the narrative using forced lip-sync format:
|
|
71
68
|
```
|
|
72
69
|
Spoken dialogue (say EXACTLY, word-for-word): "..."
|
|
73
70
|
Mouth clearly visible when speaking, lip-sync aligned.
|
|
74
71
|
```
|
|
75
72
|
|
|
76
|
-
|
|
77
|
-
-
|
|
78
|
-
-
|
|
79
|
-
-
|
|
73
|
+
**Dialogue style requirements**:
|
|
74
|
+
- **Best-friend casual tone**: Like recommending to a friend, not reading ad copy
|
|
75
|
+
- **High information density**: Every sentence includes specific details (numbers, comparisons, usage scenarios) — no filler
|
|
76
|
+
- **No hard sell**: Don't end with "link below" or generic CTAs. Use natural personal recommendations (e.g., "Best money I have spent this year", "Trust me just start")
|
|
80
77
|
|
|
81
|
-
|
|
78
|
+
**Dialogue pacing** (4 lines, matching 4 time segments):
|
|
82
79
|
```
|
|
83
|
-
[0-3s] Hook —
|
|
84
|
-
[3-8s]
|
|
85
|
-
[8-12s]
|
|
86
|
-
[12-15s]
|
|
80
|
+
[0-3s] Hook — One sentence to stop the scroll (pain point / suspense / result-first)
|
|
81
|
+
[3-8s] Selling point — Specific specs + personal experience
|
|
82
|
+
[8-12s] Scene — Where to use + portability / versatility
|
|
83
|
+
[12-15s] Close — Genuine personal recommendation, no hard sell
|
|
87
84
|
```
|
|
88
85
|
|
|
89
|
-
#### Part C:
|
|
86
|
+
#### Part C: Visual Narrative (one continuous narrative)
|
|
90
87
|
|
|
91
|
-
|
|
88
|
+
**Video structure (one continuous 15-second video):**
|
|
92
89
|
```
|
|
93
|
-
[0-3s] HOOK —
|
|
94
|
-
[3-8s] SHOWCASE —
|
|
95
|
-
[8-12s] SCENE —
|
|
96
|
-
[12-15s] CLOSE —
|
|
90
|
+
[0-3s] HOOK — High-impact opening. Must: fast camera movement (whip pan / snap dolly in) + dynamic action + start speaking immediately. Never start slow.
|
|
91
|
+
[3-8s] SHOWCASE — Product display + model interaction. Camera transitions to reveal material details.
|
|
92
|
+
[8-12s] SCENE — Real-life usage scenario. Pull back to medium/wide shot.
|
|
93
|
+
[12-15s] CLOSE — Model faces camera + product in frame + natural ending. Frame holds steady.
|
|
97
94
|
```
|
|
98
95
|
|
|
99
|
-
|
|
96
|
+
**Output 3 items:**
|
|
100
97
|
|
|
101
|
-
#### 1. Video Prompt
|
|
102
|
-
|
|
103
|
-
-
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
- Ad-6D Protocol
|
|
107
|
-
-
|
|
108
|
-
-
|
|
109
|
-
-
|
|
98
|
+
#### 1. Video Prompt (English, with dialogue)
|
|
99
|
+
Director-dictation style paragraph (6-10 sentences, one thing per sentence), containing:
|
|
100
|
+
- Product anchoring (one sentence, Part A) at the very beginning
|
|
101
|
+
- Dialogue embedded with `Spoken dialogue (say EXACTLY, word-for-word):` format (Part B)
|
|
102
|
+
- `Mouth clearly visible when speaking, lip-sync aligned.` after each dialogue line
|
|
103
|
+
- Ad-6D Protocol elements interspersed
|
|
104
|
+
- Model appearance consistency description (gender, hair, skin tone, body type, outfit)
|
|
105
|
+
- At least 3 camera movement changes
|
|
106
|
+
- Lighting/atmosphere description
|
|
110
107
|
|
|
111
|
-
#### 2.
|
|
112
|
-
|
|
108
|
+
#### 2. Dialogue Script (English, with timestamps)
|
|
109
|
+
List the 4 dialogue lines separately with their time segments for easy review.
|
|
113
110
|
|
|
114
|
-
#### 3. BGM
|
|
115
|
-
-
|
|
116
|
-
-
|
|
111
|
+
#### 3. BGM / Sound Design Suggestions
|
|
112
|
+
- Recommend music style matching the product tone
|
|
113
|
+
- Key moment sound effect cues
|
|
117
114
|
|
|
118
|
-
|
|
115
|
+
**Reference example**: Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` for the latest standard output format.
|
|
119
116
|
|
|
120
|
-
### Phase 3:
|
|
117
|
+
### Phase 3: User Confirmation
|
|
121
118
|
|
|
122
|
-
|
|
123
|
-
-
|
|
124
|
-
-
|
|
125
|
-
-
|
|
126
|
-
-
|
|
119
|
+
After presenting the full script, ask the user:
|
|
120
|
+
- Whether to adjust dialogue
|
|
121
|
+
- Whether to change the scene
|
|
122
|
+
- Whether to modify prompt details
|
|
123
|
+
- Proceed to submission after confirmation
|
|
127
124
|
|
|
128
|
-
### Phase 4:
|
|
125
|
+
### Phase 4: Upload Materials + Submit Video Generation Task
|
|
129
126
|
|
|
130
|
-
|
|
127
|
+
After user confirms the script, upload the product image and submit the video generation task.
|
|
131
128
|
|
|
132
|
-
|
|
133
|
-
-
|
|
134
|
-
-
|
|
135
|
-
-
|
|
136
|
-
-
|
|
129
|
+
**Important rules**:
|
|
130
|
+
- Only upload product images — **never upload model/real person photos** (privacy detection will block images containing realistic human faces, error: `InputImageSensitiveContentDetected.PrivacyInformation`)
|
|
131
|
+
- Model appearance is controlled entirely by prompt text description
|
|
132
|
+
- Product images should ideally be clean white-background product photos, avoid images with marketing text overlays
|
|
133
|
+
- For batch generation: upload the product image once, reuse the material ID to submit multiple tasks with different scenes
|
|
137
134
|
|
|
138
135
|
## Important Notes
|
|
139
136
|
|
|
140
|
-
-
|
|
141
|
-
-
|
|
142
|
-
-
|
|
143
|
-
-
|
|
137
|
+
- Images support jpg/jpeg/png/webp formats
|
|
138
|
+
- Video prompts must be entirely in English
|
|
139
|
+
- Dialogue must be in English, embedded in the prompt (`Spoken dialogue (say EXACTLY, word-for-word): "..."`)
|
|
140
|
+
- **Do not output separate subtitle text** — dialogue is already in the prompt, no additional subtitle layer needed
|