npm - @renoise/plugin - Versions diffs - 0.2.1 - Mend

@renoise/plugin 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.claude-plugin/marketplace.json +15 -0
package/.claude-plugin/plugin.json +23 -0
package/README.md +53 -0
package/hooks/check-api-key.sh +28 -0
package/hooks/hooks.json +28 -0
package/hooks/session-start.sh +40 -0
package/index.mjs +1 -0
package/openclaw.plugin.json +22 -0
package/package.json +22 -0
package/skills/director/SKILL.md +269 -0
package/skills/director/references/narrative-pacing.md +257 -0
package/skills/director/references/style-library.md +179 -0
package/skills/file-upload/SKILL.md +79 -0
package/skills/file-upload/scripts/upload.mjs +103 -0
package/skills/gemini-gen/SKILL.md +236 -0
package/skills/gemini-gen/scripts/gemini.mjs +220 -0
package/skills/product-sheet-generate/SKILL.md +75 -0
package/skills/renoise-gen/SKILL.md +364 -0
package/skills/renoise-gen/references/api-endpoints.md +142 -0
package/skills/renoise-gen/references/video-capabilities.md +524 -0
package/skills/renoise-gen/renoise-cli.mjs +723 -0
package/skills/scene-generate/SKILL.md +52 -0
package/skills/short-film-editor/SKILL.md +478 -0
package/skills/short-film-editor/examples/mystery-package-4shot.md +260 -0
package/skills/short-film-editor/references/continuity-guide.md +170 -0
package/skills/short-film-editor/scripts/analyze-beats.py +271 -0
package/skills/short-film-editor/scripts/batch-generate.sh +150 -0
package/skills/short-film-editor/scripts/split-grid.sh +70 -0
package/skills/tiktok-content-maker/SKILL.md +140 -0
package/skills/tiktok-content-maker/examples/dress-demo.md +86 -0
package/skills/tiktok-content-maker/references/ecom-prompt-guide.md +266 -0
package/skills/video-download/SKILL.md +161 -0
package/skills/video-download/scripts/download-video.sh +91 -0

package/skills/short-film-editor/scripts/analyze-beats.py ADDED Viewed

@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Beat analysis for short film editing.
+Analyzes audio to extract BPM, beat positions, section boundaries,
+and suggests clip cut points within Seedance's 5-15s constraint.
+Usage: python3 analyze-beats.py <audio_file>
+Output: JSON to stdout
+Dependencies: pip3 install librosa soundfile numpy
+"""
+import json
+import sys
+from pathlib import Path
+try:
+    import librosa
+    import numpy as np
+except ImportError:
+    print(
+        json.dumps({
+            "error": "Missing dependencies. Install with: pip3 install librosa soundfile numpy"
+        }),
+        file=sys.stderr,
+    )
+    sys.exit(1)
+MIN_SEGMENT_S = 5.0
+MAX_SEGMENT_S = 15.0
+def analyze_beats(audio_path: str) -> dict:
+    """Analyze audio file and return beat/section/cut data."""
+    y, sr = librosa.load(audio_path, sr=22050)
+    total_duration = librosa.get_duration(y=y, sr=sr)
+    # Beat tracking
+    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr).tolist()
+    bpm = float(np.round(tempo, 1)) if np.ndim(tempo) == 0 else float(np.round(tempo[0], 1))
+    # Section boundary detection via spectral change
+    # Use a smaller number of segments for short audio
+    n_segments = max(4, min(10, int(total_duration / 10)))
+    boundaries = _detect_sections(y, sr, n_segments)
+    # Build sections with labels
+    section_labels = _label_sections(boundaries, total_duration)
+    # Generate suggested cuts respecting 5-15s constraint
+    suggested_cuts = _suggest_cuts(section_labels, beat_times, total_duration)
+    return {
+        "bpm": bpm,
+        "total_duration_s": round(total_duration, 2),
+        "beats": [round(b, 3) for b in beat_times],
+        "sections": section_labels,
+        "suggested_cuts": suggested_cuts,
+    }
+def _detect_sections(y, sr, n_segments: int) -> list[float]:
+    """Detect section boundaries using spectral features."""
+    # Compute spectral features
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+    features = np.vstack([chroma, mfcc])
+    # Use agglomerative clustering for boundary detection
+    try:
+        bound_frames = librosa.segment.agglomerative(features, k=n_segments)
+        bound_times = librosa.frames_to_time(bound_frames, sr=sr).tolist()
+    except Exception:
+        # Fallback: evenly spaced sections
+        duration = librosa.get_duration(y=y, sr=sr)
+        bound_times = [i * duration / n_segments for i in range(1, n_segments)]
+    # Remove boundaries too close to start/end
+    bound_times = [b for b in bound_times if 2.0 < b < (librosa.get_duration(y=y, sr=sr) - 2.0)]
+    return sorted(set(round(b, 2) for b in bound_times))
+def _label_sections(boundaries: list[float], total_duration: float) -> list[dict]:
+    """Assign labels to sections based on position in the track."""
+    labels_map = ["intro", "verse", "build", "chorus", "bridge", "verse2", "chorus2", "outro"]
+    points = [0.0] + boundaries + [total_duration]
+    sections = []
+    for i in range(len(points) - 1):
+        label_idx = min(i, len(labels_map) - 1)
+        sections.append({
+            "start": round(points[i], 2),
+            "end": round(points[i + 1], 2),
+            "label": labels_map[label_idx],
+        })
+    return sections
+def _suggest_cuts(
+    sections: list[dict],
+    beats: list[float],
+    total_duration: float,
+) -> list[dict]:
+    """
+    Generate suggested cut points respecting 5-15s segment constraints.
+    Uses section boundaries as primary cuts, then subdivides or merges as needed.
+    """
+    raw_segments = []
+    for sec in sections:
+        start = sec["start"]
+        end = sec["end"]
+        duration = end - start
+        label = sec["label"]
+        if duration <= MAX_SEGMENT_S:
+            raw_segments.append({"time": start, "end": end, "duration": round(duration, 2), "section": label})
+        else:
+            # Subdivide long sections at beat points
+            sub_segments = _subdivide_at_beats(start, end, beats, label)
+            raw_segments.extend(sub_segments)
+    # Merge segments that are too short
+    merged = _merge_short_segments(raw_segments)
+    # Final pass: ensure all within bounds
+    result = []
+    for seg in merged:
+        dur = seg["end"] - seg["time"]
+        if dur >= MIN_SEGMENT_S:
+            result.append({
+                "time": round(seg["time"], 2),
+                "end": round(seg["end"], 2),
+                "duration": round(dur, 2),
+                "section": seg["section"],
+            })
+    # Handle edge case: if no valid segments, create evenly spaced ones
+    if not result:
+        n = max(1, int(total_duration / 10))
+        seg_dur = total_duration / n
+        for i in range(n):
+            s = i * seg_dur
+            e = min((i + 1) * seg_dur, total_duration)
+            result.append({
+                "time": round(s, 2),
+                "end": round(e, 2),
+                "duration": round(e - s, 2),
+                "section": f"segment_{i + 1}",
+            })
+    return result
+def _subdivide_at_beats(
+    start: float,
+    end: float,
+    beats: list[float],
+    label: str,
+) -> list[dict]:
+    """Split a long section at beat points into 5-15s segments."""
+    section_beats = [b for b in beats if start <= b <= end]
+    if not section_beats:
+        # No beats found; split evenly
+        n = max(2, int((end - start) / 10))
+        seg_len = (end - start) / n
+        return [
+            {
+                "time": round(start + i * seg_len, 2),
+                "end": round(start + (i + 1) * seg_len, 2),
+                "duration": round(seg_len, 2),
+                "section": label,
+            }
+            for i in range(n)
+        ]
+    segments = []
+    current_start = start
+    for beat in section_beats:
+        if beat - current_start >= MAX_SEGMENT_S:
+            # Find the best beat to cut at (closest to target ~10s)
+            target = current_start + 10.0
+            candidates = [b for b in section_beats if current_start + MIN_SEGMENT_S <= b <= current_start + MAX_SEGMENT_S]
+            if candidates:
+                cut = min(candidates, key=lambda b: abs(b - target))
+                segments.append({
+                    "time": round(current_start, 2),
+                    "end": round(cut, 2),
+                    "duration": round(cut - current_start, 2),
+                    "section": label,
+                })
+                current_start = cut
+    # Final segment
+    if end - current_start >= MIN_SEGMENT_S:
+        segments.append({
+            "time": round(current_start, 2),
+            "end": round(end, 2),
+            "duration": round(end - current_start, 2),
+            "section": label,
+        })
+    elif segments:
+        # Extend the last segment
+        segments[-1]["end"] = round(end, 2)
+        segments[-1]["duration"] = round(end - segments[-1]["time"], 2)
+    else:
+        segments.append({
+            "time": round(start, 2),
+            "end": round(end, 2),
+            "duration": round(end - start, 2),
+            "section": label,
+        })
+    return segments
+def _merge_short_segments(segments: list[dict]) -> list[dict]:
+    """Merge segments shorter than MIN_SEGMENT_S with their neighbors."""
+    if not segments:
+        return []
+    merged = [segments[0].copy()]
+    for seg in segments[1:]:
+        prev = merged[-1]
+        prev_dur = prev["end"] - prev["time"]
+        seg_dur = seg["end"] - seg["time"]
+        if seg_dur < MIN_SEGMENT_S:
+            # Try merging with previous if combined still <= MAX
+            combined = prev_dur + seg_dur
+            if combined <= MAX_SEGMENT_S:
+                prev["end"] = seg["end"]
+                prev["duration"] = round(combined, 2)
+                continue
+        if prev_dur < MIN_SEGMENT_S:
+            # Extend previous into current
+            combined = prev_dur + seg_dur
+            if combined <= MAX_SEGMENT_S:
+                prev["end"] = seg["end"]
+                prev["duration"] = round(combined, 2)
+                prev["section"] = seg["section"]
+                continue
+        merged.append(seg.copy())
+    return merged
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python3 analyze-beats.py <audio_file>", file=sys.stderr)
+        print("Supported formats: mp3, aac, wav, m4a, ogg, flac", file=sys.stderr)
+        sys.exit(1)
+    audio_path = sys.argv[1]
+    if not Path(audio_path).exists():
+        print(json.dumps({"error": f"File not found: {audio_path}"}), file=sys.stderr)
+        sys.exit(1)
+    result = analyze_beats(audio_path)
+    print(json.dumps(result, indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()

package/skills/short-film-editor/scripts/batch-generate.sh ADDED Viewed

@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+#
+# Batch video generation for short film projects.
+# Reads a prompts JSON file and sequentially creates/waits/retrieves each shot.
+#
+# Usage:
+#   bash batch-generate.sh --project <project-id> --ratio <ratio> --prompts-file <prompts.json>
+#
+# Prompts JSON format:
+#   [
+#     { "shot_id": "S1", "prompt": "...", "duration": 8 },
+#     { "shot_id": "S2", "prompt": "...", "duration": 13 },
+#     ...
+#   ]
+set -euo pipefail
+# ---- Parse args ----
+PROJECT=""
+RATIO="16:9"
+PROMPTS_FILE=""
+TIMEOUT=600
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --project)  PROJECT="$2";      shift 2 ;;
+    --ratio)    RATIO="$2";        shift 2 ;;
+    --prompts-file) PROMPTS_FILE="$2"; shift 2 ;;
+    --timeout)  TIMEOUT="$2";      shift 2 ;;
+    *)          echo "Unknown arg: $1"; exit 1 ;;
+  esac
+done
+if [[ -z "$PROMPTS_FILE" ]]; then
+  echo "Error: --prompts-file is required."
+  echo "Usage: bash batch-generate.sh --project <id> --ratio <ratio> --prompts-file <prompts.json>"
+  exit 1
+fi
+if [[ ! -f "$PROMPTS_FILE" ]]; then
+  echo "Error: File not found: $PROMPTS_FILE"
+  exit 1
+fi
+# ---- Locate renoise-cli ----
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CLI="${SCRIPT_DIR}/../../renoise-gen/renoise-cli.mjs"
+if [[ ! -f "$CLI" ]]; then
+  echo "Error: renoise-cli.mjs not found at $CLI"
+  exit 1
+fi
+# ---- Check balance ----
+echo "=== Checking balance ==="
+node "$CLI" me
+echo ""
+# ---- Read prompts ----
+SHOT_COUNT=$(jq 'length' "$PROMPTS_FILE")
+echo "=== Batch generation: $SHOT_COUNT shots ==="
+echo "Project: ${PROJECT:-'(none)'}"
+echo "Ratio: $RATIO"
+echo "Timeout per shot: ${TIMEOUT}s"
+echo ""
+# ---- Results tracking ----
+RESULTS=()
+FAILED=0
+for i in $(seq 0 $((SHOT_COUNT - 1))); do
+  SHOT_ID=$(jq -r ".[$i].shot_id" "$PROMPTS_FILE")
+  PROMPT=$(jq -r ".[$i].prompt" "$PROMPTS_FILE")
+  DURATION=$(jq -r ".[$i].duration" "$PROMPTS_FILE")
+  echo "--- [$((i + 1))/$SHOT_COUNT] $SHOT_ID (${DURATION}s) ---"
+  # Build tags
+  TAGS="$SHOT_ID"
+  if [[ -n "$PROJECT" ]]; then
+    TAGS="$PROJECT,$SHOT_ID"
+  fi
+  # Create task
+  CREATE_OUTPUT=$(node "$CLI" create \
+    --prompt "$PROMPT" \
+    --duration "$DURATION" \
+    --ratio "$RATIO" \
+    --tags "$TAGS" 2>&1) || {
+    echo "[FAILED] $SHOT_ID — create error:"
+    echo "$CREATE_OUTPUT"
+    FAILED=$((FAILED + 1))
+    RESULTS+=("$SHOT_ID|FAILED|—|create error")
+    echo ""
+    echo "Stopping batch — fix the issue and re-run."
+    break
+  }
+  # Extract task ID from create output
+  TASK_ID=$(echo "$CREATE_OUTPUT" | grep -oE 'id=[0-9]+' | head -1 | cut -d= -f2)
+  if [[ -z "$TASK_ID" ]]; then
+    echo "[FAILED] $SHOT_ID — could not parse task ID from output:"
+    echo "$CREATE_OUTPUT"
+    FAILED=$((FAILED + 1))
+    RESULTS+=("$SHOT_ID|FAILED|—|no task ID")
+    break
+  fi
+  echo "Task created: #$TASK_ID"
+  # Wait for completion
+  WAIT_OUTPUT=$(node "$CLI" wait "$TASK_ID" --timeout "$TIMEOUT" 2>&1) || {
+    echo "[FAILED] $SHOT_ID (task #$TASK_ID) — wait error:"
+    echo "$WAIT_OUTPUT"
+    FAILED=$((FAILED + 1))
+    RESULTS+=("$SHOT_ID|FAILED|#$TASK_ID|wait timeout/error")
+    echo ""
+    echo "Stopping batch — the task may still be running. Check with: node renoise-cli.mjs get $TASK_ID"
+    break
+  }
+  # Get result
+  RESULT_OUTPUT=$(node "$CLI" result "$TASK_ID" 2>&1)
+  VIDEO_URL=$(echo "$RESULT_OUTPUT" | jq -r '.result.videoUrl // .videoUrl // "unknown"' 2>/dev/null || echo "unknown")
+  echo "[SUCCESS] $SHOT_ID → $VIDEO_URL"
+  RESULTS+=("$SHOT_ID|SUCCESS|#$TASK_ID|$VIDEO_URL")
+  echo ""
+done
+# ---- Summary ----
+echo ""
+echo "========================================="
+echo "  BATCH GENERATION SUMMARY"
+echo "========================================="
+printf "%-8s %-10s %-10s %s\n" "Shot" "Status" "Task" "URL"
+printf "%-8s %-10s %-10s %s\n" "----" "------" "----" "---"
+for entry in "${RESULTS[@]}"; do
+  IFS='|' read -r shot status task url <<< "$entry"
+  printf "%-8s %-10s %-10s %s\n" "$shot" "$status" "$task" "$url"
+done
+echo ""
+echo "Total: ${#RESULTS[@]}/$SHOT_COUNT completed, $FAILED failed"
+if [[ $FAILED -gt 0 ]]; then
+  exit 1
+fi

package/skills/short-film-editor/scripts/split-grid.sh ADDED Viewed

@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+#
+# Split a storyboard grid image into individual panel images.
+# Uses ImageMagick to crop each cell from an NxM grid.
+#
+# Usage:
+#   bash split-grid.sh <grid_image> <output_dir> <rows> <cols>
+#
+# Example:
+#   bash split-grid.sh grid.png storyboard/ 2 4
+#   → storyboard/S1.png, S2.png, S3.png, ..., S8.png
+#
+# Dependencies: ImageMagick (brew install imagemagick)
+set -euo pipefail
+GRID_IMAGE="${1:-}"
+OUTPUT_DIR="${2:-}"
+ROWS="${3:-2}"
+COLS="${4:-4}"
+if [[ -z "$GRID_IMAGE" || -z "$OUTPUT_DIR" ]]; then
+  echo "Usage: split-grid.sh <grid_image> <output_dir> [rows] [cols]"
+  echo "Example: split-grid.sh grid.png storyboard/ 2 4"
+  exit 1
+fi
+if ! command -v magick &>/dev/null && ! command -v convert &>/dev/null; then
+  echo "Error: ImageMagick not found. Install with: brew install imagemagick"
+  exit 1
+fi
+# Determine ImageMagick command (v7: magick, v6: convert/identify)
+if command -v magick &>/dev/null; then
+  IDENTIFY="magick identify"
+  CONVERT="magick"
+else
+  IDENTIFY="identify"
+  CONVERT="convert"
+fi
+mkdir -p "$OUTPUT_DIR"
+# Get image dimensions
+DIMS=$($IDENTIFY -format "%wx%h" "$GRID_IMAGE")
+IMG_W=$(echo "$DIMS" | cut -dx -f1)
+IMG_H=$(echo "$DIMS" | cut -dx -f2)
+CELL_W=$((IMG_W / COLS))
+CELL_H=$((IMG_H / ROWS))
+echo "Grid: ${IMG_W}x${IMG_H}, Layout: ${ROWS}x${COLS}, Cell: ${CELL_W}x${CELL_H}"
+INDEX=1
+for row in $(seq 0 $((ROWS - 1))); do
+  for col in $(seq 0 $((COLS - 1))); do
+    X=$((col * CELL_W))
+    Y=$((row * CELL_H))
+    OUT_FILE="${OUTPUT_DIR}/S${INDEX}.png"
+    $CONVERT "$GRID_IMAGE" -crop "${CELL_W}x${CELL_H}+${X}+${Y}" +repage "$OUT_FILE"
+    SIZE=$(ls -lh "$OUT_FILE" | awk '{print $5}')
+    echo "  S${INDEX}: ${CELL_W}x${CELL_H} @ +${X}+${Y} → ${OUT_FILE} (${SIZE})"
+    INDEX=$((INDEX + 1))
+  done
+done
+echo "Done: $((INDEX - 1)) panels extracted"

package/skills/tiktok-content-maker/SKILL.md ADDED Viewed

@@ -0,0 +1,140 @@
+---
+name: tiktok-content-maker
+description: >
+  TikTok e-commerce short video script generator. Analyzes product photos,
+  generates 15s video scripts with video prompts and English dialogue.
+  Use when user says "TikTok product video", "ecommerce video",
+  "product video", "sales video", "shoot product". Do NOT use for non-ecommerce videos or
+  general creative direction (use director instead).
+allowed-tools: Bash, Read
+metadata:
+  author: renoise
+  version: 0.1.0
+  category: video-production
+  tags: [product, ecommerce, tiktok]
+---
+# Content Maker — E-commerce Short Video Script + Generation
+## Overview
+End-to-end e-commerce short video tool: user provides product images (+ optional model images) → analyze product info → generate 15-second TikTok script (video prompt with embedded English dialogue) → submit video generation task.
+## Workflow
+### Phase 1: Material Collection & Product Analysis
+1. **Collect material paths**: Ask user for images
+   - `Product image path` (required): Product hero image. **Best: clean white-background product photo with no text/labels/decorations**. Images with marketing text overlays will interfere with the model.
+   - `Model image path` (optional, for analysis reference only): Shows how the product is worn/used. **Note: Model images are only used to understand product usage — they are NOT uploaded to Renoise** (privacy detection will block images containing realistic human faces).
+2. **Analyze product info**:
+   - Use the `gemini-gen` skill to analyze product images — send the image(s) with a prompt requesting product analysis (type, color, material, selling points, brand tone, scene suggestions)
+   - Alternatively, view images directly via the Read tool and analyze manually
+   - Extract: product type, color, material, selling points, brand tone, applicable scenarios
+   - **(Critical) Understand correct product usage from lifestyle images**:
+     - What is the user's posture? (standing/sitting/lying/walking)
+     - Where is the product positioned on the body? (handheld/floor/table/under body)
+     - How does the product interact with the body? (hand pressure vs body weight vs wearing vs applying)
+     - Where is the usage scenario? (gym/office/home/outdoors)
+   - If the user provides a product link, use WebFetch to scrape product detail page for additional context
+3. **Present analysis results** for user to confirm or supplement. Results must include a clear "**Usage description**", e.g.:
+   > Usage: Place the peanut ball on the floor/yoga mat, user lies on top of the ball, using body weight to massage the muscles along both sides of the spine. The peanut-shaped groove avoids the spine while the two ball ends work the erector spinae muscles.
+### Phase 2: 15-Second Script + Prompt Generation
+Based on analysis results + reference guide, generate a complete 15-second video script.
+**Must reference the following guide** (Read before generating):
+- `${CLAUDE_SKILL_DIR}/references/ecom-prompt-guide.md` — E-commerce video prompt guide
+**Prompt structure (3 required components):**
+#### Part A: Product Anchoring (first line of prompt)
+Product appearance is conveyed by the reference image. The prompt only needs **one sentence** stating what the product is + its use case:
+```
+The product is a [brand] [product type] for [primary use case], shown in the reference image.
+The product must match the reference image exactly in every frame. Do not invent any packaging, box, or container unless the reference image shows one.
+```
+**Key**: Do not repeat color, material, shape, or logo descriptions in the prompt — that information is already in the reference image. Save prompt space for the hook and visual narrative.
+#### Part B: Dialogue Embedding (throughout)
+Dialogue must be in English, embedded in the narrative using forced lip-sync format:
+```
+Spoken dialogue (say EXACTLY, word-for-word): "..."
+Mouth clearly visible when speaking, lip-sync aligned.
+```
+**Dialogue style requirements**:
+- **Best-friend casual tone**: Like recommending to a friend, not reading ad copy
+- **High information density**: Every sentence includes specific details (numbers, comparisons, usage scenarios) — no filler
+- **No hard sell**: Don't end with "link below" or generic CTAs. Use natural personal recommendations (e.g., "Best money I have spent this year", "Trust me just start")
+**Dialogue pacing** (4 lines, matching 4 time segments):
+```
+[0-3s]   Hook — One sentence to stop the scroll (pain point / suspense / result-first)
+[3-8s]   Selling point — Specific specs + personal experience
+[8-12s]  Scene — Where to use + portability / versatility
+[12-15s] Close — Genuine personal recommendation, no hard sell
+```
+#### Part C: Visual Narrative (one continuous narrative)
+**Video structure (one continuous 15-second video):**
+```
+[0-3s]   HOOK — High-impact opening. Must: fast camera movement (whip pan / snap dolly in) + dynamic action + start speaking immediately. Never start slow.
+[3-8s]   SHOWCASE — Product display + model interaction. Camera transitions to reveal material details.
+[8-12s]  SCENE — Real-life usage scenario. Pull back to medium/wide shot.
+[12-15s] CLOSE — Model faces camera + product in frame + natural ending. Frame holds steady.
+```
+**Output 3 items:**
+#### 1. Video Prompt (English, with dialogue)
+Director-dictation style paragraph (6-10 sentences, one thing per sentence), containing:
+- Product anchoring (one sentence, Part A) at the very beginning
+- Dialogue embedded with `Spoken dialogue (say EXACTLY, word-for-word):` format (Part B)
+- `Mouth clearly visible when speaking, lip-sync aligned.` after each dialogue line
+- Ad-6D Protocol elements interspersed
+- Model appearance consistency description (gender, hair, skin tone, body type, outfit)
+- At least 3 camera movement changes
+- Lighting/atmosphere description
+#### 2. Dialogue Script (English, with timestamps)
+List the 4 dialogue lines separately with their time segments for easy review.
+#### 3. BGM / Sound Design Suggestions
+- Recommend music style matching the product tone
+- Key moment sound effect cues
+**Reference example**: Read `${CLAUDE_SKILL_DIR}/examples/dress-demo.md` for the latest standard output format.
+### Phase 3: User Confirmation
+After presenting the full script, ask the user:
+- Whether to adjust dialogue
+- Whether to change the scene
+- Whether to modify prompt details
+- Proceed to submission after confirmation
+### Phase 4: Upload Materials + Submit Video Generation Task
+After user confirms the script, upload the product image and submit the video generation task.
+**Important rules**:
+- Only upload product images — **never upload model/real person photos** (privacy detection will block images containing realistic human faces, error: `InputImageSensitiveContentDetected.PrivacyInformation`)
+- Model appearance is controlled entirely by prompt text description
+- Product images should ideally be clean white-background product photos, avoid images with marketing text overlays
+- For batch generation: upload the product image once, reuse the material ID to submit multiple tasks with different scenes
+## Important Notes
+- Images support jpg/jpeg/png/webp formats
+- Video prompts must be entirely in English
+- Dialogue must be in English, embedded in the prompt (`Spoken dialogue (say EXACTLY, word-for-word): "..."`)
+- **Do not output separate subtitle text** — dialogue is already in the prompt, no additional subtitle layer needed