RubyGems - buttercut - Versions diffs - 0.5.0 → 0.6.0 - Mend

buttercut 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/.claude/settings.local.json +2 -1
data/.claude/skills/analyze-video/SKILL.md +11 -78
data/.claude/skills/analyze-video/agent_prompt.md +84 -0
data/.claude/skills/backup-library/SKILL.md +1 -1
data/.claude/skills/cut-planner/SKILL.md +74 -0
data/.claude/skills/roughcut/SKILL.md +41 -47
data/.claude/skills/roughcut/agent_prompt.md +153 -0
data/.claude/skills/summarize-video/SKILL.md +31 -0
data/.claude/skills/summarize-video/agent_prompt.md +39 -0
data/.claude/skills/summarize-video/summary_skeleton.rb +78 -0
data/.claude/skills/summarize-video/visual_script_extractor.rb +78 -0
data/.claude/skills/transcribe-audio/SKILL.md +17 -71
data/.claude/skills/transcribe-audio/agent_prompt.md +53 -0
data/CLAUDE.md +48 -18
data/lib/buttercut/version.rb +1 -1
data/templates/library_template.yaml +1 -0
data/templates/plan_template.md +53 -0
data/templates/roughcut_template.yaml +3 -20
data/templates/settings_template.yaml +3 -0
metadata +11 -3
data/.claude/skills/roughcut/agent_instructions.md +0 -109

data/.claude/skills/summarize-video/summary_skeleton.rb ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env ruby
+# Pre-create a summary skeleton file for the summarize-video skill,
+# with header (filename, duration) filled in and four placeholder markers
+# in the body for the sub-agent to replace via Edit.
+#
+# Usage:
+#   ruby summary_skeleton.rb <visual_transcript.json> <summary_output.md>
+require 'json'
+class SummarySkeleton
+  def self.create(transcript_path, output_path)
+    new(transcript_path, output_path).create
+  end
+  def initialize(transcript_path, output_path)
+    raise ArgumentError, "transcript_path is required" if transcript_path.nil? || transcript_path.empty?
+    raise ArgumentError, "output_path is required" if output_path.nil? || output_path.empty?
+    @transcript_path = transcript_path
+    @output_path = output_path
+  end
+  def create
+    File.write(output_path, skeleton)
+    puts "skeleton: #{output_path}"
+  end
+  private
+  attr_reader :transcript_path, :output_path
+  def data
+    @data ||= JSON.parse(File.read(transcript_path))
+  end
+  def video_filename
+    File.basename(data["video_path"].to_s)
+  end
+  def segments
+    data["segments"] or raise "transcript JSON has no 'segments' key: #{transcript_path}"
+  end
+  def total_duration
+    segments.last["end"].to_f
+  end
+  def format_timestamp(seconds)
+    total = seconds.to_i
+    "%02d:%02d" % [total / 60, total % 60]
+  end
+  def skeleton
+    <<~MD
+      # #{video_filename}
+      **Duration:** #{format_timestamp(total_duration)}
+      ## Overview
+      <!-- FILL_OVERVIEW -->
+      ## Key Visuals
+      <!-- FILL_KEY_VISUALS -->
+      ## Notable Dialogue
+      <!-- FILL_DIALOGUE -->
+      ## B-Roll
+      <!-- FILL_BROLL -->
+    MD
+  end
+end
+if __FILE__ == $PROGRAM_NAME
+  transcript_path, output_path = ARGV
+  abort("usage: summary_skeleton.rb <visual_transcript.json> <summary_output.md>") unless transcript_path && output_path
+  SummarySkeleton.create(transcript_path, output_path)
+end

data/.claude/skills/summarize-video/visual_script_extractor.rb ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env ruby
+# Extract a human-readable script from a visual transcript JSON,
+# interleaving [VISUAL] descriptions with timestamped dialogue.
+# Prints to stdout for direct consumption by the summarize-video skill.
+#
+# Usage:
+#   ruby visual_script_extractor.rb <visual_transcript.json>
+require 'json'
+class VisualScriptExtractor
+  def self.extract(transcript_path)
+    new(transcript_path).extract
+  end
+  def initialize(transcript_path)
+    raise ArgumentError, "transcript_path is required" if transcript_path.nil? || transcript_path.empty?
+    @transcript_path = transcript_path
+  end
+  def extract
+    puts header
+    puts
+    puts format_script
+  end
+  private
+  attr_reader :transcript_path
+  def data
+    @data ||= JSON.parse(File.read(transcript_path))
+  end
+  def segments
+    data["segments"] or raise "transcript JSON has no 'segments' key: #{transcript_path}"
+  end
+  def header
+    "# Video: #{video_filename}\n# Duration: #{format_timestamp(total_duration)}"
+  end
+  def video_filename
+    File.basename(data["video_path"].to_s)
+  end
+  def total_duration
+    segments.last["end"].to_f
+  end
+  def format_script
+    segments.filter_map { |s| format_segment(s) }.join("\n\n")
+  end
+  def format_segment(segment)
+    text   = segment["text"].to_s.strip
+    visual = segment["visual"].to_s.strip
+    ts     = format_timestamp(segment["start"].to_f)
+    lines = []
+    lines << "[#{ts}] [VISUAL] #{visual}" unless visual.empty?
+    lines << "[#{ts}] #{text}" unless text.empty?
+    lines.empty? ? nil : lines.join("\n")
+  end
+  def format_timestamp(seconds)
+    total = seconds.to_i
+    "%02d:%02d" % [total / 60, total % 60]
+  end
+end
+if __FILE__ == $PROGRAM_NAME
+  transcript_path = ARGV[0]
+  abort("usage: visual_script_extractor.rb <visual_transcript.json>") unless transcript_path
+  VisualScriptExtractor.extract(transcript_path)
+end

data/.claude/skills/transcribe-audio/SKILL.md CHANGED Viewed

@@ -3,88 +3,34 @@ name: transcribe-audio
 description: Transcribes video audio using WhisperX, preserving original timestamps. Creates JSON transcript with word-level timing. Use when you need to generate audio transcripts for videos.
 ---
-# Skill: Transcribe Audio
+# Skill: Transcribe Audio (parent brief)
-Transcribes video audio using WhisperX and creates clean JSON transcripts with word-level timing data.
+Transcribes video audio using WhisperX and produces a clean JSON transcript with word-level timing.
-## When to Use
-- Videos need audio transcripts before visual analysis
+`SKILL.md` is the parent's dispatch brief. The sub-agent's working prompt lives in `agent_prompt.md` — inline its contents when launching the Task agent. Don't pass `SKILL.md`.
-## Critical Requirements
+## Parallelism
-Use WhisperX, NOT standard Whisper. WhisperX preserves the original video timeline including leading silence, ensuring transcripts match actual video timestamps. Run WhisperX directly on video files. Don't extract audio separately - this ensures timestamp alignment.
+Launch at most **2 in parallel**. WhisperX is already multithreaded internally (~4 CPU threads via CTranslate2); 2 processes is the throughput-vs-RAM sweet spot on a 16GB Mac.
-## Workflow
+## Inputs to gather and pass inline
-### 1. Inputs from the parent
-This skill runs as a sub-agent. Do NOT read `library.yaml` or `settings.yaml` — the parent has that context and passes everything inline in your prompt. Expect these inputs:
+The parent reads `library.yaml` and `settings.yaml` and passes these values inline in each agent's prompt:
 - `video_path` — absolute path to the video file
 - `transcript_output_dir` — where to write the transcript JSON (e.g. `libraries/<library>/transcripts`)
-- `language_code` — ISO 639-1 code already mapped by the parent (e.g. `en`, `es`)
-- `whisper_model` — model size from the parent (e.g. `small`, `medium`, `turbo`)
-- `transcript_refinement` — boolean; if `true`, the parent will also pass `user_context` and `footage_summary` strings for Step 4
-- `user_context` (only when refinement is on) — may be empty string
-- `footage_summary` (only when refinement is on) — may be empty string
-If any required input is missing from your prompt, stop and ask the parent rather than inferring it from the filesystem.
-### 2. Run WhisperX
-```bash
-whisperx "<video_path>" \
-  --language <language_code> \
-  --model <whisper_model> \
-  --compute_type float32 \
-  --device cpu \
-  --output_format json \
-  --output_dir <transcript_output_dir>
-```
-### 3. Prepare Audio Transcript
-After WhisperX completes, format the JSON using our prepare_audio_script:
-```bash
-ruby .claude/skills/transcribe-audio/prepare_audio_script.rb \
-  <transcript_output_dir>/<video_basename>.json \
-  <video_path>
-```
-This script:
-- Adds video source path as metadata
-- Removes unnecessary fields to reduce file size
-- Prettifies JSON
-### 4. (Optional) Refine the transcript
-If the parent passed `transcript_refinement: true`, follow `.claude/skills/transcribe-audio/refine_instructions.md` using the `user_context` and `footage_summary` strings the parent supplied inline. Do NOT open `library.yaml`. If `transcript_refinement` is not set or is `false`, skip this step.
-### 5. Return Success Response
-After audio preparation completes, return this structured response to the parent agent:
-```
-✓ <video_basename.mov> transcribed successfully
-  Audio transcript: <transcript_output_dir>/<video_basename>.json
-  Video path: <video_path>
-```
-**DO NOT update library.yaml** - the parent agent will handle this to avoid race conditions when running multiple transcriptions in parallel.
-## Running in Parallel
+- `language_code` — ISO 639-1 code (e.g. `en`, `es`) — parent maps from library.yaml's `language` name
+- `whisper_model` — model size from settings.yaml (e.g. `small`, `medium`, `turbo`)
+- `transcript_refinement` — boolean from library.yaml. If `true`, also pass:
+  - `user_context` (may be empty string)
+  - `footage_summary` (may be empty string)
-This skill is designed to run inside a Task agent for parallel execution:
-- Each agent handles ONE video file
-- Multiple agents can run simultaneously
-- Parent thread updates library.yaml sequentially after each agent completes
-- No race conditions on shared YAML file
+After the agent returns, update `library.yaml` with `transcript: <filename>.json`.
-## Next Step
+## Next step
-After audio transcription, use the **analyze-video** skill to add visual descriptions and create the visual transcript.
+Once all videos have audio transcripts, dispatch `analyze-video` for visual descriptions.
-## Installation
+## Dependencies
-Ensure WhisperX is installed. Use the **setup** skill to verify dependencies.
+WhisperX must be installed. Use the **setup** skill to verify.

data/.claude/skills/transcribe-audio/agent_prompt.md ADDED Viewed

@@ -0,0 +1,53 @@
+# Transcribe Audio (sub-agent prompt)
+You are a sub-agent. Transcribe one video file using WhisperX and produce a clean JSON transcript with word-level timing.
+**Critical:** Use WhisperX, NOT standard Whisper. WhisperX preserves the original video timeline including leading silence, ensuring transcripts match actual video timestamps. Run WhisperX directly on the video file — don't extract audio separately.
+## Inputs (passed inline by the parent)
+- `video_path` — absolute path to the video file
+- `transcript_output_dir` — where to write the transcript JSON
+- `language_code` — ISO 639-1 code (e.g. `en`, `es`)
+- `whisper_model` — model size (e.g. `small`, `medium`, `turbo`)
+- `transcript_refinement` — boolean; if `true`, also expect:
+  - `user_context` — string, may be empty
+  - `footage_summary` — string, may be empty
+Do NOT read `library.yaml` or `settings.yaml`. If a required input is missing from your prompt, stop and ask the parent rather than inferring from the filesystem.
+## 1. Run WhisperX
+```bash
+whisperx "<video_path>" \
+  --language <language_code> \
+  --model <whisper_model> \
+  --compute_type float32 \
+  --device cpu \
+  --output_format json \
+  --output_dir <transcript_output_dir>
+```
+## 2. Prepare audio transcript
+```bash
+ruby .claude/skills/transcribe-audio/prepare_audio_script.rb \
+  <transcript_output_dir>/<video_basename>.json \
+  <video_path>
+```
+This script adds the video source path as metadata, removes unnecessary fields, and prettifies the JSON.
+## 3. (Optional) Refine the transcript
+If `transcript_refinement: true`, follow `.claude/skills/transcribe-audio/refine_instructions.md`, using the `user_context` and `footage_summary` strings the parent supplied inline. Do NOT open `library.yaml`. Skip if `transcript_refinement` is missing or `false`.
+## 4. Return success response
+```
+✓ <video_basename.mov> transcribed successfully
+  Audio transcript: <transcript_output_dir>/<video_basename>.json
+  Video path: <video_path>
+```
+**Do NOT update library.yaml** — the parent handles all yaml I/O to avoid race conditions in parallel runs.

data/CLAUDE.md CHANGED Viewed

@@ -23,15 +23,16 @@ You are an AI video editor assistant working with a software engineer. You gener
    - If new: gather project information (library name, video file locations, language)
    - Create directory structure and library.yaml from template
    - Automatically start footage analysis after setup
-2. **Transcribe** → Use `transcribe-audio` and `analyze-video` skills to process videos
+2. **Transcribe** → Use `transcribe-audio`, `analyze-video`, and `summarize-video` skills to process videos
    - First: `transcribe-audio` creates audio transcripts with WhisperX (word-level timing)
    - Then: `analyze-video` adds visual descriptions by extracting and analyzing frames
-   - All videos must have BOTH audio transcripts AND visual transcripts before proceeding to rough cut or sequence creation
-   - Visual transcripts are essential for B-roll selection, shot composition, and editorial decisions
-3. **Edit** → Use `roughcut` skill to create timeline scripts from transcripts
-   - **Rough cuts**: Multi-minute edits for full videos (typically 3-15+ minutes)
-   - **Sequences**: 30-60 second clips that user will build to be imported into a larger video (created using the same roughcut skill with shorter target duration)
-   - **PREREQUISITE:** Check library.yaml to verify all videos have visual_transcript populated
+   - Then: `summarize-video` generates a short markdown summary from each visual transcript
+   - All videos must have audio transcripts, visual transcripts, AND summaries before proceeding to rough cut or sequence creation
+3. **Edit** → Use `cut-planner` then `roughcut` to plan and build a timeline from transcripts
+   - `cut-planner` reads all summaries in the main thread, proposes 2–3 narrative options, iterates with the user, and writes an approved plan markdown file
+   - `roughcut` consumes that plan, spins up a sub-agent that reads the library directly, builds the YAML iteratively, reviews against format conventions, exports the XML, and returns conversational editorial notes the parent uses to dialogue with the user
+   - **Rough cuts**: 3–15+ min edits. **Sequences**: 30–60s clips. Same pair of skills, different target duration.
+   - **PREREQUISITE:** Check library.yaml to verify all videos have `visual_transcript` and `summary` populated
 4. **Backup** → Use `backup-library` skill to create compressed archives of all libraries
    - Creates timestamped ZIP backup of entire libraries directory
    - Backups are stored in `/backups/` and excluded from git
@@ -121,9 +122,11 @@ Ask the user these questions for new libraries one at a time (never all at once)
 mkdir -p libraries/[library-name]
 mkdir -p libraries/[library-name]/transcripts
 mkdir -p libraries/[library-name]/roughcuts
+mkdir -p libraries/[library-name]/summaries
+mkdir -p libraries/[library-name]/plans
 ```
-Note: A single `/tmp/` directory at the root is used for all temporary files. Create subdirectories as needed and delete after use.
+Note: A single `tmp/` directory inside the buttercut project root is used for all temporary files. Create subdirectories as needed and delete after use.
 ### Create Library File
@@ -131,7 +134,7 @@ Duplicate `templates/library_template.yaml` to create `libraries/[library-name]/
 For each video file:
 1. Use `ffprobe` to get duration
-2. Add entry to library.yaml with empty `transcript` and `visual_transcript`
+2. Add entry to library.yaml with empty `transcript`, `visual_transcript`, and `summary`
 3. Empty fields mean "todo", valid filenames mean "done"
 The `language` field stores the language code for all videos in this library.
@@ -144,16 +147,21 @@ After library setup completes, **automatically start analyzing all footage**:
 1. Inform user: "Library setup complete. Found [N] videos ([total size]). Starting footage analysis..."
 2. Read `libraries/settings.yaml` (for `whisper_model`) and the library's `library.yaml` (for `language`, `transcript_refinement`, `user_context`, `footage_summary`) ONCE in the parent thread. If any expected field is missing, run the appropriate migration first (see Critical Principles below).
-3. Launch `transcribe-audio` agents (can run in parallel for multiple videos). Pass these values inline in each agent's prompt — the sub-agent never reads `library.yaml` or `settings.yaml`:
+3. Launch `transcribe-audio` agents. Pass these values inline in each agent's prompt:
    - `video_path`, `transcript_output_dir`, `language_code`, `whisper_model`
    - `transcript_refinement` (boolean). If `true`, also pass the current `user_context` and `footage_summary` strings (empty strings are fine — refinement still catches nonsense-token and self-witness fixes).
 4. As each agent completes, update library.yaml with `transcript` (filename only, not full path).
-5. After all audio transcripts complete, launch `analyze-video` agents (can run in parallel) following the same "parent passes context inline" contract. Pass inline: `video_path`, `audio_transcript_path`, `visual_transcript_path`.
+5. After all audio transcripts complete, launch `analyze-video` agents. Pass inline: `video_path`, `audio_transcript_path`, `visual_transcript_path`.
 6. As each agent completes, update library.yaml with `visual_transcript` (filename only, not full path).
-7. Analyze ALL videos before offering to create rough cuts.
-8. **After all analysis completes, automatically create a backup** using the `backup-library` skill.
+7. After all visual transcripts complete, summarize each video using the `summarize-video` skill on the **Haiku model**:
+   - For each video, first pre-create a skeleton file in the parent: `ruby .claude/skills/summarize-video/summary_skeleton.rb <visual_transcript_path> <summary_output_path>`
+   - Then launch the agent passing inline: `visual_transcript_path`, `summary_output_path` (e.g., `libraries/[library-name]/summaries/summary_[videoname].md`)
+   - The agent fills the four placeholders via Edit. The skeleton + Edit pattern is required: without it, Haiku frequently refuses Write and dumps markdown into its reply instead.
+8. As each agent completes, update library.yaml with `summary` (filename only, not full path).
+9. Analyze ALL videos before offering to create rough cuts.
+10. **After all analysis completes, automatically create a backup** using the `backup-library` skill.
-**Contract: sub-agents don't read `library.yaml`.** The parent owns `library.yaml` (and `settings.yaml`) — it reads once, passes values inline, and writes results once per agent completion. Sub-agents should not even know those files exist. This keeps the context boundary clean and avoids race conditions when many agents run in parallel.
+**Contract: sub-agents receive `agent_prompt.md`, not `SKILL.md`.** For parallelizable skills (`transcribe-audio`, `analyze-video`, `summarize-video`), the parent reads `SKILL.md` for dispatch info (parallelism cap, required inputs) and inlines `agent_prompt.md` into the sub-agent's prompt. `SKILL.md` is parent-only.
 **Note on refinement:** When `transcript_refinement: true`, each `transcribe-audio` agent reviews and corrects its transcript in place before returning, using the `user_context` and `footage_summary` the parent passed in. Empty context strings are fine — the agent still runs and catches nonsense-token and self-witness fixes. The parent still only writes `transcript: <filename>.json` to `library.yaml` after the agent completes.
@@ -181,7 +189,8 @@ When processing multiple videos, use parallel agents for maximum throughput:
    - Run WhisperX or frame extraction.
    - Prepare and clean transcript JSON.
    - Return structured response with file paths.
-   - DO NOT read `library.yaml` or `settings.yaml`, and DO NOT update `library.yaml` (parent handles all yaml I/O).
+   Each skill's `agent_prompt.md` documents its own IO contract — including whether the sub-agent reads or writes `library.yaml`.
 3. **Benefits:**
    - Multiple videos process simultaneously
@@ -200,6 +209,7 @@ Known migration triggers (match each to a `scripts/NNN_migrate_*.rb` script via
 - `editor` missing (added in 0.4.0)
 - `transcript_refinement` missing (added in [Unreleased]; missing means "predates the feature, default to `false`" — NOT the template default of `true`)
 - `footage_summary` missing OR old name `footage_description` present (renamed in [Unreleased])
+- video entries with `summary` missing (added in [Unreleased]; missing means "todo", default to empty string)
 - video entries with `transcript_path` / `visual_transcript_path` (renamed to `transcript` / `visual_transcript` in 0.3.0)
 - video entries with `file_size_mb` (removed in 0.3.0)
@@ -209,7 +219,9 @@ A missing field is not the same as a field set to the template default — the t
 **Use actual filenames.** Never use generic labels like "Video 1" or "Clip A" - always reference actual filenames like "DJI_20250423171212_0210_D.mov" for clear traceability.
-**Visual transcripts are mandatory.** Before creating any rough cut or sequence, verify ALL videos have both audio and visual transcripts. Check `library.yaml` - every video entry must have a `visual_transcript` with a filename (not empty or null or ""). Transcripts are stored in `libraries/[library-name]/transcripts/`. Visual descriptions are essential for shot selection, pacing decisions, and B-roll placement.
+**Visual transcripts and summaries are mandatory.** Before creating any rough cut or sequence, verify ALL videos have audio transcripts, visual transcripts, AND summaries. Check `library.yaml` — every video entry must have `visual_transcript` and `summary` with filenames (not empty, null, or ""). Transcripts are stored in `libraries/[library-name]/transcripts/`; summaries in `libraries/[library-name]/summaries/`. Visual descriptions and summaries are essential for shot selection and pacing decisions.
+**Single-track timelines only.** ButterCut produces one sequential video track. Each clip's own audio plays during that clip — there is no second video track for cutaways layered over a continuing voiceover, and no separate audio track. When planning or pitching cuts, never propose "B-roll over VO," "story under meetup footage," picture-in-picture, or any structure that assumes a clip's audio continues while different visuals play on top. Cutaways are fine, but they're hard cuts: when you cut to the wide shot, you cut to that shot's audio too. Plan every cut as a strictly linear sequence of clips.
 **Be curious and ask questions.** Occasionally ask users questions about their libraries and footage to better understand context, creative intent, and preferences. When you receive answers, add this information to the `user_context` key in the library.yaml file. This builds institutional knowledge that improves future rough cut and sequence decisions and helps maintain continuity across editing sessions.
@@ -219,7 +231,7 @@ A missing field is not the same as a field set to the template default — the t
 - Flag areas needing human judgment rather than making assumptions
 - When you have lots of videos to process (dozens or hundreds isn't out of the ordinary), create a reasonable task list with 5 tasks and then a final task that says to check the yaml processing file to see if you need to then generate more tasks. This way users can see progress and the agent doesn't get overwhelmed.
 - Generally avoid writing one-off scripts, but if you do need to write one, write it in Ruby unless you have a very strong reason to write in another language.
-- Only run 4 parallel tasks at a time.
+- Parallelism caps live in each skill's `SKILL.md` (parent brief). Read it before dispatching.
 - Whenever you export XML files, include a datetime timestamp in the filename so it's clear when they were generated.
 ## Programming Style
@@ -255,6 +267,24 @@ ButterCut is designed to be simple, automatic and geared toward working with non
 The user should not need to understand video codecs, frame rates, or FCPXML structure - just provide file paths and get working XML. We should talk to the user from a video editing perspective, not a technical software engineer perspective.
+### Vocabulary — talk like an editor, not a developer
+The user is a video editor, not a programmer (generally). They don't need to know what file the cut lives in, what tool transcribed their audio, or which skill or sub-agent is doing the work behind the scenes. Implementation details are for the codebase; user-facing chat stays in the language of video editing. When in doubt, drop the technical noun entirely and just say what's happening. Skills, code, etc, should obviously stay technical, but keep that out when chatting with the user.
+Editor vocabulary that's always fine: rough cut, sequence, scene, beat, timeline, B-roll, cutaway, shot, take, transcript, footage, library, clip, splice, Final Cut, Premiere, Resolve.
+Don't say → say (one per category — generalize the pattern, don't treat as a lookup table):
+- *File/format nouns:* "I'll update the YAML" / "regenerate the FCPXML" → "I'll update the cut" / "I'll re-export it for Final Cut"
+- *Architecture nouns:* "I'll spin up a sub-agent" / "running the roughcut skill" / "the parent thread" → just speak in first person ("I'll build the cut")
+- *Tools and models:* "WhisperX will transcribe" / "running ffmpeg" / "I used Haiku for the summary" → "I'll transcribe the audio" / "I'll analyze the visuals" (don't name models)
+- *Internal field names:* "I'll update footage_summary" / "transcript_refinement is true" → "I'll note that about your footage" / "I'll proofread the transcripts"
+- *Paths in casual chat:* `.fcpxml`, `.json`, `libraries/foo/transcripts/…` → name the artifact ("the Final Cut export", "the transcript") and only show the path at final delivery or when the user needs to grab the file
+Two exceptions where technical detail IS appropriate:
+1. The user explicitly asks ("where is it saved?", "what format?") — answer plainly.
+2. Final delivery summary — naming the export file path is genuinely useful so they can find it.
 ## Development Commands
 ### Testing
@@ -275,4 +305,4 @@ bundle exec rspec spec/buttercut_spec.rb:10
 ## Claude Skills
-When creating new Claude skills, aim to keep them to 50 lines. Only very complicated skills (ie transcription and roughcuts) should be larger than that. If the skill is complicated and seems like it can't be explained in 50 lines, consider if they should be broken up across multiple skills or if the complexity can be contained inside a ruby script saved adjacent to the skill.
+When creating new Claude skills, aim to keep them as brief as possible. Use active voice to help condense instructions. Use simple, plain language.

data/lib/buttercut/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class ButterCut
-  VERSION = "0.5.0"
+  VERSION = "0.6.0"
 end

data/templates/library_template.yaml CHANGED Viewed

@@ -20,3 +20,4 @@ videos:
     duration: "00:05:32"
     transcript: # filename only (stored in libraries/[library-name]/transcripts/)
     visual_transcript: # filename only (visual_*.json with frame descriptions)
+    summary: # filename only (summary_*.md overview stored in libraries/[library-name]/summaries/)

data/templates/plan_template.md ADDED Viewed

@@ -0,0 +1,53 @@
+<!--
+Cut Plan Template — written by `cut-planner`, consumed by `roughcut`.
+Fill in every section. Delete this comment block before saving.
+The plan is editorial direction; the build agent picks the exact clips
+and timestamps inside each beat.
+-->
+# [Working title]
+## Concept
+<!-- 1–2 sentences on the angle, tone, or arc. What story is this cut telling? -->
+## Format
+<!-- Vlog, YouTube Short, long-form, documentary, talking-head, montage, etc.
+Include any pacing or tonal cues that flow from the format. -->
+## Target Duration
+<!-- Approximate runtime, e.g. "4–6 min" or "45–60s sequence". -->
+## Beats
+<!--
+Ordered list. Each beat is one editorial unit with intent and footage suggestions. Beats are direction, not paper-cut timestamps.
+For an 8 minute vlog, you might aim for something like 60 seconds per beat, with both good footage for a-roll and b-roll.
+For other types of videos use your best editorial judgement, thinking about what is common in the genre you're working with. You can also talk to the user directly to determine what they want.
+-->
+### 1. [Beat name]
+- **Intent:** what this beat does for the story (open, escalate, turn, payoff, etc.)
+- **Approx. share:** rough fraction of runtime (e.g. "~30s", "~2 min", "~15% of total")
+- **Footage suggestions:** filenames likely to feed this beat (e.g. `DJI_56738.mov`, `panasonic_1234.mov`). The build agent may swap in stronger moments from elsewhere.
+### 2. [Beat name]
+- **Intent:**
+- **Approx. share:**
+- **Footage suggestions:**
+<!-- Add more beats as needed. A 6 minute video might have 4-6 beats. You'll need to use your judgement about the footage availability, target duration and cut you're making. -->
+## Required Dialogue
+<!--
+Lines the user specifically wants in. Two flavors are both fine:
+- **Exact quote:** "Here's how I learned to juggle." (`source_file_if_known.mov`)
+- **Lossy reference:** "Include the bit about Kailey's uncle the magician teaching her to juggle before he died." (`file_1.mov, file_2.mov, file_5.mov`)
+Leave this section empty if no specific lines are required.
+-->
+## Notes for the Build
+<!-- Any constraints, things to avoid, or judgment calls the build agent should know — single-track timeline assumptions, must-not-include footage, tone preferences, etc. Include decisions or direction from the user. -->

data/templates/roughcut_template.yaml CHANGED Viewed

@@ -1,25 +1,8 @@
 # Rough Cut Template
 # This template defines the structure for video rough cuts
-# User-facing description of what this rough cut contains
-description: "Brief description of the rough cut - what story it tells, target duration, and editorial approach"
-# Working notes for the agent during rough cut creation
-notes: |
-  Working notes area for editorial decisions, narrative structure planning,
-  pacing considerations, and any issues or concerns identified during editing.
-  Consider:
-  - Story arc and key narrative beats
-  - Pacing and rhythm
-  - Transitions between segments
-  - B-roll placement opportunities
-  - Audio/dialogue clarity
-# Coverage summary of available footage
-footage_coverage: |
-  Overview of what footage is available and how it could be used.
-  Include notes about strongest segments, potential issues, and creative opportunities.
+# One-line summary of what this cut is — useful when scanning a folder of cuts
+description: "Brief one-line summary of this cut — what it is and roughly how long"
 # The actual rough cut - ordered list of clips to use
 clips:
@@ -39,4 +22,4 @@ clips:
 # Rough cut metadata
 metadata:
   created_date: ""  # Will be populated when rough cut is created
-  total_duration: ""  # Calculated from all clip durations
+  total_duration: ""  # Calculated from all clip durations

data/templates/settings_template.yaml CHANGED Viewed

@@ -8,3 +8,6 @@ editor: fcpx
 # turbo is nearly as accurate as large-v3 but significantly faster
 # Recommended: `small` paired with transcript_refinement (set per-library in library.yaml)
 whisper_model: small
+# After exporting a roughcut, also drop a copy of the XML on the Desktop for easy import
+save_to_desktop_after_export: true

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: buttercut
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
 platform: ruby
 authors:
 - Andrew Ford
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-04-25 00:00:00.000000000 Z
+date: 2026-05-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -66,18 +66,25 @@ files:
 - ".claude/settings.json"
 - ".claude/settings.local.json"
 - ".claude/skills/analyze-video/SKILL.md"
+- ".claude/skills/analyze-video/agent_prompt.md"
 - ".claude/skills/analyze-video/prepare_visual_script.rb"
 - ".claude/skills/backup-library/SKILL.md"
 - ".claude/skills/backup-library/backup_libraries.rb"
+- ".claude/skills/cut-planner/SKILL.md"
 - ".claude/skills/release/SKILL.md"
 - ".claude/skills/roughcut/SKILL.md"
-- ".claude/skills/roughcut/agent_instructions.md"
+- ".claude/skills/roughcut/agent_prompt.md"
 - ".claude/skills/roughcut/export_to_fcpxml.rb"
 - ".claude/skills/setup/SKILL.md"
 - ".claude/skills/setup/advanced-setup.md"
 - ".claude/skills/setup/simple-setup.md"
 - ".claude/skills/setup/verify_install.rb"
+- ".claude/skills/summarize-video/SKILL.md"
+- ".claude/skills/summarize-video/agent_prompt.md"
+- ".claude/skills/summarize-video/summary_skeleton.rb"
+- ".claude/skills/summarize-video/visual_script_extractor.rb"
 - ".claude/skills/transcribe-audio/SKILL.md"
+- ".claude/skills/transcribe-audio/agent_prompt.md"
 - ".claude/skills/transcribe-audio/prepare_audio_script.rb"
 - ".claude/skills/transcribe-audio/refine_instructions.md"
 - ".claude/skills/update-buttercut/SKILL.md"
@@ -91,6 +98,7 @@ files:
 - lib/buttercut/fcpx.rb
 - lib/buttercut/version.rb
 - templates/library_template.yaml
+- templates/plan_template.md
 - templates/roughcut_template.yaml
 - templates/settings_template.yaml
 homepage: https://github.com/andrewford/buttercut