RubyGems - buttercut - Versions diffs - 0.4.0 → 0.5.0 - Mend

buttercut 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/.claude/scripts/script_extractor.rb +66 -0
data/.claude/settings.local.json +6 -1
data/.claude/skills/analyze-video/SKILL.md +17 -9
data/.claude/skills/backup-library/backup_libraries.rb +1 -1
data/.claude/skills/release/SKILL.md +21 -11
data/.claude/skills/roughcut/agent_instructions.md +1 -1
data/.claude/skills/roughcut/export_to_fcpxml.rb +25 -0
data/.claude/skills/transcribe-audio/SKILL.md +25 -18
data/.claude/skills/transcribe-audio/refine_instructions.md +114 -0
data/CLAUDE.md +91 -40
data/README.md +5 -1
data/lib/buttercut/version.rb +1 -1
data/templates/library_template.yaml +1 -0
data/templates/settings_template.yaml +10 -0
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c4de766d8aca2ec00b99b20abd177310ade3b5145424677bf6b3e515487960b2
-  data.tar.gz: 97429c1df91a51ef44a921a0d2f3e8140adfa2c6c0943abf643d0967cea2e4bc
+  metadata.gz: 78b845e8b54d03aee93f00bdbaa96f140d0f10c94910a117352b7401cf30bf63
+  data.tar.gz: 0eb609100a9e2f367b493d6aef9f45d08e784ad573c4d033733009be40ccc525
 SHA512:
-  metadata.gz: a6c30d9d4038725ef7b63cc15d5de34a8fc85e775bb3907896dae7d1bbde9f87abe7da263dec07ce0a27c7ec6b6d5940db00dc1bf40b24763a49c5381a9961b4
-  data.tar.gz: '029612e07d6fb9e806aecd10f5d89a95557d23e151a4fe53cb3ab393cd05b45ad89908881c982f6b7922e6227399306cbc51a4ef257404637b84f51a52c5e757'
+  metadata.gz: 33eb34693818323f40900bcea272781391c372fc5bf9adb71f11632da83aa3de5936538150c69a9d5407f5e5a6059342eed23eed5683e6b1fec36d9a55b37d2a
+  data.tar.gz: f022d15eb198cb32dde550d0120598cf5d8eb7f95145e21bfb70401df1c762bcd4474f4836966601aa46ce232e2bd364a25cc8d1187ad2c1e7edf02164ca1789

data/.claude/scripts/script_extractor.rb ADDED Viewed

@@ -0,0 +1,66 @@
+#!/usr/bin/env ruby
+# Extract the plain-text script from a WhisperX-style transcript JSON.
+#
+# Usage:
+#   ruby .claude/scripts/script_extractor.rb <transcript.json> <output.txt>
+#
+# Output is one segment per paragraph (blank line between), trimmed, suitable
+# for proofreading by a human or a sub-agent without the overhead of the full
+# transcript JSON (word-level timing, scores, etc.).
+require 'json'
+class ScriptExtractor
+  def self.extract(transcript_path, output_path)
+    new(transcript_path, output_path).extract
+  end
+  def initialize(transcript_path, output_path)
+    raise ArgumentError, "transcript_path is required" if transcript_path.nil? || transcript_path.empty?
+    raise ArgumentError, "output_path is required" if output_path.nil? || output_path.empty?
+    @transcript_path = transcript_path
+    @output_path = output_path
+  end
+  def extract
+    write_output(format_script)
+    report
+  end
+  private
+  attr_reader :transcript_path, :output_path
+  def data
+    @data ||= JSON.parse(File.read(transcript_path))
+  end
+  def segments
+    data["segments"] or raise "transcript JSON has no 'segments' key: #{transcript_path}"
+  end
+  def format_script
+    paragraphs = segments.map { |s| s["text"].to_s.strip }.reject(&:empty?)
+    paragraphs.join("\n\n") + "\n"
+  end
+  def write_output(text)
+    File.write(output_path, text)
+  end
+  def report
+    in_kb = (File.size(transcript_path) / 1024.0).round(1)
+    out_kb = (File.size(output_path) / 1024.0).round(1)
+    puts "Extracted script: #{output_path} (#{out_kb} KB from #{in_kb} KB source, #{segments.size} segments)"
+  end
+end
+if __FILE__ == $PROGRAM_NAME
+  transcript_path, output_path = ARGV
+  abort("usage: script_extractor.rb <transcript.json> <output.txt>") unless transcript_path && output_path
+  abort("file not found: #{transcript_path}") unless File.file?(transcript_path)
+  if File.expand_path(output_path) == File.expand_path(transcript_path)
+    abort("output path must differ from transcript path: #{transcript_path}")
+  end
+  ScriptExtractor.extract(transcript_path, output_path)
+end

data/.claude/settings.local.json CHANGED Viewed

@@ -1,6 +1,9 @@
 {
   "permissions": {
     "allow": [
+      "Agent",
+      "Read(tmp/**)",
+      "Write(tmp/**)",
       "Bash(./.claude/skills/roughcut/combine_visual_transcripts.rb:*)",
       "Bash(./.claude/skills/roughcut/export_to_fcpxml.rb:*)",
       "Skill(backup-library)",
@@ -22,7 +25,9 @@
       "Bash(git worktree add:*)",
       "Bash(cat:*)",
       "Bash(python3:*)",
-      "Bash(gh api:*)"
+      "Bash(gh api:*)",
+      "Bash(gh pr:*)",
+      "Bash(cp *)"
     ],
     "deny": [],
     "ask": []

data/.claude/skills/analyze-video/SKILL.md CHANGED Viewed

@@ -13,16 +13,24 @@ Videos must have audio transcripts. Run **transcribe-audio** skill first if need
 ## Workflow
-### 1. Copy & Clean Audio Transcript
+### 1. Inputs from the parent
+This skill runs as a sub-agent. Do NOT read `library.yaml` or `settings.yaml` — the parent has that context and passes everything inline in your prompt. Expect these inputs:
+- `video_path` — absolute path to the video file
+- `audio_transcript_path` — absolute path to the prepared audio transcript JSON
+- `visual_transcript_path` — absolute path to write the visual transcript JSON
+### 2. Copy & Clean Audio Transcript
 Don't read the audio transcript, just copy it and then prepare it by using the prepare_visual_script.rb file. This removes word-level timing data and prettifies the JSON for easier editing:
 ```bash
-cp libraries/[library]/transcripts/video.json libraries/[library]/transcripts/visual_video.json
-ruby .claude/skills/analyze-video/prepare_visual_script.rb libraries/[library]/transcripts/visual_video.json
+cp <audio_transcript_path> <visual_transcript_path>
+ruby .claude/skills/analyze-video/prepare_visual_script.rb <visual_transcript_path>
 ```
-### 2. Extract Frames (Binary Search)
+### 3. Extract Frames (Binary Search)
 Create frame directory: `mkdir -p tmp/frames/[video_name]`
@@ -37,11 +45,11 @@ ffmpeg -ss 00:00:02 -i video.mov -vframes 1 -vf "scale=1280:-1" tmp/frames/[vide
 **Stop when:** The footage no longer seems to be changing or only has minor changes
 **Never sample** more frequently than once per 30 seconds
-### 3. Add Visual Descriptions
+### 4. Add Visual Descriptions
 Read the visual video json file that you created earlier.
-**Read the JPG frames** from `tmp/frames/[video_name]/` using Read tool, then **Edit** `visual_video.json`:
+**Read the JPG frames** from `tmp/frames/[video_name]/` using Read tool, then **Edit** the file at `<visual_transcript_path>`:
 Do these incrementally. You don't need to create a program or script to do this, just incrementally edit the json whenever you read new frames.
@@ -73,7 +81,7 @@ Do these incrementally. You don't need to create a program or script to do this,
 - First segment: detailed (subject, setting, shot type, lighting, camera style)
 - Continuing shots: brief if similar, otherwise can be up to 3 sentences if drastically different.
-### 4. Cleanup & Return
+### 5. Cleanup & Return
 ```bash
 rm -rf tmp/frames/[video_name]
@@ -82,8 +90,8 @@ rm -rf tmp/frames/[video_name]
 Return structured response:
 ```
 ✓ [video_filename.mov] analyzed successfully
-  Visual transcript: libraries/[library]/transcripts/visual_video.json
-  Video path: /full/path/to/video_filename.mov
+  Visual transcript: <visual_transcript_path>
+  Video path: <video_path>
 ```
 **DO NOT update library.yaml** - parent agent handles this to avoid race conditions in parallel execution.

data/.claude/skills/backup-library/backup_libraries.rb CHANGED Viewed

@@ -29,7 +29,7 @@ class LibraryBackup
     files = Dir.glob(File.join(@libraries_dir, '**', '*')).select { |f| File.file?(f) }
-    Zip::File.open(backup_path, Zip::File::CREATE) do |zipfile|
+    Zip::File.open(backup_path, create: true) do |zipfile|
       files.each do |file|
         zipfile.add(file.sub("#{File.dirname(@libraries_dir)}/", ''), file)
       end

data/.claude/skills/release/SKILL.md CHANGED Viewed

@@ -59,7 +59,17 @@ class ButterCut
 end
 ```
-### 5. Gather Changelog Notes
+### 5. Update Gemfile.lock
+Run `bundle install` so `Gemfile.lock` reflects the new version:
+```bash
+bundle install
+```
+Verify the version updated in `Gemfile.lock` before proceeding.
+### 6. Gather Changelog Notes
 Ask user for release notes. Prompt with:
 - What changed in this release?
@@ -67,7 +77,7 @@ Ask user for release notes. Prompt with:
 - Any bug fixes?
 - Any breaking changes?
-### 6. Update or Create CHANGELOG.md
+### 7. Update or Create CHANGELOG.md
 If `CHANGELOG.md` exists, prepend new entry. Otherwise create it:
@@ -89,14 +99,14 @@ All notable changes to ButterCut will be documented in this file.
 - Improved W
 ```
-### 7. Commit Version Bump
+### 8. Commit Version Bump
 ```bash
-git add lib/buttercut/version.rb CHANGELOG.md
+git add lib/buttercut/version.rb Gemfile.lock CHANGELOG.md
 git commit -m "Bump version to 0.2.0"
 ```
-### 8. Create and Push Git Tag
+### 9. Create and Push Git Tag
 ```bash
 git tag v0.2.0
@@ -104,7 +114,7 @@ git push origin main
 git push origin v0.2.0
 ```
-### 9. Build Gem
+### 10. Build Gem
 ```bash
 gem build buttercut.gemspec
@@ -112,7 +122,7 @@ gem build buttercut.gemspec
 This creates `buttercut-0.2.0.gem` file.
-### 10. Publish to RubyGems
+### 11. Publish to RubyGems
 **First time setup check:**
@@ -133,7 +143,7 @@ gem push buttercut-0.2.0.gem
 This makes the gem available for `gem install buttercut` worldwide.
-### 11. Create GitHub Release
+### 12. Create GitHub Release
 **Using GitHub CLI:**
 ```bash
@@ -155,21 +165,21 @@ Guide user through manual release creation:
 Then wait for user confirmation that release is created before proceeding to cleanup.
-### 12. Cleanup
+### 13. Cleanup
 ```bash
 # Remove local gem file (it's on RubyGems and GitHub now)
 rm buttercut-0.2.0.gem
 ```
-### 13. Verify Release
+### 14. Verify Release
 Check that everything worked:
 - RubyGems page: https://rubygems.org/gems/buttercut
 - GitHub releases: https://github.com/andrewford/buttercut/releases
 - Git tags: `git tag -l`
-### 14. Return Success Response
+### 15. Return Success Response
 Provide summary:
 ```

data/.claude/skills/roughcut/agent_instructions.md CHANGED Viewed

@@ -82,7 +82,7 @@ Each clip needs:
 ### 5. Export to Video Editor
-Check `library.yaml` for the `editor` field. If it's set, use that value. If it's not set or empty, ask the user for their editor choice (Final Cut Pro X, Adobe Premiere Pro, or DaVinci Resolve), then save their choice back to `library.yaml` (`fcpx`, `premiere`, or `resolve`).
+Check `library.yaml` for the `editor` field. If it's set, use that value. If it's not set or empty, check `libraries/settings.yaml` for the default `editor` value and use that (also save it back to `library.yaml`). If neither has an editor set, ask the user for their editor choice (Final Cut Pro X, Adobe Premiere Pro, or DaVinci Resolve), then save their choice back to both `library.yaml` and `libraries/settings.yaml`.
 Export based on choice:
 ```bash

data/.claude/skills/roughcut/export_to_fcpxml.rb CHANGED Viewed

@@ -102,6 +102,31 @@ def main
   generator.save(output_path)
   puts "\n✓ Rough cut exported to: #{output_path}"
+  validate_fcpxml(output_path) if editor_symbol == :fcpx
+end
+def validate_fcpxml(xml_path)
+  dtd_path = File.expand_path('../../../dtd/FCPXMLv1_8.dtd', __dir__)
+  unless File.exist?(dtd_path)
+    puts "⚠ DTD not found at #{dtd_path}; skipping validation."
+    return
+  end
+  unless system('command -v xmllint > /dev/null 2>&1')
+    puts "⚠ xmllint not found; skipping validation."
+    return
+  end
+  # xmllint prints errors to stderr; --noout suppresses the doc dump on success.
+  output = `xmllint --noout --dtdvalid "#{dtd_path}" "#{xml_path}" 2>&1`
+  if $?.success?
+    puts "✓ FCPXML validates against FCPXMLv1_8.dtd"
+  else
+    warn "✗ FCPXML failed DTD validation:"
+    warn output
+    exit 1
+  end
 end
 main

data/.claude/skills/transcribe-audio/SKILL.md CHANGED Viewed

@@ -16,27 +16,30 @@ Use WhisperX, NOT standard Whisper. WhisperX preserves the original video timeli
 ## Workflow
-### 1. Read Language from Library File
+### 1. Inputs from the parent
-Read the library's `library.yaml` to get the language code:
+This skill runs as a sub-agent. Do NOT read `library.yaml` or `settings.yaml` — the parent has that context and passes everything inline in your prompt. Expect these inputs:
-```yaml
-# Library metadata
-library_name: [library-name]
-language: en  # Language code stored here
-...
-```
+- `video_path` — absolute path to the video file
+- `transcript_output_dir` — where to write the transcript JSON (e.g. `libraries/<library>/transcripts`)
+- `language_code` — ISO 639-1 code already mapped by the parent (e.g. `en`, `es`)
+- `whisper_model` — model size from the parent (e.g. `small`, `medium`, `turbo`)
+- `transcript_refinement` — boolean; if `true`, the parent will also pass `user_context` and `footage_summary` strings for Step 4
+- `user_context` (only when refinement is on) — may be empty string
+- `footage_summary` (only when refinement is on) — may be empty string
+If any required input is missing from your prompt, stop and ask the parent rather than inferring it from the filesystem.
 ### 2. Run WhisperX
 ```bash
-whisperx "/full/path/to/video.mov" \
-  --language en \
-  --model medium \
+whisperx "<video_path>" \
+  --language <language_code> \
+  --model <whisper_model> \
   --compute_type float32 \
   --device cpu \
   --output_format json \
-  --output_dir libraries/[library-name]/transcripts
+  --output_dir <transcript_output_dir>
 ```
 ### 3. Prepare Audio Transcript
@@ -45,8 +48,8 @@ After WhisperX completes, format the JSON using our prepare_audio_script:
 ```bash
 ruby .claude/skills/transcribe-audio/prepare_audio_script.rb \
-  libraries/[library-name]/transcripts/video_name.json \
-  /full/path/to/original/video_name.mov
+  <transcript_output_dir>/<video_basename>.json \
+  <video_path>
 ```
 This script:
@@ -54,14 +57,18 @@ This script:
 - Removes unnecessary fields to reduce file size
 - Prettifies JSON
-### 4. Return Success Response
+### 4. (Optional) Refine the transcript
+If the parent passed `transcript_refinement: true`, follow `.claude/skills/transcribe-audio/refine_instructions.md` using the `user_context` and `footage_summary` strings the parent supplied inline. Do NOT open `library.yaml`. If `transcript_refinement` is not set or is `false`, skip this step.
+### 5. Return Success Response
 After audio preparation completes, return this structured response to the parent agent:
 ```
-✓ [video_filename.mov] transcribed successfully
-  Audio transcript: libraries/[library-name]/transcripts/video_name.json
-  Video path: /full/path/to/video_filename.mov
+✓ <video_basename.mov> transcribed successfully
+  Audio transcript: <transcript_output_dir>/<video_basename>.json
+  Video path: <video_path>
 ```
 **DO NOT update library.yaml** - the parent agent will handle this to avoid race conditions when running multiple transcriptions in parallel.

data/.claude/skills/transcribe-audio/refine_instructions.md ADDED Viewed

@@ -0,0 +1,114 @@
+# Transcript refinement instructions
+Companion file for `SKILL.md`. Invoked from SKILL.md Step 4 when the parent passed `transcript_refinement: true`. Reviews a WhisperX transcript and corrects misheard words using the context strings the parent supplied, in place.
+## Step 1 — Gather inputs from the parent
+The parent has already supplied these inline in your prompt:
+- `transcript_path` — absolute path to the prepared transcript JSON
+- `user_context` — string, may be empty
+- `footage_summary` — string, may be empty
+Do NOT open `library.yaml` or search the filesystem for additional context — if the parent didn't pass it, treat it as unavailable. If the parent invoked refinement with only empty context strings, proceed anyway. Catch issues from just what the parent gave you and the transcript.
+## Step 2 — Extract a compact script view
+Run the shared extractor to produce a plain-text view of the transcript (one segment per paragraph, no timing metadata). Pick a sibling `.txt` path next to the transcript and pass it explicitly:
+```bash
+ruby .claude/scripts/script_extractor.rb <transcript_path> <transcript_path_with_.txt_extension>
+```
+Read ONLY that `.txt` file for the analysis steps below. Do NOT `Read` the full transcript JSON yet — it's large and you don't need its word-level structure to identify corrections.
+## Step 3 — HARD RULE: preserve word count, never change timing
+WhisperX produces word-level timing. The `segments[].words[]` array is 1:1 with the space-separated tokens in `segments[].text`. Splitting or merging tokens breaks this alignment and corrupts downstream timing used by roughcut.
+Allowed:
+- **1→1 token spelling fix** (same count, different characters). Transcript: `"The bike ended up in a second-floor apartment over near the Tenderlohn, which is where the cops met us."` Fix: `Tenderlohn` → `Tenderloin` — one mangled token replaced by the correct San Francisco neighborhood spelling, same single-token slot. Surrounding words are untouched.
+- **N→N token phrase fix** (same count across a phrase). Transcript: `"We had been planning to ride out to Walnut Creak for the weekend before the whole thing happened."` Fix: `Walnut Creak` → `Walnut Creek` — two tokens stay two tokens; only one character-set changes, but the phrase is treated as the unit of edit for safety.
+Disallowed:
+- **1→2 token split**. Transcript: `"Her cousin grew up in Sanjose and still lives in the same house her parents bought in the sixties."` The correct spelling is "San Jose" (two tokens), but WhisperX fused it into a single token covering the speaker's fast delivery. Splitting that one timing slot into two requires guessing where "San" ends and "Jose" begins — don't do it. (See squashing technique below for the right move.)
+- **2→1 token merge**. Transcript: `"We walked every single block of the neighborhood looking for the stolen bike that afternoon."` If you wanted to "normalize" `every single` into a single `everysingle` token, you'd drop one entry from the words array. Same corruption in reverse. Don't.
+Never modify timing fields (`start`, `end`, `duration`, `word.start`, `word.end`) for any reason.
+**Squashing technique**: when the correct term is naturally multi-word but the transcript has it as a single nonsense token, squash the correction into a single-token form to preserve word count. Downstream agents (analyze-video, roughcut) care about accurate word recognition, not cosmetic spacing — prefer squashing over skipping.
+- Transcript: `"Her cousin grew up in Sanjose and still lives in the same house her parents bought in the sixties."` Fix: `Sanjose` → `SanJose` (squashed single-token form). Downstream agents will still recognize the city. NOT `San Jose` — that's a disallowed 1→2 split.
+- Transcript: `"Our rental was a tiny cottage right on the edge of Tenderknob, close to a Burmese place we ended up at every single night."` The speaker meant "Tendernob" (the informal Tenderloin/Nob Hill border). Fix: `Tenderknob` → `Tendernob` (1→1 spelling fix, stays one token).
+- Transcript: `"She went to a little Catholic school in the Mission called Saintvincent when she was a kid, and her sister went there too."` Fix: `Saintvincent` → `SaintVincent` (squashed; preserves the one-token slot).
+If even squashing won't work (genuinely requires splitting or merging tokens), do NOT edit. Note it in your return summary instead. Example: `"Skipped: 'everysingle' in segment 12 should likely be 'every single' (two words), but a 1→2 split would corrupt timing."`
+## Step 4 — Identify corrections from the compact script
+Scan the `.txt` view against the confidence rubric. Every candidate must also satisfy Step 3's word-count rule.
+- **Context-named term match**: correct if the intended term appears in `user_context` or `footage_summary` and the transcript has a close mishearing. Example: `footage_summary` says "the couple got married at a small vineyard in Sonoma over Labor Day weekend." The transcript has `"We drove all the way up to Sanoma on Friday afternoon and the traffic was unbelievable."` "Sanoma" is a 1→1 mishearing of the context-named location — fix it.
+- **Nonsense-token match**: correct if the transcript token is a non-word nonsense string with a clear real-world spelling implied by context. Example: transcript says `"His mother grew up in Pleasantton and worked at the little cafe downtown for twenty years."` "Pleasantton" isn't a real place — but "Pleasanton" is a real East Bay city and nothing else is phonetically close. 1→1 spelling fix.
+- **Self-witness rule**: correct if the proposed correct form appears elsewhere in the SAME transcript AND the suspect token is phonetically close. Example: an early segment says `"Andrew and Gordon ended up getting dinner at a Thai place in Pacific Heights that night after everything calmed down."` A later segment says `"Pacific Heights has been Andrew's favorite neighborhood since he first moved to the city back in 2015."` If a third segment has `"We drove through Pasific Hites on the way to the station."`, fix it — the correct form is witnessed twice elsewhere in the same transcript.
+- **Do NOT correct based on general world knowledge alone**. Example: transcript says `"Andrew dropped by a little market on Fillmore for snacks before we started the ride."` Even if you happen to know of a specific famous store on Fillmore, don't invent it — the generic phrasing might be exactly what was said. Require either a context naming or a self-witness. If neither exists, leave it.
+Collect every authorized correction as an `old → new` pair before moving to Step 5.
+## Step 5 — Apply each correction to the full JSON
+Now (and only now) you need to touch the transcript JSON. For each correction, you must update three places so they stay consistent:
+1. `segments[].text` — the sentence-level text
+2. `segments[].words[].word` — the word-level array inside the owning segment
+3. `word_segments[].word` — the top-level flat word array
+Read the JSON targeted, not whole — use `Grep` to locate each occurrence and its surrounding lines, then `Edit` with a unique anchor.
+### 5a — Update `segments[].text` with phrase context
+Every correction must include at least one adjacent word of surrounding context. Never `Edit` on a bare word — even nonsense tokens — because Edit does substring matching, not word-boundary matching. Bare-word replacements silently corrupt legitimate substrings. For example, if you try to fix a misheard `"car"` by running `Edit replace_all=true old="car" new="far"`, you'll also rewrite every occurrence of `"carrot"` into `"farrot"`, every `"scared"` into `"sfared"`, and so on across the whole transcript. Always anchor the edit with at least one adjacent word.
+Correct form:
+- `Edit replace_all=true old="second-floor apartment over near the Tenderlohn" new="second-floor apartment over near the Tenderloin"` — 1→1 spelling fix in generous phrase context.
+- `Edit replace_all=true old="ride out to Walnut Creak for the weekend" new="ride out to Walnut Creek for the weekend"` — 2→2 phrase fix.
+- `Edit replace_all=true old="cousin grew up in Sanjose and still lives" new="cousin grew up in SanJose and still lives"` — squashed 1→1 fix.
+**Case rule**: preserve the transcript's existing case. The goal is accurate word recognition for downstream agents, not proper-noun capitalization. If the transcript has "tundraloin" (lowercase), replace with "tenderloin" (lowercase) — don't upgrade to "Tenderloin". If the transcript has "Tundraloin" at a sentence start, replace with "Tenderloin" there. Match case-for-case; don't normalize. Exception: the squashing technique (Step 3) may introduce an internal capital to mark a word boundary (e.g. `Sanjose` → `SanJose`); the first letter's case still follows this rule.
+### 5b — Update the two word-level arrays, anchored by `start`
+Both `segments[].words[].word` and top-level `word_segments[].word` have their own entry for each token. These arrays aren't consumed downstream yet, but they're how we'll cut a single word or phrase out of a segment later, so keeping them consistent with the corrected `segments[].text` is load-bearing — don't leave them stale.
+Anchor each word-array edit on the adjacent `start` timestamp so it's unique (the token alone may appear in many slots). Only the `word` field changes; timing fields (`start`, `end`, `score`, etc.) must stay untouched.
+The transcript JSON is pretty-printed (`JSON.pretty_generate`), so each key sits on its own line. `Edit` does literal substring matching — your `old_string` must include the newline and indentation between `"word": "..."` and `"start": ...`. Use the exact whitespace from the file (open it with `Read` or `Grep -A` first to copy the indentation verbatim).
+- Two-line anchor form (copy the real indentation from the file):
+  ```
+  Edit old='"word": "Sanjose",
+              "start": 10.534' new='"word": "SanJose",
+              "start": 10.534'
+  ```
+  Updates one entry; repeat for the other array.
+- For an N→N phrase fix, update each token's word entry the same way, anchored by its own `start`.
+- For the squashing case (e.g. `Sanjose` → `SanJose`), the word count is unchanged, so there's still exactly one word entry to update per array.
+## Step 6 — Clean up the extracted script file
+Delete the `.txt` file created in Step 2. It's scaffolding, not a deliverable.
+```bash
+rm <transcript_path with .json replaced by .txt>
+```
+## Step 7 — Return summary to the parent
+Append a refinement line to your SKILL.md Step 5 response. Format:
+- If corrections made: list them as `old → new` pairs, one per line.
+- If no corrections needed: `"Refinement: no corrections needed"`.
+- If some candidates were skipped for word-count reasons: `"Refinement: skipped N corrections that would have changed word count"` followed by the list.
+The parent writes only `transcript: <filename>.json` to library.yaml — no new field needed.

data/CLAUDE.md CHANGED Viewed

@@ -40,6 +40,33 @@ You are an AI video editor assistant working with a software engineer. You gener
 Libraries are the primary abstraction in ButterCut - each library represents a video series or project and is self-contained under `/libraries/[library-name]/`. A library is conceptually similar to a Final Cut Pro library, but uses a simple file structure (YAML, JSON transcripts) optimized for AI analysis rather than FCP's proprietary format.
+### Initialize Settings
+Before any library setup, check if `libraries/settings.yaml` exists. If not, copy from template:
+```bash
+cp templates/settings_template.yaml libraries/settings.yaml
+```
+If no previous settings.yaml was present, use the ask user question tool to ask the user to confirm or change their defaults (editor and whisper_model).
+Editor Options:
+- Final Cut Pro X
+- Adobe Premiere Pro
+- DaVinci Resolve
+Model Options:
+- Small (recommended — pairs well with per-library transcript_refinement)
+- Medium
+- Turbo (Large)
+Save these options into libraries/settings.yaml.
+Note: `transcript_refinement` is a **per-library** setting (not global). Ask about it during library setup (see "Gather Project Information" below), not during initial settings setup.
+When creating a new library, read `libraries/settings.yaml` and use the `editor` value to pre-populate the library's `editor` field.
 ### Check for Existing Library
 **ALWAYS** check if a library already exists before starting setup:
@@ -63,9 +90,9 @@ ls libraries/[library-name]/library.yaml
 ### Gather Project Information
-Ask the user these questions for new libraries:
+Ask the user these questions for new libraries one at a time (never all at once):
-1. **What is the library name for this project?**
+1. **What do you want to call this project library?**
    - Examples: "bike-locking-video-series", "raiders-2025-highlights", "yo-yo-techniques"
    - Normalize the name:
      - Replace spaces with dashes
@@ -73,15 +100,20 @@ Ask the user these questions for new libraries:
      - Remove special characters (keep alphanumeric and dashes)
 2. **Where are the video files located?**
-   - Accept either a directory path (recursively find all video files inside folder) or individual file paths
+   - Ask: "Where are your video files? You can drag folders or individual files directly into the chat."
    - Verify all files exist before proceeding
    - Inform user of what was found: "Found 5 video files totaling 2.3GB"
 3. **What language is spoken in these videos?**
-   - Common options: `en` (English), `es` (Spanish), `fr` (French), `de` (German), `ja` (Japanese)
-   - Or `auto` for auto-detect (adds ~30 seconds per video during transcription)
-   - This applies to all videos in this library
-   - Save to library.yaml for use during transcription
+   - Ask using AskUserQuestion with options: "English", "Spanish" and a free-text fallback for other languages
+   - Save the language name (e.g., "English") to library.yaml
+   - Map to language code (e.g., `en`, `es`, `fr`) behind the scenes when needed for transcription
+4. **Can I proofread the transcripts after they're generated?**
+   - Ask using AskUserQuestion with this exact question: "Can I proofread the transcripts after they're generated? I'll use the video's context to fix mistakes."
+   - Options: "Yes - Recommended (Use Claude to refine video understanding)" and "No"
+   - Save the boolean to `transcript_refinement` in library.yaml (true for Yes, false for No)
+   - Default to `true` if the user skips
 ### Create Directory Structure
@@ -111,13 +143,19 @@ Progressively update the `footage_summary` field after each video is transcribed
 After library setup completes, **automatically start analyzing all footage**:
 1. Inform user: "Library setup complete. Found [N] videos ([total size]). Starting footage analysis..."
-2. Read library.yaml to get language code and find videos needing transcription
-3. Launch `transcribe-audio` agents (can run in parallel for multiple videos)
-4. As each agent completes, update library.yaml with `transcript` (filename only, not full path)
-5. After all audio transcripts complete, launch `analyze-video` agents (can run in parallel)
-6. As each agent completes, update library.yaml with `visual_transcript` (filename only, not full path)
-7. Analyze ALL videos before offering to create rough cuts
-8. **After all analysis completes, automatically create a backup** using the `backup-library` skill
+2. Read `libraries/settings.yaml` (for `whisper_model`) and the library's `library.yaml` (for `language`, `transcript_refinement`, `user_context`, `footage_summary`) ONCE in the parent thread. If any expected field is missing, run the appropriate migration first (see Critical Principles below).
+3. Launch `transcribe-audio` agents (can run in parallel for multiple videos). Pass these values inline in each agent's prompt — the sub-agent never reads `library.yaml` or `settings.yaml`:
+   - `video_path`, `transcript_output_dir`, `language_code`, `whisper_model`
+   - `transcript_refinement` (boolean). If `true`, also pass the current `user_context` and `footage_summary` strings (empty strings are fine — refinement still catches nonsense-token and self-witness fixes).
+4. As each agent completes, update library.yaml with `transcript` (filename only, not full path).
+5. After all audio transcripts complete, launch `analyze-video` agents (can run in parallel) following the same "parent passes context inline" contract. Pass inline: `video_path`, `audio_transcript_path`, `visual_transcript_path`.
+6. As each agent completes, update library.yaml with `visual_transcript` (filename only, not full path).
+7. Analyze ALL videos before offering to create rough cuts.
+8. **After all analysis completes, automatically create a backup** using the `backup-library` skill.
+**Contract: sub-agents don't read `library.yaml`.** The parent owns `library.yaml` (and `settings.yaml`) — it reads once, passes values inline, and writes results once per agent completion. Sub-agents should not even know those files exist. This keeps the context boundary clean and avoids race conditions when many agents run in parallel.
+**Note on refinement:** When `transcript_refinement: true`, each `transcribe-audio` agent reviews and corrects its transcript in place before returning, using the `user_context` and `footage_summary` the parent passed in. Empty context strings are fine — the agent still runs and catches nonsense-token and self-witness fixes. The parent still only writes `transcript: <filename>.json` to `library.yaml` after the agent completes.
 **Terminology:**
 - User-facing: Call it "footage analysis" or "analyzing footage"
@@ -133,18 +171,17 @@ After library setup completes, **automatically start analyzing all footage**:
 When processing multiple videos, use parallel agents for maximum throughput:
 1. **Parent agent responsibilities:**
-   - Read library.yaml for language code
-   - Read library.yaml to find videos needing work
-   - Launch Task agents with transcribe-audio or analyze-video skills
-   - Update library.yaml sequentially as agents complete
-   - Handle errors and retries
+   - Read `library.yaml` and `settings.yaml` once to gather: videos needing work, `language_code`, `whisper_model`, `transcript_refinement`, `user_context`, `footage_summary`.
+   - Launch Task agents with transcribe-audio or analyze-video skills, passing all needed values **inline in the prompt**.
+   - Update library.yaml sequentially as agents complete.
+   - Handle errors and retries.
 2. **Child agent (transcribe-audio/analyze-video) responsibilities:**
-   - Process ONE video file
-   - Run WhisperX or frame extraction
-   - Prepare and clean transcript JSON
-   - Return structured response with file paths
-   - DO NOT update library.yaml (parent handles this)
+   - Process ONE video file using only the inputs passed inline by the parent.
+   - Run WhisperX or frame extraction.
+   - Prepare and clean transcript JSON.
+   - Return structured response with file paths.
+   - DO NOT read `library.yaml` or `settings.yaml`, and DO NOT update `library.yaml` (parent handles all yaml I/O).
 3. **Benefits:**
    - Multiple videos process simultaneously
@@ -156,7 +193,19 @@ When processing multiple videos, use parallel agents for maximum throughput:
 Each library has a `library.yaml` file that serves as your persistent memory and the SOURCE OF TRUTH. This file contains all library metadata, footage descriptions, transcription status, and key learnings. Always read this file when working on a library and you need guidance for how/where to save files.
-**If library structure seems wrong, check CHANGELOG.md.** The library.yaml format has evolved over versions. If you encounter unexpected field names (like `transcript_path` instead of `transcript`), read CHANGELOG.md to understand breaking changes and available migration scripts.
+**Migrate legacy library.yaml files before doing anything else.** Every time you read a library.yaml, check it against the canonical field list in `templates/library_template.yaml`. If any expected field is missing, or any field appears under an old name, the library predates a feature and MUST be migrated before you do any further work on it — no rough cuts, sequences, transcription, exports, or anything else until the schema is current. The migrations are fast, idempotent, and safe; don't ask the user for permission and don't describe them as optional "tidying." Just run them.
+Known migration triggers (match each to a `scripts/NNN_migrate_*.rb` script via CHANGELOG.md):
+- `editor` missing (added in 0.4.0)
+- `transcript_refinement` missing (added in [Unreleased]; missing means "predates the feature, default to `false`" — NOT the template default of `true`)
+- `footage_summary` missing OR old name `footage_description` present (renamed in [Unreleased])
+- video entries with `transcript_path` / `visual_transcript_path` (renamed to `transcript` / `visual_transcript` in 0.3.0)
+- video entries with `file_size_mb` (removed in 0.3.0)
+A missing field is not the same as a field set to the template default — the template default only applies to freshly created libraries. If you see a schema issue not on this list, still check CHANGELOG.md; the list may be behind. After running migrations, re-read the library.yaml and continue with whatever the user asked for.
+**Keep main-thread context minimal.** The main thread orchestrates; sub-agents do the heavy work and return concise summaries. Don't read full transcript JSON, visual transcript JSON, or extracted frames into the main thread as part of routine workflow — across a large library this bloats context fast. Trust sub-agent return messages when updating library.yaml. Direct user requests ("show me transcript X") are fine; the rule is about automatic workflow behavior.
 **Use actual filenames.** Never use generic labels like "Video 1" or "Clip A" - always reference actual filenames like "DJI_20250423171212_0210_D.mov" for clear traceability.
@@ -171,6 +220,17 @@ Each library has a `library.yaml` file that serves as your persistent memory and
 - When you have lots of videos to process (dozens or hundreds isn't out of the ordinary), create a reasonable task list with 5 tasks and then a final task that says to check the yaml processing file to see if you need to then generate more tasks. This way users can see progress and the agent doesn't get overwhelmed.
 - Generally avoid writing one-off scripts, but if you do need to write one, write it in Ruby unless you have a very strong reason to write in another language.
 - Only run 4 parallel tasks at a time.
+- Whenever you export XML files, include a datetime timestamp in the filename so it's clear when they were generated.
+## Programming Style
+When you add a Ruby script under `.claude/scripts/` or similar, follow these conventions:
+- **One class per script; file name matches the class name.** `ScriptExtractor` lives in `script_extractor.rb`.
+- **Single high-level entry point.** Expose a class method (`Klass.extract`, `Klass.run`, etc.) that calls `new(...).extract` internally — callers shouldn't need to know about instantiation.
+- **Break the work into small private methods with clear names** (`load_transcript`, `format_script`, `write_output`, `report`). The public entry point should read like a short outline of the workflow.
+- **Required arguments are required.** Don't silently default `nil`/missing args — raise `ArgumentError` in `initialize` if a required value is missing or empty. No hidden fallback paths.
+- **Keep CLI arg parsing out of the class.** Use a bottom-of-file `if __FILE__ == $PROGRAM_NAME` block to parse `ARGV`, validate file paths, print a usage line, and delegate to the class.
 ## Project Structure
@@ -182,21 +242,23 @@ Each library has a `library.yaml` file that serves as your persistent memory and
 - `spec/` - RSpec test suite
 - `templates/` - Library and project templates
 - `libraries/` - Working directory for user's video projects (gitignored)
+- `libraries/settings.yaml` - User settings (editor, whisper_model) — created from template on first library setup
 - `backups/` - Compressed library backups (transcriptions, roughcuts, etc) (gitignored)
 ## Design Philosophy
-ButterCut is designed to be simple and automatic:
+ButterCut is designed to be simple, automatic and geared toward working with non technical people using ButterCut via a client, Claude Cowork or Claude Code.
 - **Input**: Array of full file paths to video files
-- **Output**: Working FCPXML ready to import into Final Cut Pro
+- **Output**: Working XML file ready to import into the non-technical user's video editor (Final Cut, Premiere, Resolve)
 - **Automatic Metadata Extraction**: Uses FFmpeg internally to extract video properties (duration, resolution, frame rate, audio rate, etc.)
-- **No Manual Configuration Required**: Library handles all the complexity of FCPXML generation
-The user should not need to understand video codecs, frame rates, or FCPXML structure - just provide file paths and get working XML.
+The user should not need to understand video codecs, frame rates, or FCPXML structure - just provide file paths and get working XML. We should talk to the user from a video editing perspective, not a technical software engineer perspective.
 ## Development Commands
 ### Testing
+RSpec tests for the XML generation library. This doesn't include agent or end to end testing.
 ```bash
 # Install dependencies
 bundle install
@@ -211,17 +273,6 @@ bundle exec rspec spec/buttercut_spec.rb
 bundle exec rspec spec/buttercut_spec.rb:10
 ```
-### DTD Validation
-macOS has a built-in XML lint tool - allowing you to validate a FCPXML document against its DTD file.
-```bash
-xmllint --dtdvalid "dtd/FCPXMLv1_8.dtd" "/path/to/your/file.fcpxml"
-```
-This will check if the generated FCPXML conforms to the FCPXML 1.8 specification.
-- Whenever you export xml files, always include a datetime timestamp so it's clear when they were generated
 ## Claude Skills
 When creating new Claude skills, aim to keep them to 50 lines. Only very complicated skills (ie transcription and roughcuts) should be larger than that. If the skill is complicated and seems like it can't be explained in 50 lines, consider if they should be broken up across multiple skills or if the complexity can be contained inside a ruby script saved adjacent to the skill.

data/README.md CHANGED Viewed

@@ -124,4 +124,8 @@ MIT
 ## Contributing
-Bug reports and pull requests welcome.
+Bug reports and pull requests welcome, with that said...
+**Guidelines:**
+- Write the body of your pull request or GitHub issue yourself. Don't use an agent (Claude Code, etc) to generate it.
+- Keep pull requests small and limited to a single feature or bugfix at a time. It's a lot easier to write code, I feel like it's just as hard as before to review code.

data/lib/buttercut/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class ButterCut
-  VERSION = "0.4.0"
+  VERSION = "0.5.0"
 end

data/templates/library_template.yaml CHANGED Viewed

@@ -4,6 +4,7 @@ created_date: [YYYY-MM-DD]
 last_updated: [YYYY-MM-DD]
 language: english
 editor: # preferred video editor: fcpx, premiere, or resolve
+transcript_refinement: true # on by default; set false to skip the AI transcript review step
 user_context: ""
 # Whenever you ask the user questions about the library, save a summarized version here.
 # ie; The man wearing the dark blue long sleeve shirt is "Andrew". The small brown dog is "Sammy". This footage was shot over one evening.

data/templates/settings_template.yaml ADDED Viewed

@@ -0,0 +1,10 @@
+# ButterCut User Settings
+# Copy this file to libraries/settings.yaml to configure defaults
+# Preferred video editor: fcpx, premiere, or resolve
+editor: fcpx
+# WhisperX model size: tiny, base, small, medium, or turbo
+# turbo is nearly as accurate as large-v3 but significantly faster
+# Recommended: `small` paired with transcript_refinement (set per-library in library.yaml)
+whisper_model: small

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: buttercut
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Andrew Ford
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-02-24 00:00:00.000000000 Z
+date: 2026-04-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -62,6 +62,7 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".claude/commands/worktree.md"
+- ".claude/scripts/script_extractor.rb"
 - ".claude/settings.json"
 - ".claude/settings.local.json"
 - ".claude/skills/analyze-video/SKILL.md"
@@ -78,6 +79,7 @@ files:
 - ".claude/skills/setup/verify_install.rb"
 - ".claude/skills/transcribe-audio/SKILL.md"
 - ".claude/skills/transcribe-audio/prepare_audio_script.rb"
+- ".claude/skills/transcribe-audio/refine_instructions.md"
 - ".claude/skills/update-buttercut/SKILL.md"
 - CLAUDE.md
 - LICENSE
@@ -90,6 +92,7 @@ files:
 - lib/buttercut/version.rb
 - templates/library_template.yaml
 - templates/roughcut_template.yaml
+- templates/settings_template.yaml
 homepage: https://github.com/andrewford/buttercut
 licenses:
 - MIT