buttercut 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/scripts/script_extractor.rb +66 -0
- data/.claude/settings.local.json +6 -1
- data/.claude/skills/analyze-video/SKILL.md +17 -9
- data/.claude/skills/backup-library/backup_libraries.rb +1 -1
- data/.claude/skills/release/SKILL.md +21 -11
- data/.claude/skills/roughcut/agent_instructions.md +1 -1
- data/.claude/skills/roughcut/export_to_fcpxml.rb +25 -0
- data/.claude/skills/transcribe-audio/SKILL.md +25 -18
- data/.claude/skills/transcribe-audio/refine_instructions.md +114 -0
- data/CLAUDE.md +91 -40
- data/README.md +5 -1
- data/lib/buttercut/version.rb +1 -1
- data/templates/library_template.yaml +1 -0
- data/templates/settings_template.yaml +10 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 78b845e8b54d03aee93f00bdbaa96f140d0f10c94910a117352b7401cf30bf63
|
|
4
|
+
data.tar.gz: 0eb609100a9e2f367b493d6aef9f45d08e784ad573c4d033733009be40ccc525
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 33eb34693818323f40900bcea272781391c372fc5bf9adb71f11632da83aa3de5936538150c69a9d5407f5e5a6059342eed23eed5683e6b1fec36d9a55b37d2a
|
|
7
|
+
data.tar.gz: f022d15eb198cb32dde550d0120598cf5d8eb7f95145e21bfb70401df1c762bcd4474f4836966601aa46ce232e2bd364a25cc8d1187ad2c1e7edf02164ca1789
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Extract the plain-text script from a WhisperX-style transcript JSON.
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# ruby .claude/scripts/script_extractor.rb <transcript.json> <output.txt>
|
|
6
|
+
#
|
|
7
|
+
# Output is one segment per paragraph (blank line between), trimmed, suitable
|
|
8
|
+
# for proofreading by a human or a sub-agent without the overhead of the full
|
|
9
|
+
# transcript JSON (word-level timing, scores, etc.).
|
|
10
|
+
|
|
11
|
+
require 'json'
|
|
12
|
+
|
|
13
|
+
class ScriptExtractor
|
|
14
|
+
def self.extract(transcript_path, output_path)
|
|
15
|
+
new(transcript_path, output_path).extract
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def initialize(transcript_path, output_path)
|
|
19
|
+
raise ArgumentError, "transcript_path is required" if transcript_path.nil? || transcript_path.empty?
|
|
20
|
+
raise ArgumentError, "output_path is required" if output_path.nil? || output_path.empty?
|
|
21
|
+
@transcript_path = transcript_path
|
|
22
|
+
@output_path = output_path
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def extract
|
|
26
|
+
write_output(format_script)
|
|
27
|
+
report
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
attr_reader :transcript_path, :output_path
|
|
33
|
+
|
|
34
|
+
def data
|
|
35
|
+
@data ||= JSON.parse(File.read(transcript_path))
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def segments
|
|
39
|
+
data["segments"] or raise "transcript JSON has no 'segments' key: #{transcript_path}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def format_script
|
|
43
|
+
paragraphs = segments.map { |s| s["text"].to_s.strip }.reject(&:empty?)
|
|
44
|
+
paragraphs.join("\n\n") + "\n"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def write_output(text)
|
|
48
|
+
File.write(output_path, text)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def report
|
|
52
|
+
in_kb = (File.size(transcript_path) / 1024.0).round(1)
|
|
53
|
+
out_kb = (File.size(output_path) / 1024.0).round(1)
|
|
54
|
+
puts "Extracted script: #{output_path} (#{out_kb} KB from #{in_kb} KB source, #{segments.size} segments)"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if __FILE__ == $PROGRAM_NAME
|
|
59
|
+
transcript_path, output_path = ARGV
|
|
60
|
+
abort("usage: script_extractor.rb <transcript.json> <output.txt>") unless transcript_path && output_path
|
|
61
|
+
abort("file not found: #{transcript_path}") unless File.file?(transcript_path)
|
|
62
|
+
if File.expand_path(output_path) == File.expand_path(transcript_path)
|
|
63
|
+
abort("output path must differ from transcript path: #{transcript_path}")
|
|
64
|
+
end
|
|
65
|
+
ScriptExtractor.extract(transcript_path, output_path)
|
|
66
|
+
end
|
data/.claude/settings.local.json
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"permissions": {
|
|
3
3
|
"allow": [
|
|
4
|
+
"Agent",
|
|
5
|
+
"Read(tmp/**)",
|
|
6
|
+
"Write(tmp/**)",
|
|
4
7
|
"Bash(./.claude/skills/roughcut/combine_visual_transcripts.rb:*)",
|
|
5
8
|
"Bash(./.claude/skills/roughcut/export_to_fcpxml.rb:*)",
|
|
6
9
|
"Skill(backup-library)",
|
|
@@ -22,7 +25,9 @@
|
|
|
22
25
|
"Bash(git worktree add:*)",
|
|
23
26
|
"Bash(cat:*)",
|
|
24
27
|
"Bash(python3:*)",
|
|
25
|
-
"Bash(gh api:*)"
|
|
28
|
+
"Bash(gh api:*)",
|
|
29
|
+
"Bash(gh pr:*)",
|
|
30
|
+
"Bash(cp *)"
|
|
26
31
|
],
|
|
27
32
|
"deny": [],
|
|
28
33
|
"ask": []
|
|
@@ -13,16 +13,24 @@ Videos must have audio transcripts. Run **transcribe-audio** skill first if need
|
|
|
13
13
|
|
|
14
14
|
## Workflow
|
|
15
15
|
|
|
16
|
-
### 1.
|
|
16
|
+
### 1. Inputs from the parent
|
|
17
|
+
|
|
18
|
+
This skill runs as a sub-agent. Do NOT read `library.yaml` or `settings.yaml` — the parent has that context and passes everything inline in your prompt. Expect these inputs:
|
|
19
|
+
|
|
20
|
+
- `video_path` — absolute path to the video file
|
|
21
|
+
- `audio_transcript_path` — absolute path to the prepared audio transcript JSON
|
|
22
|
+
- `visual_transcript_path` — absolute path to write the visual transcript JSON
|
|
23
|
+
|
|
24
|
+
### 2. Copy & Clean Audio Transcript
|
|
17
25
|
|
|
18
26
|
Don't read the audio transcript, just copy it and then prepare it by using the prepare_visual_script.rb file. This removes word-level timing data and prettifies the JSON for easier editing:
|
|
19
27
|
|
|
20
28
|
```bash
|
|
21
|
-
cp
|
|
22
|
-
ruby .claude/skills/analyze-video/prepare_visual_script.rb
|
|
29
|
+
cp <audio_transcript_path> <visual_transcript_path>
|
|
30
|
+
ruby .claude/skills/analyze-video/prepare_visual_script.rb <visual_transcript_path>
|
|
23
31
|
```
|
|
24
32
|
|
|
25
|
-
###
|
|
33
|
+
### 3. Extract Frames (Binary Search)
|
|
26
34
|
|
|
27
35
|
Create frame directory: `mkdir -p tmp/frames/[video_name]`
|
|
28
36
|
|
|
@@ -37,11 +45,11 @@ ffmpeg -ss 00:00:02 -i video.mov -vframes 1 -vf "scale=1280:-1" tmp/frames/[vide
|
|
|
37
45
|
**Stop when:** The footage no longer seems to be changing or only has minor changes
|
|
38
46
|
**Never sample** more frequently than once per 30 seconds
|
|
39
47
|
|
|
40
|
-
###
|
|
48
|
+
### 4. Add Visual Descriptions
|
|
41
49
|
|
|
42
50
|
Read the visual video json file that you created earlier.
|
|
43
51
|
|
|
44
|
-
**Read the JPG frames** from `tmp/frames/[video_name]/` using Read tool, then **Edit**
|
|
52
|
+
**Read the JPG frames** from `tmp/frames/[video_name]/` using Read tool, then **Edit** the file at `<visual_transcript_path>`:
|
|
45
53
|
|
|
46
54
|
Do these incrementally. You don't need to create a program or script to do this, just incrementally edit the json whenever you read new frames.
|
|
47
55
|
|
|
@@ -73,7 +81,7 @@ Do these incrementally. You don't need to create a program or script to do this,
|
|
|
73
81
|
- First segment: detailed (subject, setting, shot type, lighting, camera style)
|
|
74
82
|
- Continuing shots: brief if similar, otherwise can be up to 3 sentences if drastically different.
|
|
75
83
|
|
|
76
|
-
###
|
|
84
|
+
### 5. Cleanup & Return
|
|
77
85
|
|
|
78
86
|
```bash
|
|
79
87
|
rm -rf tmp/frames/[video_name]
|
|
@@ -82,8 +90,8 @@ rm -rf tmp/frames/[video_name]
|
|
|
82
90
|
Return structured response:
|
|
83
91
|
```
|
|
84
92
|
✓ [video_filename.mov] analyzed successfully
|
|
85
|
-
Visual transcript:
|
|
86
|
-
Video path:
|
|
93
|
+
Visual transcript: <visual_transcript_path>
|
|
94
|
+
Video path: <video_path>
|
|
87
95
|
```
|
|
88
96
|
|
|
89
97
|
**DO NOT update library.yaml** - parent agent handles this to avoid race conditions in parallel execution.
|
|
@@ -29,7 +29,7 @@ class LibraryBackup
|
|
|
29
29
|
|
|
30
30
|
files = Dir.glob(File.join(@libraries_dir, '**', '*')).select { |f| File.file?(f) }
|
|
31
31
|
|
|
32
|
-
Zip::File.open(backup_path,
|
|
32
|
+
Zip::File.open(backup_path, create: true) do |zipfile|
|
|
33
33
|
files.each do |file|
|
|
34
34
|
zipfile.add(file.sub("#{File.dirname(@libraries_dir)}/", ''), file)
|
|
35
35
|
end
|
|
@@ -59,7 +59,17 @@ class ButterCut
|
|
|
59
59
|
end
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
-
### 5.
|
|
62
|
+
### 5. Update Gemfile.lock
|
|
63
|
+
|
|
64
|
+
Run `bundle install` so `Gemfile.lock` reflects the new version:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
bundle install
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Verify the version updated in `Gemfile.lock` before proceeding.
|
|
71
|
+
|
|
72
|
+
### 6. Gather Changelog Notes
|
|
63
73
|
|
|
64
74
|
Ask user for release notes. Prompt with:
|
|
65
75
|
- What changed in this release?
|
|
@@ -67,7 +77,7 @@ Ask user for release notes. Prompt with:
|
|
|
67
77
|
- Any bug fixes?
|
|
68
78
|
- Any breaking changes?
|
|
69
79
|
|
|
70
|
-
###
|
|
80
|
+
### 7. Update or Create CHANGELOG.md
|
|
71
81
|
|
|
72
82
|
If `CHANGELOG.md` exists, prepend new entry. Otherwise create it:
|
|
73
83
|
|
|
@@ -89,14 +99,14 @@ All notable changes to ButterCut will be documented in this file.
|
|
|
89
99
|
- Improved W
|
|
90
100
|
```
|
|
91
101
|
|
|
92
|
-
###
|
|
102
|
+
### 8. Commit Version Bump
|
|
93
103
|
|
|
94
104
|
```bash
|
|
95
|
-
git add lib/buttercut/version.rb CHANGELOG.md
|
|
105
|
+
git add lib/buttercut/version.rb Gemfile.lock CHANGELOG.md
|
|
96
106
|
git commit -m "Bump version to 0.2.0"
|
|
97
107
|
```
|
|
98
108
|
|
|
99
|
-
###
|
|
109
|
+
### 9. Create and Push Git Tag
|
|
100
110
|
|
|
101
111
|
```bash
|
|
102
112
|
git tag v0.2.0
|
|
@@ -104,7 +114,7 @@ git push origin main
|
|
|
104
114
|
git push origin v0.2.0
|
|
105
115
|
```
|
|
106
116
|
|
|
107
|
-
###
|
|
117
|
+
### 10. Build Gem
|
|
108
118
|
|
|
109
119
|
```bash
|
|
110
120
|
gem build buttercut.gemspec
|
|
@@ -112,7 +122,7 @@ gem build buttercut.gemspec
|
|
|
112
122
|
|
|
113
123
|
This creates `buttercut-0.2.0.gem` file.
|
|
114
124
|
|
|
115
|
-
###
|
|
125
|
+
### 11. Publish to RubyGems
|
|
116
126
|
|
|
117
127
|
**First time setup check:**
|
|
118
128
|
|
|
@@ -133,7 +143,7 @@ gem push buttercut-0.2.0.gem
|
|
|
133
143
|
|
|
134
144
|
This makes the gem available for `gem install buttercut` worldwide.
|
|
135
145
|
|
|
136
|
-
###
|
|
146
|
+
### 12. Create GitHub Release
|
|
137
147
|
|
|
138
148
|
**Using GitHub CLI:**
|
|
139
149
|
```bash
|
|
@@ -155,21 +165,21 @@ Guide user through manual release creation:
|
|
|
155
165
|
|
|
156
166
|
Then wait for user confirmation that release is created before proceeding to cleanup.
|
|
157
167
|
|
|
158
|
-
###
|
|
168
|
+
### 13. Cleanup
|
|
159
169
|
|
|
160
170
|
```bash
|
|
161
171
|
# Remove local gem file (it's on RubyGems and GitHub now)
|
|
162
172
|
rm buttercut-0.2.0.gem
|
|
163
173
|
```
|
|
164
174
|
|
|
165
|
-
###
|
|
175
|
+
### 14. Verify Release
|
|
166
176
|
|
|
167
177
|
Check that everything worked:
|
|
168
178
|
- RubyGems page: https://rubygems.org/gems/buttercut
|
|
169
179
|
- GitHub releases: https://github.com/andrewford/buttercut/releases
|
|
170
180
|
- Git tags: `git tag -l`
|
|
171
181
|
|
|
172
|
-
###
|
|
182
|
+
### 15. Return Success Response
|
|
173
183
|
|
|
174
184
|
Provide summary:
|
|
175
185
|
```
|
|
@@ -82,7 +82,7 @@ Each clip needs:
|
|
|
82
82
|
|
|
83
83
|
### 5. Export to Video Editor
|
|
84
84
|
|
|
85
|
-
Check `library.yaml` for the `editor` field. If it's set, use that value. If it's not set or empty, ask the user for their editor choice (Final Cut Pro X, Adobe Premiere Pro, or DaVinci Resolve), then save their choice back to `library.yaml`
|
|
85
|
+
Check `library.yaml` for the `editor` field. If it's set, use that value. If it's not set or empty, check `libraries/settings.yaml` for the default `editor` value and use that (also save it back to `library.yaml`). If neither has an editor set, ask the user for their editor choice (Final Cut Pro X, Adobe Premiere Pro, or DaVinci Resolve), then save their choice back to both `library.yaml` and `libraries/settings.yaml`.
|
|
86
86
|
|
|
87
87
|
Export based on choice:
|
|
88
88
|
```bash
|
|
@@ -102,6 +102,31 @@ def main
|
|
|
102
102
|
generator.save(output_path)
|
|
103
103
|
|
|
104
104
|
puts "\n✓ Rough cut exported to: #{output_path}"
|
|
105
|
+
|
|
106
|
+
validate_fcpxml(output_path) if editor_symbol == :fcpx
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def validate_fcpxml(xml_path)
|
|
110
|
+
dtd_path = File.expand_path('../../../dtd/FCPXMLv1_8.dtd', __dir__)
|
|
111
|
+
unless File.exist?(dtd_path)
|
|
112
|
+
puts "⚠ DTD not found at #{dtd_path}; skipping validation."
|
|
113
|
+
return
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
unless system('command -v xmllint > /dev/null 2>&1')
|
|
117
|
+
puts "⚠ xmllint not found; skipping validation."
|
|
118
|
+
return
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# xmllint prints errors to stderr; --noout suppresses the doc dump on success.
|
|
122
|
+
output = `xmllint --noout --dtdvalid "#{dtd_path}" "#{xml_path}" 2>&1`
|
|
123
|
+
if $?.success?
|
|
124
|
+
puts "✓ FCPXML validates against FCPXMLv1_8.dtd"
|
|
125
|
+
else
|
|
126
|
+
warn "✗ FCPXML failed DTD validation:"
|
|
127
|
+
warn output
|
|
128
|
+
exit 1
|
|
129
|
+
end
|
|
105
130
|
end
|
|
106
131
|
|
|
107
132
|
main
|
|
@@ -16,27 +16,30 @@ Use WhisperX, NOT standard Whisper. WhisperX preserves the original video timeli
|
|
|
16
16
|
|
|
17
17
|
## Workflow
|
|
18
18
|
|
|
19
|
-
### 1.
|
|
19
|
+
### 1. Inputs from the parent
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
This skill runs as a sub-agent. Do NOT read `library.yaml` or `settings.yaml` — the parent has that context and passes everything inline in your prompt. Expect these inputs:
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
- `video_path` — absolute path to the video file
|
|
24
|
+
- `transcript_output_dir` — where to write the transcript JSON (e.g. `libraries/<library>/transcripts`)
|
|
25
|
+
- `language_code` — ISO 639-1 code already mapped by the parent (e.g. `en`, `es`)
|
|
26
|
+
- `whisper_model` — model size from the parent (e.g. `small`, `medium`, `turbo`)
|
|
27
|
+
- `transcript_refinement` — boolean; if `true`, the parent will also pass `user_context` and `footage_summary` strings for Step 4
|
|
28
|
+
- `user_context` (only when refinement is on) — may be empty string
|
|
29
|
+
- `footage_summary` (only when refinement is on) — may be empty string
|
|
30
|
+
|
|
31
|
+
If any required input is missing from your prompt, stop and ask the parent rather than inferring it from the filesystem.
|
|
29
32
|
|
|
30
33
|
### 2. Run WhisperX
|
|
31
34
|
|
|
32
35
|
```bash
|
|
33
|
-
whisperx "
|
|
34
|
-
--language
|
|
35
|
-
--model
|
|
36
|
+
whisperx "<video_path>" \
|
|
37
|
+
--language <language_code> \
|
|
38
|
+
--model <whisper_model> \
|
|
36
39
|
--compute_type float32 \
|
|
37
40
|
--device cpu \
|
|
38
41
|
--output_format json \
|
|
39
|
-
--output_dir
|
|
42
|
+
--output_dir <transcript_output_dir>
|
|
40
43
|
```
|
|
41
44
|
|
|
42
45
|
### 3. Prepare Audio Transcript
|
|
@@ -45,8 +48,8 @@ After WhisperX completes, format the JSON using our prepare_audio_script:
|
|
|
45
48
|
|
|
46
49
|
```bash
|
|
47
50
|
ruby .claude/skills/transcribe-audio/prepare_audio_script.rb \
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
<transcript_output_dir>/<video_basename>.json \
|
|
52
|
+
<video_path>
|
|
50
53
|
```
|
|
51
54
|
|
|
52
55
|
This script:
|
|
@@ -54,14 +57,18 @@ This script:
|
|
|
54
57
|
- Removes unnecessary fields to reduce file size
|
|
55
58
|
- Prettifies JSON
|
|
56
59
|
|
|
57
|
-
### 4.
|
|
60
|
+
### 4. (Optional) Refine the transcript
|
|
61
|
+
|
|
62
|
+
If the parent passed `transcript_refinement: true`, follow `.claude/skills/transcribe-audio/refine_instructions.md` using the `user_context` and `footage_summary` strings the parent supplied inline. Do NOT open `library.yaml`. If `transcript_refinement` is not set or is `false`, skip this step.
|
|
63
|
+
|
|
64
|
+
### 5. Return Success Response
|
|
58
65
|
|
|
59
66
|
After audio preparation completes, return this structured response to the parent agent:
|
|
60
67
|
|
|
61
68
|
```
|
|
62
|
-
✓
|
|
63
|
-
Audio transcript:
|
|
64
|
-
Video path:
|
|
69
|
+
✓ <video_basename.mov> transcribed successfully
|
|
70
|
+
Audio transcript: <transcript_output_dir>/<video_basename>.json
|
|
71
|
+
Video path: <video_path>
|
|
65
72
|
```
|
|
66
73
|
|
|
67
74
|
**DO NOT update library.yaml** - the parent agent will handle this to avoid race conditions when running multiple transcriptions in parallel.
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Transcript refinement instructions
|
|
2
|
+
|
|
3
|
+
Companion file for `SKILL.md`. Invoked from SKILL.md Step 4 when the parent passed `transcript_refinement: true`. Reviews a WhisperX transcript and corrects misheard words using the context strings the parent supplied, in place.
|
|
4
|
+
|
|
5
|
+
## Step 1 — Gather inputs from the parent
|
|
6
|
+
|
|
7
|
+
The parent has already supplied these inline in your prompt:
|
|
8
|
+
|
|
9
|
+
- `transcript_path` — absolute path to the prepared transcript JSON
|
|
10
|
+
- `user_context` — string, may be empty
|
|
11
|
+
- `footage_summary` — string, may be empty
|
|
12
|
+
|
|
13
|
+
Do NOT open `library.yaml` or search the filesystem for additional context — if the parent didn't pass it, treat it as unavailable. If the parent invoked refinement with only empty context strings, proceed anyway. Catch issues from just what the parent gave you and the transcript.
|
|
14
|
+
|
|
15
|
+
## Step 2 — Extract a compact script view
|
|
16
|
+
|
|
17
|
+
Run the shared extractor to produce a plain-text view of the transcript (one segment per paragraph, no timing metadata). Pick a sibling `.txt` path next to the transcript and pass it explicitly:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
ruby .claude/scripts/script_extractor.rb <transcript_path> <transcript_path_with_.txt_extension>
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Read ONLY that `.txt` file for the analysis steps below. Do NOT `Read` the full transcript JSON yet — it's large and you don't need its word-level structure to identify corrections.
|
|
24
|
+
|
|
25
|
+
## Step 3 — HARD RULE: preserve word count, never change timing
|
|
26
|
+
|
|
27
|
+
WhisperX produces word-level timing. The `segments[].words[]` array is 1:1 with the space-separated tokens in `segments[].text`. Splitting or merging tokens breaks this alignment and corrupts downstream timing used by roughcut.
|
|
28
|
+
|
|
29
|
+
Allowed:
|
|
30
|
+
- **1→1 token spelling fix** (same count, different characters). Transcript: `"The bike ended up in a second-floor apartment over near the Tenderlohn, which is where the cops met us."` Fix: `Tenderlohn` → `Tenderloin` — one mangled token replaced by the correct San Francisco neighborhood spelling, same single-token slot. Surrounding words are untouched.
|
|
31
|
+
- **N→N token phrase fix** (same count across a phrase). Transcript: `"We had been planning to ride out to Walnut Creak for the weekend before the whole thing happened."` Fix: `Walnut Creak` → `Walnut Creek` — two tokens stay two tokens; only one character-set changes, but the phrase is treated as the unit of edit for safety.
|
|
32
|
+
|
|
33
|
+
Disallowed:
|
|
34
|
+
- **1→2 token split**. Transcript: `"Her cousin grew up in Sanjose and still lives in the same house her parents bought in the sixties."` The correct spelling is "San Jose" (two tokens), but WhisperX fused it into a single token covering the speaker's fast delivery. Splitting that one timing slot into two requires guessing where "San" ends and "Jose" begins — don't do it. (See squashing technique below for the right move.)
|
|
35
|
+
- **2→1 token merge**. Transcript: `"We walked every single block of the neighborhood looking for the stolen bike that afternoon."` If you wanted to "normalize" `every single` into a single `everysingle` token, you'd drop one entry from the words array. Same corruption in reverse. Don't.
|
|
36
|
+
|
|
37
|
+
Never modify timing fields (`start`, `end`, `duration`, `word.start`, `word.end`) for any reason.
|
|
38
|
+
|
|
39
|
+
**Squashing technique**: when the correct term is naturally multi-word but the transcript has it as a single nonsense token, squash the correction into a single-token form to preserve word count. Downstream agents (analyze-video, roughcut) care about accurate word recognition, not cosmetic spacing — prefer squashing over skipping.
|
|
40
|
+
|
|
41
|
+
- Transcript: `"Her cousin grew up in Sanjose and still lives in the same house her parents bought in the sixties."` Fix: `Sanjose` → `SanJose` (squashed single-token form). Downstream agents will still recognize the city. NOT `San Jose` — that's a disallowed 1→2 split.
|
|
42
|
+
- Transcript: `"Our rental was a tiny cottage right on the edge of Tenderknob, close to a Burmese place we ended up at every single night."` The speaker meant "Tendernob" (the informal Tenderloin/Nob Hill border). Fix: `Tenderknob` → `Tendernob` (1→1 spelling fix, stays one token).
|
|
43
|
+
- Transcript: `"She went to a little Catholic school in the Mission called Saintvincent when she was a kid, and her sister went there too."` Fix: `Saintvincent` → `SaintVincent` (squashed; preserves the one-token slot).
|
|
44
|
+
|
|
45
|
+
If even squashing won't work (genuinely requires splitting or merging tokens), do NOT edit. Note it in your return summary instead. Example: `"Skipped: 'everysingle' in segment 12 should likely be 'every single' (two words), but a 1→2 split would corrupt timing."`
|
|
46
|
+
|
|
47
|
+
## Step 4 — Identify corrections from the compact script
|
|
48
|
+
|
|
49
|
+
Scan the `.txt` view against the confidence rubric. Every candidate must also satisfy Step 3's word-count rule.
|
|
50
|
+
|
|
51
|
+
- **Context-named term match**: correct if the intended term appears in `user_context` or `footage_summary` and the transcript has a close mishearing. Example: `footage_summary` says "the couple got married at a small vineyard in Sonoma over Labor Day weekend." The transcript has `"We drove all the way up to Sanoma on Friday afternoon and the traffic was unbelievable."` "Sanoma" is a 1→1 mishearing of the context-named location — fix it.
|
|
52
|
+
- **Nonsense-token match**: correct if the transcript token is a non-word nonsense string with a clear real-world spelling implied by context. Example: transcript says `"His mother grew up in Pleasantton and worked at the little cafe downtown for twenty years."` "Pleasantton" isn't a real place — but "Pleasanton" is a real East Bay city and nothing else is phonetically close. 1→1 spelling fix.
|
|
53
|
+
- **Self-witness rule**: correct if the proposed correct form appears elsewhere in the SAME transcript AND the suspect token is phonetically close. Example: an early segment says `"Andrew and Gordon ended up getting dinner at a Thai place in Pacific Heights that night after everything calmed down."` A later segment says `"Pacific Heights has been Andrew's favorite neighborhood since he first moved to the city back in 2015."` If a third segment has `"We drove through Pasific Hites on the way to the station."`, fix it — the correct form is witnessed twice elsewhere in the same transcript.
|
|
54
|
+
- **Do NOT correct based on general world knowledge alone**. Example: transcript says `"Andrew dropped by a little market on Fillmore for snacks before we started the ride."` Even if you happen to know of a specific famous store on Fillmore, don't invent it — the generic phrasing might be exactly what was said. Require either a context naming or a self-witness. If neither exists, leave it.
|
|
55
|
+
|
|
56
|
+
Collect every authorized correction as an `old → new` pair before moving to Step 5.
|
|
57
|
+
|
|
58
|
+
## Step 5 — Apply each correction to the full JSON
|
|
59
|
+
|
|
60
|
+
Now (and only now) you need to touch the transcript JSON. For each correction, you must update three places so they stay consistent:
|
|
61
|
+
|
|
62
|
+
1. `segments[].text` — the sentence-level text
|
|
63
|
+
2. `segments[].words[].word` — the word-level array inside the owning segment
|
|
64
|
+
3. `word_segments[].word` — the top-level flat word array
|
|
65
|
+
|
|
66
|
+
Read the JSON targeted, not whole — use `Grep` to locate each occurrence and its surrounding lines, then `Edit` with a unique anchor.
|
|
67
|
+
|
|
68
|
+
### 5a — Update `segments[].text` with phrase context
|
|
69
|
+
|
|
70
|
+
Every correction must include at least one adjacent word of surrounding context. Never `Edit` on a bare word — even nonsense tokens — because Edit does substring matching, not word-boundary matching. Bare-word replacements silently corrupt legitimate substrings. For example, if you try to fix a misheard `"car"` by running `Edit replace_all=true old="car" new="far"`, you'll also rewrite every occurrence of `"carrot"` into `"farrot"`, every `"scared"` into `"sfared"`, and so on across the whole transcript. Always anchor the edit with at least one adjacent word.
|
|
71
|
+
|
|
72
|
+
Correct form:
|
|
73
|
+
|
|
74
|
+
- `Edit replace_all=true old="second-floor apartment over near the Tenderlohn" new="second-floor apartment over near the Tenderloin"` — 1→1 spelling fix in generous phrase context.
|
|
75
|
+
- `Edit replace_all=true old="ride out to Walnut Creak for the weekend" new="ride out to Walnut Creek for the weekend"` — 2→2 phrase fix.
|
|
76
|
+
- `Edit replace_all=true old="cousin grew up in Sanjose and still lives" new="cousin grew up in SanJose and still lives"` — squashed 1→1 fix.
|
|
77
|
+
|
|
78
|
+
**Case rule**: preserve the transcript's existing case. The goal is accurate word recognition for downstream agents, not proper-noun capitalization. If the transcript has "tundraloin" (lowercase), replace with "tenderloin" (lowercase) — don't upgrade to "Tenderloin". If the transcript has "Tundraloin" at a sentence start, replace with "Tenderloin" there. Match case-for-case; don't normalize. Exception: the squashing technique (Step 3) may introduce an internal capital to mark a word boundary (e.g. `Sanjose` → `SanJose`); the first letter's case still follows this rule.
|
|
79
|
+
|
|
80
|
+
### 5b — Update the two word-level arrays, anchored by `start`
|
|
81
|
+
|
|
82
|
+
Both `segments[].words[].word` and top-level `word_segments[].word` have their own entry for each token. These arrays aren't consumed downstream yet, but they're how we'll cut a single word or phrase out of a segment later, so keeping them consistent with the corrected `segments[].text` is load-bearing — don't leave them stale.
|
|
83
|
+
|
|
84
|
+
Anchor each word-array edit on the adjacent `start` timestamp so it's unique (the token alone may appear in many slots). Only the `word` field changes; timing fields (`start`, `end`, `score`, etc.) must stay untouched.
|
|
85
|
+
|
|
86
|
+
The transcript JSON is pretty-printed (`JSON.pretty_generate`), so each key sits on its own line. `Edit` does literal substring matching — your `old_string` must include the newline and indentation between `"word": "..."` and `"start": ...`. Use the exact whitespace from the file (open it with `Read` or `Grep -A` first to copy the indentation verbatim).
|
|
87
|
+
|
|
88
|
+
- Two-line anchor form (copy the real indentation from the file):
|
|
89
|
+
```
|
|
90
|
+
Edit old='"word": "Sanjose",
|
|
91
|
+
"start": 10.534' new='"word": "SanJose",
|
|
92
|
+
"start": 10.534'
|
|
93
|
+
```
|
|
94
|
+
Updates one entry; repeat for the other array.
|
|
95
|
+
- For an N→N phrase fix, update each token's word entry the same way, anchored by its own `start`.
|
|
96
|
+
- For the squashing case (e.g. `Sanjose` → `SanJose`), the word count is unchanged, so there's still exactly one word entry to update per array.
|
|
97
|
+
|
|
98
|
+
## Step 6 — Clean up the extracted script file
|
|
99
|
+
|
|
100
|
+
Delete the `.txt` file created in Step 2. It's scaffolding, not a deliverable.
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
rm <transcript_path with .json replaced by .txt>
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Step 7 — Return summary to the parent
|
|
107
|
+
|
|
108
|
+
Append a refinement line to your SKILL.md Step 5 response. Format:
|
|
109
|
+
|
|
110
|
+
- If corrections made: list them as `old → new` pairs, one per line.
|
|
111
|
+
- If no corrections needed: `"Refinement: no corrections needed"`.
|
|
112
|
+
- If some candidates were skipped for word-count reasons: `"Refinement: skipped N corrections that would have changed word count"` followed by the list.
|
|
113
|
+
|
|
114
|
+
The parent writes only `transcript: <filename>.json` to library.yaml — no new field needed.
|
data/CLAUDE.md
CHANGED
|
@@ -40,6 +40,33 @@ You are an AI video editor assistant working with a software engineer. You gener
|
|
|
40
40
|
|
|
41
41
|
Libraries are the primary abstraction in ButterCut - each library represents a video series or project and is self-contained under `/libraries/[library-name]/`. A library is conceptually similar to a Final Cut Pro library, but uses a simple file structure (YAML, JSON transcripts) optimized for AI analysis rather than FCP's proprietary format.
|
|
42
42
|
|
|
43
|
+
### Initialize Settings
|
|
44
|
+
|
|
45
|
+
Before any library setup, check if `libraries/settings.yaml` exists. If not, copy from template:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
cp templates/settings_template.yaml libraries/settings.yaml
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
If no previous settings.yaml was present, use the ask user question tool to ask the user to confirm or change their defaults (editor and whisper_model).
|
|
52
|
+
|
|
53
|
+
Editor Options:
|
|
54
|
+
- Final Cut Pro X
|
|
55
|
+
- Adobe Premiere Pro
|
|
56
|
+
- DaVinci Resolve
|
|
57
|
+
|
|
58
|
+
Model Options:
|
|
59
|
+
- Small (recommended — pairs well with per-library transcript_refinement)
|
|
60
|
+
- Medium
|
|
61
|
+
- Turbo (Large)
|
|
62
|
+
|
|
63
|
+
Save these options into libraries/settings.yaml.
|
|
64
|
+
|
|
65
|
+
Note: `transcript_refinement` is a **per-library** setting (not global). Ask about it during library setup (see "Gather Project Information" below), not during initial settings setup.
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
When creating a new library, read `libraries/settings.yaml` and use the `editor` value to pre-populate the library's `editor` field.
|
|
69
|
+
|
|
43
70
|
### Check for Existing Library
|
|
44
71
|
|
|
45
72
|
**ALWAYS** check if a library already exists before starting setup:
|
|
@@ -63,9 +90,9 @@ ls libraries/[library-name]/library.yaml
|
|
|
63
90
|
|
|
64
91
|
### Gather Project Information
|
|
65
92
|
|
|
66
|
-
Ask the user these questions for new libraries:
|
|
93
|
+
Ask the user these questions for new libraries one at a time (never all at once):
|
|
67
94
|
|
|
68
|
-
1. **What
|
|
95
|
+
1. **What do you want to call this project library?**
|
|
69
96
|
- Examples: "bike-locking-video-series", "raiders-2025-highlights", "yo-yo-techniques"
|
|
70
97
|
- Normalize the name:
|
|
71
98
|
- Replace spaces with dashes
|
|
@@ -73,15 +100,20 @@ Ask the user these questions for new libraries:
|
|
|
73
100
|
- Remove special characters (keep alphanumeric and dashes)
|
|
74
101
|
|
|
75
102
|
2. **Where are the video files located?**
|
|
76
|
-
-
|
|
103
|
+
- Ask: "Where are your video files? You can drag folders or individual files directly into the chat."
|
|
77
104
|
- Verify all files exist before proceeding
|
|
78
105
|
- Inform user of what was found: "Found 5 video files totaling 2.3GB"
|
|
79
106
|
|
|
80
107
|
3. **What language is spoken in these videos?**
|
|
81
|
-
-
|
|
82
|
-
-
|
|
83
|
-
-
|
|
84
|
-
|
|
108
|
+
- Ask using AskUserQuestion with options: "English", "Spanish" and a free-text fallback for other languages
|
|
109
|
+
- Save the language name (e.g., "English") to library.yaml
|
|
110
|
+
- Map to language code (e.g., `en`, `es`, `fr`) behind the scenes when needed for transcription
|
|
111
|
+
|
|
112
|
+
4. **Can I proofread the transcripts after they're generated?**
|
|
113
|
+
- Ask using AskUserQuestion with this exact question: "Can I proofread the transcripts after they're generated? I'll use the video's context to fix mistakes."
|
|
114
|
+
- Options: "Yes - Recommended (Use Claude to refine video understanding)" and "No"
|
|
115
|
+
- Save the boolean to `transcript_refinement` in library.yaml (true for Yes, false for No)
|
|
116
|
+
- Default to `true` if the user skips
|
|
85
117
|
|
|
86
118
|
### Create Directory Structure
|
|
87
119
|
|
|
@@ -111,13 +143,19 @@ Progressively update the `footage_summary` field after each video is transcribed
|
|
|
111
143
|
After library setup completes, **automatically start analyzing all footage**:
|
|
112
144
|
|
|
113
145
|
1. Inform user: "Library setup complete. Found [N] videos ([total size]). Starting footage analysis..."
|
|
114
|
-
2. Read library.yaml
|
|
115
|
-
3. Launch `transcribe-audio` agents (can run in parallel for multiple videos)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
146
|
+
2. Read `libraries/settings.yaml` (for `whisper_model`) and the library's `library.yaml` (for `language`, `transcript_refinement`, `user_context`, `footage_summary`) ONCE in the parent thread. If any expected field is missing, run the appropriate migration first (see Critical Principles below).
|
|
147
|
+
3. Launch `transcribe-audio` agents (can run in parallel for multiple videos). Pass these values inline in each agent's prompt — the sub-agent never reads `library.yaml` or `settings.yaml`:
|
|
148
|
+
- `video_path`, `transcript_output_dir`, `language_code`, `whisper_model`
|
|
149
|
+
- `transcript_refinement` (boolean). If `true`, also pass the current `user_context` and `footage_summary` strings (empty strings are fine — refinement still catches nonsense-token and self-witness fixes).
|
|
150
|
+
4. As each agent completes, update library.yaml with `transcript` (filename only, not full path).
|
|
151
|
+
5. After all audio transcripts complete, launch `analyze-video` agents (can run in parallel) following the same "parent passes context inline" contract. Pass inline: `video_path`, `audio_transcript_path`, `visual_transcript_path`.
|
|
152
|
+
6. As each agent completes, update library.yaml with `visual_transcript` (filename only, not full path).
|
|
153
|
+
7. Analyze ALL videos before offering to create rough cuts.
|
|
154
|
+
8. **After all analysis completes, automatically create a backup** using the `backup-library` skill.
|
|
155
|
+
|
|
156
|
+
**Contract: sub-agents don't read `library.yaml`.** The parent owns `library.yaml` (and `settings.yaml`) — it reads once, passes values inline, and writes results once per agent completion. Sub-agents should not even know those files exist. This keeps the context boundary clean and avoids race conditions when many agents run in parallel.
|
|
157
|
+
|
|
158
|
+
**Note on refinement:** When `transcript_refinement: true`, each `transcribe-audio` agent reviews and corrects its transcript in place before returning, using the `user_context` and `footage_summary` the parent passed in. Empty context strings are fine — the agent still runs and catches nonsense-token and self-witness fixes. The parent still only writes `transcript: <filename>.json` to `library.yaml` after the agent completes.
|
|
121
159
|
|
|
122
160
|
**Terminology:**
|
|
123
161
|
- User-facing: Call it "footage analysis" or "analyzing footage"
|
|
@@ -133,18 +171,17 @@ After library setup completes, **automatically start analyzing all footage**:
|
|
|
133
171
|
When processing multiple videos, use parallel agents for maximum throughput:
|
|
134
172
|
|
|
135
173
|
1. **Parent agent responsibilities:**
|
|
136
|
-
- Read library.yaml
|
|
137
|
-
-
|
|
138
|
-
-
|
|
139
|
-
-
|
|
140
|
-
- Handle errors and retries
|
|
174
|
+
- Read `library.yaml` and `settings.yaml` once to gather: videos needing work, `language_code`, `whisper_model`, `transcript_refinement`, `user_context`, `footage_summary`.
|
|
175
|
+
- Launch Task agents with transcribe-audio or analyze-video skills, passing all needed values **inline in the prompt**.
|
|
176
|
+
- Update library.yaml sequentially as agents complete.
|
|
177
|
+
- Handle errors and retries.
|
|
141
178
|
|
|
142
179
|
2. **Child agent (transcribe-audio/analyze-video) responsibilities:**
|
|
143
|
-
- Process ONE video file
|
|
144
|
-
- Run WhisperX or frame extraction
|
|
145
|
-
- Prepare and clean transcript JSON
|
|
146
|
-
- Return structured response with file paths
|
|
147
|
-
- DO NOT update library.yaml (parent handles
|
|
180
|
+
- Process ONE video file using only the inputs passed inline by the parent.
|
|
181
|
+
- Run WhisperX or frame extraction.
|
|
182
|
+
- Prepare and clean transcript JSON.
|
|
183
|
+
- Return structured response with file paths.
|
|
184
|
+
- DO NOT read `library.yaml` or `settings.yaml`, and DO NOT update `library.yaml` (parent handles all yaml I/O).
|
|
148
185
|
|
|
149
186
|
3. **Benefits:**
|
|
150
187
|
- Multiple videos process simultaneously
|
|
@@ -156,7 +193,19 @@ When processing multiple videos, use parallel agents for maximum throughput:
|
|
|
156
193
|
|
|
157
194
|
Each library has a `library.yaml` file that serves as your persistent memory and the SOURCE OF TRUTH. This file contains all library metadata, footage descriptions, transcription status, and key learnings. Always read this file when working on a library and you need guidance for how/where to save files.
|
|
158
195
|
|
|
159
|
-
**
|
|
196
|
+
**Migrate legacy library.yaml files before doing anything else.** Every time you read a library.yaml, check it against the canonical field list in `templates/library_template.yaml`. If any expected field is missing, or any field appears under an old name, the library predates a feature and MUST be migrated before you do any further work on it — no rough cuts, sequences, transcription, exports, or anything else until the schema is current. The migrations are fast, idempotent, and safe; don't ask the user for permission and don't describe them as optional "tidying." Just run them.
|
|
197
|
+
|
|
198
|
+
Known migration triggers (match each to a `scripts/NNN_migrate_*.rb` script via CHANGELOG.md):
|
|
199
|
+
|
|
200
|
+
- `editor` missing (added in 0.4.0)
|
|
201
|
+
- `transcript_refinement` missing (added in [Unreleased]; missing means "predates the feature, default to `false`" — NOT the template default of `true`)
|
|
202
|
+
- `footage_summary` missing OR old name `footage_description` present (renamed in [Unreleased])
|
|
203
|
+
- video entries with `transcript_path` / `visual_transcript_path` (renamed to `transcript` / `visual_transcript` in 0.3.0)
|
|
204
|
+
- video entries with `file_size_mb` (removed in 0.3.0)
|
|
205
|
+
|
|
206
|
+
A missing field is not the same as a field set to the template default — the template default only applies to freshly created libraries. If you see a schema issue not on this list, still check CHANGELOG.md; the list may be behind. After running migrations, re-read the library.yaml and continue with whatever the user asked for.
|
|
207
|
+
|
|
208
|
+
**Keep main-thread context minimal.** The main thread orchestrates; sub-agents do the heavy work and return concise summaries. Don't read full transcript JSON, visual transcript JSON, or extracted frames into the main thread as part of routine workflow — across a large library this bloats context fast. Trust sub-agent return messages when updating library.yaml. Direct user requests ("show me transcript X") are fine; the rule is about automatic workflow behavior.
|
|
160
209
|
|
|
161
210
|
**Use actual filenames.** Never use generic labels like "Video 1" or "Clip A" - always reference actual filenames like "DJI_20250423171212_0210_D.mov" for clear traceability.
|
|
162
211
|
|
|
@@ -171,6 +220,17 @@ Each library has a `library.yaml` file that serves as your persistent memory and
|
|
|
171
220
|
- When you have lots of videos to process (dozens or hundreds isn't out of the ordinary), create a reasonable task list with 5 tasks and then a final task that says to check the yaml processing file to see if you need to then generate more tasks. This way users can see progress and the agent doesn't get overwhelmed.
|
|
172
221
|
- Generally avoid writing one-off scripts, but if you do need to write one, write it in Ruby unless you have a very strong reason to write in another language.
|
|
173
222
|
- Only run 4 parallel tasks at a time.
|
|
223
|
+
- Whenever you export XML files, include a datetime timestamp in the filename so it's clear when they were generated.
|
|
224
|
+
|
|
225
|
+
## Programming Style
|
|
226
|
+
|
|
227
|
+
When you add a Ruby script under `.claude/scripts/` or similar, follow these conventions:
|
|
228
|
+
|
|
229
|
+
- **One class per script; file name matches the class name.** `ScriptExtractor` lives in `script_extractor.rb`.
|
|
230
|
+
- **Single high-level entry point.** Expose a class method (`Klass.extract`, `Klass.run`, etc.) that calls `new(...).extract` internally — callers shouldn't need to know about instantiation.
|
|
231
|
+
- **Break the work into small private methods with clear names** (`load_transcript`, `format_script`, `write_output`, `report`). The public entry point should read like a short outline of the workflow.
|
|
232
|
+
- **Required arguments are required.** Don't silently default `nil`/missing args — raise `ArgumentError` in `initialize` if a required value is missing or empty. No hidden fallback paths.
|
|
233
|
+
- **Keep CLI arg parsing out of the class.** Use a bottom-of-file `if __FILE__ == $PROGRAM_NAME` block to parse `ARGV`, validate file paths, print a usage line, and delegate to the class.
|
|
174
234
|
|
|
175
235
|
## Project Structure
|
|
176
236
|
|
|
@@ -182,21 +242,23 @@ Each library has a `library.yaml` file that serves as your persistent memory and
|
|
|
182
242
|
- `spec/` - RSpec test suite
|
|
183
243
|
- `templates/` - Library and project templates
|
|
184
244
|
- `libraries/` - Working directory for user's video projects (gitignored)
|
|
245
|
+
- `libraries/settings.yaml` - User settings (editor, whisper_model) — created from template on first library setup
|
|
185
246
|
- `backups/` - Compressed library backups (transcriptions, roughcuts, etc) (gitignored)
|
|
186
247
|
|
|
187
248
|
## Design Philosophy
|
|
188
249
|
|
|
189
|
-
ButterCut is designed to be simple and
|
|
250
|
+
ButterCut is designed to be simple, automatic and geared toward working with non technical people using ButterCut via a client, Claude Cowork or Claude Code.
|
|
251
|
+
|
|
190
252
|
- **Input**: Array of full file paths to video files
|
|
191
|
-
- **Output**: Working
|
|
253
|
+
- **Output**: Working XML file ready to import into the non-technical user's video editor (Final Cut, Premiere, Resolve)
|
|
192
254
|
- **Automatic Metadata Extraction**: Uses FFmpeg internally to extract video properties (duration, resolution, frame rate, audio rate, etc.)
|
|
193
|
-
- **No Manual Configuration Required**: Library handles all the complexity of FCPXML generation
|
|
194
255
|
|
|
195
|
-
The user should not need to understand video codecs, frame rates, or FCPXML structure - just provide file paths and get working XML.
|
|
256
|
+
The user should not need to understand video codecs, frame rates, or FCPXML structure - just provide file paths and get working XML. We should talk to the user from a video editing perspective, not a technical software engineer perspective.
|
|
196
257
|
|
|
197
258
|
## Development Commands
|
|
198
259
|
|
|
199
260
|
### Testing
|
|
261
|
+
RSpec tests for the XML generation library. This doesn't include agent or end to end testing.
|
|
200
262
|
```bash
|
|
201
263
|
# Install dependencies
|
|
202
264
|
bundle install
|
|
@@ -211,17 +273,6 @@ bundle exec rspec spec/buttercut_spec.rb
|
|
|
211
273
|
bundle exec rspec spec/buttercut_spec.rb:10
|
|
212
274
|
```
|
|
213
275
|
|
|
214
|
-
### DTD Validation
|
|
215
|
-
|
|
216
|
-
macOS has a built-in XML lint tool - allowing you to validate a FCPXML document against its DTD file.
|
|
217
|
-
|
|
218
|
-
```bash
|
|
219
|
-
xmllint --dtdvalid "dtd/FCPXMLv1_8.dtd" "/path/to/your/file.fcpxml"
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
This will check if the generated FCPXML conforms to the FCPXML 1.8 specification.
|
|
223
|
-
- Whenever you export xml files, always include a datetime timestamp so it's clear when they were generated
|
|
224
|
-
|
|
225
276
|
## Claude Skills
|
|
226
277
|
|
|
227
278
|
When creating new Claude skills, aim to keep them to 50 lines. Only very complicated skills (ie transcription and roughcuts) should be larger than that. If the skill is complicated and seems like it can't be explained in 50 lines, consider if they should be broken up across multiple skills or if the complexity can be contained inside a ruby script saved adjacent to the skill.
|
data/README.md
CHANGED
|
@@ -124,4 +124,8 @@ MIT
|
|
|
124
124
|
|
|
125
125
|
## Contributing
|
|
126
126
|
|
|
127
|
-
Bug reports and pull requests welcome
|
|
127
|
+
Bug reports and pull requests welcome, with that said...
|
|
128
|
+
|
|
129
|
+
**Guidelines:**
|
|
130
|
+
- Write the body of your pull request or GitHub issue yourself. Don't use an agent (Claude Code, etc) to generate it.
|
|
131
|
+
- Keep pull requests small and limited to a single feature or bugfix at a time. It's a lot easier to write code, I feel like it's just as hard as before to review code.
|
data/lib/buttercut/version.rb
CHANGED
|
@@ -4,6 +4,7 @@ created_date: [YYYY-MM-DD]
|
|
|
4
4
|
last_updated: [YYYY-MM-DD]
|
|
5
5
|
language: english
|
|
6
6
|
editor: # preferred video editor: fcpx, premiere, or resolve
|
|
7
|
+
transcript_refinement: true # on by default; set false to skip the AI transcript review step
|
|
7
8
|
user_context: ""
|
|
8
9
|
# Whenever you ask the user questions about the library, save a summarized version here.
|
|
9
10
|
# ie; The man wearing the dark blue long sleeve shirt is "Andrew". The small brown dog is "Sammy". This footage was shot over one evening.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# ButterCut User Settings
|
|
2
|
+
# Copy this file to libraries/settings.yaml to configure defaults
|
|
3
|
+
|
|
4
|
+
# Preferred video editor: fcpx, premiere, or resolve
|
|
5
|
+
editor: fcpx
|
|
6
|
+
|
|
7
|
+
# WhisperX model size: tiny, base, small, medium, or turbo
|
|
8
|
+
# turbo is nearly as accurate as large-v3 but significantly faster
|
|
9
|
+
# Recommended: `small` paired with transcript_refinement (set per-library in library.yaml)
|
|
10
|
+
whisper_model: small
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: buttercut
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Ford
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-04-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -62,6 +62,7 @@ extensions: []
|
|
|
62
62
|
extra_rdoc_files: []
|
|
63
63
|
files:
|
|
64
64
|
- ".claude/commands/worktree.md"
|
|
65
|
+
- ".claude/scripts/script_extractor.rb"
|
|
65
66
|
- ".claude/settings.json"
|
|
66
67
|
- ".claude/settings.local.json"
|
|
67
68
|
- ".claude/skills/analyze-video/SKILL.md"
|
|
@@ -78,6 +79,7 @@ files:
|
|
|
78
79
|
- ".claude/skills/setup/verify_install.rb"
|
|
79
80
|
- ".claude/skills/transcribe-audio/SKILL.md"
|
|
80
81
|
- ".claude/skills/transcribe-audio/prepare_audio_script.rb"
|
|
82
|
+
- ".claude/skills/transcribe-audio/refine_instructions.md"
|
|
81
83
|
- ".claude/skills/update-buttercut/SKILL.md"
|
|
82
84
|
- CLAUDE.md
|
|
83
85
|
- LICENSE
|
|
@@ -90,6 +92,7 @@ files:
|
|
|
90
92
|
- lib/buttercut/version.rb
|
|
91
93
|
- templates/library_template.yaml
|
|
92
94
|
- templates/roughcut_template.yaml
|
|
95
|
+
- templates/settings_template.yaml
|
|
93
96
|
homepage: https://github.com/andrewford/buttercut
|
|
94
97
|
licenses:
|
|
95
98
|
- MIT
|