@kenkaiiii/gg-editor 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +64 -1
- package/dist/cli.js.map +1 -1
- package/dist/core/audio-mix.d.ts.map +1 -1
- package/dist/core/audio-mix.js +8 -1
- package/dist/core/audio-mix.js.map +1 -1
- package/dist/core/audio-mix.test.js +1 -1
- package/dist/core/audio-mix.test.js.map +1 -1
- package/dist/core/auth/api-keys.d.ts +1 -1
- package/dist/core/auth/api-keys.d.ts.map +1 -1
- package/dist/core/auth/api-keys.js +2 -1
- package/dist/core/auth/api-keys.js.map +1 -1
- package/dist/core/auth/login.d.ts.map +1 -1
- package/dist/core/auth/login.js.map +1 -1
- package/dist/core/beats.d.ts +59 -0
- package/dist/core/beats.d.ts.map +1 -0
- package/dist/core/beats.js +122 -0
- package/dist/core/beats.js.map +1 -0
- package/dist/core/beats.test.d.ts +2 -0
- package/dist/core/beats.test.d.ts.map +1 -0
- package/dist/core/beats.test.js +86 -0
- package/dist/core/beats.test.js.map +1 -0
- package/dist/core/brand-kit.d.ts +80 -0
- package/dist/core/brand-kit.d.ts.map +1 -0
- package/dist/core/brand-kit.js +96 -0
- package/dist/core/brand-kit.js.map +1 -0
- package/dist/core/brand-kit.test.d.ts +2 -0
- package/dist/core/brand-kit.test.d.ts.map +1 -0
- package/dist/core/brand-kit.test.js +76 -0
- package/dist/core/brand-kit.test.js.map +1 -0
- package/dist/core/bundled-sfx.d.ts +64 -0
- package/dist/core/bundled-sfx.d.ts.map +1 -0
- package/dist/core/bundled-sfx.js +218 -0
- package/dist/core/bundled-sfx.js.map +1 -0
- package/dist/core/bundled-sfx.test.d.ts +2 -0
- package/dist/core/bundled-sfx.test.d.ts.map +1 -0
- package/dist/core/bundled-sfx.test.js +81 -0
- package/dist/core/bundled-sfx.test.js.map +1 -0
- package/dist/core/child-abort.d.ts +57 -0
- package/dist/core/child-abort.d.ts.map +1 -0
- package/dist/core/child-abort.js +95 -0
- package/dist/core/child-abort.js.map +1 -0
- package/dist/core/child-abort.test.d.ts +2 -0
- package/dist/core/child-abort.test.d.ts.map +1 -0
- package/dist/core/child-abort.test.js +88 -0
- package/dist/core/child-abort.test.js.map +1 -0
- package/dist/core/clip-scoring.d.ts +44 -0
- package/dist/core/clip-scoring.d.ts.map +1 -0
- package/dist/core/clip-scoring.js +165 -0
- package/dist/core/clip-scoring.js.map +1 -0
- package/dist/core/clip-scoring.test.d.ts +2 -0
- package/dist/core/clip-scoring.test.d.ts.map +1 -0
- package/dist/core/clip-scoring.test.js +113 -0
- package/dist/core/clip-scoring.test.js.map +1 -0
- package/dist/core/emoji-captions.d.ts +45 -0
- package/dist/core/emoji-captions.d.ts.map +1 -0
- package/dist/core/emoji-captions.js +121 -0
- package/dist/core/emoji-captions.js.map +1 -0
- package/dist/core/face-reframe.d.ts +91 -0
- package/dist/core/face-reframe.d.ts.map +1 -0
- package/dist/core/face-reframe.js +141 -0
- package/dist/core/face-reframe.js.map +1 -0
- package/dist/core/face-reframe.test.d.ts +2 -0
- package/dist/core/face-reframe.test.d.ts.map +1 -0
- package/dist/core/face-reframe.test.js +171 -0
- package/dist/core/face-reframe.test.js.map +1 -0
- package/dist/core/filler-words.d.ts +57 -9
- package/dist/core/filler-words.d.ts.map +1 -1
- package/dist/core/filler-words.js +61 -9
- package/dist/core/filler-words.js.map +1 -1
- package/dist/core/filler-words.test.js +91 -17
- package/dist/core/filler-words.test.js.map +1 -1
- package/dist/core/hook-rewrite.d.ts +48 -0
- package/dist/core/hook-rewrite.d.ts.map +1 -0
- package/dist/core/hook-rewrite.js +151 -0
- package/dist/core/hook-rewrite.js.map +1 -0
- package/dist/core/hook-rewrite.test.d.ts +2 -0
- package/dist/core/hook-rewrite.test.d.ts.map +1 -0
- package/dist/core/hook-rewrite.test.js +58 -0
- package/dist/core/hook-rewrite.test.js.map +1 -0
- package/dist/core/hosts/lazy.d.ts.map +1 -1
- package/dist/core/hosts/lazy.js +2 -0
- package/dist/core/hosts/lazy.js.map +1 -1
- package/dist/core/hosts/premiere/adapter.d.ts +1 -0
- package/dist/core/hosts/premiere/adapter.d.ts.map +1 -1
- package/dist/core/hosts/premiere/adapter.js.map +1 -1
- package/dist/core/hosts/premiere/bridge-source.d.ts.map +1 -1
- package/dist/core/hosts/premiere/bridge-source.js +6 -3
- package/dist/core/hosts/premiere/bridge-source.js.map +1 -1
- package/dist/core/hosts/resolve/adapter.d.ts +1 -0
- package/dist/core/hosts/resolve/adapter.d.ts.map +1 -1
- package/dist/core/hosts/resolve/adapter.js.map +1 -1
- package/dist/core/hosts/resolve/bridge-source.d.ts.map +1 -1
- package/dist/core/hosts/resolve/bridge-source.js +31 -4
- package/dist/core/hosts/resolve/bridge-source.js.map +1 -1
- package/dist/core/hosts/resolve/bridge.d.ts +2 -19
- package/dist/core/hosts/resolve/bridge.d.ts.map +1 -1
- package/dist/core/hosts/resolve/bridge.js +70 -41
- package/dist/core/hosts/resolve/bridge.js.map +1 -1
- package/dist/core/hosts/resolve/bridge.test.js +130 -0
- package/dist/core/hosts/resolve/bridge.test.js.map +1 -1
- package/dist/core/hosts/types.d.ts +6 -0
- package/dist/core/hosts/types.d.ts.map +1 -1
- package/dist/core/hosts/types.js.map +1 -1
- package/dist/core/logger.d.ts +32 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +188 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/loop-match.d.ts +57 -0
- package/dist/core/loop-match.d.ts.map +1 -0
- package/dist/core/loop-match.js +91 -0
- package/dist/core/loop-match.js.map +1 -0
- package/dist/core/media/ffmpeg.d.ts.map +1 -1
- package/dist/core/media/ffmpeg.js +14 -3
- package/dist/core/media/ffmpeg.js.map +1 -1
- package/dist/core/multi-format.d.ts +67 -0
- package/dist/core/multi-format.d.ts.map +1 -0
- package/dist/core/multi-format.js +127 -0
- package/dist/core/multi-format.js.map +1 -0
- package/dist/core/multi-format.test.d.ts +2 -0
- package/dist/core/multi-format.test.d.ts.map +1 -0
- package/dist/core/multi-format.test.js +151 -0
- package/dist/core/multi-format.test.js.map +1 -0
- package/dist/core/python/beats.py +61 -0
- package/dist/core/python/face_reframe.py +163 -0
- package/dist/core/python/sidecar-path.d.ts +13 -0
- package/dist/core/python/sidecar-path.d.ts.map +1 -0
- package/dist/core/python/sidecar-path.js +24 -0
- package/dist/core/python/sidecar-path.js.map +1 -0
- package/dist/core/python.d.ts +57 -0
- package/dist/core/python.d.ts.map +1 -0
- package/dist/core/python.js +107 -0
- package/dist/core/python.js.map +1 -0
- package/dist/core/python.test.d.ts +2 -0
- package/dist/core/python.test.d.ts.map +1 -0
- package/dist/core/python.test.js +129 -0
- package/dist/core/python.test.js.map +1 -0
- package/dist/core/retention-structure.d.ts +81 -0
- package/dist/core/retention-structure.d.ts.map +1 -0
- package/dist/core/retention-structure.js +206 -0
- package/dist/core/retention-structure.js.map +1 -0
- package/dist/core/retention-structure.test.d.ts +2 -0
- package/dist/core/retention-structure.test.d.ts.map +1 -0
- package/dist/core/retention-structure.test.js +88 -0
- package/dist/core/retention-structure.test.js.map +1 -0
- package/dist/core/review.d.ts +17 -0
- package/dist/core/review.d.ts.map +1 -1
- package/dist/core/review.js +20 -24
- package/dist/core/review.js.map +1 -1
- package/dist/core/safe-paths.d.ts +11 -0
- package/dist/core/safe-paths.d.ts.map +1 -1
- package/dist/core/safe-paths.js +26 -10
- package/dist/core/safe-paths.js.map +1 -1
- package/dist/core/safe-paths.test.js +16 -0
- package/dist/core/safe-paths.test.js.map +1 -1
- package/dist/core/skills-loader.d.ts +48 -2
- package/dist/core/skills-loader.d.ts.map +1 -1
- package/dist/core/skills-loader.js +97 -19
- package/dist/core/skills-loader.js.map +1 -1
- package/dist/core/skills-loader.test.js +63 -1
- package/dist/core/skills-loader.test.js.map +1 -1
- package/dist/core/srt.d.ts +42 -7
- package/dist/core/srt.d.ts.map +1 -1
- package/dist/core/srt.js +101 -32
- package/dist/core/srt.js.map +1 -1
- package/dist/core/srt.test.js +54 -1
- package/dist/core/srt.test.js.map +1 -1
- package/dist/core/thumbnail-compose.d.ts +58 -0
- package/dist/core/thumbnail-compose.d.ts.map +1 -0
- package/dist/core/thumbnail-compose.js +101 -0
- package/dist/core/thumbnail-compose.js.map +1 -0
- package/dist/core/thumbnail-promise.d.ts +46 -0
- package/dist/core/thumbnail-promise.d.ts.map +1 -0
- package/dist/core/thumbnail-promise.js +133 -0
- package/dist/core/thumbnail-promise.js.map +1 -0
- package/dist/core/thumbnail-promise.test.d.ts +2 -0
- package/dist/core/thumbnail-promise.test.d.ts.map +1 -0
- package/dist/core/thumbnail-promise.test.js +52 -0
- package/dist/core/thumbnail-promise.test.js.map +1 -0
- package/dist/core/viral-moments.d.ts +70 -0
- package/dist/core/viral-moments.d.ts.map +1 -0
- package/dist/core/viral-moments.js +192 -0
- package/dist/core/viral-moments.js.map +1 -0
- package/dist/core/viral-moments.test.d.ts +2 -0
- package/dist/core/viral-moments.test.d.ts.map +1 -0
- package/dist/core/viral-moments.test.js +153 -0
- package/dist/core/viral-moments.test.js.map +1 -0
- package/dist/core/whisper.d.ts +16 -0
- package/dist/core/whisper.d.ts.map +1 -1
- package/dist/core/whisper.js +72 -5
- package/dist/core/whisper.js.map +1 -1
- package/dist/core/whisper.test.js +111 -1
- package/dist/core/whisper.test.js.map +1 -1
- package/dist/core/youtube-metadata.d.ts +44 -0
- package/dist/core/youtube-metadata.d.ts.map +1 -0
- package/dist/core/youtube-metadata.js +168 -0
- package/dist/core/youtube-metadata.js.map +1 -0
- package/dist/core/youtube-metadata.test.d.ts +2 -0
- package/dist/core/youtube-metadata.test.d.ts.map +1 -0
- package/dist/core/youtube-metadata.test.js +132 -0
- package/dist/core/youtube-metadata.test.js.map +1 -0
- package/dist/prompt-commands.d.ts +24 -0
- package/dist/prompt-commands.d.ts.map +1 -0
- package/dist/prompt-commands.js +243 -0
- package/dist/prompt-commands.js.map +1 -0
- package/dist/prompt-commands.test.d.ts +2 -0
- package/dist/prompt-commands.test.d.ts.map +1 -0
- package/dist/prompt-commands.test.js +46 -0
- package/dist/prompt-commands.test.js.map +1 -0
- package/dist/skills.d.ts +6 -6
- package/dist/skills.d.ts.map +1 -1
- package/dist/skills.js +1426 -445
- package/dist/skills.js.map +1 -1
- package/dist/system-prompt.d.ts.map +1 -1
- package/dist/system-prompt.js +108 -0
- package/dist/system-prompt.js.map +1 -1
- package/dist/tools/add-fades.d.ts.map +1 -1
- package/dist/tools/add-fades.js +2 -1
- package/dist/tools/add-fades.js.map +1 -1
- package/dist/tools/add-sfx-at-cuts.d.ts.map +1 -1
- package/dist/tools/add-sfx-at-cuts.js +36 -11
- package/dist/tools/add-sfx-at-cuts.js.map +1 -1
- package/dist/tools/add-sfx-to-timeline.d.ts +34 -0
- package/dist/tools/add-sfx-to-timeline.d.ts.map +1 -0
- package/dist/tools/add-sfx-to-timeline.js +169 -0
- package/dist/tools/add-sfx-to-timeline.js.map +1 -0
- package/dist/tools/add-sfx-to-timeline.test.d.ts +2 -0
- package/dist/tools/add-sfx-to-timeline.test.d.ts.map +1 -0
- package/dist/tools/add-sfx-to-timeline.test.js +181 -0
- package/dist/tools/add-sfx-to-timeline.test.js.map +1 -0
- package/dist/tools/audit-first-frame.d.ts +36 -0
- package/dist/tools/audit-first-frame.d.ts.map +1 -0
- package/dist/tools/audit-first-frame.js +181 -0
- package/dist/tools/audit-first-frame.js.map +1 -0
- package/dist/tools/audit-retention-structure.d.ts +20 -0
- package/dist/tools/audit-retention-structure.d.ts.map +1 -0
- package/dist/tools/audit-retention-structure.js +95 -0
- package/dist/tools/audit-retention-structure.js.map +1 -0
- package/dist/tools/audit-retention-structure.test.d.ts +2 -0
- package/dist/tools/audit-retention-structure.test.d.ts.map +1 -0
- package/dist/tools/audit-retention-structure.test.js +93 -0
- package/dist/tools/audit-retention-structure.test.js.map +1 -0
- package/dist/tools/bleep-words.d.ts +59 -0
- package/dist/tools/bleep-words.d.ts.map +1 -0
- package/dist/tools/bleep-words.js +211 -0
- package/dist/tools/bleep-words.js.map +1 -0
- package/dist/tools/bleep-words.test.d.ts +2 -0
- package/dist/tools/bleep-words.test.d.ts.map +1 -0
- package/dist/tools/bleep-words.test.js +96 -0
- package/dist/tools/bleep-words.test.js.map +1 -0
- package/dist/tools/burn-subtitles.d.ts.map +1 -1
- package/dist/tools/burn-subtitles.js +10 -5
- package/dist/tools/burn-subtitles.js.map +1 -1
- package/dist/tools/clean-audio.d.ts.map +1 -1
- package/dist/tools/clean-audio.js +2 -1
- package/dist/tools/clean-audio.js.map +1 -1
- package/dist/tools/cluster-takes.js +2 -1
- package/dist/tools/cluster-takes.js.map +1 -1
- package/dist/tools/compose-thumbnail-variants.d.ts +70 -0
- package/dist/tools/compose-thumbnail-variants.d.ts.map +1 -0
- package/dist/tools/compose-thumbnail-variants.js +274 -0
- package/dist/tools/compose-thumbnail-variants.js.map +1 -0
- package/dist/tools/compose-thumbnail.d.ts +6 -13
- package/dist/tools/compose-thumbnail.d.ts.map +1 -1
- package/dist/tools/compose-thumbnail.js +44 -81
- package/dist/tools/compose-thumbnail.js.map +1 -1
- package/dist/tools/concat-videos.d.ts.map +1 -1
- package/dist/tools/concat-videos.js +12 -5
- package/dist/tools/concat-videos.js.map +1 -1
- package/dist/tools/concat-videos.test.d.ts +2 -0
- package/dist/tools/concat-videos.test.d.ts.map +1 -0
- package/dist/tools/concat-videos.test.js +103 -0
- package/dist/tools/concat-videos.test.js.map +1 -0
- package/dist/tools/crossfade-videos.d.ts.map +1 -1
- package/dist/tools/crossfade-videos.js +2 -1
- package/dist/tools/crossfade-videos.js.map +1 -1
- package/dist/tools/cut-filler-words.d.ts.map +1 -1
- package/dist/tools/cut-filler-words.js +24 -8
- package/dist/tools/cut-filler-words.js.map +1 -1
- package/dist/tools/detect-speaker-changes.js +2 -1
- package/dist/tools/detect-speaker-changes.js.map +1 -1
- package/dist/tools/extract-audio.d.ts.map +1 -1
- package/dist/tools/extract-audio.js +13 -7
- package/dist/tools/extract-audio.js.map +1 -1
- package/dist/tools/face-reframe.d.ts +30 -0
- package/dist/tools/face-reframe.d.ts.map +1 -0
- package/dist/tools/face-reframe.js +143 -0
- package/dist/tools/face-reframe.js.map +1 -0
- package/dist/tools/face-reframe.test.d.ts +2 -0
- package/dist/tools/face-reframe.test.d.ts.map +1 -0
- package/dist/tools/face-reframe.test.js +139 -0
- package/dist/tools/face-reframe.test.js.map +1 -0
- package/dist/tools/find-viral-moments.d.ts +23 -0
- package/dist/tools/find-viral-moments.d.ts.map +1 -0
- package/dist/tools/find-viral-moments.js +176 -0
- package/dist/tools/find-viral-moments.js.map +1 -0
- package/dist/tools/find-viral-moments.test.d.ts +2 -0
- package/dist/tools/find-viral-moments.test.d.ts.map +1 -0
- package/dist/tools/find-viral-moments.test.js +144 -0
- package/dist/tools/find-viral-moments.test.js.map +1 -0
- package/dist/tools/generate-gif.d.ts.map +1 -1
- package/dist/tools/generate-gif.js +47 -40
- package/dist/tools/generate-gif.js.map +1 -1
- package/dist/tools/generate-gif.test.d.ts +2 -0
- package/dist/tools/generate-gif.test.d.ts.map +1 -0
- package/dist/tools/generate-gif.test.js +115 -0
- package/dist/tools/generate-gif.test.js.map +1 -0
- package/dist/tools/generate-outro.d.ts +18 -0
- package/dist/tools/generate-outro.d.ts.map +1 -0
- package/dist/tools/generate-outro.js +175 -0
- package/dist/tools/generate-outro.js.map +1 -0
- package/dist/tools/generate-youtube-metadata.d.ts +23 -0
- package/dist/tools/generate-youtube-metadata.d.ts.map +1 -0
- package/dist/tools/generate-youtube-metadata.js +103 -0
- package/dist/tools/generate-youtube-metadata.js.map +1 -0
- package/dist/tools/generate-youtube-metadata.test.d.ts +2 -0
- package/dist/tools/generate-youtube-metadata.test.d.ts.map +1 -0
- package/dist/tools/generate-youtube-metadata.test.js +118 -0
- package/dist/tools/generate-youtube-metadata.test.js.map +1 -0
- package/dist/tools/index.d.ts +14 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +130 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/index.test.js +27 -1
- package/dist/tools/index.test.js.map +1 -1
- package/dist/tools/ken-burns.d.ts.map +1 -1
- package/dist/tools/ken-burns.js +2 -1
- package/dist/tools/ken-burns.js.map +1 -1
- package/dist/tools/loop-match-short.d.ts +22 -0
- package/dist/tools/loop-match-short.d.ts.map +1 -0
- package/dist/tools/loop-match-short.js +107 -0
- package/dist/tools/loop-match-short.js.map +1 -0
- package/dist/tools/mix-audio.d.ts.map +1 -1
- package/dist/tools/mix-audio.js +2 -1
- package/dist/tools/mix-audio.js.map +1 -1
- package/dist/tools/normalize-loudness.d.ts.map +1 -1
- package/dist/tools/normalize-loudness.js +2 -1
- package/dist/tools/normalize-loudness.js.map +1 -1
- package/dist/tools/path-traversal.test.d.ts +15 -0
- package/dist/tools/path-traversal.test.d.ts.map +1 -0
- package/dist/tools/path-traversal.test.js +223 -0
- package/dist/tools/path-traversal.test.js.map +1 -0
- package/dist/tools/pick-best-takes.js +2 -1
- package/dist/tools/pick-best-takes.js.map +1 -1
- package/dist/tools/punch-in.d.ts.map +1 -1
- package/dist/tools/punch-in.js +2 -1
- package/dist/tools/punch-in.js.map +1 -1
- package/dist/tools/read-transcript.js +2 -1
- package/dist/tools/read-transcript.js.map +1 -1
- package/dist/tools/render-multi-format.d.ts +35 -0
- package/dist/tools/render-multi-format.d.ts.map +1 -0
- package/dist/tools/render-multi-format.js +206 -0
- package/dist/tools/render-multi-format.js.map +1 -0
- package/dist/tools/render-multi-format.test.d.ts +2 -0
- package/dist/tools/render-multi-format.test.d.ts.map +1 -0
- package/dist/tools/render-multi-format.test.js +312 -0
- package/dist/tools/render-multi-format.test.js.map +1 -0
- package/dist/tools/render.d.ts.map +1 -1
- package/dist/tools/render.js +2 -2
- package/dist/tools/render.js.map +1 -1
- package/dist/tools/rewrite-hook.d.ts +32 -0
- package/dist/tools/rewrite-hook.d.ts.map +1 -0
- package/dist/tools/rewrite-hook.js +65 -0
- package/dist/tools/rewrite-hook.js.map +1 -0
- package/dist/tools/score-clip.d.ts +30 -0
- package/dist/tools/score-clip.d.ts.map +1 -0
- package/dist/tools/score-clip.js +109 -0
- package/dist/tools/score-clip.js.map +1 -0
- package/dist/tools/score-clip.test.d.ts +2 -0
- package/dist/tools/score-clip.test.d.ts.map +1 -0
- package/dist/tools/score-clip.test.js +110 -0
- package/dist/tools/score-clip.test.js.map +1 -0
- package/dist/tools/search-tools.d.ts +34 -0
- package/dist/tools/search-tools.d.ts.map +1 -0
- package/dist/tools/search-tools.js +86 -0
- package/dist/tools/search-tools.js.map +1 -0
- package/dist/tools/search-tools.test.d.ts +2 -0
- package/dist/tools/search-tools.test.d.ts.map +1 -0
- package/dist/tools/search-tools.test.js +60 -0
- package/dist/tools/search-tools.test.js.map +1 -0
- package/dist/tools/snap-cuts-to-beats.d.ts +18 -0
- package/dist/tools/snap-cuts-to-beats.d.ts.map +1 -0
- package/dist/tools/snap-cuts-to-beats.js +110 -0
- package/dist/tools/snap-cuts-to-beats.js.map +1 -0
- package/dist/tools/snap-cuts-to-beats.test.d.ts +2 -0
- package/dist/tools/snap-cuts-to-beats.test.d.ts.map +1 -0
- package/dist/tools/snap-cuts-to-beats.test.js +99 -0
- package/dist/tools/snap-cuts-to-beats.test.js.map +1 -0
- package/dist/tools/speed-ramp.d.ts.map +1 -1
- package/dist/tools/speed-ramp.js +2 -1
- package/dist/tools/speed-ramp.js.map +1 -1
- package/dist/tools/stabilize-video.d.ts.map +1 -1
- package/dist/tools/stabilize-video.js +2 -1
- package/dist/tools/stabilize-video.js.map +1 -1
- package/dist/tools/suggest-broll.d.ts +34 -0
- package/dist/tools/suggest-broll.d.ts.map +1 -0
- package/dist/tools/suggest-broll.js +367 -0
- package/dist/tools/suggest-broll.js.map +1 -0
- package/dist/tools/suggest-broll.test.d.ts +2 -0
- package/dist/tools/suggest-broll.test.d.ts.map +1 -0
- package/dist/tools/suggest-broll.test.js +217 -0
- package/dist/tools/suggest-broll.test.js.map +1 -0
- package/dist/tools/text-based-cut.d.ts +33 -0
- package/dist/tools/text-based-cut.d.ts.map +1 -0
- package/dist/tools/text-based-cut.js +172 -0
- package/dist/tools/text-based-cut.js.map +1 -0
- package/dist/tools/text-based-cut.test.d.ts +2 -0
- package/dist/tools/text-based-cut.test.d.ts.map +1 -0
- package/dist/tools/text-based-cut.test.js +32 -0
- package/dist/tools/text-based-cut.test.js.map +1 -0
- package/dist/tools/transcribe.d.ts +1 -1
- package/dist/tools/transition-videos.d.ts +1 -1
- package/dist/tools/transition-videos.d.ts.map +1 -1
- package/dist/tools/transition-videos.js +2 -1
- package/dist/tools/transition-videos.js.map +1 -1
- package/dist/tools/trim-dead-air.d.ts +59 -0
- package/dist/tools/trim-dead-air.d.ts.map +1 -0
- package/dist/tools/trim-dead-air.js +215 -0
- package/dist/tools/trim-dead-air.js.map +1 -0
- package/dist/tools/trim-dead-air.test.d.ts +2 -0
- package/dist/tools/trim-dead-air.test.d.ts.map +1 -0
- package/dist/tools/trim-dead-air.test.js +75 -0
- package/dist/tools/trim-dead-air.test.js.map +1 -0
- package/dist/tools/verify-thumbnail-promise.d.ts +33 -0
- package/dist/tools/verify-thumbnail-promise.d.ts.map +1 -0
- package/dist/tools/verify-thumbnail-promise.js +112 -0
- package/dist/tools/verify-thumbnail-promise.js.map +1 -0
- package/dist/tools/verify-thumbnail-promise.test.d.ts +2 -0
- package/dist/tools/verify-thumbnail-promise.test.d.ts.map +1 -0
- package/dist/tools/verify-thumbnail-promise.test.js +38 -0
- package/dist/tools/verify-thumbnail-promise.test.js.map +1 -0
- package/dist/tools/write-keyword-captions.d.ts +7 -0
- package/dist/tools/write-keyword-captions.d.ts.map +1 -1
- package/dist/tools/write-keyword-captions.js +35 -4
- package/dist/tools/write-keyword-captions.js.map +1 -1
- package/dist/ui/App.d.ts.map +1 -1
- package/dist/ui/App.js +75 -11
- package/dist/ui/App.js.map +1 -1
- package/dist/ui/tool-formatters.d.ts +30 -0
- package/dist/ui/tool-formatters.d.ts.map +1 -0
- package/dist/ui/tool-formatters.js +461 -0
- package/dist/ui/tool-formatters.js.map +1 -0
- package/dist/ui/tool-formatters.test.d.ts +2 -0
- package/dist/ui/tool-formatters.test.d.ts.map +1 -0
- package/dist/ui/tool-formatters.test.js +143 -0
- package/dist/ui/tool-formatters.test.js.map +1 -0
- package/package.json +10 -9
package/dist/skills.js
CHANGED
|
@@ -1,236 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Bundled skill markdowns.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Bundled skill markdowns. Auto-generated from src/skills/*.md by
|
|
3
|
+
* scripts/build-skills.mjs — DO NOT EDIT BY HAND. Add a new skill by
|
|
4
|
+
* dropping a .md file in src/skills/ (with optional YAML frontmatter)
|
|
5
|
+
* and re-running `node scripts/build-skills.mjs`.
|
|
6
6
|
*
|
|
7
|
-
* Skills are exposed through the read_skill tool; their descriptions live
|
|
8
|
-
* the system prompt. Pattern follows the Anthropic
|
|
7
|
+
* Skills are exposed through the read_skill tool; their descriptions live
|
|
8
|
+
* in the system prompt. Pattern follows the Anthropic skill convention:
|
|
9
9
|
* description in the prompt, full content on demand.
|
|
10
10
|
*/
|
|
11
|
-
const
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
where a person speaks for >5 minutes and the editorial work is take-selection,
|
|
15
|
-
filler removal, silence trimming, and pacing.
|
|
16
|
-
|
|
17
|
-
**Goal:** turn a raw recording into a tight, watchable cut without losing the
|
|
18
|
-
speaker's voice or the moments that matter. Captions are non-negotiable.
|
|
19
|
-
|
|
20
|
-
---
|
|
21
|
-
|
|
22
|
-
## The 5-pass method
|
|
23
|
-
|
|
24
|
-
These run in order. Each pass narrows the cut. Don't skip — passes 1–2 are
|
|
25
|
-
where 80% of the time savings live.
|
|
26
|
-
|
|
27
|
-
### Pass 1 — Utterance segmentation
|
|
28
|
-
|
|
29
|
-
\`\`\`
|
|
30
|
-
probe_media(input) → fps, duration
|
|
31
|
-
extract_audio(input, audio.wav, 16000)
|
|
32
|
-
transcribe(audio.wav, transcript.json) → segment-level transcript
|
|
33
|
-
\`\`\`
|
|
34
|
-
|
|
35
|
-
Now you have a segment list keyed by start/end seconds. Treat each segment as
|
|
36
|
-
the smallest editorial unit. Don't cut inside a segment unless the speaker
|
|
37
|
-
changes mid-segment.
|
|
38
|
-
|
|
39
|
-
### Pass 2 — Take detection
|
|
40
|
-
|
|
41
|
-
\`\`\`
|
|
42
|
-
cluster_takes(transcript.json) → groups of similar segments
|
|
43
|
-
\`\`\`
|
|
44
|
-
|
|
45
|
-
Multi-member clusters mean the speaker re-took a line. Pick the winner per
|
|
46
|
-
cluster:
|
|
47
|
-
|
|
48
|
-
- **Default to the last take** — speaker had practice.
|
|
49
|
-
- **Visual doubt** → \`score_shot(times=[mid of each member])\`, pick highest.
|
|
50
|
-
- **Audio doubt** → \`read_transcript(startSec=A, endSec=B)\` to inspect.
|
|
51
|
-
- Add a marker on each decision: \`add_marker(color="green", note="kept: take 3 of 3 — strongest delivery")\`.
|
|
52
|
-
|
|
53
|
-
### Pass 3 — Filler removal
|
|
54
|
-
|
|
55
|
-
For each kept segment, look for these and add cut markers:
|
|
56
|
-
|
|
57
|
-
- "um", "uh", "like" used as filler (not as comparison)
|
|
58
|
-
- restart phrases: "so the thing is — actually, the thing is…"
|
|
59
|
-
- mid-sentence aborts the speaker self-corrected past
|
|
60
|
-
|
|
61
|
-
Mark each one with \`add_marker(color="red", note="cut: filler 'um'")\`.
|
|
62
|
-
|
|
63
|
-
### Pass 4 — Incomplete-sentence trim
|
|
64
|
-
|
|
65
|
-
Drop segments that:
|
|
66
|
-
|
|
67
|
-
- Trail off with no point ("…and yeah, anyway")
|
|
68
|
-
- Start mid-thought because the previous take was kept
|
|
69
|
-
- Repeat content already covered in a kept take
|
|
70
|
-
|
|
71
|
-
\`add_marker(color="red", note="cut: incomplete; covered in earlier take")\`.
|
|
72
|
-
|
|
73
|
-
### Pass 5 — Silence normalization
|
|
74
|
-
|
|
75
|
-
\`\`\`
|
|
76
|
-
detect_silence(input) → frame-aligned KEEP ranges
|
|
77
|
-
\`\`\`
|
|
78
|
-
|
|
79
|
-
Use the KEEP ranges to remove dead air >1s. Don't kill all silence —
|
|
80
|
-
breathing space matters for pacing. The default threshold usually leaves
|
|
81
|
-
natural pauses intact.
|
|
82
|
-
|
|
83
|
-
---
|
|
84
|
-
|
|
85
|
-
## Final assembly
|
|
86
|
-
|
|
87
|
-
Combine pass-2 winners + pass-3/4 surviving segments into a single decision
|
|
88
|
-
list. Each entry is one EDL event.
|
|
89
|
-
|
|
90
|
-
\`\`\`
|
|
91
|
-
write_edl(events=decisions, frameRate=fps)
|
|
92
|
-
import_edl(path)
|
|
93
|
-
\`\`\`
|
|
94
|
-
|
|
95
|
-
Then captions:
|
|
96
|
-
|
|
97
|
-
\`\`\`
|
|
98
|
-
write_srt(cues=transcript.segments mapped to start/end/text)
|
|
99
|
-
import_subtitles(srtPath)
|
|
100
|
-
\`\`\`
|
|
101
|
-
|
|
102
|
-
For long-form: sidecar SRT (don't burn in) so YouTube/podcast players can
|
|
103
|
-
toggle them. Mention this to the user.
|
|
104
|
-
|
|
105
|
-
---
|
|
106
|
-
|
|
107
|
-
## Red flags — pause and ask
|
|
108
|
-
|
|
109
|
-
- Cluster has takes that are roughly equal quality — \`add_marker(color="red", note="PAUSE: which take? 1=A, 2=B")\` and stop.
|
|
110
|
-
- Segment is editorial-content-bearing but has bad audio — flag, don't drop.
|
|
111
|
-
- The user said "trim filler" but every "um" is intentional emphasis (rare but real) — confirm.
|
|
112
|
-
|
|
113
|
-
## Don't
|
|
114
|
-
|
|
115
|
-
- Don't render until the user reviews the markers.
|
|
116
|
-
- Don't read full transcript without \`startSec/endSec\` — context blow-up.
|
|
117
|
-
- Don't cut inside a segment unless the speaker changes mid-segment.
|
|
118
|
-
- Don't skip captions for long-form unless explicitly told to.
|
|
119
|
-
`;
|
|
120
|
-
const SHORT_FORM_CONTENT_EDIT = `# short-form-content-edit
|
|
121
|
-
|
|
122
|
-
**When to use:** TikTok / Reels / Shorts / vertical clips. Source is usually
|
|
123
|
-
a longer horizontal video the user wants reframed, captioned, hooked, and
|
|
124
|
-
shipped.
|
|
125
|
-
|
|
126
|
-
**Goal:** the first 2 seconds win or lose retention. The cut, the caption,
|
|
127
|
-
and the hook all serve that one number.
|
|
128
|
-
|
|
129
|
-
---
|
|
130
|
-
|
|
131
|
-
## Recipe
|
|
132
|
-
|
|
133
|
-
### 1. Find the moment
|
|
134
|
-
|
|
135
|
-
If the user gives you a horizontal video without timestamps, find the moment
|
|
136
|
-
worth clipping:
|
|
137
|
-
|
|
138
|
-
\`\`\`
|
|
139
|
-
probe_media(input)
|
|
140
|
-
extract_audio(input, audio.wav, 16000)
|
|
141
|
-
transcribe(audio.wav, transcript.json)
|
|
142
|
-
read_transcript(transcript.json, contains="<keyword from user>")
|
|
143
|
-
\`\`\`
|
|
144
|
-
|
|
145
|
-
Or for visual moments: \`score_shot(input, intervalSec=15)\` then inspect tops.
|
|
146
|
-
|
|
147
|
-
Settle on a \`[startSec, endSec]\` window. Aim for **15–60 seconds** for shorts;
|
|
148
|
-
90s max for Reels.
|
|
149
|
-
|
|
150
|
-
### 2. Reformat to vertical
|
|
151
|
-
|
|
152
|
-
Build the vertical timeline as FCPXML and import:
|
|
153
|
-
|
|
154
|
-
\`\`\`
|
|
155
|
-
reformat_timeline(
|
|
156
|
-
output="vertical.fcpxml",
|
|
157
|
-
preset="9:16",
|
|
158
|
-
title="<short name>",
|
|
159
|
-
frameRate=<source fps>,
|
|
160
|
-
events=[{ reel, sourcePath, sourceInFrame, sourceOutFrame }]
|
|
161
|
-
)
|
|
162
|
-
import_edl("vertical.fcpxml")
|
|
163
|
-
\`\`\`
|
|
164
|
-
|
|
165
|
-
Then on Resolve Studio, switch to color page and prompt the user to apply
|
|
166
|
-
Smart Reframe per clip:
|
|
167
|
-
|
|
168
|
-
\`\`\`
|
|
169
|
-
open_page("color")
|
|
170
|
-
add_marker(color="yellow", note="apply Smart Reframe per clip (Resolve Studio: right-click clip → Smart Reframe)")
|
|
171
|
-
\`\`\`
|
|
172
|
-
|
|
173
|
-
Premiere users: prompt for Auto Reframe via the captions/effects panel.
|
|
174
|
-
|
|
175
|
-
### 3. Hook the first 2 seconds
|
|
176
|
-
|
|
177
|
-
The hook lives in the first 60 frames. Options:
|
|
178
|
-
|
|
179
|
-
- **Cold-open the punchline** — start at the most attention-grabbing line,
|
|
180
|
-
not the setup. Use \`read_transcript\` to find it.
|
|
181
|
-
- **Speed-up the intro** — \`set_clip_speed(clipId, speed=1.5)\` on the opening clip.
|
|
182
|
-
- **Pre-roll text/marker** — \`add_marker(color="yellow", note="add hook text overlay: '<line from transcript>'")\` for the user to add.
|
|
183
|
-
|
|
184
|
-
### 4. Burned-in captions
|
|
185
|
-
|
|
186
|
-
Vertical = burned-in (most viewers watch muted, native captions are tiny).
|
|
187
|
-
|
|
188
|
-
\`\`\`
|
|
189
|
-
write_srt(cues=transcript.segments_in_window)
|
|
190
|
-
import_subtitles(srtPath)
|
|
191
|
-
add_marker(color="yellow", note="style captions: large, center-bottom, high-contrast — burn in via Resolve subtitle track styling")
|
|
192
|
-
\`\`\`
|
|
193
|
-
|
|
194
|
-
If the user is on Resolve Studio, they can right-click the subtitle track →
|
|
195
|
-
"Convert Subtitles to Text+" and style it. Note this to them.
|
|
196
|
-
|
|
197
|
-
### 5. Render
|
|
198
|
-
|
|
199
|
-
Don't render until the user reviews. When they say "render":
|
|
200
|
-
|
|
201
|
-
\`\`\`
|
|
202
|
-
render(preset=<host preset>, output="<name>.mp4")
|
|
203
|
-
\`\`\`
|
|
204
|
-
|
|
205
|
-
Common presets: H.264 Master, YouTube 1080p (works for Shorts too),
|
|
206
|
-
Vimeo 1080p.
|
|
207
|
-
|
|
11
|
+
const CHAPTER_MARKERS = `---
|
|
12
|
+
name: chapter-markers
|
|
13
|
+
description: Author YouTube/podcast chapter timestamps from a transcript: 5–15 chapters, first at 00:00, ≥30s apart, only at real topic shifts. Drops purple markers + emits a YouTube-formatted description block.
|
|
208
14
|
---
|
|
209
15
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
| Knob | Default | Why |
|
|
213
|
-
|---|---|---|
|
|
214
|
-
| Aspect | 9:16 (TikTok/Reels/Shorts) | Most platforms |
|
|
215
|
-
| Length | 15–60s | Algorithm sweet spot |
|
|
216
|
-
| Captions | burned-in | Watched muted |
|
|
217
|
-
| First 2s | the hook | Retention curve |
|
|
218
|
-
| Music | not added by you | Style decision; ask the user |
|
|
219
|
-
|
|
220
|
-
## Red flags — pause and ask
|
|
221
|
-
|
|
222
|
-
- User wants 9:16 but the source has critical wide-shot framing → \`add_marker(color="red", note="PAUSE: source is composed for 16:9. 9:16 will crop heads/sides. Confirm reframe vs. letterbox.")\`.
|
|
223
|
-
- Window selection is ambiguous → propose 2–3 candidates as red markers, stop.
|
|
224
|
-
- No clear hook in the chosen window → say so, suggest a different start.
|
|
225
|
-
|
|
226
|
-
## Don't
|
|
227
|
-
|
|
228
|
-
- Don't render until the user reviews markers.
|
|
229
|
-
- Don't burn captions before the user approves the SRT text.
|
|
230
|
-
- Don't pick a hook blindly — surface options.
|
|
231
|
-
- Don't leave silence >0.4s in the first 2 seconds.
|
|
232
|
-
`;
|
|
233
|
-
const CHAPTER_MARKERS = `# chapter-markers
|
|
16
|
+
# chapter-markers
|
|
234
17
|
|
|
235
18
|
**When to use:** YouTube videos, podcasts, courses, long-form interviews. The
|
|
236
19
|
user wants chapter timestamps the audience can jump to.
|
|
@@ -322,37 +105,167 @@ user verify before publishing.
|
|
|
322
105
|
- Don't burn chapters into video — markers + description block only.
|
|
323
106
|
- Don't render until the user reviews the chapters.
|
|
324
107
|
`;
|
|
325
|
-
const
|
|
108
|
+
const FUSION_LOWER_THIRD = `---
|
|
109
|
+
name: fusion-lower-third
|
|
110
|
+
description: Build a name/title chyron natively in DaVinci Resolve's Fusion via fusion_comp — Background + TextPlus + Merge node graph, wiring, styling, lower-third positioning, keyframed fade in/out. Resolve Studio only; cross-host fallback is write_lower_third + burn_subtitles.
|
|
111
|
+
---
|
|
326
112
|
|
|
327
|
-
|
|
328
|
-
zooms, add lower-thirds or title cards, build coordinated multi-track
|
|
329
|
-
B-roll compositions, or do speed ramps.
|
|
113
|
+
# fusion-lower-third
|
|
330
114
|
|
|
331
|
-
**
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
file-only ffmpeg passes for content-shape changes (speed ramps, mixing,
|
|
335
|
-
zoom-on-stills, transitions).
|
|
115
|
+
**When to use:** the user asks for a name/title chyron that should be
|
|
116
|
+
*editable inside the NLE* (not baked-in pixels), or wants a quick
|
|
117
|
+
title card built natively in DaVinci Resolve's Fusion page.
|
|
336
118
|
|
|
337
|
-
|
|
119
|
+
**Goal:** compose a Background + TextPlus + Merge graph in Fusion via
|
|
120
|
+
\`fusion_comp\`. Resolve only — Premiere has no Fusion equivalent; for
|
|
121
|
+
that, fall back to \`write_lower_third\` + \`burn_subtitles\`.
|
|
338
122
|
|
|
339
|
-
|
|
123
|
+
---
|
|
340
124
|
|
|
341
|
-
|
|
125
|
+
## When to pick which
|
|
342
126
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
127
|
+
- **fusion_comp** — Resolve, comp lives inside the project, user can
|
|
128
|
+
tweak it later. Best when the user is already on the Fusion page or
|
|
129
|
+
wants a chyron that travels with the project file.
|
|
130
|
+
- **write_lower_third + burn_subtitles** — works on any host, output
|
|
131
|
+
is a baked-in pixel layer. Faster to iterate from the agent side
|
|
132
|
+
but the user can no longer edit the text without re-running the
|
|
133
|
+
pipeline.
|
|
348
134
|
|
|
349
|
-
|
|
350
|
-
and \`import_timeline\`s it. Clips not listed in \`newOrder\` keep their
|
|
351
|
-
original relative order and append at the end.
|
|
135
|
+
If unsure, ask: "Resolve-native (editable) or baked-in?"
|
|
352
136
|
|
|
353
137
|
---
|
|
354
138
|
|
|
355
|
-
## Recipe
|
|
139
|
+
## Recipe — name + title lower-third on the active comp
|
|
140
|
+
|
|
141
|
+
Pre-flight: \`host_info\` must report \`name === "resolve"\`. If it doesn't,
|
|
142
|
+
stop and tell the user this skill is Resolve-only.
|
|
143
|
+
|
|
144
|
+
\`\`\`
|
|
145
|
+
host_info # confirm Resolve
|
|
146
|
+
|
|
147
|
+
# 1. Get to the Fusion page so the user can see the result.
|
|
148
|
+
open_page(name="fusion")
|
|
149
|
+
|
|
150
|
+
# 2. Build the graph.
|
|
151
|
+
fusion_comp(action="add_node", toolId="Background", name="LT_Strap")
|
|
152
|
+
fusion_comp(action="add_node", toolId="TextPlus", name="LT_Text")
|
|
153
|
+
fusion_comp(action="add_node", toolId="Merge", name="LT_Comp")
|
|
154
|
+
|
|
155
|
+
# 3. Wire it: strap as Background, text as Foreground.
|
|
156
|
+
fusion_comp(action="connect", fromNode="LT_Strap", toNode="LT_Comp",
|
|
157
|
+
toInput="Background")
|
|
158
|
+
fusion_comp(action="connect", fromNode="LT_Text", toNode="LT_Comp",
|
|
159
|
+
toInput="Foreground")
|
|
160
|
+
|
|
161
|
+
# 4. Set the text content + colour.
|
|
162
|
+
fusion_comp(action="set_input", node="LT_Text", input="StyledText",
|
|
163
|
+
value="<Name>\\n<Title>")
|
|
164
|
+
fusion_comp(action="set_input", node="LT_Text", input="Size", value=0.06)
|
|
165
|
+
fusion_comp(action="set_input", node="LT_Text", input="Color1Red", value=1)
|
|
166
|
+
fusion_comp(action="set_input", node="LT_Text", input="Color1Green", value=1)
|
|
167
|
+
fusion_comp(action="set_input", node="LT_Text", input="Color1Blue", value=1)
|
|
168
|
+
|
|
169
|
+
# 5. Park the strap in the lower-left third.
|
|
170
|
+
fusion_comp(action="set_input", node="LT_Strap", input="TopLeftRed", value=0)
|
|
171
|
+
fusion_comp(action="set_input", node="LT_Strap", input="TopLeftGreen", value=0)
|
|
172
|
+
fusion_comp(action="set_input", node="LT_Strap", input="TopLeftBlue", value=0)
|
|
173
|
+
fusion_comp(action="set_input", node="LT_Strap", input="TopLeftAlpha", value=0.85)
|
|
174
|
+
\`\`\`
|
|
175
|
+
|
|
176
|
+
The Merge node is the comp's MediaOut by default; the user sees the
|
|
177
|
+
result on the active timeline clip immediately.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Animating in / out
|
|
182
|
+
|
|
183
|
+
Use \`set_keyframe\` on the Merge's \`Blend\` input (overall opacity):
|
|
184
|
+
|
|
185
|
+
\`\`\`
|
|
186
|
+
fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
|
|
187
|
+
frame=0, value=0) # invisible at clip start
|
|
188
|
+
fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
|
|
189
|
+
frame=12, value=1) # fade in over 12f
|
|
190
|
+
fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
|
|
191
|
+
frame=72, value=1) # hold
|
|
192
|
+
fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
|
|
193
|
+
frame=84, value=0) # fade out
|
|
194
|
+
\`\`\`
|
|
195
|
+
|
|
196
|
+
Frames are relative to the comp's render range — set it explicitly if
|
|
197
|
+
the agent needs to control the in/out range:
|
|
198
|
+
|
|
199
|
+
\`\`\`
|
|
200
|
+
fusion_comp(action="set_render_range", start=0, end=120)
|
|
201
|
+
\`\`\`
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Targeting a specific clip's comp
|
|
206
|
+
|
|
207
|
+
Pass \`clipId\` to scope every action to that clip's first Fusion comp
|
|
208
|
+
(auto-created if the clip has none). Useful for batched lower-thirds
|
|
209
|
+
across multiple clips:
|
|
210
|
+
|
|
211
|
+
\`\`\`
|
|
212
|
+
get_timeline # discover clipIds
|
|
213
|
+
fusion_comp(action="add_node", toolId="TextPlus",
|
|
214
|
+
name="LT_Text", clipId="<clipId>")
|
|
215
|
+
\`\`\`
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Troubleshooting
|
|
220
|
+
|
|
221
|
+
- **\`Resolve.Fusion() unavailable\`** — Resolve build is too old or
|
|
222
|
+
user is on a free seat. Fusion is Studio-only at scriptable depth.
|
|
223
|
+
- **\`No active Fusion comp\`** — user hasn't switched to the Fusion
|
|
224
|
+
page on a clip with a comp. Either call \`open_page("fusion")\` first
|
|
225
|
+
on a known clip, or pass \`clipId\` so we operate on that clip's comp
|
|
226
|
+
directly.
|
|
227
|
+
- **\`AddTool('X') returned None\`** — \`toolId\` is wrong. The canonical
|
|
228
|
+
IDs the agent will hit: \`Background\`, \`TextPlus\`, \`Merge\`,
|
|
229
|
+
\`Transform\`, \`ColorCorrector\`, \`DeltaKeyer\`, \`Brightness\`, \`Glow\`,
|
|
230
|
+
\`Blur\`. There's no scriptable enumeration; check Fusion's docs if
|
|
231
|
+
the user names a tool not in this list.
|
|
232
|
+
`;
|
|
233
|
+
const KEYFRAMING_AND_TITLES = `---
|
|
234
|
+
name: keyframing-and-titles
|
|
235
|
+
description: Recipes for the seven scripting gaps neither Resolve nor Premiere expose: timeline reorder, multi-track lanes, lower-thirds + title cards (ASS), keyframed opacity/position/volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns, named transitions (smash-cut, whip-pan, dip-to-black).
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
# keyframing-and-titles
|
|
239
|
+
|
|
240
|
+
**When to use:** the user asks to reorder clips, animate fades / pans /
|
|
241
|
+
zooms, add lower-thirds or title cards, build coordinated multi-track
|
|
242
|
+
B-roll compositions, or do speed ramps.
|
|
243
|
+
|
|
244
|
+
**Goal:** these are the seven gaps neither Resolve nor Premiere exposes
|
|
245
|
+
through their scripting APIs. The agent's path is FCPXML rebuild for
|
|
246
|
+
timeline-shape changes (reorder, lanes, keyframes, titles) and
|
|
247
|
+
file-only ffmpeg passes for content-shape changes (speed ramps, mixing,
|
|
248
|
+
zoom-on-stills, transitions).
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Recipe 1 — Reorder clips on the timeline
|
|
253
|
+
|
|
254
|
+
The user says "move clip 5 to the start" or "swap clips 2 and 3".
|
|
255
|
+
|
|
256
|
+
\`\`\`
|
|
257
|
+
get_timeline # discover clipIds in order
|
|
258
|
+
clone_timeline(newName="<original>-v2") # safety net
|
|
259
|
+
reorder_timeline(newOrder=["c5","c1","c2","c3","c4"])
|
|
260
|
+
\`\`\`
|
|
261
|
+
|
|
262
|
+
\`reorder_timeline\` reads the current timeline, emits a permuted FCPXML,
|
|
263
|
+
and \`import_timeline\`s it. Clips not listed in \`newOrder\` keep their
|
|
264
|
+
original relative order and append at the end.
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Recipe 2 — Multi-track B-roll composition
|
|
356
269
|
|
|
357
270
|
The user wants several B-roll cutaways stacked above the main A-roll
|
|
358
271
|
with per-clip opacity and timing.
|
|
@@ -496,123 +409,360 @@ For raw xfade names beyond the preset list, use \`crossfade_videos\`.
|
|
|
496
409
|
- Don't keyframe opacity / position on Premiere via UXP — it's not
|
|
497
410
|
exposed; emit FCPXML with the keyframes baked in instead.
|
|
498
411
|
`;
|
|
499
|
-
const
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
one shot, cooler / paler in the next. Different camera, different
|
|
503
|
-
location, different white-balance setting, sun behind a cloud. The user
|
|
504
|
-
wants the faces to match.
|
|
505
|
-
|
|
506
|
-
**Goal:** bring the target clip's skin tones toward a reference clip
|
|
507
|
-
without re-grading the whole frame. Skin lives in the reds and yellows;
|
|
508
|
-
that's where we operate.
|
|
509
|
-
|
|
412
|
+
const LONG_FORM_CONTENT_EDIT = `---
|
|
413
|
+
name: long-form-content-edit
|
|
414
|
+
description: Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires transcribe, cluster_takes, detect_silence, write_edl, import_edl, write_srt, add_marker into a single workflow.
|
|
510
415
|
---
|
|
511
416
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
| Path | When | Where the grade lives |
|
|
515
|
-
|---|---|---|
|
|
516
|
-
| \`grade_skin_tones\` | works on every host (Resolve, Premiere, no-NLE) | baked into a new file |
|
|
517
|
-
| \`match_clip_color\` | Resolve only | non-destructive, in the clip's grade node |
|
|
417
|
+
# long-form-content-edit
|
|
518
418
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
419
|
+
**When to use:** podcasts, interviews, vlogs, courses, talking-head — anything
|
|
420
|
+
where a person speaks for >5 minutes and the editorial work is take-selection,
|
|
421
|
+
filler removal, silence trimming, and pacing.
|
|
522
422
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
\`set_primary_correction\`, so the colorist can adjust after.
|
|
423
|
+
**Goal:** turn a raw recording into a tight, watchable cut without losing the
|
|
424
|
+
speaker's voice or the moments that matter. Captions are non-negotiable.
|
|
526
425
|
|
|
527
426
|
---
|
|
528
427
|
|
|
529
|
-
##
|
|
530
|
-
|
|
531
|
-
### 1. Pick the frames (most important step)
|
|
428
|
+
## The 5-pass method
|
|
532
429
|
|
|
533
|
-
|
|
534
|
-
the
|
|
430
|
+
These run in order. Each pass narrows the cut. Don't skip — passes 1–2 are
|
|
431
|
+
where 80% of the time savings live.
|
|
535
432
|
|
|
536
|
-
|
|
537
|
-
the room).
|
|
538
|
-
- The lighting on the face must be representative (not the one frame
|
|
539
|
-
where they walked through a shadow).
|
|
540
|
-
- Eyes open, mouth not in a weird shape, no motion blur.
|
|
433
|
+
### Pass 1 — Utterance segmentation
|
|
541
434
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
435
|
+
\`\`\`
|
|
436
|
+
probe_media(input) → fps, duration
|
|
437
|
+
extract_audio(input, audio.wav, 16000)
|
|
438
|
+
transcribe(audio.wav, transcript.json) → segment-level transcript
|
|
439
|
+
\`\`\`
|
|
545
440
|
|
|
546
|
-
|
|
441
|
+
Now you have a segment list keyed by start/end seconds. Treat each segment as
|
|
442
|
+
the smallest editorial unit. Don't cut inside a segment unless the speaker
|
|
443
|
+
changes mid-segment.
|
|
547
444
|
|
|
548
|
-
|
|
445
|
+
### Pass 2 — Take detection
|
|
549
446
|
|
|
550
447
|
\`\`\`
|
|
551
|
-
|
|
552
|
-
referenceVideo="<ref.mp4>",
|
|
553
|
-
referenceAtSec=<face-forward time>,
|
|
554
|
-
targetVideo="<tgt.mp4>",
|
|
555
|
-
targetAtSec=<face-forward time>,
|
|
556
|
-
output="<tgt-graded.mp4>"
|
|
557
|
-
)
|
|
448
|
+
cluster_takes(transcript.json) → groups of similar segments
|
|
558
449
|
\`\`\`
|
|
559
450
|
|
|
560
|
-
|
|
451
|
+
Multi-member clusters mean the speaker re-took a line. Pick the winner per
|
|
452
|
+
cluster:
|
|
561
453
|
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
454
|
+
- **Default to the last take** — speaker had practice.
|
|
455
|
+
- **Visual doubt** → \`score_shot(times=[mid of each member])\`, pick highest.
|
|
456
|
+
- **Audio doubt** → \`read_transcript(startSec=A, endSec=B)\` to inspect.
|
|
457
|
+
- Add a marker on each decision: \`add_marker(color="green", note="kept: take 3 of 3 — strongest delivery")\`.
|
|
566
458
|
|
|
567
|
-
|
|
459
|
+
### Pass 3 — Filler removal
|
|
568
460
|
|
|
569
|
-
|
|
570
|
-
match_clip_color(
|
|
571
|
-
referenceVideo="<ref.mp4>",
|
|
572
|
-
referenceAtSec=<face-forward time>,
|
|
573
|
-
targetClipId="<target clip id>",
|
|
574
|
-
targetAtSec=<face-forward time>,
|
|
575
|
-
applyAutomatically=true
|
|
576
|
-
)
|
|
577
|
-
\`\`\`
|
|
461
|
+
For each kept segment, look for these and add cut markers:
|
|
578
462
|
|
|
579
|
-
|
|
580
|
-
|
|
463
|
+
- "um", "uh", "like" used as filler (not as comparison)
|
|
464
|
+
- restart phrases: "so the thing is — actually, the thing is…"
|
|
465
|
+
- mid-sentence aborts the speaker self-corrected past
|
|
581
466
|
|
|
582
|
-
|
|
467
|
+
Mark each one with \`add_marker(color="red", note="cut: filler 'um'")\`.
|
|
583
468
|
|
|
584
|
-
|
|
469
|
+
### Pass 4 — Incomplete-sentence trim
|
|
585
470
|
|
|
586
|
-
|
|
587
|
-
- \`0.4 ≤ confidence < 0.7\` — apply but flag for review:
|
|
588
|
-
\`add_marker(color="yellow", note="skin grade: review — confidence <X>")\`.
|
|
589
|
-
- \`confidence < 0.4\` — DO NOT apply. The model is guessing. Tell the
|
|
590
|
-
user what you saw, suggest they grade the shot manually or pick a
|
|
591
|
-
better reference frame.
|
|
471
|
+
Drop segments that:
|
|
592
472
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
because the agent asked for an output path — but you can re-run with a
|
|
597
|
-
better reference frame if confidence was low.
|
|
473
|
+
- Trail off with no point ("…and yeah, anyway")
|
|
474
|
+
- Start mid-thought because the previous take was kept
|
|
475
|
+
- Repeat content already covered in a kept take
|
|
598
476
|
|
|
599
|
-
|
|
477
|
+
\`add_marker(color="red", note="cut: incomplete; covered in earlier take")\`.
|
|
600
478
|
|
|
601
|
-
|
|
479
|
+
### Pass 5 — Silence normalization
|
|
602
480
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
481
|
+
\`\`\`
|
|
482
|
+
detect_silence(input) → frame-aligned KEEP ranges
|
|
483
|
+
\`\`\`
|
|
484
|
+
|
|
485
|
+
Use the KEEP ranges to remove dead air >1s. Don't kill all silence —
|
|
486
|
+
breathing space matters for pacing. The default threshold usually leaves
|
|
487
|
+
natural pauses intact.
|
|
609
488
|
|
|
610
489
|
---
|
|
611
490
|
|
|
612
|
-
##
|
|
491
|
+
## Final assembly
|
|
613
492
|
|
|
614
|
-
-
|
|
615
|
-
|
|
493
|
+
Combine pass-2 winners + pass-3/4 surviving segments into a single decision
|
|
494
|
+
list. Each entry is one EDL event.
|
|
495
|
+
|
|
496
|
+
\`\`\`
|
|
497
|
+
write_edl(events=decisions, frameRate=fps)
|
|
498
|
+
import_edl(path)
|
|
499
|
+
\`\`\`
|
|
500
|
+
|
|
501
|
+
Then captions:
|
|
502
|
+
|
|
503
|
+
\`\`\`
|
|
504
|
+
write_srt(cues=transcript.segments mapped to start/end/text)
|
|
505
|
+
import_subtitles(srtPath)
|
|
506
|
+
\`\`\`
|
|
507
|
+
|
|
508
|
+
For long-form: sidecar SRT (don't burn in) so YouTube/podcast players can
|
|
509
|
+
toggle them. Mention this to the user.
|
|
510
|
+
|
|
511
|
+
---
|
|
512
|
+
|
|
513
|
+
## Red flags — pause and ask
|
|
514
|
+
|
|
515
|
+
- Cluster has takes that are roughly equal quality — \`add_marker(color="red", note="PAUSE: which take? 1=A, 2=B")\` and stop.
|
|
516
|
+
- Segment is editorial-content-bearing but has bad audio — flag, don't drop.
|
|
517
|
+
- The user said "trim filler" but every "um" is intentional emphasis (rare but real) — confirm.
|
|
518
|
+
|
|
519
|
+
## Don't
|
|
520
|
+
|
|
521
|
+
- Don't render until the user reviews the markers.
|
|
522
|
+
- Don't read full transcript without \`startSec/endSec\` — context blow-up.
|
|
523
|
+
- Don't cut inside a segment unless the speaker changes mid-segment.
|
|
524
|
+
- Don't skip captions for long-form unless explicitly told to.
|
|
525
|
+
`;
|
|
526
|
+
const SHORT_FORM_CONTENT_EDIT = `---
|
|
527
|
+
name: short-form-content-edit
|
|
528
|
+
description: Recipe for TikTok / Reels / Shorts. Find the moment → reformat 9:16 → hook the first 2 seconds → burn captions → render. Uses reformat_timeline, import_edl, set_clip_speed, write_srt, import_subtitles, open_page (Resolve).
|
|
529
|
+
---
|
|
530
|
+
|
|
531
|
+
# short-form-content-edit
|
|
532
|
+
|
|
533
|
+
**When to use:** TikTok / Reels / Shorts / vertical clips. Source is usually
|
|
534
|
+
a longer horizontal video the user wants reframed, captioned, hooked, and
|
|
535
|
+
shipped.
|
|
536
|
+
|
|
537
|
+
**Goal:** the first 2 seconds win or lose retention. The cut, the caption,
|
|
538
|
+
and the hook all serve that one number.
|
|
539
|
+
|
|
540
|
+
---
|
|
541
|
+
|
|
542
|
+
## Recipe
|
|
543
|
+
|
|
544
|
+
### 1. Find the moment
|
|
545
|
+
|
|
546
|
+
If the user gives you a horizontal video without timestamps, find the moment
|
|
547
|
+
worth clipping:
|
|
548
|
+
|
|
549
|
+
\`\`\`
|
|
550
|
+
probe_media(input)
|
|
551
|
+
extract_audio(input, audio.wav, 16000)
|
|
552
|
+
transcribe(audio.wav, transcript.json)
|
|
553
|
+
read_transcript(transcript.json, contains="<keyword from user>")
|
|
554
|
+
\`\`\`
|
|
555
|
+
|
|
556
|
+
Or for visual moments: \`score_shot(input, intervalSec=15)\` then inspect tops.
|
|
557
|
+
|
|
558
|
+
Settle on a \`[startSec, endSec]\` window. Aim for **15–60 seconds** for shorts;
|
|
559
|
+
90s max for Reels.
|
|
560
|
+
|
|
561
|
+
### 2. Reformat to vertical
|
|
562
|
+
|
|
563
|
+
Build the vertical timeline as FCPXML and import:
|
|
564
|
+
|
|
565
|
+
\`\`\`
|
|
566
|
+
reformat_timeline(
|
|
567
|
+
output="vertical.fcpxml",
|
|
568
|
+
preset="9:16",
|
|
569
|
+
title="<short name>",
|
|
570
|
+
frameRate=<source fps>,
|
|
571
|
+
events=[{ reel, sourcePath, sourceInFrame, sourceOutFrame }]
|
|
572
|
+
)
|
|
573
|
+
import_edl("vertical.fcpxml")
|
|
574
|
+
\`\`\`
|
|
575
|
+
|
|
576
|
+
Then on Resolve Studio, switch to color page and prompt the user to apply
|
|
577
|
+
Smart Reframe per clip:
|
|
578
|
+
|
|
579
|
+
\`\`\`
|
|
580
|
+
open_page("color")
|
|
581
|
+
add_marker(color="yellow", note="apply Smart Reframe per clip (Resolve Studio: right-click clip → Smart Reframe)")
|
|
582
|
+
\`\`\`
|
|
583
|
+
|
|
584
|
+
Premiere users: prompt for Auto Reframe via the captions/effects panel.
|
|
585
|
+
|
|
586
|
+
### 3. Hook the first 2 seconds
|
|
587
|
+
|
|
588
|
+
The hook lives in the first 60 frames. Options:
|
|
589
|
+
|
|
590
|
+
- **Cold-open the punchline** — start at the most attention-grabbing line,
|
|
591
|
+
not the setup. Use \`read_transcript\` to find it.
|
|
592
|
+
- **Speed-up the intro** — \`set_clip_speed(clipId, speed=1.5)\` on the opening clip.
|
|
593
|
+
- **Pre-roll text/marker** — \`add_marker(color="yellow", note="add hook text overlay: '<line from transcript>'")\` for the user to add.
|
|
594
|
+
|
|
595
|
+
### 4. Burned-in captions
|
|
596
|
+
|
|
597
|
+
Vertical = burned-in (most viewers watch muted, native captions are tiny).
|
|
598
|
+
|
|
599
|
+
\`\`\`
|
|
600
|
+
write_srt(cues=transcript.segments_in_window)
|
|
601
|
+
import_subtitles(srtPath)
|
|
602
|
+
add_marker(color="yellow", note="style captions: large, center-bottom, high-contrast — burn in via Resolve subtitle track styling")
|
|
603
|
+
\`\`\`
|
|
604
|
+
|
|
605
|
+
If the user is on Resolve Studio, they can right-click the subtitle track →
|
|
606
|
+
"Convert Subtitles to Text+" and style it. Note this to them.
|
|
607
|
+
|
|
608
|
+
### 5. Render
|
|
609
|
+
|
|
610
|
+
Don't render until the user reviews. When they say "render":
|
|
611
|
+
|
|
612
|
+
\`\`\`
|
|
613
|
+
render(preset=<host preset>, output="<name>.mp4")
|
|
614
|
+
\`\`\`
|
|
615
|
+
|
|
616
|
+
Common presets: H.264 Master, YouTube 1080p (works for Shorts too),
|
|
617
|
+
Vimeo 1080p.
|
|
618
|
+
|
|
619
|
+
---
|
|
620
|
+
|
|
621
|
+
## Defaults for short-form
|
|
622
|
+
|
|
623
|
+
| Knob | Default | Why |
|
|
624
|
+
|---|---|---|
|
|
625
|
+
| Aspect | 9:16 (TikTok/Reels/Shorts) | Most platforms |
|
|
626
|
+
| Length | 15–60s | Algorithm sweet spot |
|
|
627
|
+
| Captions | burned-in | Watched muted |
|
|
628
|
+
| First 2s | the hook | Retention curve |
|
|
629
|
+
| Music | not added by you | Style decision; ask the user |
|
|
630
|
+
|
|
631
|
+
## Red flags — pause and ask
|
|
632
|
+
|
|
633
|
+
- User wants 9:16 but the source has critical wide-shot framing → \`add_marker(color="red", note="PAUSE: source is composed for 16:9. 9:16 will crop heads/sides. Confirm reframe vs. letterbox.")\`.
|
|
634
|
+
- Window selection is ambiguous → propose 2–3 candidates as red markers, stop.
|
|
635
|
+
- No clear hook in the chosen window → say so, suggest a different start.
|
|
636
|
+
|
|
637
|
+
## Don't
|
|
638
|
+
|
|
639
|
+
- Don't render until the user reviews markers.
|
|
640
|
+
- Don't burn captions before the user approves the SRT text.
|
|
641
|
+
- Don't pick a hook blindly — surface options.
|
|
642
|
+
- Don't leave silence >0.4s in the first 2 seconds.
|
|
643
|
+
`;
|
|
644
|
+
const SKIN_TONE_MATCHING = `---
|
|
645
|
+
name: skin-tone-matching
|
|
646
|
+
description: Match faces across clips when host scripting can't reach power windows or qualifiers. Two paths: grade_skin_tones (file-only — bakes a vision-derived colorbalance + selectivecolor + eq into a new mp4, pair with replace_clip) and match_clip_color (Resolve only — derives the same grade as a CDL via set_primary_correction).
|
|
647
|
+
---
|
|
648
|
+
|
|
649
|
+
# skin-tone-matching
|
|
650
|
+
|
|
651
|
+
**When to use:** the host's face looks different across clips — warmer in
|
|
652
|
+
one shot, cooler / paler in the next. Different camera, different
|
|
653
|
+
location, different white-balance setting, sun behind a cloud. The user
|
|
654
|
+
wants the faces to match.
|
|
655
|
+
|
|
656
|
+
**Goal:** bring the target clip's skin tones toward a reference clip
|
|
657
|
+
without re-grading the whole frame. Skin lives in the reds and yellows;
|
|
658
|
+
that's where we operate.
|
|
659
|
+
|
|
660
|
+
---
|
|
661
|
+
|
|
662
|
+
## Two paths
|
|
663
|
+
|
|
664
|
+
| Path | When | Where the grade lives |
|
|
665
|
+
|---|---|---|
|
|
666
|
+
| \`grade_skin_tones\` | works on every host (Resolve, Premiere, no-NLE) | baked into a new file |
|
|
667
|
+
| \`match_clip_color\` | Resolve only | non-destructive, in the clip's grade node |
|
|
668
|
+
|
|
669
|
+
Pick **\`grade_skin_tones\`** when the user is on Premiere, when there's no
|
|
670
|
+
NLE, or when they want a finished file they can drop anywhere. Pair with
|
|
671
|
+
\`replace_clip\` to swap it onto the timeline.
|
|
672
|
+
|
|
673
|
+
Pick **\`match_clip_color\`** when the user is on Resolve and wants to keep
|
|
674
|
+
the grade tweakable. The tool pipes the CDL through
|
|
675
|
+
\`set_primary_correction\`, so the colorist can adjust after.
|
|
676
|
+
|
|
677
|
+
---
|
|
678
|
+
|
|
679
|
+
## Recipe
|
|
680
|
+
|
|
681
|
+
### 1. Pick the frames (most important step)
|
|
682
|
+
|
|
683
|
+
Vision is only as good as what you show it. For BOTH the reference and
|
|
684
|
+
the target:
|
|
685
|
+
|
|
686
|
+
- The face must be visible and large enough (not a wide shot from across
|
|
687
|
+
the room).
|
|
688
|
+
- The lighting on the face must be representative (not the one frame
|
|
689
|
+
where they walked through a shadow).
|
|
690
|
+
- Eyes open, mouth not in a weird shape, no motion blur.
|
|
691
|
+
|
|
692
|
+
Use \`score_shot(input, intervalSec=15)\` or \`extract_frame\` to find good
|
|
693
|
+
candidates. If the user already pointed at a moment ("match shot 3 to
|
|
694
|
+
shot 1") use those timestamps directly.
|
|
695
|
+
|
|
696
|
+
### 2. Run the grade
|
|
697
|
+
|
|
698
|
+
**File-only path (works in every host):**
|
|
699
|
+
|
|
700
|
+
\`\`\`
|
|
701
|
+
grade_skin_tones(
|
|
702
|
+
referenceVideo="<ref.mp4>",
|
|
703
|
+
referenceAtSec=<face-forward time>,
|
|
704
|
+
targetVideo="<tgt.mp4>",
|
|
705
|
+
targetAtSec=<face-forward time>,
|
|
706
|
+
output="<tgt-graded.mp4>"
|
|
707
|
+
)
|
|
708
|
+
\`\`\`
|
|
709
|
+
|
|
710
|
+
Returns \`{path, confidence, why, grade}\`. Then:
|
|
711
|
+
|
|
712
|
+
\`\`\`
|
|
713
|
+
replace_clip(clipId="<target clip id>", mediaPath="<tgt-graded.mp4>")
|
|
714
|
+
add_marker(color="yellow", note="skin grade: <why>")
|
|
715
|
+
\`\`\`
|
|
716
|
+
|
|
717
|
+
**Resolve non-baked path:**
|
|
718
|
+
|
|
719
|
+
\`\`\`
|
|
720
|
+
match_clip_color(
|
|
721
|
+
referenceVideo="<ref.mp4>",
|
|
722
|
+
referenceAtSec=<face-forward time>,
|
|
723
|
+
targetClipId="<target clip id>",
|
|
724
|
+
targetAtSec=<face-forward time>,
|
|
725
|
+
applyAutomatically=true
|
|
726
|
+
)
|
|
727
|
+
\`\`\`
|
|
728
|
+
|
|
729
|
+
Returns \`{applied, confidence, why, grade}\`. The CDL goes into node 1
|
|
730
|
+
(or \`nodeIndex=N\` if you want a specific node).
|
|
731
|
+
|
|
732
|
+
### 3. Check confidence
|
|
733
|
+
|
|
734
|
+
The model's confidence is the most important field. Always inspect it:
|
|
735
|
+
|
|
736
|
+
- \`confidence ≥ 0.7\` — apply. Trust the result.
|
|
737
|
+
- \`0.4 ≤ confidence < 0.7\` — apply but flag for review:
|
|
738
|
+
\`add_marker(color="yellow", note="skin grade: review — confidence <X>")\`.
|
|
739
|
+
- \`confidence < 0.4\` — DO NOT apply. The model is guessing. Tell the
|
|
740
|
+
user what you saw, suggest they grade the shot manually or pick a
|
|
741
|
+
better reference frame.
|
|
742
|
+
|
|
743
|
+
\`match_clip_color\` enforces this: with \`applyAutomatically=true\`,
|
|
744
|
+
confidence < 0.4 returns \`{applied: false}\` and the grade is surfaced
|
|
745
|
+
without writing to the node. \`grade_skin_tones\` always bakes the file
|
|
746
|
+
because the agent asked for an output path — but you can re-run with a
|
|
747
|
+
better reference frame if confidence was low.
|
|
748
|
+
|
|
749
|
+
---
|
|
750
|
+
|
|
751
|
+
## Defaults
|
|
752
|
+
|
|
753
|
+
| Knob | Default | Why |
|
|
754
|
+
|---|---|---|
|
|
755
|
+
| Vision detail | \`low\` | cheap; skin balance doesn't need pixel-peeping |
|
|
756
|
+
| Vision model | \`gpt-4o-mini\` | well-calibrated for color comparisons |
|
|
757
|
+
| Output codec | \`libx264 crf=18\` | visually lossless |
|
|
758
|
+
| Reference frame width | 768px | enough for skin-tone discrimination |
|
|
759
|
+
|
|
760
|
+
---
|
|
761
|
+
|
|
762
|
+
## What this is NOT
|
|
763
|
+
|
|
764
|
+
- NOT a deterministic ColorChecker match. There's no chart, no
|
|
765
|
+
colorimetry — it's a vision pass.
|
|
616
766
|
- NOT a substitute for a colorist. Power windows / qualifiers / curves
|
|
617
767
|
are out of scope. If skin needs to be isolated from a colored
|
|
618
768
|
background, surface that and stop.
|
|
@@ -621,152 +771,978 @@ better reference frame if confidence was low.
|
|
|
621
771
|
|
|
622
772
|
---
|
|
623
773
|
|
|
624
|
-
## Red flags — pause and ask
|
|
774
|
+
## Red flags — pause and ask
|
|
775
|
+
|
|
776
|
+
- Reference and target are filmed under fundamentally different
|
|
777
|
+
lighting (tungsten vs daylight) → confidence will be low. Tell the
|
|
778
|
+
user and suggest a less aggressive match (or LUT-based correction
|
|
779
|
+
first).
|
|
780
|
+
- Target shot has multiple people with different skin tones → the
|
|
781
|
+
vision model averages. Pick the primary face's frame and warn the
|
|
782
|
+
user the secondary face may shift.
|
|
783
|
+
- User wants pixel-perfect match across 50 clips → run on a hero pair,
|
|
784
|
+
then \`copy_grade(sourceClipId=hero, targetClipIds=[...])\` instead of
|
|
785
|
+
re-running vision on every clip.
|
|
786
|
+
|
|
787
|
+
## Don't
|
|
788
|
+
|
|
789
|
+
- Don't pick a target frame where the face is in shadow or motion blur.
|
|
790
|
+
- Don't apply low-confidence grades silently.
|
|
791
|
+
- Don't run on top of an existing aggressive grade — clean state first
|
|
792
|
+
or expect compounding shifts.
|
|
793
|
+
- Don't bake \`grade_skin_tones\` over the original target file. Always
|
|
794
|
+
write to a new path.
|
|
795
|
+
`;
|
|
796
|
+
const VIRAL_HOOK_PATTERNS = `---
|
|
797
|
+
name: viral-hook-patterns
|
|
798
|
+
description: Hook patterns sourced from primary creators (Jenny Hoyos on the official YouTube Blog, the leaked MrBeast production manual, Paddy Galloway's data analyses) — not generic creator-folklore. Read when analyze_hook fails, when picking a find_viral_moments candidate, or when generate_youtube_metadata needs a punchier title. Each pattern names a real creator example, the primary source, and the failure mode.
|
|
799
|
+
---
|
|
800
|
+
|
|
801
|
+
# viral-hook-patterns
|
|
802
|
+
|
|
803
|
+
**When to use:** the user wants a stronger opener, a Short's hook scored < 60 in \`analyze_hook\`, or \`generate_youtube_metadata\` needs to phrase a title around a proven structure. Also useful when picking which \`find_viral_moments\` candidate to ship — the candidate's \`hookLine\` should map to one of these patterns; if it doesn't, the hook is probably weak.
|
|
804
|
+
|
|
805
|
+
**Sources used.** All patterns below reference **at least one named creator example AND a primary source** — the leaked MrBeast production manual (authenticated by 2 former producers per Passionfruit's August 2024 reporting), Jenny Hoyos's interview on YouTube's own blog (Jan 28 2025), the My First Million ep. 580 with Hoyos (May 3 2024), Paddy Galloway's LinkedIn / X analyses, and the YouTube Creator Liaison's official commentary. Patterns without that level of provenance were dropped.
|
|
806
|
+
|
|
807
|
+
---
|
|
808
|
+
|
|
809
|
+
## How a hook is judged in 2025
|
|
810
|
+
|
|
811
|
+
The retention bar:
|
|
812
|
+
|
|
813
|
+
- **Shorts:** **[primary]** Jenny Hoyos on YouTube's blog (Jan 28 2025, https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/): *"I really do think you have one second to hook someone, especially on Shorts."*
|
|
814
|
+
- **Shorts continued:** **[primary]** Paddy Galloway's analysis of 3.3 billion Shorts views (Rattibha-archived X thread, 2023): the best-performing Shorts hold **70–90%** of viewers from swiping away. Below 70% view-vs-swipe = burial.
|
|
815
|
+
- **Long-form:** **[primary]** YouTube's Senior Director of Growth, Todd Beaupré, via Stan Ventures (Sept 5 2024): *"the importance of the first 30 seconds of a video, the role of thumbnails, and engaging intros in capturing the audience's attention."* Marketing Agent's recap of Feb 2025 Creator Insider: *"Establish value within 7 seconds."*
|
|
816
|
+
- **Mid-video:** **[primary]** MrBeast leaked production manual: re-engagement checkpoints at the **3-minute** and **6-minute** marks for long-form (per Cybernews Sept 16 2024).
|
|
817
|
+
|
|
818
|
+
**The retention data backdrop.** Retention Rabbit's 2025 benchmark study (75+ niches, Q1 2024 – Q1 2025): the average video retains 23.7% of viewers; only 1 in 6 surpasses 50%; 55% of viewers leave within the first minute. A working hook isn't optional — it's the difference between distribution and burial.
|
|
819
|
+
|
|
820
|
+
---
|
|
821
|
+
|
|
822
|
+
## The 12 patterns
|
|
823
|
+
|
|
824
|
+
### 1. Click-to-unpause packaging (Paddy Galloway)
|
|
825
|
+
|
|
826
|
+
**Structure:** Thumbnail captures a mid-action moment that the brain wants to resolve by clicking.
|
|
827
|
+
|
|
828
|
+
**Source.** **[primary]** Paddy Galloway, LinkedIn post March 2026 analysing four viral thumbnails (a MrBeast piece, an old man mid-conversation, two others): *"They each use a simple (yet powerful) packaging technique. Click to unpause. All four of these videos create a scene that you have to 'click' to 'unpause' and see for yourself. The thumbnail and title create an open loop in the brain we want to close."*
|
|
829
|
+
|
|
830
|
+
**Worked example (Galloway's own).** *"Imagine the opposite — MrBeast standing beside the steps pointing vs actually partaking. The old man smiling and posing for the camera instead of being mid-conversation. Dead in the water."*
|
|
831
|
+
|
|
832
|
+
**Failure mode:** posed shot, neutral expression, completed action. The loop is closed before the click. Eyes-at-camera-while-smiling is the universal signal of "nothing is about to happen."
|
|
833
|
+
|
|
834
|
+
### 2. Shock → Intrigue → Satisfy (Jenny Hoyos's three-beat)
|
|
835
|
+
|
|
836
|
+
**Structure:** Three distinct beats compressed into the first second of a Short. Shock = a visual/audio interrupt. Intrigue = a one-line setup that withholds the answer. Satisfy = the implied promise the rest of the Short will deliver.
|
|
837
|
+
|
|
838
|
+
**Source.** **[primary]** YouTube's own blog (Jan 28 2025): *"Jenny emphasises the critical importance of hooking viewers within the first second of a Short, using a three-step formula of shock, intrigue, and satisfy."*
|
|
839
|
+
|
|
840
|
+
**Worked example.** Hoyos's $1 chicken sandwich vs Chick-fil-A Short — opens with the punchier visual hit (shock), poses the value question (intrigue), promises the comparison (satisfy). Marketing Examined's breakdown of her playbook (May 16 2024): she would "even change the idea of the entire video for a strong hook."
|
|
841
|
+
|
|
842
|
+
**Failure mode.** Hook is too abstract or builds slowly. Her test: *"a good hook should be so clear that viewers understand the video even on mute."*
|
|
843
|
+
|
|
844
|
+
### 3. Foreshadow the ending (Hoyos)
|
|
845
|
+
|
|
846
|
+
**Structure:** Open on a moment from the END of the video, rewind, withhold the payoff until the end.
|
|
847
|
+
|
|
848
|
+
**Source.** **[primary]** Hoyos via vidIQ blog (Dec 2023, https://vidiq.com/blog/post/how-jenny-hoyos-gets-10m-views-per-youtube-short/): *"I started a video by giving my grandma a $5 Christmas present and showing her reaction… you don't see what the gift is until the end of the video."*
|
|
849
|
+
|
|
850
|
+
**Worked example.** Her $5 Christmas-gift Short — opens on grandma's reaction shot, hides the actual gift, makes viewers stay to find out.
|
|
851
|
+
|
|
852
|
+
**Failure mode.** Foreshadowing something the ending can't visually pay off. AVP collapses around the reveal point.
|
|
853
|
+
|
|
854
|
+
### 4. "But / So" escalation (Hoyos)
|
|
855
|
+
|
|
856
|
+
**Structure:** Every story beat connected by a \`but\` or \`so\`, not \`and then\`. Each \`but\` raises stakes; each \`so\` makes a consequence visible.
|
|
857
|
+
|
|
858
|
+
**Source.** **[primary]** Hoyos via vidIQ: *"You can bring this to life by using the words 'but' and 'so'… 'But the dog whined softly, so I followed him for a few miles. He led me to a dark tunnel, so I backed away in fear. But I saw a bunch of abandoned puppies at the rim of the opening.' Each 'but' stops the viewer from scrolling away as conflict rises."*
|
|
859
|
+
|
|
860
|
+
**Failure mode.** Plot progression via \`and then… and then…\` produces flat retention curves and reads as low-satisfaction.
|
|
861
|
+
|
|
862
|
+
### 5. Power-word opener (Hoyos)
|
|
863
|
+
|
|
864
|
+
**Structure:** Open with a single high-curiosity word: \`$1\`, \`banned\`, \`free\`, \`secret\`, \`cheap\`, \`nobody\`. Pair it with a concrete claim.
|
|
865
|
+
|
|
866
|
+
**Source.** **[primary]** Marketing Examined (May 2024) on Hoyos's playbook: hook should be "Concise, no more than 3 seconds, visually pleasing — power words like 'banned,' 'free,' 'one dollar,' 'secret,' or 'cheap' instantly pique curiosity."
|
|
867
|
+
|
|
868
|
+
**Failure mode.** Power word with no payoff — classic clickbait. Triggers Ritchie's CTR/retention penalty: *"If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
|
|
869
|
+
|
|
870
|
+
### 6. Crazy Progression — show, don't tell, then skip ahead (MrBeast)
|
|
871
|
+
|
|
872
|
+
**Structure:** First 3 minutes of long-form aren't a setup — they're an escalation. Don't say "we'll do X" — show X already happening at scale.
|
|
873
|
+
|
|
874
|
+
**Source.** **[primary]** Leaked MrBeast production manual (per ProTunesOne Oct 2025 https://protunesone.com/blog/leaked-mrbeast-document-on-his-youtube-strategies/): *"Minutes 1-3: Instead of telling viewers what you will do, show them. MrBeast uses a technique called 'crazy progression.' For example, if he is making a video about a guy surviving weeks in the forest, he would cover multiple days instead of making the first 3 minutes about the first day. The intention here is to hook viewers as fast as possible and get them emotionally invested in the story."*
|
|
875
|
+
|
|
876
|
+
**Worked example.** *$1 vs $1,000,000,000 Yacht!* — the manual explicitly: *"As the viewer progresses through the video, the stakes rise, first presenting the $1 yacht, then a $1 million yacht, then a $10 million yacht, and so on, until the payoff at the end."*
|
|
877
|
+
|
|
878
|
+
**Failure mode.** A first 3 minutes that explains the rules instead of showing escalation. The manual's frame: *"Match the clickbait expectations and front-load as much information about the video as possible while incorporating the maximum amount of visuals, music, effects and quick scene changes."*
|
|
879
|
+
|
|
880
|
+
### 7. Match-the-thumbnail-promise (MrBeast)
|
|
881
|
+
|
|
882
|
+
**Structure:** Whatever the thumbnail visually promises, deliver in the first 60 seconds. Not at minute 8.
|
|
883
|
+
|
|
884
|
+
**Source.** **[primary]** MrBeast manual via Creator Handbook (Sept 18 2024): *"Thumbnails must align with expectations set by the title. If a thumbnail promises a specific scene or visual spectacle, the video must deliver on that promise to keep viewers engaged."* Plus: *"CTR is what dictates what we do for videos. 'I Spent 50 Hours In My Front Yard' is lame, and you wouldn't click it. But you would click 'I Spent 50 Hours In Ketchup.'"*
|
|
885
|
+
|
|
886
|
+
**Source corroboration.** **[primary]** Rene Ritchie via vidIQ Aug 2025: *"Great thumbnails don't just get viewers to click — they also help viewers understand what the video is about, so that they can make informed decisions about what to watch."*
|
|
887
|
+
|
|
888
|
+
**Failure mode.** Thumbnail-bait. CTR spikes, retention craters, the algorithm penalises distribution.
|
|
889
|
+
|
|
890
|
+
### 8. Mid-video re-engagement at minute 3 and 6 (MrBeast)
|
|
891
|
+
|
|
892
|
+
**Structure:** A mini-hook (twist, reveal, escalation) at exactly the points where retention historically dips. Not the climax — a refresh.
|
|
893
|
+
|
|
894
|
+
**Source.** **[primary]** Leaked MrBeast manual via Cybernews (Sept 16 2024): *"Around the three-minute mark, MrBeast's team aims to include a 're-engagement,' which is highly interesting and impressive… The next most crucial segment of a video is the 3–6 minute mark, which needs to be filled with most exciting and interesting content. After another 're-engagement' at the six-minute mark, the hope is to retain the viewers till the end."*
|
|
895
|
+
|
|
896
|
+
**Failure mode.** Recapping what just happened instead of escalating. Recap-style re-engagements drop retention sharper than no recap.
|
|
897
|
+
|
|
898
|
+
### 9. The "I asked Google" / "I asked an expert" hook (Sean Andrew)
|
|
899
|
+
|
|
900
|
+
**Structure:** Open with a researched question whose answer the audience wants. The hook frames you as proxy: you found out, viewer doesn't have to.
|
|
901
|
+
|
|
902
|
+
**Source.** **[secondary, named example]** vidIQ Shorts hooks roundup (Feb 2026, https://vidiq.com/blog/post/viral-video-hooks-youtube-shorts/): *"Sean Andrew used this opener to get 478,000 views on a long-jumping video. He asked Google 'what the longest jump in history is,' and then attempted to break the long-jump record."*
|
|
903
|
+
|
|
904
|
+
**Failure mode.** The answer is in the hook. The hook works because Google's answer becomes the implicit promise *to be tested*; if you reveal the answer, the test loses tension.
|
|
905
|
+
|
|
906
|
+
### 10. Credibility + specific N (Erika Kullberg)
|
|
907
|
+
|
|
908
|
+
**Structure:** "Here are N things I do before [scenario], coming from [credential]." Combines foreshadowing (audience knows it ends on item N) with credential framing.
|
|
909
|
+
|
|
910
|
+
**Source.** **[secondary, named example]** vidIQ (Feb 2026): *"Erika Kullberg's 'Quick Travel Tips' starts: 'Here are three things I do before every flight, coming from a lawyer who travels six months out of the year.' She speaks directly to travellers and builds credibility by saying how often she travels."*
|
|
911
|
+
|
|
912
|
+
**Failure mode.** Generic credibility ("as a content creator…") or N too high. Lists of 10+ erode foreshadowing because viewers can't track them.
|
|
913
|
+
|
|
914
|
+
### 11. End-of-video cliffhanger (Rene Ritchie's underused tactic)
|
|
915
|
+
|
|
916
|
+
**Structure:** End the current video on a cliffhanger that resolves in the next upload. Pulls watch-time INTO the channel, not out of it.
|
|
917
|
+
|
|
918
|
+
**Source.** **[primary]** Rene Ritchie via Search Engine Journal (Aug 15 2023 — older but still cited canonically): *"Cliffhangers are an underused tactic on YouTube. Similar to how they're used in television and movies, implementing cliffhangers at the end of YouTube videos can make viewers eager to watch the next video to see what happens. This builds excitement and investment in the audience."*
|
|
919
|
+
|
|
920
|
+
**Failure mode.** No payoff in the next upload. The cliffhanger creates an unfulfilled loop, dropping satisfaction surveys (one of the four signals YouTube weighs in 2025).
|
|
921
|
+
|
|
922
|
+
### 12. First-frame-as-thumbnail (Hoyos + Galloway)
|
|
923
|
+
|
|
924
|
+
**Structure:** The literal first frame of the video should communicate the promise without audio. Treat it like a thumbnail.
|
|
925
|
+
|
|
926
|
+
**Source.** **[primary]** Paddy Galloway's Rattibha-archived X thread (2023): *"It's important to make your first second really punchy and engaging to hook viewers early into the video. Treat your intro like a thumbnail."* **[primary]** Hoyos on My First Million ep. 580 (May 3 2024) discusses the importance of "the first frame" as a retention lever; she found that removing a single frame can change a Short's retention curve.
|
|
927
|
+
|
|
928
|
+
**Failure mode.** First frame is a logo, a black slate, a cold-open loading shot, or a face mid-blink. Mobile autoplay shows this in the feed before audio loads.
|
|
929
|
+
|
|
930
|
+
---
|
|
931
|
+
|
|
932
|
+
## Picking a pattern
|
|
933
|
+
|
|
934
|
+
Default order to try, by content type:
|
|
935
|
+
|
|
936
|
+
| Content type | First choice | Backup |
|
|
937
|
+
|---|---|---|
|
|
938
|
+
| Shorts | Pattern 2 (Shock/Intrigue/Satisfy) + Pattern 5 (power word) | Pattern 3 (foreshadow ending) |
|
|
939
|
+
| Long-form challenge / spectacle | Pattern 6 (crazy progression) + Pattern 7 (match thumbnail) | Pattern 8 (3-min re-engagement) |
|
|
940
|
+
| Educational long-form | Pattern 10 (credibility + N) | Pattern 1 (click to unpause) |
|
|
941
|
+
| Vlog / journey | Pattern 4 (but/so) | Pattern 11 (cliffhanger to next) |
|
|
942
|
+
| Reaction / opinion | Pattern 1 (click to unpause) | Pattern 9 (asked Google) |
|
|
943
|
+
| Series content | Pattern 11 (cliffhanger) | – |
|
|
944
|
+
|
|
945
|
+
**Avoid combining patterns** — viewers can only track one promise at a time. One pattern, executed well, beats three layered patterns.
|
|
946
|
+
|
|
947
|
+
---
|
|
948
|
+
|
|
949
|
+
## Anti-patterns (don't ship these)
|
|
950
|
+
|
|
951
|
+
- **"Hey guys what's up so today I want to talk about…"** — Beaupré's quote about establishing value in 7 seconds rules this out. \`analyze_hook\` will catch it; flag with a red marker.
|
|
952
|
+
- **Static talking head with no visual change in 0–2 seconds** — even with a perfect line, retention adds zero. Pair every hook with a visual change (cut, zoom, gesture). MrBeast manual: *"incorporating the maximum amount of visuals, music, effects and quick scene changes."*
|
|
953
|
+
- **Hooks that contain the answer.** "Here are 5 ways to save money: 1. budget, 2. invest, 3. …" — burns the curiosity gap immediately.
|
|
954
|
+
- **Generic music sting with no spoken content for 1+ second** — the first second is the hook on Shorts. Move the punchline forward.
|
|
955
|
+
- **Posed thumbnail mismatch.** A posed studio thumbnail paired with a candid mid-action video opener loses both audiences (no click-to-unpause AND no thumbnail-promise match).
|
|
956
|
+
|
|
957
|
+
---
|
|
958
|
+
|
|
959
|
+
## Operationalising in the agent
|
|
960
|
+
|
|
961
|
+
The agent does NOT generate footage. It can only re-cut from what was filmed, or recommend a re-shoot. Frame every hook diagnosis around that constraint.
|
|
962
|
+
|
|
963
|
+
When \`analyze_hook\` returns a low score:
|
|
964
|
+
|
|
965
|
+
1. Read the \`hookLine\` field from \`find_viral_moments\` (or the first sentence of the chosen window).
|
|
966
|
+
2. Call \`rewrite_hook(currentHook=<line>, videoTopic=<one-line>, transcriptExcerpt=<200–500 chars>, pattern="auto")\` — returns 3 candidate rewrites with the chosen pattern + rationale.
|
|
967
|
+
3. Surface the candidates to the user. **Do NOT auto-apply** — we can't speak the new line on-camera; the user has to either:
|
|
968
|
+
- **Pick an existing alternative opener from the source footage** — if so, propose a cut window via \`text_based_cut\`.
|
|
969
|
+
- **Re-shoot the opener** — if so, drop a red marker:
|
|
970
|
+
\`\`\`
|
|
971
|
+
add_marker(color="red", note="PAUSE: hook needs re-shoot. Suggested line: '[candidate]'")
|
|
972
|
+
\`\`\`
|
|
973
|
+
4. **Never silently ship a sub-60 hook.** If the user can't re-shoot and source has no better alternative, the right move is to tell them so explicitly — not to pretend the current opener is fine.
|
|
974
|
+
|
|
975
|
+
For Shorts specifically, the canonical pre-flight chain is:
|
|
976
|
+
|
|
977
|
+
\`\`\`
|
|
978
|
+
audit_first_frame(input) # is the t=0 frame thumbnail-quality?
|
|
979
|
+
analyze_hook(input) # does the spoken line earn the watch?
|
|
980
|
+
verify_thumbnail_promise(thumb, input) # does the opening deliver the thumbnail's promise?
|
|
981
|
+
\`\`\`
|
|
982
|
+
|
|
983
|
+
Gate at all three. If any returns blocking findings, pause before render.
|
|
984
|
+
|
|
985
|
+
**Operational targets** (executable today via the tools above):
|
|
986
|
+
- **Optimal duration 30–45 s** — \`find_viral_moments\` already defaults to \`[20, 45]\`.
|
|
987
|
+
- **Target ≥ 90% retention** through to last second (Hoyos's bar) — the agent can't measure this until upload, but it's the bar to rewrite toward.
|
|
988
|
+
- **Target ≥ 70% view-vs-swipe** (Galloway's 3.3B-views floor) — same: post-upload metric the user reports.
|
|
989
|
+
- **Seamless re-loop** — run \`loop_match_short\` as the last step before delivery.
|
|
990
|
+
|
|
991
|
+
For long-form retention checkpoints (Pattern 8 — 3-min and 6-min re-engagement), use \`audit_retention_structure(transcript)\`. It returns per-checkpoint scores and weakest-checkpoint suggestions; the agent then proposes \`cut_filler_words\` / \`text_based_cut\` / \`punch_in\` / \`add_sfx_at_cuts\` on the flat windows.
|
|
992
|
+
|
|
993
|
+
---
|
|
994
|
+
|
|
995
|
+
## Sources & further reading
|
|
996
|
+
|
|
997
|
+
**Primary creator sources:**
|
|
998
|
+
- Jenny Hoyos × Todd Sherman, **YouTube Creator Blog**, Jan 28 2025 — https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/
|
|
999
|
+
- Jenny Hoyos, **My First Million** ep. 580, May 3 2024 — https://www.mfmpod.com/videos/the-formula-to-break-100-million-views-on-shorts-ft-jenny-hoyos/
|
|
1000
|
+
- Jenny Hoyos × **Marketing Examined**, May 16 2024 — https://www.marketingexamined.com/blog/jenny-hoyos-short-form-video-playbook
|
|
1001
|
+
- **Leaked MrBeast production manual**, August 2024, full PDF mirrored at https://simonwillison.net/2024/Sep/15/how-to-succeed-in-mrbeast-production/
|
|
1002
|
+
- Paddy Galloway, **Creator Science Podcast #209**, Jan 27 2026 — https://podcast.creatorscience.com/paddy-galloway-2/
|
|
1003
|
+
- Paddy Galloway, **LinkedIn "Click to unpause"** post, March 2026
|
|
1004
|
+
- Paddy Galloway, **3.3 billion Shorts views** X thread, archived at https://en.rattibha.com/thread/1646898356419981315
|
|
1005
|
+
|
|
1006
|
+
**Authoritative third-party syntheses:**
|
|
1007
|
+
- vidIQ — Hoyos breakdown (Dec 2023), Shorts hooks roundup (Feb 2026)
|
|
1008
|
+
- Creator Handbook — MrBeast manual recap, Sept 18 2024
|
|
1009
|
+
- ProTunesOne — Leaked MrBeast doc breakdown, Oct 28 2025
|
|
1010
|
+
- Cybernews — MrBeast manual on retention checkpoints, Sept 16 2024
|
|
1011
|
+
- Search Engine Journal — Rene Ritchie on cliffhangers, Aug 15 2023
|
|
1012
|
+
`;
|
|
1013
|
+
const YOUTUBE_ALGORITHM_PRIMER = `---
|
|
1014
|
+
name: youtube-algorithm-primer
|
|
1015
|
+
description: How YouTube actually ranks videos in 2024–2026, sourced from Creator Insider, the YouTube Liaison (Rene Ritchie), Senior Director of Growth Todd Beaupré, Paddy Galloway, and the Retention Rabbit 2025 benchmark study. Read when generating titles/descriptions/chapters or when a video is underperforming. Numbers without a primary YouTube source are flagged as third-party heuristics.
|
|
1016
|
+
---
|
|
1017
|
+
|
|
1018
|
+
# youtube-algorithm-primer
|
|
1019
|
+
|
|
1020
|
+
**When to use:** any time a tool needs to optimise FOR the algorithm — title generation, description structure, chapter placement, render-format selection, end-screen placement, multi-format render decisions. Also when the user asks "why isn't this getting views?" — the answer usually maps to one of the four signals below.
|
|
1021
|
+
|
|
1022
|
+
**What this is:** a working model with cited sources. Where a number comes from YouTube's own staff, it's marked **[primary]**. Where it comes from third-party tooling (vidIQ, TubeBuddy, Dataslayer) or aggregator sources, it's marked **[secondary]**. Where it's creator folklore with no traceable source, it's marked **[unverified]** — surface those to the user as heuristics, not laws.
|
|
1023
|
+
|
|
1024
|
+
**Source quality up front.** Most authoritative in 2024–2026 order: (1) Creator Insider, the Beaupré ↔ Ritchie video conversations, especially the Jan 23 2025 algorithm explainer; (2) Rene Ritchie's "Top Five" YouTube Blog posts and \`@YouTubeLiaison\` on X; (3) the YouTube Help Center on Test & Compare and Add Custom Thumbnails; (4) Paddy Galloway (data-driven creator strategist) — his X threads and Creator Science Podcast #209 (Jan 27 2026). Tool-vendor data (vidIQ, TubeBuddy, Dataslayer, Retention Rabbit) is useful directional signal but not platform-confirmed.
|
|
1025
|
+
|
|
1026
|
+
---
|
|
1027
|
+
|
|
1028
|
+
## The 2025 shift: satisfaction-weighted discovery
|
|
1029
|
+
|
|
1030
|
+
The biggest change creators must internalise. YouTube announced a recommendation model overhaul in early 2025; the new system layers four qualitative satisfaction signals on top of clicks and watch time:
|
|
1031
|
+
|
|
1032
|
+
1. **Surveys** — post-view "Did you enjoy this video?" prompts.
|
|
1033
|
+
2. **Sentiment modelling** — comments + like/dislike ratios.
|
|
1034
|
+
3. **Long-session retention** — time spent across multiple videos in a session.
|
|
1035
|
+
4. **Feedback suppression** — "Not Interested" / "Don't Recommend Channel" clicks.
|
|
1036
|
+
|
|
1037
|
+
**[primary]** Todd Beaupré (YouTube Sr. Director, Growth & Discovery), via Buffer's recap of the Jan 2025 Creator Insider conversation: *"We're trying to understand not just about the viewer's behavior and what they do, but how they feel about the time they're spending. What do they say about their experience watching a video."* (https://buffer.com/resources/youtube-algorithm/, 2025)
|
|
1038
|
+
|
|
1039
|
+
**[primary]** Rene Ritchie (YouTube Creator Liaison), Jan 2025 Creator Insider video, paraphrased on Lia Haberman's ICYMI newsletter: *"YouTube's Algorithm Pulls, Not Pushes: The recommendation system doesn't 'push' creator videos out to YouTube audiences but instead pulls in content based on the user's individual viewing habits — think of it as automating word of mouth. Viewer Satisfaction Matters: YouTube measures user satisfaction through engagement signals such as likes, comments, and surveys. Total watch time is not the golden standard — sometimes viewers want a video to be more efficient and just get to the point."* (https://liahaberman.substack.com/p/icymi-how-youtubes-2025-algorithm, Jan 31 2025)
|
|
1040
|
+
|
|
1041
|
+
**Editorial implication.** Stop padding videos to hit a watch-time number. The platform now reads "got to the point fast" as a positive satisfaction signal, not a missed-watch-time signal.
|
|
1042
|
+
|
|
1043
|
+
---
|
|
1044
|
+
|
|
1045
|
+
## The four metrics that move ranking
|
|
1046
|
+
|
|
1047
|
+
In rough order of importance for general distribution:
|
|
1048
|
+
|
|
1049
|
+
### 1. Click-through rate (CTR) on impressions
|
|
1050
|
+
|
|
1051
|
+
CTR is driven by the **thumbnail + title pair**. **[secondary]** Tool-vendor benchmarks roughly converge:
|
|
1052
|
+
|
|
1053
|
+
| Band | vidIQ (Nov 2025) | Dataslayer (~2026) | YTShark (Mar 2026) |
|
|
1054
|
+
|------|---|---|---|
|
|
1055
|
+
| Poor | < 4% (thumbnail/title isn't clear enough) | < 3% needs immediate fixes | – |
|
|
1056
|
+
| Average | 4–6% | 4–6% | 2–10% (most channels) |
|
|
1057
|
+
| Good | 7%+ | 7–10% | – |
|
|
1058
|
+
| Excellent | 9–10%+ | > 10% (niche channels with loyal audiences) | – |
|
|
1059
|
+
|
|
1060
|
+
Niche-specific (PostEverywhere citing vidIQ + TubeBuddy data, Jan 2026): gaming averages 8.5%, educational averages 4.5%.
|
|
1061
|
+
|
|
1062
|
+
**[unverified]** The "1,000 impressions / 10% CTR triggers expanded distribution" claim that floats in SEO blogs (Hashmeta and others) has no traceable YouTube source. Treat as folk wisdom.
|
|
1063
|
+
|
|
1064
|
+
**[primary]** What Paddy Galloway actually says about CTR — Creator Science Podcast #209, Jan 27 2026: *"CTR itself is a very fickle and in some ways infuriating metric… because the more impressions a video gets, the lower the CTR drops typically… CTR itself as a whole is not very useful. CTR in the first hour or first 24 hours can be a good predictor of success on videos. There's a very strong correlation between first-hour CTR and long-term video performance on a lot of established channels."*
|
|
1065
|
+
|
|
1066
|
+
**[primary]** What YouTube itself says about CTR's role — Rene Ritchie on the Test & Compare A/B tool, July 25 2025 (via vidIQ blog https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/): *"Thumbnail Test & Compare returns watch time rather than separate metrics on click-through rate (CTR) and retention (AVP), because watch time includes both! You have to click to watch and you have to retain to build up time. If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
|
|
1067
|
+
|
|
1068
|
+
**Operational rule for the agent:** judge CTR against the channel's own first-hour baseline, not industry averages. YouTube's native A/B tool optimises Watch Time Share, not CTR — match that bias.
|
|
1069
|
+
|
|
1070
|
+
### 2. Average view duration (AVD) and average percentage viewed (AVP%)
|
|
1071
|
+
|
|
1072
|
+
The single best 2024–2026 retention dataset is **[secondary, large N]** Retention Rabbit's May 2025 audience-retention benchmark report (75+ niches; Q1 2024 – Q1 2025; https://www.retentionrabbit.com/blog/2025-youtube-audience-retention-benchmark-report):
|
|
1073
|
+
|
|
1074
|
+
- **Average YouTube video retains 23.7%** of its viewers.
|
|
1075
|
+
- **Only 1 in 6 videos (16.8%) surpass 50% retention.**
|
|
1076
|
+
- **55%+ viewer drop-off occurs in the first minute.**
|
|
1077
|
+
- Channels improving average retention by 10 percentage points see a correlated **25%+ increase in impressions**.
|
|
1078
|
+
- Educational How-Tos average **42.1% retention** — top niche.
|
|
1079
|
+
|
|
1080
|
+
**[secondary]** Threshold consensus across multiple 2025 sources (Solveigmm Aug 2025; PostEverywhere Jan 2026; Virvid Feb 2026):
|
|
1081
|
+
|
|
1082
|
+
- **50–60% AVP%** is solid.
|
|
1083
|
+
- **70%+** earns priority placement in suggested videos.
|
|
1084
|
+
- **< 40%** triggers active deprioritisation regardless of CTR.
|
|
1085
|
+
|
|
1086
|
+
**[primary]** The "50% rule" reframed — Rene Ritchie / Todd Beaupré (Jan 2025 Creator Insider, paraphrased on Hootsuite Sept 2025 https://blog.hootsuite.com/youtube-algorithm/): the platform now *"prioritises videos that provide a positive viewing experience, not just those that hold attention the longest."* Translation: a 6-minute video at 80% retention beats a 20-minute video at 30% retention even though the longer one logged more raw watch time.
|
|
1087
|
+
|
|
1088
|
+
**[primary]** Retention shape vs absolute time — YouTube's own guidance is that *relative* watch time matters more on short videos, *absolute* watch time more on long-form (cited by Virvid Feb 2026 from YouTube Help Center).
|
|
1089
|
+
|
|
1090
|
+
**The first-minute problem is the loudest signal.** Multiple converging sources:
|
|
1091
|
+
|
|
1092
|
+
- Retention Rabbit: **55%+ leave within 60 seconds**.
|
|
1093
|
+
- 1of10 (cited by PostEverywhere): *"nearly 20% of viewers drop off within the first 15 seconds — not because the video is bad, but because the intro fails to connect."*
|
|
1094
|
+
- **[primary]** Todd Beaupré, via Stan Ventures recap (Sept 5 2024): *"the importance of the first 30 seconds of a video, the role of thumbnails, and engaging intros in capturing the audience's attention."*
|
|
1095
|
+
- **[primary]** Marketing Agent Blog summarising Creator Insider Feb 2025: *"Establish value within 7 seconds (per Creator Insider, 2025)."*
|
|
1096
|
+
|
|
1097
|
+
**Diagnostic patterns on the audience-retention graph:**
|
|
1098
|
+
- Cliff in the first 30s → hook problem; rerun \`analyze_hook\` and recut opener.
|
|
1099
|
+
- Slow steady decline → pacing; rerun \`cut_filler_words\`, tighten with \`text_based_cut\`, consider \`punch_in\` / \`add_sfx_at_cuts\`.
|
|
1100
|
+
- Spike up at minute X → viewers told friends to skip there; move it earlier next time.
|
|
1101
|
+
- Steep drop at chapter boundary → chapter title oversold; rewrite the chapter title.
|
|
1102
|
+
|
|
1103
|
+
### 3. Session contribution / next-video continuation
|
|
1104
|
+
|
|
1105
|
+
**[primary]** Beaupré's framing (Jan 2025 Creator Insider): channels grow fastest when each video naturally leads viewers to watch another, creating "bingeable journeys." YoutoWire's Jan 2026 ranking-of-ranking-signals: session time (does your video lead to more YouTube watching?) sits behind only CTR and AVD in importance.
|
|
1106
|
+
|
|
1107
|
+
What extends a session:
|
|
1108
|
+
- End-screen elements pointing to your next video.
|
|
1109
|
+
- Series content / episodic structure.
|
|
1110
|
+
- Chapters + a clear "next" hook in the outro.
|
|
1111
|
+
|
|
1112
|
+
What ends sessions:
|
|
1113
|
+
- Long static outros (viewer closes tab while waiting).
|
|
1114
|
+
- Generic "subscribe" outros without a next-video pointer.
|
|
1115
|
+
|
|
1116
|
+
**Operational rule:** the brand kit's \`outro\` should chain to the next video. Description should reference previous / next uploads. \`generate_outro\` is the lever.
|
|
1117
|
+
|
|
1118
|
+
### 4. Engagement velocity (first 24–48 hours) — partial myth
|
|
1119
|
+
|
|
1120
|
+
The "first 48 hours decide everything" framing is overstated by SEO blogs.
|
|
1121
|
+
|
|
1122
|
+
**[primary]** Paddy Galloway, X thread Oct 16 2023 (still cited): *"The YouTube algorithm doesn't let you experiment. We recently tried a completely new format with a client. It started slow. 6/10. Now it's about to be our fastest ever video to hit 1 million views."*
|
|
1123
|
+
|
|
1124
|
+
**[secondary]** Dataslayer Jan 2026 directly debunks the "your video is dead if it doesn't pop in 48h" myth: *"YouTube's 2025 algorithm actively resurfaces old content when topics become relevant again. Videos about 'tax deductions for freelancers' spike in January and April."*
|
|
1125
|
+
|
|
1126
|
+
**[primary]** Rene Ritchie, YouTube Blog March 28 2024 (https://blog.youtube/culture-and-trends/renes-top-five-on-youtube-march-28-2024-edition/): *"Don't delete videos unless you have a very, very good reason. When you delete a video, you delete your channel's connection to the audience that watched that video."*
|
|
1127
|
+
|
|
1128
|
+
**Verdict for the agent:** first-hour CTR matters as a predictor for established formats. New formats and evergreen topics absolutely recover later. Don't tell users their video is dead at 48h.
|
|
1129
|
+
|
|
1130
|
+
---
|
|
1131
|
+
|
|
1132
|
+
## What YouTube has officially said it does NOT use
|
|
1133
|
+
|
|
1134
|
+
This is the most reliably citable section because it's all from YouTube's own staff.
|
|
1135
|
+
|
|
1136
|
+
- **Tags — minimal impact.** **[primary]** YouTube Liaison (\`@YouTubeLiaison\`), Aug 22 2024, summarised by Stan Ventures (https://www.stanventures.com/news/youtube-reveals-new-seo-priorities-756/): *"Liaison debunked this myth, stating that tags have a minimal impact on the algorithm. The primary recommendation was to use tags sparingly, emphasising on common misspellings of channel names or key topics related to the video."*
|
|
1137
|
+
- **Hashtags — small effect, contextual only.** **[primary]** Same Liaison statement: *"hashtags should only be employed when they align with trending topics or help contextualise a video in a way that adds value."*
|
|
1138
|
+
- **Categories — minor.** Same source: *"while categories help YouTube understand the general context of a video, they are a minor consideration in the grand scheme of things."*
|
|
1139
|
+
- **Upload time of day — not algorithmic.** **[primary]** Rene Ritchie's March 28 2024 "Mythbusters" YouTube Blog post with Beaupré: posting time matters for *your audience's habits*, not algorithmically.
|
|
1140
|
+
- **Subscriber count — weak signal.** **[secondary]** Dataslayer Jan 2026: *"In 2025, YouTube actively recommends videos from small channels. Subscriber count is one of hundreds of signals, and not a strong one. A 0-subscriber channel can appear in recommendations if the video performs well with test audiences."*
|
|
1141
|
+
- **Dislikes — barely register.** **[secondary]** YoutoWire Jan 2026: *"Dislikes barely register. Algorithm treats them as 'engagement' (not negative signal). What DOES hurt: High 'Not Interested' clicks (when viewers tell YouTube 'Don't recommend this channel')."* Consistent with all Ritchie commentary on \`Not Interested\` being the actual penalty signal.
|
|
1142
|
+
- **Subscriber-feed checkbox / unchecking notifications — no effect.** **[primary]** Rene Ritchie: *"Shorts don't trigger notifications on upload, so that part won't make a difference. For long-form, most subscribers watch from the home page."*
|
|
1143
|
+
- **Description links — fine unless spammy.** **[secondary]** Dataslayer: links to resources mentioned in the video are fine; the algorithm just favours videos that keep viewers on YouTube longer.
|
|
1144
|
+
|
|
1145
|
+
---
|
|
1146
|
+
|
|
1147
|
+
## Algorithm changes worth knowing (2024–2026)
|
|
1148
|
+
|
|
1149
|
+
Don't recite these to the user, but reflect them in tool defaults.
|
|
1150
|
+
|
|
1151
|
+
- **Oct 15 2024:** Shorts max length raised from 60 s → 3 minutes. (PPC.land timeline)
|
|
1152
|
+
- **March 31 2025:** Shorts view counting changed — view counts now register on play/replay with no minimum watch time; YPP eligibility and Shorts ad-revenue sharing remain on the renamed *Engaged Views* metric. (TubeBuddy, Pixability, PPC.land all confirm.)
|
|
1153
|
+
- **Feb 2025:** "Satisfaction-weighted" recommendation model rolled out (Creator Insider, paraphrased on Marketing Agent Blog Nov 4 2025).
|
|
1154
|
+
- **July 2025:** YouTube removed the Trending page and Trending Now list; replaced by per-vertical micro-trend tracking. (Shopify summary citing the YouTube announcement.)
|
|
1155
|
+
- **2024–2025:** Native title + thumbnail A/B testing (Test & Compare) rolled out widely. **[primary]** Rene Ritchie via vidIQ July 25 2025: *"You can pick up to 3 versions of your title… up to 3 thumbnails… YouTube doesn't use click-through rate (CTR) as the winning metric — it uses Watch Time Share. Tests typically run from 1 to 14 days, depending on how quickly statistical significance is reached. Once there's a clear winner, YouTube automatically applies it."*
|
|
1156
|
+
- **Late 2025:** Shorts and long-form recommendation surfaces partially decoupled. **[secondary, partial]** YTShark Mar 2026 says fully decoupled; **[primary]** YouTube Creator Blog July 2025 (per Marketing Agent) says short-form retention still feeds satisfaction signals back into long-form discovery. Reality is in between: ranking systems separate, but viewer-graph cross-pollination remains.
|
|
1157
|
+
|
|
1158
|
+
---
|
|
1159
|
+
|
|
1160
|
+
## Title rules (the highest-leverage lever)
|
|
1161
|
+
|
|
1162
|
+
Constraints (cross-source consensus from vidIQ Aug 2025, AmpiFire Nov 2025, multiple creator analysts):
|
|
1163
|
+
|
|
1164
|
+
- **≤ 70 characters** before mobile feed truncation; **60 is safer**.
|
|
1165
|
+
- **Front-load the hook** in the first 4–6 words (mobile crops the rest).
|
|
1166
|
+
- **One specific number** if applicable — "5 mistakes" beats "common mistakes"; "$3,000" beats "expensive."
|
|
1167
|
+
- **Curiosity gap, not spoiler** — title should make the viewer want the answer, not contain it.
|
|
1168
|
+
- **No clickbait that doesn't deliver** — see Ritchie's quote above. CTR-spike + AVP-collapse is now actively penalised.
|
|
1169
|
+
- **One emoji max** if any.
|
|
1170
|
+
|
|
1171
|
+
Patterns that consistently perform across creator data (vidIQ + TubeBuddy public analyses):
|
|
1172
|
+
|
|
1173
|
+
- **"How I [achieved] [in time] (with [twist])"** — How I built X in 3 days (without Y)
|
|
1174
|
+
- **"[Number] [things] [audience] [verb]"** — 5 mistakes new editors make
|
|
1175
|
+
- **"Why [common belief] is wrong"** — Why the 10K hour rule is wrong
|
|
1176
|
+
- **"I [extreme behaviour] for [time]. Here's what happened."** — I cooked one new dish per day for 30 days
|
|
1177
|
+
- **"The [adjective] truth about [topic]"** — The boring truth about productivity apps
|
|
1178
|
+
|
|
1179
|
+
\`generate_youtube_metadata\` should propose 3 titles using **different patterns from this list**, not three variations of one. Pattern variety lets the user pick.
|
|
1180
|
+
|
|
1181
|
+
---
|
|
1182
|
+
|
|
1183
|
+
## Description structure (sidecar SEO + AVD lift)
|
|
1184
|
+
|
|
1185
|
+
The description's job is to:
|
|
1186
|
+
|
|
1187
|
+
1. **Restate the hook in the first 2 lines** — these show above-the-fold on mobile.
|
|
1188
|
+
2. **Drop chapters** — clickable timestamps that double as table-of-contents. Required for any video > 5 minutes.
|
|
1189
|
+
3. **Link related uploads** — pulls watch-time into your channel.
|
|
1190
|
+
4. **CTA last** — subscribe/Patreon/etc. at the END, not the top.
|
|
1191
|
+
|
|
1192
|
+
Skeleton:
|
|
1193
|
+
|
|
1194
|
+
\`\`\`
|
|
1195
|
+
<one-line restated hook>
|
|
1196
|
+
<one specific question to drive comments>
|
|
1197
|
+
|
|
1198
|
+
⏱️ Chapters
|
|
1199
|
+
00:00 <chapter 1 title>
|
|
1200
|
+
01:23 <chapter 2 title>
|
|
1201
|
+
…
|
|
1202
|
+
|
|
1203
|
+
🎥 Related videos
|
|
1204
|
+
- <previous video title> → <link>
|
|
1205
|
+
- <related video title> → <link>
|
|
1206
|
+
|
|
1207
|
+
📌 About this channel
|
|
1208
|
+
<one-paragraph "what to expect" + subscribe url>
|
|
1209
|
+
\`\`\`
|
|
1210
|
+
|
|
1211
|
+
\`generate_youtube_metadata\` produces chapters and description body; the agent slots them into this skeleton.
|
|
1212
|
+
|
|
1213
|
+
---
|
|
1214
|
+
|
|
1215
|
+
## Shorts ranks differently
|
|
1216
|
+
|
|
1217
|
+
**[primary]** From Hootsuite Sept 2025 paraphrasing the official Shorts ranking explainer: *"A 30-second Short with 85% watch duration will likely rank higher than a 60-second Short with only 50% retention. Looping Shorts (where viewers rewatch part of the video) tend to get more recommendations than those with lower replay rates."*
|
|
1218
|
+
|
|
1219
|
+
**[primary]** Hootsuite continues: *"Unlike long-form videos, click-through rate (CTR) isn't a ranking factor [for Shorts], since users don't actively click Shorts — they swipe through them."*
|
|
1220
|
+
|
|
1221
|
+
**[primary]** Paddy Galloway's analysis of 3.3 billion Shorts views (Rattibha-archived X thread): *"The best-performing Shorts have between 70% and 90% of people viewing versus swiping away from them."* Operationalised: **target ≥ 70% view-vs-swipe rate** as a hard floor, ≥ 85% as the success bar.
|
|
1222
|
+
|
|
1223
|
+
**[primary]** Jenny Hoyos on YouTube's own blog (Jan 28 2025, https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/): *"I really do think you have one second to hook someone, especially on Shorts."* The official YouTube Blog summarises her three-step formula: **shock, intrigue, satisfy**.
|
|
1224
|
+
|
|
1225
|
+
**Optimal Shorts duration:** **[primary]** Hoyos via Marketing Examined (May 16 2024 https://www.marketingexamined.com/blog/jenny-hoyos-short-form-video-playbook): aim for **30–34 seconds** with **90%+ retention** in the last second. **[secondary]** Boss Wallah Sept 2025 corroborates: target 90–100% retention on Shorts under 20 seconds.
|
|
1226
|
+
|
|
1227
|
+
**Implications for the agent (all executable today):**
|
|
1228
|
+
- **Default Shorts length: 30–45 s, not 60 s.** \`find_viral_moments\` already defaults to \`[20, 45]\`.
|
|
1229
|
+
- **Burned captions are not optional** — sound-off mobile is the default. Use \`write_keyword_captions(autoEmoji=true)\` + \`burn_subtitles\`.
|
|
1230
|
+
- **First 0.5–1 s is the hook.** Use \`audit_first_frame\` to score the t=0 frame as a thumbnail (Galloway: 'treat your intro like a thumbnail'); pair with \`analyze_hook\` for the spoken-line check.
|
|
1231
|
+
- **Seamless re-loop** — Shorts loop rate is a confirmed ranking signal. Run \`loop_match_short\` as the last step before delivery (crossfades the last ~0.3 s into the first frame).
|
|
1232
|
+
- **Skip the outro on vertical.** \`generate_outro\` is for long-form.
|
|
1233
|
+
|
|
1234
|
+
---
|
|
1235
|
+
|
|
1236
|
+
## Operationalising this in the agent
|
|
625
1237
|
|
|
626
|
-
|
|
627
|
-
lighting (tungsten vs daylight) → confidence will be low. Tell the
|
|
628
|
-
user and suggest a less aggressive match (or LUT-based correction
|
|
629
|
-
first).
|
|
630
|
-
- Target shot has multiple people with different skin tones → the
|
|
631
|
-
vision model averages. Pick the primary face's frame and warn the
|
|
632
|
-
user the secondary face may shift.
|
|
633
|
-
- User wants pixel-perfect match across 50 clips → run on a hero pair,
|
|
634
|
-
then \`copy_grade(sourceClipId=hero, targetClipIds=[...])\` instead of
|
|
635
|
-
re-running vision on every clip.
|
|
1238
|
+
The agent does NOT have access to live YouTube Studio metrics. When the user asks "why isn't this getting views?", first **ASK the user to paste the relevant numbers from Studio** (impressions, CTR, average view duration, average percentage viewed). Don't guess; don't fabricate.
|
|
636
1239
|
|
|
637
|
-
|
|
1240
|
+
Once numbers are in hand, **diagnose in this order** and surface the FIRST failing metric — don't dump all five:
|
|
638
1241
|
|
|
639
|
-
-
|
|
640
|
-
|
|
641
|
-
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
1242
|
+
1. **CTR < 4% (vs channel baseline)?** → Re-thumbnail + re-title. Run \`compose_thumbnail_variants(strategy="expression")\` for 3 face/expression variants and \`generate_youtube_metadata\` for 3 title candidates. Then: tell the user to upload all three thumbnails + one title per variant to YouTube Studio's **Test & Compare** — we cannot trigger that test from the agent; it lives only in Studio. Test & Compare optimises Watch Time Share (per Ritchie July 2025), so let YouTube pick the winner over 1–14 days.
|
|
1243
|
+
2. **CTR ok but AVP% < 30%?** → Hook problem. Run \`analyze_hook\` for the t<3s check; if Shorts, also \`audit_first_frame\`. If hook scores low, run \`rewrite_hook(currentHook=..., pattern="auto", videoTopic=...)\` to generate 3 candidate rewrites — surface them to the user. The agent CANNOT re-record the spoken line; it can only (a) recut the opener from existing source footage via \`text_based_cut\`, or (b) recommend a re-shoot.
|
|
1244
|
+
3. **AVP% ok but AVD low?** → Pacing. Run \`audit_retention_structure(transcript)\` to find the flat stretches between the 3-min and 6-min checkpoints. For each weak checkpoint, propose \`cut_filler_words\`, \`text_based_cut\`, \`punch_in\`, or \`add_sfx_at_cuts\` on the surrounding window.
|
|
1245
|
+
4. **AVD ok but session contribution low?** → End-screen / outro / next-video pointer missing. Use \`generate_outro\` with the brand-kit chain (set \`brand.outro\` and the agent inherits it).
|
|
1246
|
+
5. **Engagement velocity 0?** → No question in description (fix via \`generate_youtube_metadata\`'s description block) or tiny channel — the second case has no algorithmic fix; it's a community-size problem, not a tool problem. Be honest about this.
|
|
1247
|
+
|
|
1248
|
+
Surface ONE concrete fix per diagnosis, not the full menu.
|
|
1249
|
+
|
|
1250
|
+
**For pre-flight (before render):** the canonical short-form audit chain is \`audit_first_frame\` → \`analyze_hook\` → \`verify_thumbnail_promise\` → \`audit_retention_structure\` (long-form only). If any returns a blocking finding, surface a red marker and pause.
|
|
1251
|
+
|
|
1252
|
+
---
|
|
1253
|
+
|
|
1254
|
+
## Sources & further reading
|
|
1255
|
+
|
|
1256
|
+
**Primary (cite these first):**
|
|
1257
|
+
- Creator Insider — Beaupré + Ritchie videos, especially Jan 23 2025 algorithm explainer (https://www.youtube.com/watch?v=dhYIb72L1hU)
|
|
1258
|
+
- Rene Ritchie — \`@YouTubeLiaison\` on X; weekly "Top Five" YouTube Blog posts at https://blog.youtube/
|
|
1259
|
+
- YouTube Help Center — Test & Compare, Add Custom Thumbnails
|
|
1260
|
+
- YouTube Blog Jan 28 2025 — Jenny Hoyos Shorts deep dive (https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/)
|
|
1261
|
+
|
|
1262
|
+
**Strong secondary:**
|
|
1263
|
+
- Paddy Galloway — Creator Science Podcast #209 (Jan 27 2026); X threads at twitter.com/PaddyGalloway1
|
|
1264
|
+
- Retention Rabbit 2025 Audience Retention Benchmark Report (May 2025) — https://www.retentionrabbit.com/blog/2025-youtube-audience-retention-benchmark-report
|
|
1265
|
+
- Hootsuite YouTube algorithm guide (Sept 2025)
|
|
1266
|
+
- Buffer YouTube algorithm guide (2025)
|
|
1267
|
+
|
|
1268
|
+
**Vendor benchmarks (treat as directional, not gospel):** vidIQ, TubeBuddy, Dataslayer, YTShark, AmpiFire.
|
|
645
1269
|
`;
|
|
646
|
-
const
|
|
1270
|
+
const YOUTUBE_END_TO_END = `---
|
|
1271
|
+
name: youtube-end-to-end
|
|
1272
|
+
description: Orchestrator for "make me a YouTube video from this footage" using a TIMELINE-FIRST workflow. The agent edits the live Resolve/Premiere timeline so the user can scrub, tweak, and undo at every stage. Renders only happen at the end on explicit user intent ("render" / "export" / "ship it"). When host=none, falls back to file-only delivery and says so up front. Covers long-form, Shorts, captions, retention pipeline, and the metadata bundle.
|
|
1273
|
+
---
|
|
647
1274
|
|
|
648
|
-
|
|
649
|
-
*editable inside the NLE* (not baked-in pixels), or wants a quick
|
|
650
|
-
title card built natively in DaVinci Resolve's Fusion page.
|
|
1275
|
+
# youtube-end-to-end
|
|
651
1276
|
|
|
652
|
-
**
|
|
653
|
-
|
|
654
|
-
|
|
1277
|
+
**When to use:** the user gives a single broad ask like *"make me a YouTube video from this footage"*, *"turn this recording into something I can ship"*, or *"give me a YouTube cut and a Shorts cut"*. This is the orchestrator skill — it composes the per-pass skills (long-form, short-form, chapter-markers, retention) into a single end-to-end run that **edits the user's timeline live** and produces metadata, captions, and SFX they can review before exporting.
|
|
1278
|
+
|
|
1279
|
+
**Core posture: you are an EDITOR, not an export pipeline.** Read the system prompt's "You are an editor, not an export pipeline" section. It overrides everything else here. Render only when the user says so.
|
|
1280
|
+
|
|
1281
|
+
**Goal:** the user watches the agent build the cut on their timeline. Cuts appear, SFX clips land on A3, captions attach as a sidecar, markers document each decision, the brand-kit outro splices onto the end. The user plays back, scrubs, asks for a tweak. Then says "ship it." Then the agent renders.
|
|
655
1282
|
|
|
656
1283
|
---
|
|
657
1284
|
|
|
658
|
-
##
|
|
1285
|
+
## Step 0 — Intent triage (ONE question max)
|
|
659
1286
|
|
|
660
|
-
|
|
661
|
-
tweak it later. Best when the user is already on the Fusion page or
|
|
662
|
-
wants a chyron that travels with the project file.
|
|
663
|
-
- **write_lower_third + burn_subtitles** — works on any host, output
|
|
664
|
-
is a baked-in pixel layer. Faster to iterate from the agent side
|
|
665
|
-
but the user can no longer edit the text without re-running the
|
|
666
|
-
pipeline.
|
|
1287
|
+
Look at the input and the user's prompt:
|
|
667
1288
|
|
|
668
|
-
|
|
1289
|
+
- **Input duration** via \`probe_media\`. Anything > 5 minutes → assume long-form. Anything ≤ 5 minutes → assume short-form. Both for source > 5 min when prompt is silent.
|
|
1290
|
+
- **Brand kit:** read \`<cwd>/.gg/brand.json\` silently. All render-time tools inherit; don't ask about typography or logos.
|
|
1291
|
+
- **Host check:** call \`host_info\`. If host=none, tell the user *"No NLE attached — I'll produce standalone mp4s. Open Resolve / Premiere if you want a timeline-native edit you can keep tweaking."* Then proceed with the file-only fallback path (skip steps 2-5 timeline ops; jump to render).
|
|
1292
|
+
|
|
1293
|
+
If duration is 4–6 min AND prompt is silent on format, ASK once: *"Long-form, Shorts, or both?"*. One question, then run.
|
|
669
1294
|
|
|
670
1295
|
---
|
|
671
1296
|
|
|
672
|
-
##
|
|
1297
|
+
## Step 1 — Foundation pass (timeline-safe; runs once)
|
|
673
1298
|
|
|
674
|
-
|
|
675
|
-
|
|
1299
|
+
\`\`\`
|
|
1300
|
+
host_info → confirm host + caps
|
|
1301
|
+
get_timeline → fps, duration, existing markers
|
|
1302
|
+
get_markers → prior decisions / session resume
|
|
1303
|
+
clone_timeline(name="…-edit-v1") → SAFETY NET before destructive ops
|
|
1304
|
+
save_project → checkpoint
|
|
1305
|
+
probe_media(input) → fps, duration, codecs
|
|
1306
|
+
extract_audio(input, audio.wav, sampleRate=16000)
|
|
1307
|
+
transcribe(audio.wav, transcript.json,
|
|
1308
|
+
wordTimestamps=true) → word-level transcript
|
|
1309
|
+
\`\`\`
|
|
1310
|
+
|
|
1311
|
+
Word timings are mandatory — every retention multiplier downstream needs them. If the source is multi-cam, also run \`multicam_sync\` first and pick the alignment.
|
|
1312
|
+
|
|
1313
|
+
**No render in step 1. No file-baking. The user's timeline is now the working copy.**
|
|
1314
|
+
|
|
1315
|
+
---
|
|
1316
|
+
|
|
1317
|
+
## Step 2 — Long-form edits, ON THE TIMELINE (when long-form is in the brief)
|
|
1318
|
+
|
|
1319
|
+
Each of these MODIFIES THE TIMELINE the user is watching. The user can play back, scrub, and ask for changes between any of them.
|
|
676
1320
|
|
|
677
1321
|
\`\`\`
|
|
678
|
-
|
|
1322
|
+
# Filler removal (transcript-driven; lands as EDL on the timeline)
|
|
1323
|
+
cut_filler_words(transcript, sourceVideo) → emits EDL of keep ranges
|
|
1324
|
+
import_edl(path) → cuts appear on timeline ✓
|
|
1325
|
+
add_marker(color="green", note="filler-cut: removed N (Ms)")
|
|
1326
|
+
|
|
1327
|
+
# Chapters as markers (visible in Resolve marker pane immediately)
|
|
1328
|
+
read_skill(name="chapter-markers") → recipe
|
|
1329
|
+
# … per the recipe: read_transcript in 90s windows, identify topic shifts,
|
|
1330
|
+
# add_marker(color="purple", note="00:00 — Intro") at each boundary
|
|
1331
|
+
|
|
1332
|
+
# Captions as sidecar SRT (attached to timeline; not baked)
|
|
1333
|
+
write_srt(transcript, output="captions.srt", cues=...)
|
|
1334
|
+
import_subtitles(srtPath="captions.srt") → SRT attached to subtitle track ✓
|
|
1335
|
+
|
|
1336
|
+
# B-roll over flat stretches (live insert on V2)
|
|
1337
|
+
suggest_broll(transcript, topN=5) → ranked candidates from Pexels
|
|
1338
|
+
# for each: insert_broll(mediaPath=..., track=2, recordFrame=...) ✓
|
|
1339
|
+
|
|
1340
|
+
# Audit retention structure — SHOW the user the weak checkpoints, propose fixes
|
|
1341
|
+
audit_retention_structure(transcript) → weak spots + suggestions
|
|
1342
|
+
# DON'T silently rewrite. Surface to user, propose punch_in / cut_filler_words /
|
|
1343
|
+
# add_sfx_to_timeline on the surrounding window. Wait for their OK or tweak.
|
|
1344
|
+
|
|
1345
|
+
# Outro splice (from brand kit if available, otherwise generate)
|
|
1346
|
+
generate_outro(output="outro.mp4") → produces outro card mp4
|
|
1347
|
+
import_to_media_pool(path="outro.mp4")
|
|
1348
|
+
append_clip(track=1, mediaPath="outro.mp4") → outro lands at end of timeline ✓
|
|
1349
|
+
\`\`\`
|
|
679
1350
|
|
|
680
|
-
|
|
681
|
-
open_page(name="fusion")
|
|
1351
|
+
After step 2 the user has a fully-edited LONG-FORM TIMELINE in Resolve/Premiere. They can play it. Scrub to any point. Watch the b-roll cutaways. Read the chapter markers. **No mp4 has been rendered yet.**
|
|
682
1352
|
|
|
683
|
-
|
|
684
|
-
fusion_comp(action="add_node", toolId="Background", name="LT_Strap")
|
|
685
|
-
fusion_comp(action="add_node", toolId="TextPlus", name="LT_Text")
|
|
686
|
-
fusion_comp(action="add_node", toolId="Merge", name="LT_Comp")
|
|
1353
|
+
---
|
|
687
1354
|
|
|
688
|
-
|
|
689
|
-
fusion_comp(action="connect", fromNode="LT_Strap", toNode="LT_Comp",
|
|
690
|
-
toInput="Background")
|
|
691
|
-
fusion_comp(action="connect", fromNode="LT_Text", toNode="LT_Comp",
|
|
692
|
-
toInput="Foreground")
|
|
1355
|
+
## Step 3 — Shorts pass, ALSO timeline-first
|
|
693
1356
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
fusion_comp(action="set_input", node="LT_Text", input="Color1Red", value=1)
|
|
699
|
-
fusion_comp(action="set_input", node="LT_Text", input="Color1Green", value=1)
|
|
700
|
-
fusion_comp(action="set_input", node="LT_Text", input="Color1Blue", value=1)
|
|
1357
|
+
\`\`\`
|
|
1358
|
+
find_viral_moments(transcript, maxClips=3,
|
|
1359
|
+
durationRange=[20, 45]) → ranked candidate windows
|
|
1360
|
+
\`\`\`
|
|
701
1361
|
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
1362
|
+
For each candidate (top score first):
|
|
1363
|
+
|
|
1364
|
+
\`\`\`
|
|
1365
|
+
analyze_hook(input, startSec=startSec, endSec=startSec+3)
|
|
1366
|
+
→ score 0-100 + findings
|
|
707
1367
|
\`\`\`
|
|
708
1368
|
|
|
709
|
-
|
|
710
|
-
|
|
1369
|
+
If \`score < 60\`, drop a red marker and skip — bad hook = bad short. Don't ship a sub-60 hook silently; either run \`rewrite_hook\` to surface candidates and let the user decide, or move on to the next moment.
|
|
1370
|
+
|
|
1371
|
+
Otherwise, **build the short on a NEW Resolve timeline so the long-form timeline isn't disturbed:**
|
|
1372
|
+
|
|
1373
|
+
\`\`\`
|
|
1374
|
+
clone_timeline(name="short-\${i}") # New timeline for this short
|
|
1375
|
+
# Trim to the candidate window via EDL:
|
|
1376
|
+
text_based_cut(sourceVideo,
|
|
1377
|
+
cuts=[{startSec: 0, endSec: candidate.startSec},
|
|
1378
|
+
{startSec: candidate.endSec, endSec: totalSec}])
|
|
1379
|
+
import_edl(path) # Window appears on the new timeline ✓
|
|
1380
|
+
|
|
1381
|
+
# Captions burned (vertical Shorts; sidecar isn't standard for Shorts)
|
|
1382
|
+
write_keyword_captions(transcript, output="short-\${i}.ass",
|
|
1383
|
+
startSec=candidate.startSec,
|
|
1384
|
+
endSec=candidate.endSec,
|
|
1385
|
+
autoEmoji=true, groupSize=2)
|
|
1386
|
+
import_subtitles(srtPath="short-\${i}.ass") # Attached to subtitle track ✓
|
|
1387
|
+
# (Final pixel-burn happens at render time, not here.)
|
|
1388
|
+
|
|
1389
|
+
# Punch-ins at the candidate's internal cut points (timeline-native — coming;
|
|
1390
|
+
# for now, surface to user with a marker so they apply manually OR queue
|
|
1391
|
+
# for the file-bake step at render time)
|
|
1392
|
+
|
|
1393
|
+
# SFX on cuts — TIMELINE-NATIVE
|
|
1394
|
+
add_sfx_to_timeline(sfx="whoosh", cutPoints=[…internal cuts…], track=3) ✓
|
|
1395
|
+
|
|
1396
|
+
add_marker(color="green",
|
|
1397
|
+
note="short \${i}: hook=\${analyzeHook.score}, virality=\${candidate.score}")
|
|
1398
|
+
\`\`\`
|
|
1399
|
+
|
|
1400
|
+
User can now switch between long-form timeline and each \`short-\${i}\` timeline in Resolve, play back, scrub, tweak.
|
|
711
1401
|
|
|
712
1402
|
---
|
|
713
1403
|
|
|
714
|
-
##
|
|
1404
|
+
## Step 4 — Pre-flight audit (still no render)
|
|
715
1405
|
|
|
716
|
-
|
|
1406
|
+
\`\`\`
|
|
1407
|
+
audit_first_frame(sourceClipPath) # Galloway: "intro = thumbnail"
|
|
1408
|
+
analyze_hook(sourceClipPath) # spoken-line check
|
|
1409
|
+
verify_thumbnail_promise(thumb, video, 60) # MrBeast: deliver in first 60s
|
|
1410
|
+
audit_retention_structure(transcript, [180,360]) # mid-video checkpoints
|
|
1411
|
+
\`\`\`
|
|
1412
|
+
|
|
1413
|
+
Surface every finding with score + suggestion. **Don't render past a blocker.** If the user says "fix the weak hook," go back to step 2/3 with \`rewrite_hook\` candidates and propose them — DON'T silently re-cut.
|
|
1414
|
+
|
|
1415
|
+
---
|
|
1416
|
+
|
|
1417
|
+
## Step 5 — Metadata bundle (REQUIRED before declaring "ready to ship")
|
|
717
1418
|
|
|
718
1419
|
\`\`\`
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
1420
|
+
generate_youtube_metadata(transcript) # titles[3], description, tags[15],
|
|
1421
|
+
# chapters[], hashtags[]
|
|
1422
|
+
|
|
1423
|
+
compose_thumbnail_variants(input=long-form-render-OR-source-frame,
|
|
1424
|
+
outputDir="./thumbs",
|
|
1425
|
+
text="<distill best title to 2–4 words>",
|
|
1426
|
+
count=3,
|
|
1427
|
+
strategy="expression")
|
|
727
1428
|
\`\`\`
|
|
728
1429
|
|
|
729
|
-
|
|
730
|
-
|
|
1430
|
+
Surface the 3 candidate titles + 3 thumbnail variants + the description to the user. Tell them to upload all three thumbnails to YouTube Studio's **Test & Compare** (no API for this — must be manual).
|
|
1431
|
+
|
|
1432
|
+
---
|
|
1433
|
+
|
|
1434
|
+
## Step 6 — STOP HERE
|
|
1435
|
+
|
|
1436
|
+
This is the natural pause point. The user has:
|
|
1437
|
+
- A fully-edited long-form timeline in their NLE
|
|
1438
|
+
- 1–3 Shorts timelines in their NLE
|
|
1439
|
+
- 3 thumbnail variants on disk
|
|
1440
|
+
- A metadata bundle (titles, description, chapters, tags, hashtags)
|
|
1441
|
+
|
|
1442
|
+
Tell the user:
|
|
1443
|
+
|
|
1444
|
+
> ✅ Long-form ready on timeline \`<name>\` (12:34, captions attached, brand-kit outro)
|
|
1445
|
+
> ✅ Shorts ready on timelines \`short-1\`, \`short-2\`, \`short-3\` (hooks: 82, 76, 71)
|
|
1446
|
+
> ✅ Thumbnail variants: \`./thumbs/long-form.{1,2,3}.jpg\`
|
|
1447
|
+
> ✅ Metadata bundle written to chat above
|
|
1448
|
+
>
|
|
1449
|
+
> Play them back, scrub, tell me what to tweak. When you're happy, say **"render"** / **"export"** / **"ship it"** and I'll:
|
|
1450
|
+
> 1. Run \`pre_render_check\` on each timeline
|
|
1451
|
+
> 2. \`render(...)\` the long-form via Resolve's deliver page
|
|
1452
|
+
> 3. \`render_multi_format\` the shorts to 9:16 / 1:1 / 4:5
|
|
1453
|
+
>
|
|
1454
|
+
> ⚠️ N candidate(s) dropped (<reason>): …
|
|
1455
|
+
|
|
1456
|
+
**Wait for the user's go-ahead. Do not call \`render(...)\` or \`render_multi_format(...)\` until they explicitly ask.**
|
|
1457
|
+
|
|
1458
|
+
---
|
|
1459
|
+
|
|
1460
|
+
## Step 7 — Render (only on "ship it" / "render" / "export")
|
|
1461
|
+
|
|
1462
|
+
When the user explicitly asks to render:
|
|
731
1463
|
|
|
732
1464
|
\`\`\`
|
|
733
|
-
|
|
1465
|
+
# Long-form
|
|
1466
|
+
list_render_presets() # see what's installed in Resolve
|
|
1467
|
+
pre_render_check(timelineEmpty=false,
|
|
1468
|
+
expectCaptions=true,
|
|
1469
|
+
loudnessSource=...,
|
|
1470
|
+
loudnessTarget="youtube")
|
|
1471
|
+
render(preset="<from list>",
|
|
1472
|
+
output="./out/long-form.mp4") # Resolve's deliver page ✓
|
|
1473
|
+
|
|
1474
|
+
# Per Short
|
|
1475
|
+
render_multi_format(input="<short-mp4-from-Resolve-or-file>",
|
|
1476
|
+
outputDir="./out/shorts",
|
|
1477
|
+
formats=["shorts-9x16"]) # 9:16 deliverable
|
|
1478
|
+
|
|
1479
|
+
# Audio finalisation (these MUST bake — Fairlight is closed)
|
|
1480
|
+
normalize_loudness(input="./out/long-form.mp4",
|
|
1481
|
+
output="./out/long-form.delivery.mp4",
|
|
1482
|
+
platform="youtube") # -14 LUFS / -1 dBTP
|
|
1483
|
+
# Then auto-import the normalized file back so the user has the final on hand:
|
|
1484
|
+
import_to_media_pool(path="./out/long-form.delivery.mp4")
|
|
1485
|
+
add_marker(color="green", note="DELIVERY: long-form.delivery.mp4 (-14 LUFS)")
|
|
734
1486
|
\`\`\`
|
|
735
1487
|
|
|
736
1488
|
---
|
|
737
1489
|
|
|
738
|
-
##
|
|
1490
|
+
## What CHANGED vs the old export-everything flow
|
|
739
1491
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
1492
|
+
- **No file-baking mid-edit.** Captions are sidecar SRT, SFX are real audio clips on track A3, b-roll lands on V2 — all live in the user's NLE.
|
|
1493
|
+
- **\`burn_subtitles\`, \`add_sfx_at_cuts\`, \`face_reframe\`, \`mix_audio\`, \`clean_audio\`, \`duck_audio\`, \`loop_match_short\`, \`bleep_words\`, \`speed_ramp\`, \`stabilize_video\`** are the file-only tools the agent does NOT chain mid-edit. They're for the final delivery pass on user request.
|
|
1494
|
+
- **\`render(...)\` / \`render_multi_format(...)\`** only fire after explicit user intent.
|
|
1495
|
+
- **Each step modifies the live timeline** — user plays back, scrubs, asks for tweaks. The session is iterative, not a one-shot pipeline.
|
|
1496
|
+
|
|
1497
|
+
---
|
|
1498
|
+
|
|
1499
|
+
## What the agent CANNOT do (be honest with the user)
|
|
1500
|
+
|
|
1501
|
+
- Generate new footage. No re-shoots, no AI scenes. Only re-cut from existing source.
|
|
1502
|
+
- Trigger YouTube Studio Test & Compare. No public API. Agent produces 3 variants; user uploads them.
|
|
1503
|
+
- Read live channel metrics. No public CTR / AVD feed. ASK the user to paste from Studio.
|
|
1504
|
+
- Re-record a hook line. \`rewrite_hook\` proposes 3 rewrites; user picks an existing alternative opener via \`text_based_cut\` or re-shoots.
|
|
1505
|
+
- Render anything until the user says so. Even if you think it's done.
|
|
1506
|
+
|
|
1507
|
+
---
|
|
1508
|
+
|
|
1509
|
+
## Defaults & gates
|
|
1510
|
+
|
|
1511
|
+
- **Hook gate**: 60 (\`analyze_hook\`).
|
|
1512
|
+
- **Virality gate**: 50 (\`score_clip\` total).
|
|
1513
|
+
- **First-frame gate**: 60 (\`audit_first_frame\`).
|
|
1514
|
+
- **Thumbnail-promise gate**: 0.6 (\`verify_thumbnail_promise\`).
|
|
1515
|
+
- **Retention-checkpoint gate**: 0.5 per checkpoint (\`audit_retention_structure\`).
|
|
1516
|
+
- **Short duration range**: 20–45 s — \`find_viral_moments\` default.
|
|
1517
|
+
- **Loudness target**: -14 LUFS / -1 dBTP for YouTube + every short-form platform.
|
|
1518
|
+
- **Caption style** (vertical): yellow keyword pop on white default, lower-third margin 220, \`autoEmoji=true\`.
|
|
1519
|
+
- **SFX track**: A3 — keeps A1 dialogue / A2 music free.
|
|
1520
|
+
- **Render**: only on explicit user intent — never automatic.
|
|
1521
|
+
`;
|
|
1522
|
+
const YOUTUBE_THUMBNAIL_DESIGN = `---
|
|
1523
|
+
name: youtube-thumbnail-design
|
|
1524
|
+
description: Thumbnail design rules sourced from a 300K-video study (1of10 Media via Search Engine Journal Dec 2025), the official YouTube Test & Compare guidance from Rene Ritchie (July 2025), and creator strategists. Read before composing thumbnails or picking variants from compose_thumbnail_variants. Numbers are tagged with their source so the agent doesn't misquote.
|
|
1525
|
+
---
|
|
1526
|
+
|
|
1527
|
+
# youtube-thumbnail-design
|
|
1528
|
+
|
|
1529
|
+
**When to use:** any time you compose a thumbnail (\`compose_thumbnail\`, \`compose_thumbnail_variants\`) or rank candidate hero frames (\`score_shot\`). Read this BEFORE writing the headline text — getting the headline wrong is the most common reason creator thumbnails underperform, more than any single visual choice.
|
|
1530
|
+
|
|
1531
|
+
**Source authority.** The strongest 2025 evidence on what actually works in thumbnails comes from: (1) **1of10 Media's 300,000-video viral study**, reported on Search Engine Journal (Dec 22 2025); (2) **YouTube's own Test & Compare tool** + Rene Ritchie's July 2025 commentary on what it optimises; (3) creator A/B data from **vidIQ, TubeBuddy, AmpiFire**. Tags \`[primary]\`, \`[secondary, large-N]\`, \`[secondary, vendor]\` mark provenance.
|
|
1532
|
+
|
|
1533
|
+
---
|
|
1534
|
+
|
|
1535
|
+
## The viewing context (this is everything)
|
|
1536
|
+
|
|
1537
|
+
Most thumbnails are first seen at:
|
|
1538
|
+
- **120 × 67 px** — mobile feed
|
|
1539
|
+
- **246 × 138 px** — desktop home feed
|
|
1540
|
+
- **360 × 202 px** — sidebar suggestions
|
|
1541
|
+
|
|
1542
|
+
Anything finer than ~3 pixels is invisible at the smallest size. **Design for 100 × 56 first.** If it works there, it works everywhere.
|
|
1543
|
+
|
|
1544
|
+
Sanity test: render the thumbnail, scale it to 100 × 56, look at it. If you can't tell the subject + topic in 1 second, it fails.
|
|
1545
|
+
|
|
1546
|
+
---
|
|
1547
|
+
|
|
1548
|
+
## Faces vs. no-faces — the data is more nuanced than blogs claim
|
|
1549
|
+
|
|
1550
|
+
The headline question every creator asks. The clearest answer comes from the largest 2025 study:
|
|
1551
|
+
|
|
1552
|
+
**[secondary, large-N]** Search Engine Journal Dec 22 2025 (https://www.searchenginejournal.com/do-faces-help-youtube-thumbnails-heres-what-the-data-says/563944/), reporting 1of10 Media's analysis of 300,000 viral 2025 YouTube videos: *"thumbnails with faces and thumbnails without faces perform similarly, even though faces appear on a large share of videos in the sample."* Niche-level: **Finance benefits from faces; Business performs better without.** Channel-size: faces helped larger channels more than smaller ones. Multi-face thumbnails outperform single-face in their dataset.
|
|
1553
|
+
|
|
1554
|
+
**[secondary, vendor — flagged]** Tool-vendor counter-claim: vidIQ has reported that thumbnails with faces showing strong emotion can lift CTR by 20–30%, with surprise expressions specifically lifting CTR by ~49% (per Banana Thumbnail's March 2026 summary citing vidIQ data). AmpiFire's Nov 2025 synthesis: human-face videos receive 921,000 more views on average than faceless ones; sad faces appear in only 1.8% of thumbnails yet achieve the highest average views at 2.3 million.
|
|
1555
|
+
|
|
1556
|
+
**Disagreement called out.** The 1of10 dataset (300K videos) is the larger N and methodologically the most defensible. vidIQ's 20–30% number is not dataset-anchored in the public version. Use 1of10's "depends on niche and channel size" framing as the primary truth; use vidIQ's expression-specific lifts as supporting evidence.
|
|
1557
|
+
|
|
1558
|
+
**Operational rule:** assume faces help **for talking-head / vlog / finance** content, but DON'T force a face into product / screen-recording / B-roll-heavy thumbnails. If \`score_shot\`'s ranked frames don't surface a strong expressive face within the top 5, that's diagnostic — pick a strong product / screen frame instead.
|
|
1559
|
+
|
|
1560
|
+
When \`compose_thumbnail_variants\` does pick face frames, prefer:
|
|
743
1561
|
|
|
1562
|
+
- **Face fills ≥35% of frame area.** Half a face is fine if the visible half is expressive.
|
|
1563
|
+
- **Clear emotion** — surprise, delight, focus, mild anger, fear. Neutral does NOT work; the eyes do most of the work.
|
|
1564
|
+
- **Eyes look at the camera** OR at the label / subject.
|
|
1565
|
+
- **Surprise specifically** — wide eyes, open mouth — reportedly the strongest single emotion (vidIQ).
|
|
1566
|
+
|
|
1567
|
+
---
|
|
1568
|
+
|
|
1569
|
+
## Text in the thumbnail
|
|
1570
|
+
|
|
1571
|
+
YouTube's own guidance is "minimal, high-impact words" — confirmed across multiple primary sources:
|
|
1572
|
+
|
|
1573
|
+
- **[primary]** YouTube's Test & Compare commentary, via vidIQ (July 25 2025, citing Rene Ritchie): *"Great thumbnails don't just get viewers to click. They also help viewers understand what the video is about, so that they can make informed decisions about what to watch."*
|
|
1574
|
+
- **[secondary]** Influencer Marketing Hub paraphrasing YouTube guidance: *"Text on thumbnails should clarify the promise of the video, but there's a fine balance between brevity and context. YouTube recommends using minimal, high-impact words rather than full sentences. For example, 'Best Budget Camera' will often outperform 'Here Are the Best Budget Cameras for 2025'."*
|
|
1575
|
+
|
|
1576
|
+
**Operational constraints at 100 × 56:**
|
|
1577
|
+
|
|
1578
|
+
- **2–4 words MAX.** "How I built this in a weekend" is 6 words — unreadable. **"WEEKEND BUILD"** wins.
|
|
1579
|
+
- **One font, two weights at most.** Bold for the headline, regular for any subtitle. Picking a third "fun" font cheapens the thumbnail every time.
|
|
1580
|
+
- **Heavy outlines/stroke** — 4–8 px on a 1280 × 720 thumbnail. Without an outline the text disappears against any non-uniform background.
|
|
1581
|
+
- **Avoid serifs at thumbnail size.** They blur. Use sans-serif (Bebas, Impact, Inter Black, similar).
|
|
1582
|
+
- **Hard-cap title length.** A 30-character ceiling forces the discipline.
|
|
1583
|
+
|
|
1584
|
+
**Don't use the video title as the thumbnail text.** They're different jobs:
|
|
1585
|
+
- **Title** — SEO + curiosity (8–10 words, optimised for search)
|
|
1586
|
+
- **Thumbnail text** — visual punch (2–4 words, optimised for scan)
|
|
1587
|
+
|
|
1588
|
+
\`compose_thumbnail_variants(text=...)\` should NOT receive the YouTube title verbatim. Pass a 2–4 word distillation. Often this is the **hook line shortened**.
|
|
1589
|
+
|
|
1590
|
+
---
|
|
1591
|
+
|
|
1592
|
+
## Colour budget
|
|
1593
|
+
|
|
1594
|
+
**[secondary, common-practice]** Use **3 colours maximum** in the thumbnail (excluding skin tones, which are free).
|
|
1595
|
+
|
|
1596
|
+
Classic creator palette:
|
|
1597
|
+
- **High-contrast hero colour** — saturated yellow, red, or cyan, used for text outline OR a single accent
|
|
1598
|
+
- **Background fill** — solid or near-solid; dark or light enough to make the subject pop
|
|
1599
|
+
- **Subject's natural colours** — skin, clothing
|
|
1600
|
+
|
|
1601
|
+
At 100 × 56 every additional colour is one fewer "lock-on" point for the eye.
|
|
1602
|
+
|
|
1603
|
+
**[primary, brand kit hook]** If \`<cwd>/.gg/brand.json\` defines \`colors.primary\`, USE IT for the text outline or the accent. Channel-level colour identity drives recognition in a feed (the viewer recognises the channel's palette before reading the text). Don't pick a new colour every video.
|
|
1604
|
+
|
|
1605
|
+
---
|
|
1606
|
+
|
|
1607
|
+
## Composition / layout
|
|
1608
|
+
|
|
1609
|
+
The dominant compositions creators converge on:
|
|
1610
|
+
|
|
1611
|
+
### A. Rule-of-thirds: face left + label right (default for talking-head)
|
|
744
1612
|
\`\`\`
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
1613
|
+
+----------------------+
|
|
1614
|
+
| | |
|
|
1615
|
+
| FACE | LABEL |
|
|
1616
|
+
| | TWO LINES |
|
|
1617
|
+
| | |
|
|
1618
|
+
+----------------------+
|
|
1619
|
+
\`\`\`
|
|
1620
|
+
Face takes left third or two-thirds; label sits in negative space. Vlogs, tutorials, reactions.
|
|
1621
|
+
|
|
1622
|
+
### B. Centred subject + arc text (products / builds)
|
|
1623
|
+
\`\`\`
|
|
1624
|
+
+----------------------+
|
|
1625
|
+
| LABEL ABOVE |
|
|
1626
|
+
| (PRODUCT) |
|
|
1627
|
+
| LABEL BELOW |
|
|
1628
|
+
+----------------------+
|
|
1629
|
+
\`\`\`
|
|
1630
|
+
Object centred; label arcs above and below or just above. Eye locks on the centred object first.
|
|
1631
|
+
|
|
1632
|
+
### C. Before / after split (transformations)
|
|
1633
|
+
\`\`\`
|
|
1634
|
+
+----------+----------+
|
|
1635
|
+
| BEFORE | AFTER |
|
|
1636
|
+
| -- ARROW -- |
|
|
1637
|
+
| WORD |
|
|
1638
|
+
+----------+----------+
|
|
1639
|
+
\`\`\`
|
|
1640
|
+
Vertical or horizontal split, an arrow, a single labelling word. Fitness, builds, redesigns, makeovers.
|
|
1641
|
+
|
|
1642
|
+
### D. Tight close-up + circle / red zone (tutorials, especially software)
|
|
748
1643
|
\`\`\`
|
|
1644
|
+
+----------------------+
|
|
1645
|
+
| LABEL ABOVE |
|
|
1646
|
+
| [⊙ ZOOMED-IN |
|
|
1647
|
+
| DETAIL] |
|
|
1648
|
+
+----------------------+
|
|
1649
|
+
\`\`\`
|
|
1650
|
+
Red circle or arrow on a specific detail. Universal in tech / software niches.
|
|
1651
|
+
|
|
1652
|
+
**One focal point.** The viewer's eye should know where to look in 0.3 seconds. Pick one composition; stick to it.
|
|
749
1653
|
|
|
750
1654
|
---
|
|
751
1655
|
|
|
752
|
-
##
|
|
1656
|
+
## YouTube's native A/B testing — Test & Compare
|
|
753
1657
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
1658
|
+
Critical change in 2024–2025: YouTube rolled out native thumbnail (and title) A/B testing. **The agent should default to producing 3 variants and recommend Test & Compare to the user.**
|
|
1659
|
+
|
|
1660
|
+
**[primary]** Rene Ritchie via vidIQ (July 25 2025, https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/):
|
|
1661
|
+
|
|
1662
|
+
> *"Pick up to 3 versions of your title. You can also select up to 3 thumbnails. Mix and match if you want. YouTube will randomly serve each variation to viewers… YouTube doesn't use click-through rate (CTR) as the winning metric — it uses Watch Time Share. That means the title that leads to more sustained viewing wins, not necessarily the one that gets the fastest clicks. Tests typically run from 1 to 14 days, depending on how quickly statistical significance is reached. Once there's a clear winner, YouTube automatically applies it to your video."*
|
|
1663
|
+
|
|
1664
|
+
**[primary]** Same source on why CTR isn't the winning metric: *"If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
|
|
1665
|
+
|
|
1666
|
+
**Operational implication — the agent CANNOT trigger Test & Compare itself** (no public API; the test lives only in YouTube Studio). The agent's job is to PRODUCE the right 3 variants and tell the user to upload them.
|
|
1667
|
+
|
|
1668
|
+
**Single-variable A/B is built into \`compose_thumbnail_variants\` via the \`strategy\` param:**
|
|
1669
|
+
|
|
1670
|
+
- **\`strategy="expression"\`** — picks 3 distinct face/expression frames; same label on all three. Use when source has multiple expressive faces.
|
|
1671
|
+
- **\`strategy="label"\`** — picks ONE strong frame; LLM generates 3 distinct 2–4-word label variants; renders the same frame three times with different labels. Use when source has only one usable face / product / screen.
|
|
1672
|
+
- **\`strategy="mixed"\`** (default) — 3 distinct frames + same label. Use when neither single-variable mode applies cleanly.
|
|
1673
|
+
|
|
1674
|
+
Don't ship a single thumbnail. Always 3 variants.
|
|
1675
|
+
|
|
1676
|
+
---
|
|
1677
|
+
|
|
1678
|
+
## What NOT to do
|
|
1679
|
+
|
|
1680
|
+
- **All-caps shouty SEVEN-WORD HEADLINES.** Unreadable.
|
|
1681
|
+
- **Rainbow gradient text.** Wins zero A/B tests across the public datasets.
|
|
1682
|
+
- **Stock arrow templates.** Identifies the channel as "first month on YouTube" instantly.
|
|
1683
|
+
- **Watermarks on top of the subject.** If you must brand, place the watermark in a corner outside the focal area.
|
|
1684
|
+
- **Repeating the title word-for-word.** Wastes the second hook surface.
|
|
1685
|
+
- **Last week's expression, last week's composition.** Channels stagnate when every thumbnail looks identical. Vary expression and composition while keeping colour identity.
|
|
1686
|
+
- **Clickbait that doesn't deliver.** Ritchie's quote above — Watch Time Share is the metric Test & Compare uses; CTR-spike + AVP-collapse is now actively penalised.
|
|
1687
|
+
|
|
1688
|
+
---
|
|
1689
|
+
|
|
1690
|
+
## Operationalising in the agent
|
|
1691
|
+
|
|
1692
|
+
The default \`compose_thumbnail_variants\` flow:
|
|
1693
|
+
|
|
1694
|
+
1. **Pre-call \`generate_youtube_metadata\`** to get the candidate titles. Pick the strongest one.
|
|
1695
|
+
2. **Distill to 2–4 words** for the thumbnail label. Usually the hook line shortened, NOT the title verbatim.
|
|
1696
|
+
3. **Call \`compose_thumbnail_variants(input, count=3, text="<distilled label>", strategy="...")\`**.
|
|
1697
|
+
4. **Surface 3 outputs** to the user with the per-variant rationale the tool returns.
|
|
1698
|
+
5. **Verify the thumbnail's promise** with \`verify_thumbnail_promise(thumbnail=variants[0].path, video=...)\` — if the opening 60s doesn't show what the thumbnail promises, surface a red marker and don't ship until the user picks a different frame or recuts the opener.
|
|
1699
|
+
6. **Tell the user to run Test & Compare manually.** Suggested copy: *"Upload all three thumbnails to YouTube Studio's Test & Compare. YouTube picks the winner by Watch Time Share over 1–14 days. The agent can't trigger this for you — there's no API."*
|
|
1700
|
+
|
|
1701
|
+
**Brand kit integration (auto-applied).** When \`<cwd>/.gg/brand.json\` exists, \`compose_thumbnail\` and \`compose_thumbnail_variants\` already inherit:
|
|
1702
|
+
- \`fonts.heading\` → used as \`fontFile\` if not overridden
|
|
1703
|
+
- \`colors.primary\` → used as \`outlineColor\` if not overridden
|
|
1704
|
+
|
|
1705
|
+
The agent does not need to pass these explicitly. Each tool's output reports \`brandKitLoaded: true\` so the agent can confirm the kit was used.
|
|
1706
|
+
|
|
1707
|
+
---
|
|
1708
|
+
|
|
1709
|
+
## Sources & further reading
|
|
1710
|
+
|
|
1711
|
+
**Primary:**
|
|
1712
|
+
- Search Engine Journal, *"Do Faces Help YouTube Thumbnails? Here's What the Data Says"*, Dec 22 2025 (1of10 Media's 300K viral video study) — https://www.searchenginejournal.com/do-faces-help-youtube-thumbnails-heres-what-the-data-says/563944/
|
|
1713
|
+
- vidIQ, *"YouTube Launches New Title Testing Tool"*, July 25 2025 (Rene Ritchie quotes) — https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/
|
|
1714
|
+
- YouTube Help Center — Test & Compare; Add Custom Thumbnails
|
|
1715
|
+
|
|
1716
|
+
**Secondary (vendor data, treat as directional):**
|
|
1717
|
+
- AmpiFire, thumbnail face research, Nov 2025
|
|
1718
|
+
- vidIQ, thumbnail psychology / face emotion lift, 2024–2025
|
|
1719
|
+
- Banana Thumbnail, summary of vidIQ data, March 2026
|
|
1720
|
+
- Influencer Marketing Hub, YouTube thumbnail guide, 2025
|
|
1721
|
+
|
|
1722
|
+
**Creator strategists worth following:**
|
|
1723
|
+
- Paddy Galloway — paddygalloway.com, X threads
|
|
1724
|
+
- Roberto Blake — YouTube channel + blog
|
|
1725
|
+
- MrBeast leaked production manual, Aug 2024 (mirrored at simonwillison.net)
|
|
765
1726
|
`;
|
|
766
1727
|
export const SKILLS = {
|
|
1728
|
+
"chapter-markers": {
|
|
1729
|
+
name: "chapter-markers",
|
|
1730
|
+
description: "Author YouTube/podcast chapter timestamps from a transcript: 5–15 chapters, first at 00:00, ≥30s apart, only at real topic shifts. Drops purple markers + emits a YouTube-formatted description block.",
|
|
1731
|
+
content: CHAPTER_MARKERS,
|
|
1732
|
+
},
|
|
1733
|
+
"fusion-lower-third": {
|
|
1734
|
+
name: "fusion-lower-third",
|
|
1735
|
+
description: "Build a name/title chyron natively in DaVinci Resolve's Fusion via fusion_comp — Background + TextPlus + Merge node graph, wiring, styling, lower-third positioning, keyframed fade in/out. Resolve Studio only; cross-host fallback is write_lower_third + burn_subtitles.",
|
|
1736
|
+
content: FUSION_LOWER_THIRD,
|
|
1737
|
+
},
|
|
1738
|
+
"keyframing-and-titles": {
|
|
1739
|
+
name: "keyframing-and-titles",
|
|
1740
|
+
description: "Recipes for the seven scripting gaps neither Resolve nor Premiere expose: timeline reorder, multi-track lanes, lower-thirds + title cards (ASS), keyframed opacity/position/volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns, named transitions (smash-cut, whip-pan, dip-to-black).",
|
|
1741
|
+
content: KEYFRAMING_AND_TITLES,
|
|
1742
|
+
},
|
|
767
1743
|
"long-form-content-edit": {
|
|
768
1744
|
name: "long-form-content-edit",
|
|
769
|
-
description: "Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires
|
|
1745
|
+
description: "Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires transcribe, cluster_takes, detect_silence, write_edl, import_edl, write_srt, add_marker into a single workflow.",
|
|
770
1746
|
content: LONG_FORM_CONTENT_EDIT,
|
|
771
1747
|
},
|
|
772
1748
|
"short-form-content-edit": {
|
|
@@ -774,25 +1750,30 @@ export const SKILLS = {
|
|
|
774
1750
|
description: "Recipe for TikTok / Reels / Shorts. Find the moment → reformat 9:16 → hook the first 2 seconds → burn captions → render. Uses reformat_timeline, import_edl, set_clip_speed, write_srt, import_subtitles, open_page (Resolve).",
|
|
775
1751
|
content: SHORT_FORM_CONTENT_EDIT,
|
|
776
1752
|
},
|
|
777
|
-
"chapter-markers": {
|
|
778
|
-
name: "chapter-markers",
|
|
779
|
-
description: "Recipe for YouTube/podcast chapter timestamps. Reads transcript in 90s windows, identifies topic shifts, drops purple markers, and emits a YouTube-formatted description block. Constraints: first chapter at 00:00, 5–15 chapters, ≥30s apart.",
|
|
780
|
-
content: CHAPTER_MARKERS,
|
|
781
|
-
},
|
|
782
|
-
"keyframing-and-titles": {
|
|
783
|
-
name: "keyframing-and-titles",
|
|
784
|
-
description: "Recipes for the seven gaps neither Resolve nor Premiere expose via scripting: timeline reordering, multi-track / lane composition, lower-thirds and title cards (via ASS), keyframed opacity / position / volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns on stills, and named transitions (smash-cut, whip-pan, dip-to-black). Wires reorder_timeline, compose_layered, write_lower_third, write_title_card, mix_audio, speed_ramp, ken_burns, transition_videos.",
|
|
785
|
-
content: KEYFRAMING_AND_TITLES,
|
|
786
|
-
},
|
|
787
1753
|
"skin-tone-matching": {
|
|
788
1754
|
name: "skin-tone-matching",
|
|
789
|
-
description: "
|
|
1755
|
+
description: "Match faces across clips when host scripting can't reach power windows or qualifiers. Two paths: grade_skin_tones (file-only — bakes a vision-derived colorbalance + selectivecolor + eq into a new mp4, pair with replace_clip) and match_clip_color (Resolve only — derives the same grade as a CDL via set_primary_correction).",
|
|
790
1756
|
content: SKIN_TONE_MATCHING,
|
|
791
1757
|
},
|
|
792
|
-
"
|
|
793
|
-
name: "
|
|
794
|
-
description: "
|
|
795
|
-
content:
|
|
1758
|
+
"viral-hook-patterns": {
|
|
1759
|
+
name: "viral-hook-patterns",
|
|
1760
|
+
description: "Hook patterns sourced from primary creators (Jenny Hoyos on the official YouTube Blog, the leaked MrBeast production manual, Paddy Galloway's data analyses) — not generic creator-folklore. Read when analyze_hook fails, when picking a find_viral_moments candidate, or when generate_youtube_metadata needs a punchier title. Each pattern names a real creator example, the primary source, and the failure mode.",
|
|
1761
|
+
content: VIRAL_HOOK_PATTERNS,
|
|
1762
|
+
},
|
|
1763
|
+
"youtube-algorithm-primer": {
|
|
1764
|
+
name: "youtube-algorithm-primer",
|
|
1765
|
+
description: "How YouTube actually ranks videos in 2024–2026, sourced from Creator Insider, the YouTube Liaison (Rene Ritchie), Senior Director of Growth Todd Beaupré, Paddy Galloway, and the Retention Rabbit 2025 benchmark study. Read when generating titles/descriptions/chapters or when a video is underperforming. Numbers without a primary YouTube source are flagged as third-party heuristics.",
|
|
1766
|
+
content: YOUTUBE_ALGORITHM_PRIMER,
|
|
1767
|
+
},
|
|
1768
|
+
"youtube-end-to-end": {
|
|
1769
|
+
name: "youtube-end-to-end",
|
|
1770
|
+
description: "Orchestrator for \"make me a YouTube video from this footage\" using a TIMELINE-FIRST workflow. The agent edits the live Resolve/Premiere timeline so the user can scrub, tweak, and undo at every stage. Renders only happen at the end on explicit user intent (\"render\" / \"export\" / \"ship it\"). When host=none, falls back to file-only delivery and says so up front. Covers long-form, Shorts, captions, retention pipeline, and the metadata bundle.",
|
|
1771
|
+
content: YOUTUBE_END_TO_END,
|
|
1772
|
+
},
|
|
1773
|
+
"youtube-thumbnail-design": {
|
|
1774
|
+
name: "youtube-thumbnail-design",
|
|
1775
|
+
description: "Thumbnail design rules sourced from a 300K-video study (1of10 Media via Search Engine Journal Dec 2025), the official YouTube Test & Compare guidance from Rene Ritchie (July 2025), and creator strategists. Read before composing thumbnails or picking variants from compose_thumbnail_variants. Numbers are tagged with their source so the agent doesn't misquote.",
|
|
1776
|
+
content: YOUTUBE_THUMBNAIL_DESIGN,
|
|
796
1777
|
},
|
|
797
1778
|
};
|
|
798
1779
|
export const SKILL_NAMES = Object.keys(SKILLS);
|