@kenkaiiii/gg-editor 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (446) hide show
  1. package/dist/cli.js +64 -1
  2. package/dist/cli.js.map +1 -1
  3. package/dist/core/audio-mix.d.ts.map +1 -1
  4. package/dist/core/audio-mix.js +8 -1
  5. package/dist/core/audio-mix.js.map +1 -1
  6. package/dist/core/audio-mix.test.js +1 -1
  7. package/dist/core/audio-mix.test.js.map +1 -1
  8. package/dist/core/auth/api-keys.d.ts +1 -1
  9. package/dist/core/auth/api-keys.d.ts.map +1 -1
  10. package/dist/core/auth/api-keys.js +2 -1
  11. package/dist/core/auth/api-keys.js.map +1 -1
  12. package/dist/core/auth/login.d.ts.map +1 -1
  13. package/dist/core/auth/login.js.map +1 -1
  14. package/dist/core/beats.d.ts +59 -0
  15. package/dist/core/beats.d.ts.map +1 -0
  16. package/dist/core/beats.js +122 -0
  17. package/dist/core/beats.js.map +1 -0
  18. package/dist/core/beats.test.d.ts +2 -0
  19. package/dist/core/beats.test.d.ts.map +1 -0
  20. package/dist/core/beats.test.js +86 -0
  21. package/dist/core/beats.test.js.map +1 -0
  22. package/dist/core/brand-kit.d.ts +80 -0
  23. package/dist/core/brand-kit.d.ts.map +1 -0
  24. package/dist/core/brand-kit.js +96 -0
  25. package/dist/core/brand-kit.js.map +1 -0
  26. package/dist/core/brand-kit.test.d.ts +2 -0
  27. package/dist/core/brand-kit.test.d.ts.map +1 -0
  28. package/dist/core/brand-kit.test.js +76 -0
  29. package/dist/core/brand-kit.test.js.map +1 -0
  30. package/dist/core/bundled-sfx.d.ts +64 -0
  31. package/dist/core/bundled-sfx.d.ts.map +1 -0
  32. package/dist/core/bundled-sfx.js +218 -0
  33. package/dist/core/bundled-sfx.js.map +1 -0
  34. package/dist/core/bundled-sfx.test.d.ts +2 -0
  35. package/dist/core/bundled-sfx.test.d.ts.map +1 -0
  36. package/dist/core/bundled-sfx.test.js +81 -0
  37. package/dist/core/bundled-sfx.test.js.map +1 -0
  38. package/dist/core/child-abort.d.ts +57 -0
  39. package/dist/core/child-abort.d.ts.map +1 -0
  40. package/dist/core/child-abort.js +95 -0
  41. package/dist/core/child-abort.js.map +1 -0
  42. package/dist/core/child-abort.test.d.ts +2 -0
  43. package/dist/core/child-abort.test.d.ts.map +1 -0
  44. package/dist/core/child-abort.test.js +88 -0
  45. package/dist/core/child-abort.test.js.map +1 -0
  46. package/dist/core/clip-scoring.d.ts +44 -0
  47. package/dist/core/clip-scoring.d.ts.map +1 -0
  48. package/dist/core/clip-scoring.js +165 -0
  49. package/dist/core/clip-scoring.js.map +1 -0
  50. package/dist/core/clip-scoring.test.d.ts +2 -0
  51. package/dist/core/clip-scoring.test.d.ts.map +1 -0
  52. package/dist/core/clip-scoring.test.js +113 -0
  53. package/dist/core/clip-scoring.test.js.map +1 -0
  54. package/dist/core/emoji-captions.d.ts +45 -0
  55. package/dist/core/emoji-captions.d.ts.map +1 -0
  56. package/dist/core/emoji-captions.js +121 -0
  57. package/dist/core/emoji-captions.js.map +1 -0
  58. package/dist/core/face-reframe.d.ts +91 -0
  59. package/dist/core/face-reframe.d.ts.map +1 -0
  60. package/dist/core/face-reframe.js +141 -0
  61. package/dist/core/face-reframe.js.map +1 -0
  62. package/dist/core/face-reframe.test.d.ts +2 -0
  63. package/dist/core/face-reframe.test.d.ts.map +1 -0
  64. package/dist/core/face-reframe.test.js +171 -0
  65. package/dist/core/face-reframe.test.js.map +1 -0
  66. package/dist/core/filler-words.d.ts +57 -9
  67. package/dist/core/filler-words.d.ts.map +1 -1
  68. package/dist/core/filler-words.js +61 -9
  69. package/dist/core/filler-words.js.map +1 -1
  70. package/dist/core/filler-words.test.js +91 -17
  71. package/dist/core/filler-words.test.js.map +1 -1
  72. package/dist/core/hook-rewrite.d.ts +48 -0
  73. package/dist/core/hook-rewrite.d.ts.map +1 -0
  74. package/dist/core/hook-rewrite.js +151 -0
  75. package/dist/core/hook-rewrite.js.map +1 -0
  76. package/dist/core/hook-rewrite.test.d.ts +2 -0
  77. package/dist/core/hook-rewrite.test.d.ts.map +1 -0
  78. package/dist/core/hook-rewrite.test.js +58 -0
  79. package/dist/core/hook-rewrite.test.js.map +1 -0
  80. package/dist/core/hosts/lazy.d.ts.map +1 -1
  81. package/dist/core/hosts/lazy.js +2 -0
  82. package/dist/core/hosts/lazy.js.map +1 -1
  83. package/dist/core/hosts/premiere/adapter.d.ts +1 -0
  84. package/dist/core/hosts/premiere/adapter.d.ts.map +1 -1
  85. package/dist/core/hosts/premiere/adapter.js.map +1 -1
  86. package/dist/core/hosts/premiere/bridge-source.d.ts.map +1 -1
  87. package/dist/core/hosts/premiere/bridge-source.js +6 -3
  88. package/dist/core/hosts/premiere/bridge-source.js.map +1 -1
  89. package/dist/core/hosts/resolve/adapter.d.ts +1 -0
  90. package/dist/core/hosts/resolve/adapter.d.ts.map +1 -1
  91. package/dist/core/hosts/resolve/adapter.js.map +1 -1
  92. package/dist/core/hosts/resolve/bridge-source.d.ts.map +1 -1
  93. package/dist/core/hosts/resolve/bridge-source.js +31 -4
  94. package/dist/core/hosts/resolve/bridge-source.js.map +1 -1
  95. package/dist/core/hosts/resolve/bridge.d.ts +2 -19
  96. package/dist/core/hosts/resolve/bridge.d.ts.map +1 -1
  97. package/dist/core/hosts/resolve/bridge.js +70 -41
  98. package/dist/core/hosts/resolve/bridge.js.map +1 -1
  99. package/dist/core/hosts/resolve/bridge.test.js +130 -0
  100. package/dist/core/hosts/resolve/bridge.test.js.map +1 -1
  101. package/dist/core/hosts/types.d.ts +6 -0
  102. package/dist/core/hosts/types.d.ts.map +1 -1
  103. package/dist/core/hosts/types.js.map +1 -1
  104. package/dist/core/logger.d.ts +32 -0
  105. package/dist/core/logger.d.ts.map +1 -0
  106. package/dist/core/logger.js +188 -0
  107. package/dist/core/logger.js.map +1 -0
  108. package/dist/core/loop-match.d.ts +57 -0
  109. package/dist/core/loop-match.d.ts.map +1 -0
  110. package/dist/core/loop-match.js +91 -0
  111. package/dist/core/loop-match.js.map +1 -0
  112. package/dist/core/media/ffmpeg.d.ts.map +1 -1
  113. package/dist/core/media/ffmpeg.js +14 -3
  114. package/dist/core/media/ffmpeg.js.map +1 -1
  115. package/dist/core/multi-format.d.ts +67 -0
  116. package/dist/core/multi-format.d.ts.map +1 -0
  117. package/dist/core/multi-format.js +127 -0
  118. package/dist/core/multi-format.js.map +1 -0
  119. package/dist/core/multi-format.test.d.ts +2 -0
  120. package/dist/core/multi-format.test.d.ts.map +1 -0
  121. package/dist/core/multi-format.test.js +151 -0
  122. package/dist/core/multi-format.test.js.map +1 -0
  123. package/dist/core/python/beats.py +61 -0
  124. package/dist/core/python/face_reframe.py +163 -0
  125. package/dist/core/python/sidecar-path.d.ts +13 -0
  126. package/dist/core/python/sidecar-path.d.ts.map +1 -0
  127. package/dist/core/python/sidecar-path.js +24 -0
  128. package/dist/core/python/sidecar-path.js.map +1 -0
  129. package/dist/core/python.d.ts +57 -0
  130. package/dist/core/python.d.ts.map +1 -0
  131. package/dist/core/python.js +107 -0
  132. package/dist/core/python.js.map +1 -0
  133. package/dist/core/python.test.d.ts +2 -0
  134. package/dist/core/python.test.d.ts.map +1 -0
  135. package/dist/core/python.test.js +129 -0
  136. package/dist/core/python.test.js.map +1 -0
  137. package/dist/core/retention-structure.d.ts +81 -0
  138. package/dist/core/retention-structure.d.ts.map +1 -0
  139. package/dist/core/retention-structure.js +206 -0
  140. package/dist/core/retention-structure.js.map +1 -0
  141. package/dist/core/retention-structure.test.d.ts +2 -0
  142. package/dist/core/retention-structure.test.d.ts.map +1 -0
  143. package/dist/core/retention-structure.test.js +88 -0
  144. package/dist/core/retention-structure.test.js.map +1 -0
  145. package/dist/core/review.d.ts +17 -0
  146. package/dist/core/review.d.ts.map +1 -1
  147. package/dist/core/review.js +20 -24
  148. package/dist/core/review.js.map +1 -1
  149. package/dist/core/safe-paths.d.ts +11 -0
  150. package/dist/core/safe-paths.d.ts.map +1 -1
  151. package/dist/core/safe-paths.js +26 -10
  152. package/dist/core/safe-paths.js.map +1 -1
  153. package/dist/core/safe-paths.test.js +16 -0
  154. package/dist/core/safe-paths.test.js.map +1 -1
  155. package/dist/core/skills-loader.d.ts +48 -2
  156. package/dist/core/skills-loader.d.ts.map +1 -1
  157. package/dist/core/skills-loader.js +97 -19
  158. package/dist/core/skills-loader.js.map +1 -1
  159. package/dist/core/skills-loader.test.js +63 -1
  160. package/dist/core/skills-loader.test.js.map +1 -1
  161. package/dist/core/srt.d.ts +42 -7
  162. package/dist/core/srt.d.ts.map +1 -1
  163. package/dist/core/srt.js +101 -32
  164. package/dist/core/srt.js.map +1 -1
  165. package/dist/core/srt.test.js +54 -1
  166. package/dist/core/srt.test.js.map +1 -1
  167. package/dist/core/thumbnail-compose.d.ts +58 -0
  168. package/dist/core/thumbnail-compose.d.ts.map +1 -0
  169. package/dist/core/thumbnail-compose.js +101 -0
  170. package/dist/core/thumbnail-compose.js.map +1 -0
  171. package/dist/core/thumbnail-promise.d.ts +46 -0
  172. package/dist/core/thumbnail-promise.d.ts.map +1 -0
  173. package/dist/core/thumbnail-promise.js +133 -0
  174. package/dist/core/thumbnail-promise.js.map +1 -0
  175. package/dist/core/thumbnail-promise.test.d.ts +2 -0
  176. package/dist/core/thumbnail-promise.test.d.ts.map +1 -0
  177. package/dist/core/thumbnail-promise.test.js +52 -0
  178. package/dist/core/thumbnail-promise.test.js.map +1 -0
  179. package/dist/core/viral-moments.d.ts +70 -0
  180. package/dist/core/viral-moments.d.ts.map +1 -0
  181. package/dist/core/viral-moments.js +192 -0
  182. package/dist/core/viral-moments.js.map +1 -0
  183. package/dist/core/viral-moments.test.d.ts +2 -0
  184. package/dist/core/viral-moments.test.d.ts.map +1 -0
  185. package/dist/core/viral-moments.test.js +153 -0
  186. package/dist/core/viral-moments.test.js.map +1 -0
  187. package/dist/core/whisper.d.ts +16 -0
  188. package/dist/core/whisper.d.ts.map +1 -1
  189. package/dist/core/whisper.js +72 -5
  190. package/dist/core/whisper.js.map +1 -1
  191. package/dist/core/whisper.test.js +111 -1
  192. package/dist/core/whisper.test.js.map +1 -1
  193. package/dist/core/youtube-metadata.d.ts +44 -0
  194. package/dist/core/youtube-metadata.d.ts.map +1 -0
  195. package/dist/core/youtube-metadata.js +168 -0
  196. package/dist/core/youtube-metadata.js.map +1 -0
  197. package/dist/core/youtube-metadata.test.d.ts +2 -0
  198. package/dist/core/youtube-metadata.test.d.ts.map +1 -0
  199. package/dist/core/youtube-metadata.test.js +132 -0
  200. package/dist/core/youtube-metadata.test.js.map +1 -0
  201. package/dist/prompt-commands.d.ts +24 -0
  202. package/dist/prompt-commands.d.ts.map +1 -0
  203. package/dist/prompt-commands.js +243 -0
  204. package/dist/prompt-commands.js.map +1 -0
  205. package/dist/prompt-commands.test.d.ts +2 -0
  206. package/dist/prompt-commands.test.d.ts.map +1 -0
  207. package/dist/prompt-commands.test.js +46 -0
  208. package/dist/prompt-commands.test.js.map +1 -0
  209. package/dist/skills.d.ts +6 -6
  210. package/dist/skills.d.ts.map +1 -1
  211. package/dist/skills.js +1426 -445
  212. package/dist/skills.js.map +1 -1
  213. package/dist/system-prompt.d.ts.map +1 -1
  214. package/dist/system-prompt.js +108 -0
  215. package/dist/system-prompt.js.map +1 -1
  216. package/dist/tools/add-fades.d.ts.map +1 -1
  217. package/dist/tools/add-fades.js +2 -1
  218. package/dist/tools/add-fades.js.map +1 -1
  219. package/dist/tools/add-sfx-at-cuts.d.ts.map +1 -1
  220. package/dist/tools/add-sfx-at-cuts.js +36 -11
  221. package/dist/tools/add-sfx-at-cuts.js.map +1 -1
  222. package/dist/tools/add-sfx-to-timeline.d.ts +34 -0
  223. package/dist/tools/add-sfx-to-timeline.d.ts.map +1 -0
  224. package/dist/tools/add-sfx-to-timeline.js +169 -0
  225. package/dist/tools/add-sfx-to-timeline.js.map +1 -0
  226. package/dist/tools/add-sfx-to-timeline.test.d.ts +2 -0
  227. package/dist/tools/add-sfx-to-timeline.test.d.ts.map +1 -0
  228. package/dist/tools/add-sfx-to-timeline.test.js +181 -0
  229. package/dist/tools/add-sfx-to-timeline.test.js.map +1 -0
  230. package/dist/tools/audit-first-frame.d.ts +36 -0
  231. package/dist/tools/audit-first-frame.d.ts.map +1 -0
  232. package/dist/tools/audit-first-frame.js +181 -0
  233. package/dist/tools/audit-first-frame.js.map +1 -0
  234. package/dist/tools/audit-retention-structure.d.ts +20 -0
  235. package/dist/tools/audit-retention-structure.d.ts.map +1 -0
  236. package/dist/tools/audit-retention-structure.js +95 -0
  237. package/dist/tools/audit-retention-structure.js.map +1 -0
  238. package/dist/tools/audit-retention-structure.test.d.ts +2 -0
  239. package/dist/tools/audit-retention-structure.test.d.ts.map +1 -0
  240. package/dist/tools/audit-retention-structure.test.js +93 -0
  241. package/dist/tools/audit-retention-structure.test.js.map +1 -0
  242. package/dist/tools/bleep-words.d.ts +59 -0
  243. package/dist/tools/bleep-words.d.ts.map +1 -0
  244. package/dist/tools/bleep-words.js +211 -0
  245. package/dist/tools/bleep-words.js.map +1 -0
  246. package/dist/tools/bleep-words.test.d.ts +2 -0
  247. package/dist/tools/bleep-words.test.d.ts.map +1 -0
  248. package/dist/tools/bleep-words.test.js +96 -0
  249. package/dist/tools/bleep-words.test.js.map +1 -0
  250. package/dist/tools/burn-subtitles.d.ts.map +1 -1
  251. package/dist/tools/burn-subtitles.js +10 -5
  252. package/dist/tools/burn-subtitles.js.map +1 -1
  253. package/dist/tools/clean-audio.d.ts.map +1 -1
  254. package/dist/tools/clean-audio.js +2 -1
  255. package/dist/tools/clean-audio.js.map +1 -1
  256. package/dist/tools/cluster-takes.js +2 -1
  257. package/dist/tools/cluster-takes.js.map +1 -1
  258. package/dist/tools/compose-thumbnail-variants.d.ts +70 -0
  259. package/dist/tools/compose-thumbnail-variants.d.ts.map +1 -0
  260. package/dist/tools/compose-thumbnail-variants.js +274 -0
  261. package/dist/tools/compose-thumbnail-variants.js.map +1 -0
  262. package/dist/tools/compose-thumbnail.d.ts +6 -13
  263. package/dist/tools/compose-thumbnail.d.ts.map +1 -1
  264. package/dist/tools/compose-thumbnail.js +44 -81
  265. package/dist/tools/compose-thumbnail.js.map +1 -1
  266. package/dist/tools/concat-videos.d.ts.map +1 -1
  267. package/dist/tools/concat-videos.js +12 -5
  268. package/dist/tools/concat-videos.js.map +1 -1
  269. package/dist/tools/concat-videos.test.d.ts +2 -0
  270. package/dist/tools/concat-videos.test.d.ts.map +1 -0
  271. package/dist/tools/concat-videos.test.js +103 -0
  272. package/dist/tools/concat-videos.test.js.map +1 -0
  273. package/dist/tools/crossfade-videos.d.ts.map +1 -1
  274. package/dist/tools/crossfade-videos.js +2 -1
  275. package/dist/tools/crossfade-videos.js.map +1 -1
  276. package/dist/tools/cut-filler-words.d.ts.map +1 -1
  277. package/dist/tools/cut-filler-words.js +24 -8
  278. package/dist/tools/cut-filler-words.js.map +1 -1
  279. package/dist/tools/detect-speaker-changes.js +2 -1
  280. package/dist/tools/detect-speaker-changes.js.map +1 -1
  281. package/dist/tools/extract-audio.d.ts.map +1 -1
  282. package/dist/tools/extract-audio.js +13 -7
  283. package/dist/tools/extract-audio.js.map +1 -1
  284. package/dist/tools/face-reframe.d.ts +30 -0
  285. package/dist/tools/face-reframe.d.ts.map +1 -0
  286. package/dist/tools/face-reframe.js +143 -0
  287. package/dist/tools/face-reframe.js.map +1 -0
  288. package/dist/tools/face-reframe.test.d.ts +2 -0
  289. package/dist/tools/face-reframe.test.d.ts.map +1 -0
  290. package/dist/tools/face-reframe.test.js +139 -0
  291. package/dist/tools/face-reframe.test.js.map +1 -0
  292. package/dist/tools/find-viral-moments.d.ts +23 -0
  293. package/dist/tools/find-viral-moments.d.ts.map +1 -0
  294. package/dist/tools/find-viral-moments.js +176 -0
  295. package/dist/tools/find-viral-moments.js.map +1 -0
  296. package/dist/tools/find-viral-moments.test.d.ts +2 -0
  297. package/dist/tools/find-viral-moments.test.d.ts.map +1 -0
  298. package/dist/tools/find-viral-moments.test.js +144 -0
  299. package/dist/tools/find-viral-moments.test.js.map +1 -0
  300. package/dist/tools/generate-gif.d.ts.map +1 -1
  301. package/dist/tools/generate-gif.js +47 -40
  302. package/dist/tools/generate-gif.js.map +1 -1
  303. package/dist/tools/generate-gif.test.d.ts +2 -0
  304. package/dist/tools/generate-gif.test.d.ts.map +1 -0
  305. package/dist/tools/generate-gif.test.js +115 -0
  306. package/dist/tools/generate-gif.test.js.map +1 -0
  307. package/dist/tools/generate-outro.d.ts +18 -0
  308. package/dist/tools/generate-outro.d.ts.map +1 -0
  309. package/dist/tools/generate-outro.js +175 -0
  310. package/dist/tools/generate-outro.js.map +1 -0
  311. package/dist/tools/generate-youtube-metadata.d.ts +23 -0
  312. package/dist/tools/generate-youtube-metadata.d.ts.map +1 -0
  313. package/dist/tools/generate-youtube-metadata.js +103 -0
  314. package/dist/tools/generate-youtube-metadata.js.map +1 -0
  315. package/dist/tools/generate-youtube-metadata.test.d.ts +2 -0
  316. package/dist/tools/generate-youtube-metadata.test.d.ts.map +1 -0
  317. package/dist/tools/generate-youtube-metadata.test.js +118 -0
  318. package/dist/tools/generate-youtube-metadata.test.js.map +1 -0
  319. package/dist/tools/index.d.ts +14 -0
  320. package/dist/tools/index.d.ts.map +1 -1
  321. package/dist/tools/index.js +130 -1
  322. package/dist/tools/index.js.map +1 -1
  323. package/dist/tools/index.test.js +27 -1
  324. package/dist/tools/index.test.js.map +1 -1
  325. package/dist/tools/ken-burns.d.ts.map +1 -1
  326. package/dist/tools/ken-burns.js +2 -1
  327. package/dist/tools/ken-burns.js.map +1 -1
  328. package/dist/tools/loop-match-short.d.ts +22 -0
  329. package/dist/tools/loop-match-short.d.ts.map +1 -0
  330. package/dist/tools/loop-match-short.js +107 -0
  331. package/dist/tools/loop-match-short.js.map +1 -0
  332. package/dist/tools/mix-audio.d.ts.map +1 -1
  333. package/dist/tools/mix-audio.js +2 -1
  334. package/dist/tools/mix-audio.js.map +1 -1
  335. package/dist/tools/normalize-loudness.d.ts.map +1 -1
  336. package/dist/tools/normalize-loudness.js +2 -1
  337. package/dist/tools/normalize-loudness.js.map +1 -1
  338. package/dist/tools/path-traversal.test.d.ts +15 -0
  339. package/dist/tools/path-traversal.test.d.ts.map +1 -0
  340. package/dist/tools/path-traversal.test.js +223 -0
  341. package/dist/tools/path-traversal.test.js.map +1 -0
  342. package/dist/tools/pick-best-takes.js +2 -1
  343. package/dist/tools/pick-best-takes.js.map +1 -1
  344. package/dist/tools/punch-in.d.ts.map +1 -1
  345. package/dist/tools/punch-in.js +2 -1
  346. package/dist/tools/punch-in.js.map +1 -1
  347. package/dist/tools/read-transcript.js +2 -1
  348. package/dist/tools/read-transcript.js.map +1 -1
  349. package/dist/tools/render-multi-format.d.ts +35 -0
  350. package/dist/tools/render-multi-format.d.ts.map +1 -0
  351. package/dist/tools/render-multi-format.js +206 -0
  352. package/dist/tools/render-multi-format.js.map +1 -0
  353. package/dist/tools/render-multi-format.test.d.ts +2 -0
  354. package/dist/tools/render-multi-format.test.d.ts.map +1 -0
  355. package/dist/tools/render-multi-format.test.js +312 -0
  356. package/dist/tools/render-multi-format.test.js.map +1 -0
  357. package/dist/tools/render.d.ts.map +1 -1
  358. package/dist/tools/render.js +2 -2
  359. package/dist/tools/render.js.map +1 -1
  360. package/dist/tools/rewrite-hook.d.ts +32 -0
  361. package/dist/tools/rewrite-hook.d.ts.map +1 -0
  362. package/dist/tools/rewrite-hook.js +65 -0
  363. package/dist/tools/rewrite-hook.js.map +1 -0
  364. package/dist/tools/score-clip.d.ts +30 -0
  365. package/dist/tools/score-clip.d.ts.map +1 -0
  366. package/dist/tools/score-clip.js +109 -0
  367. package/dist/tools/score-clip.js.map +1 -0
  368. package/dist/tools/score-clip.test.d.ts +2 -0
  369. package/dist/tools/score-clip.test.d.ts.map +1 -0
  370. package/dist/tools/score-clip.test.js +110 -0
  371. package/dist/tools/score-clip.test.js.map +1 -0
  372. package/dist/tools/search-tools.d.ts +34 -0
  373. package/dist/tools/search-tools.d.ts.map +1 -0
  374. package/dist/tools/search-tools.js +86 -0
  375. package/dist/tools/search-tools.js.map +1 -0
  376. package/dist/tools/search-tools.test.d.ts +2 -0
  377. package/dist/tools/search-tools.test.d.ts.map +1 -0
  378. package/dist/tools/search-tools.test.js +60 -0
  379. package/dist/tools/search-tools.test.js.map +1 -0
  380. package/dist/tools/snap-cuts-to-beats.d.ts +18 -0
  381. package/dist/tools/snap-cuts-to-beats.d.ts.map +1 -0
  382. package/dist/tools/snap-cuts-to-beats.js +110 -0
  383. package/dist/tools/snap-cuts-to-beats.js.map +1 -0
  384. package/dist/tools/snap-cuts-to-beats.test.d.ts +2 -0
  385. package/dist/tools/snap-cuts-to-beats.test.d.ts.map +1 -0
  386. package/dist/tools/snap-cuts-to-beats.test.js +99 -0
  387. package/dist/tools/snap-cuts-to-beats.test.js.map +1 -0
  388. package/dist/tools/speed-ramp.d.ts.map +1 -1
  389. package/dist/tools/speed-ramp.js +2 -1
  390. package/dist/tools/speed-ramp.js.map +1 -1
  391. package/dist/tools/stabilize-video.d.ts.map +1 -1
  392. package/dist/tools/stabilize-video.js +2 -1
  393. package/dist/tools/stabilize-video.js.map +1 -1
  394. package/dist/tools/suggest-broll.d.ts +34 -0
  395. package/dist/tools/suggest-broll.d.ts.map +1 -0
  396. package/dist/tools/suggest-broll.js +367 -0
  397. package/dist/tools/suggest-broll.js.map +1 -0
  398. package/dist/tools/suggest-broll.test.d.ts +2 -0
  399. package/dist/tools/suggest-broll.test.d.ts.map +1 -0
  400. package/dist/tools/suggest-broll.test.js +217 -0
  401. package/dist/tools/suggest-broll.test.js.map +1 -0
  402. package/dist/tools/text-based-cut.d.ts +33 -0
  403. package/dist/tools/text-based-cut.d.ts.map +1 -0
  404. package/dist/tools/text-based-cut.js +172 -0
  405. package/dist/tools/text-based-cut.js.map +1 -0
  406. package/dist/tools/text-based-cut.test.d.ts +2 -0
  407. package/dist/tools/text-based-cut.test.d.ts.map +1 -0
  408. package/dist/tools/text-based-cut.test.js +32 -0
  409. package/dist/tools/text-based-cut.test.js.map +1 -0
  410. package/dist/tools/transcribe.d.ts +1 -1
  411. package/dist/tools/transition-videos.d.ts +1 -1
  412. package/dist/tools/transition-videos.d.ts.map +1 -1
  413. package/dist/tools/transition-videos.js +2 -1
  414. package/dist/tools/transition-videos.js.map +1 -1
  415. package/dist/tools/trim-dead-air.d.ts +59 -0
  416. package/dist/tools/trim-dead-air.d.ts.map +1 -0
  417. package/dist/tools/trim-dead-air.js +215 -0
  418. package/dist/tools/trim-dead-air.js.map +1 -0
  419. package/dist/tools/trim-dead-air.test.d.ts +2 -0
  420. package/dist/tools/trim-dead-air.test.d.ts.map +1 -0
  421. package/dist/tools/trim-dead-air.test.js +75 -0
  422. package/dist/tools/trim-dead-air.test.js.map +1 -0
  423. package/dist/tools/verify-thumbnail-promise.d.ts +33 -0
  424. package/dist/tools/verify-thumbnail-promise.d.ts.map +1 -0
  425. package/dist/tools/verify-thumbnail-promise.js +112 -0
  426. package/dist/tools/verify-thumbnail-promise.js.map +1 -0
  427. package/dist/tools/verify-thumbnail-promise.test.d.ts +2 -0
  428. package/dist/tools/verify-thumbnail-promise.test.d.ts.map +1 -0
  429. package/dist/tools/verify-thumbnail-promise.test.js +38 -0
  430. package/dist/tools/verify-thumbnail-promise.test.js.map +1 -0
  431. package/dist/tools/write-keyword-captions.d.ts +7 -0
  432. package/dist/tools/write-keyword-captions.d.ts.map +1 -1
  433. package/dist/tools/write-keyword-captions.js +35 -4
  434. package/dist/tools/write-keyword-captions.js.map +1 -1
  435. package/dist/ui/App.d.ts.map +1 -1
  436. package/dist/ui/App.js +75 -11
  437. package/dist/ui/App.js.map +1 -1
  438. package/dist/ui/tool-formatters.d.ts +30 -0
  439. package/dist/ui/tool-formatters.d.ts.map +1 -0
  440. package/dist/ui/tool-formatters.js +461 -0
  441. package/dist/ui/tool-formatters.js.map +1 -0
  442. package/dist/ui/tool-formatters.test.d.ts +2 -0
  443. package/dist/ui/tool-formatters.test.d.ts.map +1 -0
  444. package/dist/ui/tool-formatters.test.js +143 -0
  445. package/dist/ui/tool-formatters.test.js.map +1 -0
  446. package/package.json +10 -9
package/dist/skills.js CHANGED
@@ -1,236 +1,19 @@
1
1
  /**
2
- * Bundled skill markdowns. Embedded as TS string constants so they ship in the
3
- * compiled package without depending on disk layout. Authored in
4
- * src/skills/*.md regenerate this file by running `node build-skills.mjs`
5
- * from the package root if you edit the source markdowns.
2
+ * Bundled skill markdowns. Auto-generated from src/skills/*.md by
3
+ * scripts/build-skills.mjs DO NOT EDIT BY HAND. Add a new skill by
4
+ * dropping a .md file in src/skills/ (with optional YAML frontmatter)
5
+ * and re-running `node scripts/build-skills.mjs`.
6
6
  *
7
- * Skills are exposed through the read_skill tool; their descriptions live in
8
- * the system prompt. Pattern follows the Anthropic skills convention:
7
+ * Skills are exposed through the read_skill tool; their descriptions live
8
+ * in the system prompt. Pattern follows the Anthropic skill convention:
9
9
  * description in the prompt, full content on demand.
10
10
  */
11
- const LONG_FORM_CONTENT_EDIT = `# long-form-content-edit
12
-
13
- **When to use:** podcasts, interviews, vlogs, courses, talking-head anything
14
- where a person speaks for >5 minutes and the editorial work is take-selection,
15
- filler removal, silence trimming, and pacing.
16
-
17
- **Goal:** turn a raw recording into a tight, watchable cut without losing the
18
- speaker's voice or the moments that matter. Captions are non-negotiable.
19
-
20
- ---
21
-
22
- ## The 5-pass method
23
-
24
- These run in order. Each pass narrows the cut. Don't skip — passes 1–2 are
25
- where 80% of the time savings live.
26
-
27
- ### Pass 1 — Utterance segmentation
28
-
29
- \`\`\`
30
- probe_media(input) → fps, duration
31
- extract_audio(input, audio.wav, 16000)
32
- transcribe(audio.wav, transcript.json) → segment-level transcript
33
- \`\`\`
34
-
35
- Now you have a segment list keyed by start/end seconds. Treat each segment as
36
- the smallest editorial unit. Don't cut inside a segment unless the speaker
37
- changes mid-segment.
38
-
39
- ### Pass 2 — Take detection
40
-
41
- \`\`\`
42
- cluster_takes(transcript.json) → groups of similar segments
43
- \`\`\`
44
-
45
- Multi-member clusters mean the speaker re-took a line. Pick the winner per
46
- cluster:
47
-
48
- - **Default to the last take** — speaker had practice.
49
- - **Visual doubt** → \`score_shot(times=[mid of each member])\`, pick highest.
50
- - **Audio doubt** → \`read_transcript(startSec=A, endSec=B)\` to inspect.
51
- - Add a marker on each decision: \`add_marker(color="green", note="kept: take 3 of 3 — strongest delivery")\`.
52
-
53
- ### Pass 3 — Filler removal
54
-
55
- For each kept segment, look for these and add cut markers:
56
-
57
- - "um", "uh", "like" used as filler (not as comparison)
58
- - restart phrases: "so the thing is — actually, the thing is…"
59
- - mid-sentence aborts the speaker self-corrected past
60
-
61
- Mark each one with \`add_marker(color="red", note="cut: filler 'um'")\`.
62
-
63
- ### Pass 4 — Incomplete-sentence trim
64
-
65
- Drop segments that:
66
-
67
- - Trail off with no point ("…and yeah, anyway")
68
- - Start mid-thought because the previous take was kept
69
- - Repeat content already covered in a kept take
70
-
71
- \`add_marker(color="red", note="cut: incomplete; covered in earlier take")\`.
72
-
73
- ### Pass 5 — Silence normalization
74
-
75
- \`\`\`
76
- detect_silence(input) → frame-aligned KEEP ranges
77
- \`\`\`
78
-
79
- Use the KEEP ranges to remove dead air >1s. Don't kill all silence —
80
- breathing space matters for pacing. The default threshold usually leaves
81
- natural pauses intact.
82
-
83
- ---
84
-
85
- ## Final assembly
86
-
87
- Combine pass-2 winners + pass-3/4 surviving segments into a single decision
88
- list. Each entry is one EDL event.
89
-
90
- \`\`\`
91
- write_edl(events=decisions, frameRate=fps)
92
- import_edl(path)
93
- \`\`\`
94
-
95
- Then captions:
96
-
97
- \`\`\`
98
- write_srt(cues=transcript.segments mapped to start/end/text)
99
- import_subtitles(srtPath)
100
- \`\`\`
101
-
102
- For long-form: sidecar SRT (don't burn in) so YouTube/podcast players can
103
- toggle them. Mention this to the user.
104
-
105
- ---
106
-
107
- ## Red flags — pause and ask
108
-
109
- - Cluster has takes that are roughly equal quality — \`add_marker(color="red", note="PAUSE: which take? 1=A, 2=B")\` and stop.
110
- - Segment is editorial-content-bearing but has bad audio — flag, don't drop.
111
- - The user said "trim filler" but every "um" is intentional emphasis (rare but real) — confirm.
112
-
113
- ## Don't
114
-
115
- - Don't render until the user reviews the markers.
116
- - Don't read full transcript without \`startSec/endSec\` — context blow-up.
117
- - Don't cut inside a segment unless the speaker changes mid-segment.
118
- - Don't skip captions for long-form unless explicitly told to.
119
- `;
120
- const SHORT_FORM_CONTENT_EDIT = `# short-form-content-edit
121
-
122
- **When to use:** TikTok / Reels / Shorts / vertical clips. Source is usually
123
- a longer horizontal video the user wants reframed, captioned, hooked, and
124
- shipped.
125
-
126
- **Goal:** the first 2 seconds win or lose retention. The cut, the caption,
127
- and the hook all serve that one number.
128
-
129
- ---
130
-
131
- ## Recipe
132
-
133
- ### 1. Find the moment
134
-
135
- If the user gives you a horizontal video without timestamps, find the moment
136
- worth clipping:
137
-
138
- \`\`\`
139
- probe_media(input)
140
- extract_audio(input, audio.wav, 16000)
141
- transcribe(audio.wav, transcript.json)
142
- read_transcript(transcript.json, contains="<keyword from user>")
143
- \`\`\`
144
-
145
- Or for visual moments: \`score_shot(input, intervalSec=15)\` then inspect tops.
146
-
147
- Settle on a \`[startSec, endSec]\` window. Aim for **15–60 seconds** for shorts;
148
- 90s max for Reels.
149
-
150
- ### 2. Reformat to vertical
151
-
152
- Build the vertical timeline as FCPXML and import:
153
-
154
- \`\`\`
155
- reformat_timeline(
156
- output="vertical.fcpxml",
157
- preset="9:16",
158
- title="<short name>",
159
- frameRate=<source fps>,
160
- events=[{ reel, sourcePath, sourceInFrame, sourceOutFrame }]
161
- )
162
- import_edl("vertical.fcpxml")
163
- \`\`\`
164
-
165
- Then on Resolve Studio, switch to color page and prompt the user to apply
166
- Smart Reframe per clip:
167
-
168
- \`\`\`
169
- open_page("color")
170
- add_marker(color="yellow", note="apply Smart Reframe per clip (Resolve Studio: right-click clip → Smart Reframe)")
171
- \`\`\`
172
-
173
- Premiere users: prompt for Auto Reframe via the captions/effects panel.
174
-
175
- ### 3. Hook the first 2 seconds
176
-
177
- The hook lives in the first 60 frames. Options:
178
-
179
- - **Cold-open the punchline** — start at the most attention-grabbing line,
180
- not the setup. Use \`read_transcript\` to find it.
181
- - **Speed-up the intro** — \`set_clip_speed(clipId, speed=1.5)\` on the opening clip.
182
- - **Pre-roll text/marker** — \`add_marker(color="yellow", note="add hook text overlay: '<line from transcript>'")\` for the user to add.
183
-
184
- ### 4. Burned-in captions
185
-
186
- Vertical = burned-in (most viewers watch muted, native captions are tiny).
187
-
188
- \`\`\`
189
- write_srt(cues=transcript.segments_in_window)
190
- import_subtitles(srtPath)
191
- add_marker(color="yellow", note="style captions: large, center-bottom, high-contrast — burn in via Resolve subtitle track styling")
192
- \`\`\`
193
-
194
- If the user is on Resolve Studio, they can right-click the subtitle track →
195
- "Convert Subtitles to Text+" and style it. Note this to them.
196
-
197
- ### 5. Render
198
-
199
- Don't render until the user reviews. When they say "render":
200
-
201
- \`\`\`
202
- render(preset=<host preset>, output="<name>.mp4")
203
- \`\`\`
204
-
205
- Common presets: H.264 Master, YouTube 1080p (works for Shorts too),
206
- Vimeo 1080p.
207
-
11
+ const CHAPTER_MARKERS = `---
12
+ name: chapter-markers
13
+ description: Author YouTube/podcast chapter timestamps from a transcript: 5–15 chapters, first at 00:00, ≥30s apart, only at real topic shifts. Drops purple markers + emits a YouTube-formatted description block.
208
14
  ---
209
15
 
210
- ## Defaults for short-form
211
-
212
- | Knob | Default | Why |
213
- |---|---|---|
214
- | Aspect | 9:16 (TikTok/Reels/Shorts) | Most platforms |
215
- | Length | 15–60s | Algorithm sweet spot |
216
- | Captions | burned-in | Watched muted |
217
- | First 2s | the hook | Retention curve |
218
- | Music | not added by you | Style decision; ask the user |
219
-
220
- ## Red flags — pause and ask
221
-
222
- - User wants 9:16 but the source has critical wide-shot framing → \`add_marker(color="red", note="PAUSE: source is composed for 16:9. 9:16 will crop heads/sides. Confirm reframe vs. letterbox.")\`.
223
- - Window selection is ambiguous → propose 2–3 candidates as red markers, stop.
224
- - No clear hook in the chosen window → say so, suggest a different start.
225
-
226
- ## Don't
227
-
228
- - Don't render until the user reviews markers.
229
- - Don't burn captions before the user approves the SRT text.
230
- - Don't pick a hook blindly — surface options.
231
- - Don't leave silence >0.4s in the first 2 seconds.
232
- `;
233
- const CHAPTER_MARKERS = `# chapter-markers
16
+ # chapter-markers
234
17
 
235
18
  **When to use:** YouTube videos, podcasts, courses, long-form interviews. The
236
19
  user wants chapter timestamps the audience can jump to.
@@ -322,37 +105,167 @@ user verify before publishing.
322
105
  - Don't burn chapters into video — markers + description block only.
323
106
  - Don't render until the user reviews the chapters.
324
107
  `;
325
- const KEYFRAMING_AND_TITLES = `# keyframing-and-titles
108
+ const FUSION_LOWER_THIRD = `---
109
+ name: fusion-lower-third
110
+ description: Build a name/title chyron natively in DaVinci Resolve's Fusion via fusion_comp — Background + TextPlus + Merge node graph, wiring, styling, lower-third positioning, keyframed fade in/out. Resolve Studio only; cross-host fallback is write_lower_third + burn_subtitles.
111
+ ---
326
112
 
327
- **When to use:** the user asks to reorder clips, animate fades / pans /
328
- zooms, add lower-thirds or title cards, build coordinated multi-track
329
- B-roll compositions, or do speed ramps.
113
+ # fusion-lower-third
330
114
 
331
- **Goal:** these are the seven gaps neither Resolve nor Premiere exposes
332
- through their scripting APIs. The agent's path is FCPXML rebuild for
333
- timeline-shape changes (reorder, lanes, keyframes, titles) and
334
- file-only ffmpeg passes for content-shape changes (speed ramps, mixing,
335
- zoom-on-stills, transitions).
115
+ **When to use:** the user asks for a name/title chyron that should be
116
+ *editable inside the NLE* (not baked-in pixels), or wants a quick
117
+ title card built natively in DaVinci Resolve's Fusion page.
336
118
 
337
- ---
119
+ **Goal:** compose a Background + TextPlus + Merge graph in Fusion via
120
+ \`fusion_comp\`. Resolve only — Premiere has no Fusion equivalent; for
121
+ that, fall back to \`write_lower_third\` + \`burn_subtitles\`.
338
122
 
339
- ## Recipe 1 — Reorder clips on the timeline
123
+ ---
340
124
 
341
- The user says "move clip 5 to the start" or "swap clips 2 and 3".
125
+ ## When to pick which
342
126
 
343
- \`\`\`
344
- get_timeline # discover clipIds in order
345
- clone_timeline(newName="<original>-v2") # safety net
346
- reorder_timeline(newOrder=["c5","c1","c2","c3","c4"])
347
- \`\`\`
127
+ - **fusion_comp** — Resolve, comp lives inside the project, user can
128
+ tweak it later. Best when the user is already on the Fusion page or
129
+ wants a chyron that travels with the project file.
130
+ - **write_lower_third + burn_subtitles** — works on any host, output
131
+ is a baked-in pixel layer. Faster to iterate from the agent side
132
+ but the user can no longer edit the text without re-running the
133
+ pipeline.
348
134
 
349
- \`reorder_timeline\` reads the current timeline, emits a permuted FCPXML,
350
- and \`import_timeline\`s it. Clips not listed in \`newOrder\` keep their
351
- original relative order and append at the end.
135
+ If unsure, ask: "Resolve-native (editable) or baked-in?"
352
136
 
353
137
  ---
354
138
 
355
- ## Recipe 2 Multi-track B-roll composition
139
+ ## Recipe — name + title lower-third on the active comp
140
+
141
+ Pre-flight: \`host_info\` must report \`name === "resolve"\`. If it doesn't,
142
+ stop and tell the user this skill is Resolve-only.
143
+
144
+ \`\`\`
145
+ host_info # confirm Resolve
146
+
147
+ # 1. Get to the Fusion page so the user can see the result.
148
+ open_page(name="fusion")
149
+
150
+ # 2. Build the graph.
151
+ fusion_comp(action="add_node", toolId="Background", name="LT_Strap")
152
+ fusion_comp(action="add_node", toolId="TextPlus", name="LT_Text")
153
+ fusion_comp(action="add_node", toolId="Merge", name="LT_Comp")
154
+
155
+ # 3. Wire it: strap as Background, text as Foreground.
156
+ fusion_comp(action="connect", fromNode="LT_Strap", toNode="LT_Comp",
157
+ toInput="Background")
158
+ fusion_comp(action="connect", fromNode="LT_Text", toNode="LT_Comp",
159
+ toInput="Foreground")
160
+
161
+ # 4. Set the text content + colour.
162
+ fusion_comp(action="set_input", node="LT_Text", input="StyledText",
163
+ value="<Name>\\n<Title>")
164
+ fusion_comp(action="set_input", node="LT_Text", input="Size", value=0.06)
165
+ fusion_comp(action="set_input", node="LT_Text", input="Color1Red", value=1)
166
+ fusion_comp(action="set_input", node="LT_Text", input="Color1Green", value=1)
167
+ fusion_comp(action="set_input", node="LT_Text", input="Color1Blue", value=1)
168
+
169
+ # 5. Park the strap in the lower-left third.
170
+ fusion_comp(action="set_input", node="LT_Strap", input="TopLeftRed", value=0)
171
+ fusion_comp(action="set_input", node="LT_Strap", input="TopLeftGreen", value=0)
172
+ fusion_comp(action="set_input", node="LT_Strap", input="TopLeftBlue", value=0)
173
+ fusion_comp(action="set_input", node="LT_Strap", input="TopLeftAlpha", value=0.85)
174
+ \`\`\`
175
+
176
+ The Merge node is the comp's MediaOut by default; the user sees the
177
+ result on the active timeline clip immediately.
178
+
179
+ ---
180
+
181
+ ## Animating in / out
182
+
183
+ Use \`set_keyframe\` on the Merge's \`Blend\` input (overall opacity):
184
+
185
+ \`\`\`
186
+ fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
187
+ frame=0, value=0) # invisible at clip start
188
+ fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
189
+ frame=12, value=1) # fade in over 12f
190
+ fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
191
+ frame=72, value=1) # hold
192
+ fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
193
+ frame=84, value=0) # fade out
194
+ \`\`\`
195
+
196
+ Frames are relative to the comp's render range — set it explicitly if
197
+ the agent needs to control the in/out range:
198
+
199
+ \`\`\`
200
+ fusion_comp(action="set_render_range", start=0, end=120)
201
+ \`\`\`
202
+
203
+ ---
204
+
205
+ ## Targeting a specific clip's comp
206
+
207
+ Pass \`clipId\` to scope every action to that clip's first Fusion comp
208
+ (auto-created if the clip has none). Useful for batched lower-thirds
209
+ across multiple clips:
210
+
211
+ \`\`\`
212
+ get_timeline # discover clipIds
213
+ fusion_comp(action="add_node", toolId="TextPlus",
214
+ name="LT_Text", clipId="<clipId>")
215
+ \`\`\`
216
+
217
+ ---
218
+
219
+ ## Troubleshooting
220
+
221
+ - **\`Resolve.Fusion() unavailable\`** — Resolve build is too old or
222
+ user is on a free seat. Fusion is Studio-only at scriptable depth.
223
+ - **\`No active Fusion comp\`** — user hasn't switched to the Fusion
224
+ page on a clip with a comp. Either call \`open_page("fusion")\` first
225
+ on a known clip, or pass \`clipId\` so we operate on that clip's comp
226
+ directly.
227
+ - **\`AddTool('X') returned None\`** — \`toolId\` is wrong. The canonical
228
+ IDs the agent will hit: \`Background\`, \`TextPlus\`, \`Merge\`,
229
+ \`Transform\`, \`ColorCorrector\`, \`DeltaKeyer\`, \`Brightness\`, \`Glow\`,
230
+ \`Blur\`. There's no scriptable enumeration; check Fusion's docs if
231
+ the user names a tool not in this list.
232
+ `;
233
+ const KEYFRAMING_AND_TITLES = `---
234
+ name: keyframing-and-titles
235
+ description: Recipes for the seven scripting gaps neither Resolve nor Premiere expose: timeline reorder, multi-track lanes, lower-thirds + title cards (ASS), keyframed opacity/position/volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns, named transitions (smash-cut, whip-pan, dip-to-black).
236
+ ---
237
+
238
+ # keyframing-and-titles
239
+
240
+ **When to use:** the user asks to reorder clips, animate fades / pans /
241
+ zooms, add lower-thirds or title cards, build coordinated multi-track
242
+ B-roll compositions, or do speed ramps.
243
+
244
+ **Goal:** these are the seven gaps neither Resolve nor Premiere exposes
245
+ through their scripting APIs. The agent's path is FCPXML rebuild for
246
+ timeline-shape changes (reorder, lanes, keyframes, titles) and
247
+ file-only ffmpeg passes for content-shape changes (speed ramps, mixing,
248
+ zoom-on-stills, transitions).
249
+
250
+ ---
251
+
252
+ ## Recipe 1 — Reorder clips on the timeline
253
+
254
+ The user says "move clip 5 to the start" or "swap clips 2 and 3".
255
+
256
+ \`\`\`
257
+ get_timeline # discover clipIds in order
258
+ clone_timeline(newName="<original>-v2") # safety net
259
+ reorder_timeline(newOrder=["c5","c1","c2","c3","c4"])
260
+ \`\`\`
261
+
262
+ \`reorder_timeline\` reads the current timeline, emits a permuted FCPXML,
263
+ and \`import_timeline\`s it. Clips not listed in \`newOrder\` keep their
264
+ original relative order and append at the end.
265
+
266
+ ---
267
+
268
+ ## Recipe 2 — Multi-track B-roll composition
356
269
 
357
270
  The user wants several B-roll cutaways stacked above the main A-roll
358
271
  with per-clip opacity and timing.
@@ -496,123 +409,360 @@ For raw xfade names beyond the preset list, use \`crossfade_videos\`.
496
409
  - Don't keyframe opacity / position on Premiere via UXP — it's not
497
410
  exposed; emit FCPXML with the keyframes baked in instead.
498
411
  `;
499
- const SKIN_TONE_MATCHING = `# skin-tone-matching
500
-
501
- **When to use:** the host's face looks different across clips warmer in
502
- one shot, cooler / paler in the next. Different camera, different
503
- location, different white-balance setting, sun behind a cloud. The user
504
- wants the faces to match.
505
-
506
- **Goal:** bring the target clip's skin tones toward a reference clip
507
- without re-grading the whole frame. Skin lives in the reds and yellows;
508
- that's where we operate.
509
-
412
+ const LONG_FORM_CONTENT_EDIT = `---
413
+ name: long-form-content-edit
414
+ description: Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires transcribe, cluster_takes, detect_silence, write_edl, import_edl, write_srt, add_marker into a single workflow.
510
415
  ---
511
416
 
512
- ## Two paths
513
-
514
- | Path | When | Where the grade lives |
515
- |---|---|---|
516
- | \`grade_skin_tones\` | works on every host (Resolve, Premiere, no-NLE) | baked into a new file |
517
- | \`match_clip_color\` | Resolve only | non-destructive, in the clip's grade node |
417
+ # long-form-content-edit
518
418
 
519
- Pick **\`grade_skin_tones\`** when the user is on Premiere, when there's no
520
- NLE, or when they want a finished file they can drop anywhere. Pair with
521
- \`replace_clip\` to swap it onto the timeline.
419
+ **When to use:** podcasts, interviews, vlogs, courses, talking-head anything
420
+ where a person speaks for >5 minutes and the editorial work is take-selection,
421
+ filler removal, silence trimming, and pacing.
522
422
 
523
- Pick **\`match_clip_color\`** when the user is on Resolve and wants to keep
524
- the grade tweakable. The tool pipes the CDL through
525
- \`set_primary_correction\`, so the colorist can adjust after.
423
+ **Goal:** turn a raw recording into a tight, watchable cut without losing the
424
+ speaker's voice or the moments that matter. Captions are non-negotiable.
526
425
 
527
426
  ---
528
427
 
529
- ## Recipe
530
-
531
- ### 1. Pick the frames (most important step)
428
+ ## The 5-pass method
532
429
 
533
- Vision is only as good as what you show it. For BOTH the reference and
534
- the target:
430
+ These run in order. Each pass narrows the cut. Don't skip passes 1–2 are
431
+ where 80% of the time savings live.
535
432
 
536
- - The face must be visible and large enough (not a wide shot from across
537
- the room).
538
- - The lighting on the face must be representative (not the one frame
539
- where they walked through a shadow).
540
- - Eyes open, mouth not in a weird shape, no motion blur.
433
+ ### Pass 1 Utterance segmentation
541
434
 
542
- Use \`score_shot(input, intervalSec=15)\` or \`extract_frame\` to find good
543
- candidates. If the user already pointed at a moment ("match shot 3 to
544
- shot 1") use those timestamps directly.
435
+ \`\`\`
436
+ probe_media(input) → fps, duration
437
+ extract_audio(input, audio.wav, 16000)
438
+ transcribe(audio.wav, transcript.json) → segment-level transcript
439
+ \`\`\`
545
440
 
546
- ### 2. Run the grade
441
+ Now you have a segment list keyed by start/end seconds. Treat each segment as
442
+ the smallest editorial unit. Don't cut inside a segment unless the speaker
443
+ changes mid-segment.
547
444
 
548
- **File-only path (works in every host):**
445
+ ### Pass 2 Take detection
549
446
 
550
447
  \`\`\`
551
- grade_skin_tones(
552
- referenceVideo="<ref.mp4>",
553
- referenceAtSec=<face-forward time>,
554
- targetVideo="<tgt.mp4>",
555
- targetAtSec=<face-forward time>,
556
- output="<tgt-graded.mp4>"
557
- )
448
+ cluster_takes(transcript.json) → groups of similar segments
558
449
  \`\`\`
559
450
 
560
- Returns \`{path, confidence, why, grade}\`. Then:
451
+ Multi-member clusters mean the speaker re-took a line. Pick the winner per
452
+ cluster:
561
453
 
562
- \`\`\`
563
- replace_clip(clipId="<target clip id>", mediaPath="<tgt-graded.mp4>")
564
- add_marker(color="yellow", note="skin grade: <why>")
565
- \`\`\`
454
+ - **Default to the last take** — speaker had practice.
455
+ - **Visual doubt** → \`score_shot(times=[mid of each member])\`, pick highest.
456
+ - **Audio doubt** → \`read_transcript(startSec=A, endSec=B)\` to inspect.
457
+ - Add a marker on each decision: \`add_marker(color="green", note="kept: take 3 of 3 — strongest delivery")\`.
566
458
 
567
- **Resolve non-baked path:**
459
+ ### Pass 3 — Filler removal
568
460
 
569
- \`\`\`
570
- match_clip_color(
571
- referenceVideo="<ref.mp4>",
572
- referenceAtSec=<face-forward time>,
573
- targetClipId="<target clip id>",
574
- targetAtSec=<face-forward time>,
575
- applyAutomatically=true
576
- )
577
- \`\`\`
461
+ For each kept segment, look for these and add cut markers:
578
462
 
579
- Returns \`{applied, confidence, why, grade}\`. The CDL goes into node 1
580
- (or \`nodeIndex=N\` if you want a specific node).
463
+ - "um", "uh", "like" used as filler (not as comparison)
464
+ - restart phrases: "so the thing is — actually, the thing is…"
465
+ - mid-sentence aborts the speaker self-corrected past
581
466
 
582
- ### 3. Check confidence
467
+ Mark each one with \`add_marker(color="red", note="cut: filler 'um'")\`.
583
468
 
584
- The model's confidence is the most important field. Always inspect it:
469
+ ### Pass 4 Incomplete-sentence trim
585
470
 
586
- - \`confidence ≥ 0.7\` — apply. Trust the result.
587
- - \`0.4 ≤ confidence < 0.7\` — apply but flag for review:
588
- \`add_marker(color="yellow", note="skin grade: review — confidence <X>")\`.
589
- - \`confidence < 0.4\` — DO NOT apply. The model is guessing. Tell the
590
- user what you saw, suggest they grade the shot manually or pick a
591
- better reference frame.
471
+ Drop segments that:
592
472
 
593
- \`match_clip_color\` enforces this: with \`applyAutomatically=true\`,
594
- confidence < 0.4 returns \`{applied: false}\` and the grade is surfaced
595
- without writing to the node. \`grade_skin_tones\` always bakes the file
596
- because the agent asked for an output path — but you can re-run with a
597
- better reference frame if confidence was low.
473
+ - Trail off with no point ("…and yeah, anyway")
474
+ - Start mid-thought because the previous take was kept
475
+ - Repeat content already covered in a kept take
598
476
 
599
- ---
477
+ \`add_marker(color="red", note="cut: incomplete; covered in earlier take")\`.
600
478
 
601
- ## Defaults
479
+ ### Pass 5 — Silence normalization
602
480
 
603
- | Knob | Default | Why |
604
- |---|---|---|
605
- | Vision detail | \`low\` | cheap; skin balance doesn't need pixel-peeping |
606
- | Vision model | \`gpt-4o-mini\` | well-calibrated for color comparisons |
607
- | Output codec | \`libx264 crf=18\` | visually lossless |
608
- | Reference frame width | 768px | enough for skin-tone discrimination |
481
+ \`\`\`
482
+ detect_silence(input) → frame-aligned KEEP ranges
483
+ \`\`\`
484
+
485
+ Use the KEEP ranges to remove dead air >1s. Don't kill all silence —
486
+ breathing space matters for pacing. The default threshold usually leaves
487
+ natural pauses intact.
609
488
 
610
489
  ---
611
490
 
612
- ## What this is NOT
491
+ ## Final assembly
613
492
 
614
- - NOT a deterministic ColorChecker match. There's no chart, no
615
- colorimetry it's a vision pass.
493
+ Combine pass-2 winners + pass-3/4 surviving segments into a single decision
494
+ list. Each entry is one EDL event.
495
+
496
+ \`\`\`
497
+ write_edl(events=decisions, frameRate=fps)
498
+ import_edl(path)
499
+ \`\`\`
500
+
501
+ Then captions:
502
+
503
+ \`\`\`
504
+ write_srt(cues=transcript.segments mapped to start/end/text)
505
+ import_subtitles(srtPath)
506
+ \`\`\`
507
+
508
+ For long-form: sidecar SRT (don't burn in) so YouTube/podcast players can
509
+ toggle them. Mention this to the user.
510
+
511
+ ---
512
+
513
+ ## Red flags — pause and ask
514
+
515
+ - Cluster has takes that are roughly equal quality — \`add_marker(color="red", note="PAUSE: which take? 1=A, 2=B")\` and stop.
516
+ - Segment is editorial-content-bearing but has bad audio — flag, don't drop.
517
+ - The user said "trim filler" but every "um" is intentional emphasis (rare but real) — confirm.
518
+
519
+ ## Don't
520
+
521
+ - Don't render until the user reviews the markers.
522
+ - Don't read full transcript without \`startSec/endSec\` — context blow-up.
523
+ - Don't cut inside a segment unless the speaker changes mid-segment.
524
+ - Don't skip captions for long-form unless explicitly told to.
525
+ `;
526
+ const SHORT_FORM_CONTENT_EDIT = `---
527
+ name: short-form-content-edit
528
+ description: Recipe for TikTok / Reels / Shorts. Find the moment → reformat 9:16 → hook the first 2 seconds → burn captions → render. Uses reformat_timeline, import_edl, set_clip_speed, write_srt, import_subtitles, open_page (Resolve).
529
+ ---
530
+
531
+ # short-form-content-edit
532
+
533
+ **When to use:** TikTok / Reels / Shorts / vertical clips. Source is usually
534
+ a longer horizontal video the user wants reframed, captioned, hooked, and
535
+ shipped.
536
+
537
+ **Goal:** the first 2 seconds win or lose retention. The cut, the caption,
538
+ and the hook all serve that one number.
539
+
540
+ ---
541
+
542
+ ## Recipe
543
+
544
+ ### 1. Find the moment
545
+
546
+ If the user gives you a horizontal video without timestamps, find the moment
547
+ worth clipping:
548
+
549
+ \`\`\`
550
+ probe_media(input)
551
+ extract_audio(input, audio.wav, 16000)
552
+ transcribe(audio.wav, transcript.json)
553
+ read_transcript(transcript.json, contains="<keyword from user>")
554
+ \`\`\`
555
+
556
+ Or for visual moments: \`score_shot(input, intervalSec=15)\` then inspect tops.
557
+
558
+ Settle on a \`[startSec, endSec]\` window. Aim for **15–60 seconds** for shorts;
559
+ 90s max for Reels.
560
+
561
+ ### 2. Reformat to vertical
562
+
563
+ Build the vertical timeline as FCPXML and import:
564
+
565
+ \`\`\`
566
+ reformat_timeline(
567
+ output="vertical.fcpxml",
568
+ preset="9:16",
569
+ title="<short name>",
570
+ frameRate=<source fps>,
571
+ events=[{ reel, sourcePath, sourceInFrame, sourceOutFrame }]
572
+ )
573
+ import_edl("vertical.fcpxml")
574
+ \`\`\`
575
+
576
+ Then on Resolve Studio, switch to color page and prompt the user to apply
577
+ Smart Reframe per clip:
578
+
579
+ \`\`\`
580
+ open_page("color")
581
+ add_marker(color="yellow", note="apply Smart Reframe per clip (Resolve Studio: right-click clip → Smart Reframe)")
582
+ \`\`\`
583
+
584
+ Premiere users: prompt for Auto Reframe via the captions/effects panel.
585
+
586
+ ### 3. Hook the first 2 seconds
587
+
588
+ The hook lives in the first 60 frames. Options:
589
+
590
+ - **Cold-open the punchline** — start at the most attention-grabbing line,
591
+ not the setup. Use \`read_transcript\` to find it.
592
+ - **Speed-up the intro** — \`set_clip_speed(clipId, speed=1.5)\` on the opening clip.
593
+ - **Pre-roll text/marker** — \`add_marker(color="yellow", note="add hook text overlay: '<line from transcript>'")\` for the user to add.
594
+
595
+ ### 4. Burned-in captions
596
+
597
+ Vertical = burned-in (most viewers watch muted, native captions are tiny).
598
+
599
+ \`\`\`
600
+ write_srt(cues=transcript.segments_in_window)
601
+ import_subtitles(srtPath)
602
+ add_marker(color="yellow", note="style captions: large, center-bottom, high-contrast — burn in via Resolve subtitle track styling")
603
+ \`\`\`
604
+
605
+ If the user is on Resolve Studio, they can right-click the subtitle track →
606
+ "Convert Subtitles to Text+" and style it. Note this to them.
607
+
608
+ ### 5. Render
609
+
610
+ Don't render until the user reviews. When they say "render":
611
+
612
+ \`\`\`
613
+ render(preset=<host preset>, output="<name>.mp4")
614
+ \`\`\`
615
+
616
+ Common presets: H.264 Master, YouTube 1080p (works for Shorts too),
617
+ Vimeo 1080p.
618
+
619
+ ---
620
+
621
+ ## Defaults for short-form
622
+
623
+ | Knob | Default | Why |
624
+ |---|---|---|
625
+ | Aspect | 9:16 (TikTok/Reels/Shorts) | Most platforms |
626
+ | Length | 15–60s | Algorithm sweet spot |
627
+ | Captions | burned-in | Watched muted |
628
+ | First 2s | the hook | Retention curve |
629
+ | Music | not added by you | Style decision; ask the user |
630
+
631
+ ## Red flags — pause and ask
632
+
633
+ - User wants 9:16 but the source has critical wide-shot framing → \`add_marker(color="red", note="PAUSE: source is composed for 16:9. 9:16 will crop heads/sides. Confirm reframe vs. letterbox.")\`.
634
+ - Window selection is ambiguous → propose 2–3 candidates as red markers, stop.
635
+ - No clear hook in the chosen window → say so, suggest a different start.
636
+
637
+ ## Don't
638
+
639
+ - Don't render until the user reviews markers.
640
+ - Don't burn captions before the user approves the SRT text.
641
+ - Don't pick a hook blindly — surface options.
642
+ - Don't leave silence >0.4s in the first 2 seconds.
643
+ `;
644
+ const SKIN_TONE_MATCHING = `---
645
+ name: skin-tone-matching
646
+ description: Match faces across clips when host scripting can't reach power windows or qualifiers. Two paths: grade_skin_tones (file-only — bakes a vision-derived colorbalance + selectivecolor + eq into a new mp4, pair with replace_clip) and match_clip_color (Resolve only — derives the same grade as a CDL via set_primary_correction).
647
+ ---
648
+
649
+ # skin-tone-matching
650
+
651
+ **When to use:** the host's face looks different across clips — warmer in
652
+ one shot, cooler / paler in the next. Different camera, different
653
+ location, different white-balance setting, sun behind a cloud. The user
654
+ wants the faces to match.
655
+
656
+ **Goal:** bring the target clip's skin tones toward a reference clip
657
+ without re-grading the whole frame. Skin lives in the reds and yellows;
658
+ that's where we operate.
659
+
660
+ ---
661
+
662
+ ## Two paths
663
+
664
+ | Path | When | Where the grade lives |
665
+ |---|---|---|
666
+ | \`grade_skin_tones\` | works on every host (Resolve, Premiere, no-NLE) | baked into a new file |
667
+ | \`match_clip_color\` | Resolve only | non-destructive, in the clip's grade node |
668
+
669
+ Pick **\`grade_skin_tones\`** when the user is on Premiere, when there's no
670
+ NLE, or when they want a finished file they can drop anywhere. Pair with
671
+ \`replace_clip\` to swap it onto the timeline.
672
+
673
+ Pick **\`match_clip_color\`** when the user is on Resolve and wants to keep
674
+ the grade tweakable. The tool pipes the CDL through
675
+ \`set_primary_correction\`, so the colorist can adjust after.
676
+
677
+ ---
678
+
679
+ ## Recipe
680
+
681
+ ### 1. Pick the frames (most important step)
682
+
683
+ Vision is only as good as what you show it. For BOTH the reference and
684
+ the target:
685
+
686
+ - The face must be visible and large enough (not a wide shot from across
687
+ the room).
688
+ - The lighting on the face must be representative (not the one frame
689
+ where they walked through a shadow).
690
+ - Eyes open, mouth not in a weird shape, no motion blur.
691
+
692
+ Use \`score_shot(input, intervalSec=15)\` or \`extract_frame\` to find good
693
+ candidates. If the user already pointed at a moment ("match shot 3 to
694
+ shot 1") use those timestamps directly.
695
+
696
+ ### 2. Run the grade
697
+
698
+ **File-only path (works in every host):**
699
+
700
+ \`\`\`
701
+ grade_skin_tones(
702
+ referenceVideo="<ref.mp4>",
703
+ referenceAtSec=<face-forward time>,
704
+ targetVideo="<tgt.mp4>",
705
+ targetAtSec=<face-forward time>,
706
+ output="<tgt-graded.mp4>"
707
+ )
708
+ \`\`\`
709
+
710
+ Returns \`{path, confidence, why, grade}\`. Then:
711
+
712
+ \`\`\`
713
+ replace_clip(clipId="<target clip id>", mediaPath="<tgt-graded.mp4>")
714
+ add_marker(color="yellow", note="skin grade: <why>")
715
+ \`\`\`
716
+
717
+ **Resolve non-baked path:**
718
+
719
+ \`\`\`
720
+ match_clip_color(
721
+ referenceVideo="<ref.mp4>",
722
+ referenceAtSec=<face-forward time>,
723
+ targetClipId="<target clip id>",
724
+ targetAtSec=<face-forward time>,
725
+ applyAutomatically=true
726
+ )
727
+ \`\`\`
728
+
729
+ Returns \`{applied, confidence, why, grade}\`. The CDL goes into node 1
730
+ (or \`nodeIndex=N\` if you want a specific node).
731
+
732
+ ### 3. Check confidence
733
+
734
+ The model's confidence is the most important field. Always inspect it:
735
+
736
+ - \`confidence ≥ 0.7\` — apply. Trust the result.
737
+ - \`0.4 ≤ confidence < 0.7\` — apply but flag for review:
738
+ \`add_marker(color="yellow", note="skin grade: review — confidence <X>")\`.
739
+ - \`confidence < 0.4\` — DO NOT apply. The model is guessing. Tell the
740
+ user what you saw, suggest they grade the shot manually or pick a
741
+ better reference frame.
742
+
743
+ \`match_clip_color\` enforces this: with \`applyAutomatically=true\`,
744
+ confidence < 0.4 returns \`{applied: false}\` and the grade is surfaced
745
+ without writing to the node. \`grade_skin_tones\` always bakes the file
746
+ because the agent asked for an output path — but you can re-run with a
747
+ better reference frame if confidence was low.
748
+
749
+ ---
750
+
751
+ ## Defaults
752
+
753
+ | Knob | Default | Why |
754
+ |---|---|---|
755
+ | Vision detail | \`low\` | cheap; skin balance doesn't need pixel-peeping |
756
+ | Vision model | \`gpt-4o-mini\` | well-calibrated for color comparisons |
757
+ | Output codec | \`libx264 crf=18\` | visually lossless |
758
+ | Reference frame width | 768px | enough for skin-tone discrimination |
759
+
760
+ ---
761
+
762
+ ## What this is NOT
763
+
764
+ - NOT a deterministic ColorChecker match. There's no chart, no
765
+ colorimetry — it's a vision pass.
616
766
  - NOT a substitute for a colorist. Power windows / qualifiers / curves
617
767
  are out of scope. If skin needs to be isolated from a colored
618
768
  background, surface that and stop.
@@ -621,152 +771,978 @@ better reference frame if confidence was low.
621
771
 
622
772
  ---
623
773
 
624
- ## Red flags — pause and ask
774
+ ## Red flags — pause and ask
775
+
776
+ - Reference and target are filmed under fundamentally different
777
+ lighting (tungsten vs daylight) → confidence will be low. Tell the
778
+ user and suggest a less aggressive match (or LUT-based correction
779
+ first).
780
+ - Target shot has multiple people with different skin tones → the
781
+ vision model averages. Pick the primary face's frame and warn the
782
+ user the secondary face may shift.
783
+ - User wants pixel-perfect match across 50 clips → run on a hero pair,
784
+ then \`copy_grade(sourceClipId=hero, targetClipIds=[...])\` instead of
785
+ re-running vision on every clip.
786
+
787
+ ## Don't
788
+
789
+ - Don't pick a target frame where the face is in shadow or motion blur.
790
+ - Don't apply low-confidence grades silently.
791
+ - Don't run on top of an existing aggressive grade — clean state first
792
+ or expect compounding shifts.
793
+ - Don't bake \`grade_skin_tones\` over the original target file. Always
794
+ write to a new path.
795
+ `;
796
+ const VIRAL_HOOK_PATTERNS = `---
797
+ name: viral-hook-patterns
798
+ description: Hook patterns sourced from primary creators (Jenny Hoyos on the official YouTube Blog, the leaked MrBeast production manual, Paddy Galloway's data analyses) — not generic creator-folklore. Read when analyze_hook fails, when picking a find_viral_moments candidate, or when generate_youtube_metadata needs a punchier title. Each pattern names a real creator example, the primary source, and the failure mode.
799
+ ---
800
+
801
+ # viral-hook-patterns
802
+
803
+ **When to use:** the user wants a stronger opener, a Short's hook scored < 60 in \`analyze_hook\`, or \`generate_youtube_metadata\` needs to phrase a title around a proven structure. Also useful when picking which \`find_viral_moments\` candidate to ship — the candidate's \`hookLine\` should map to one of these patterns; if it doesn't, the hook is probably weak.
804
+
805
+ **Sources used.** All patterns below reference **at least one named creator example AND a primary source** — the leaked MrBeast production manual (authenticated by 2 former producers per Passionfruit's August 2024 reporting), Jenny Hoyos's interview on YouTube's own blog (Jan 28 2025), the My First Million ep. 580 with Hoyos (May 3 2024), Paddy Galloway's LinkedIn / X analyses, and the YouTube Creator Liaison's official commentary. Patterns without that level of provenance were dropped.
806
+
807
+ ---
808
+
809
+ ## How a hook is judged in 2025
810
+
811
+ The retention bar:
812
+
813
+ - **Shorts:** **[primary]** Jenny Hoyos on YouTube's blog (Jan 28 2025, https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/): *"I really do think you have one second to hook someone, especially on Shorts."*
814
+ - **Shorts continued:** **[primary]** Paddy Galloway's analysis of 3.3 billion Shorts views (Rattibha-archived X thread, 2023): the best-performing Shorts hold **70–90%** of viewers from swiping away. Below 70% view-vs-swipe = burial.
815
+ - **Long-form:** **[primary]** YouTube's Senior Director of Growth, Todd Beaupré, via Stan Ventures (Sept 5 2024): *"the importance of the first 30 seconds of a video, the role of thumbnails, and engaging intros in capturing the audience's attention."* Marketing Agent's recap of Feb 2025 Creator Insider: *"Establish value within 7 seconds."*
816
+ - **Mid-video:** **[primary]** MrBeast leaked production manual: re-engagement checkpoints at the **3-minute** and **6-minute** marks for long-form (per Cybernews Sept 16 2024).
817
+
818
+ **The retention data backdrop.** Retention Rabbit's 2025 benchmark study (75+ niches, Q1 2024 – Q1 2025): the average video retains 23.7% of viewers; only 1 in 6 surpasses 50%; 55% of viewers leave within the first minute. A working hook isn't optional — it's the difference between distribution and burial.
819
+
820
+ ---
821
+
822
+ ## The 12 patterns
823
+
824
+ ### 1. Click-to-unpause packaging (Paddy Galloway)
825
+
826
+ **Structure:** Thumbnail captures a mid-action moment that the brain wants to resolve by clicking.
827
+
828
+ **Source.** **[primary]** Paddy Galloway, LinkedIn post March 2026 analysing four viral thumbnails (a MrBeast piece, an old man mid-conversation, two others): *"They each use a simple (yet powerful) packaging technique. Click to unpause. All four of these videos create a scene that you have to 'click' to 'unpause' and see for yourself. The thumbnail and title create an open loop in the brain we want to close."*
829
+
830
+ **Worked example (Galloway's own).** *"Imagine the opposite — MrBeast standing beside the steps pointing vs actually partaking. The old man smiling and posing for the camera instead of being mid-conversation. Dead in the water."*
831
+
832
+ **Failure mode:** posed shot, neutral expression, completed action. The loop is closed before the click. Eyes-at-camera-while-smiling is the universal signal of "nothing is about to happen."
833
+
834
+ ### 2. Shock → Intrigue → Satisfy (Jenny Hoyos's three-beat)
835
+
836
+ **Structure:** Three distinct beats compressed into the first second of a Short. Shock = a visual/audio interrupt. Intrigue = a one-line setup that withholds the answer. Satisfy = the implied promise the rest of the Short will deliver.
837
+
838
+ **Source.** **[primary]** YouTube's own blog (Jan 28 2025): *"Jenny emphasises the critical importance of hooking viewers within the first second of a Short, using a three-step formula of shock, intrigue, and satisfy."*
839
+
840
+ **Worked example.** Hoyos's $1 chicken sandwich vs Chick-fil-A Short — opens with the punchier visual hit (shock), poses the value question (intrigue), promises the comparison (satisfy). Marketing Examined's breakdown of her playbook (May 16 2024): she would "even change the idea of the entire video for a strong hook."
841
+
842
+ **Failure mode.** Hook is too abstract or builds slowly. Her test: *"a good hook should be so clear that viewers understand the video even on mute."*
843
+
844
+ ### 3. Foreshadow the ending (Hoyos)
845
+
846
+ **Structure:** Open on a moment from the END of the video, rewind, withhold the payoff until the end.
847
+
848
+ **Source.** **[primary]** Hoyos via vidIQ blog (Dec 2023, https://vidiq.com/blog/post/how-jenny-hoyos-gets-10m-views-per-youtube-short/): *"I started a video by giving my grandma a $5 Christmas present and showing her reaction… you don't see what the gift is until the end of the video."*
849
+
850
+ **Worked example.** Her $5 Christmas-gift Short — opens on grandma's reaction shot, hides the actual gift, makes viewers stay to find out.
851
+
852
+ **Failure mode.** Foreshadowing something the ending can't visually pay off. AVP collapses around the reveal point.
853
+
854
+ ### 4. "But / So" escalation (Hoyos)
855
+
856
+ **Structure:** Every story beat connected by a \`but\` or \`so\`, not \`and then\`. Each \`but\` raises stakes; each \`so\` makes a consequence visible.
857
+
858
+ **Source.** **[primary]** Hoyos via vidIQ: *"You can bring this to life by using the words 'but' and 'so'… 'But the dog whined softly, so I followed him for a few miles. He led me to a dark tunnel, so I backed away in fear. But I saw a bunch of abandoned puppies at the rim of the opening.' Each 'but' stops the viewer from scrolling away as conflict rises."*
859
+
860
+ **Failure mode.** Plot progression via \`and then… and then…\` produces flat retention curves and reads as low-satisfaction.
861
+
862
+ ### 5. Power-word opener (Hoyos)
863
+
864
+ **Structure:** Open with a single high-curiosity word: \`$1\`, \`banned\`, \`free\`, \`secret\`, \`cheap\`, \`nobody\`. Pair it with a concrete claim.
865
+
866
+ **Source.** **[primary]** Marketing Examined (May 2024) on Hoyos's playbook: hook should be "Concise, no more than 3 seconds, visually pleasing — power words like 'banned,' 'free,' 'one dollar,' 'secret,' or 'cheap' instantly pique curiosity."
867
+
868
+ **Failure mode.** Power word with no payoff — classic clickbait. Triggers Ritchie's CTR/retention penalty: *"If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
869
+
870
+ ### 6. Crazy Progression — show, don't tell, then skip ahead (MrBeast)
871
+
872
+ **Structure:** First 3 minutes of long-form aren't a setup — they're an escalation. Don't say "we'll do X" — show X already happening at scale.
873
+
874
+ **Source.** **[primary]** Leaked MrBeast production manual (per ProTunesOne Oct 2025 https://protunesone.com/blog/leaked-mrbeast-document-on-his-youtube-strategies/): *"Minutes 1-3: Instead of telling viewers what you will do, show them. MrBeast uses a technique called 'crazy progression.' For example, if he is making a video about a guy surviving weeks in the forest, he would cover multiple days instead of making the first 3 minutes about the first day. The intention here is to hook viewers as fast as possible and get them emotionally invested in the story."*
875
+
876
+ **Worked example.** *$1 vs $1,000,000,000 Yacht!* — the manual explicitly: *"As the viewer progresses through the video, the stakes rise, first presenting the $1 yacht, then a $1 million yacht, then a $10 million yacht, and so on, until the payoff at the end."*
877
+
878
+ **Failure mode.** A first 3 minutes that explains the rules instead of showing escalation. The manual's frame: *"Match the clickbait expectations and front-load as much information about the video as possible while incorporating the maximum amount of visuals, music, effects and quick scene changes."*
879
+
880
+ ### 7. Match-the-thumbnail-promise (MrBeast)
881
+
882
+ **Structure:** Whatever the thumbnail visually promises, deliver in the first 60 seconds. Not at minute 8.
883
+
884
+ **Source.** **[primary]** MrBeast manual via Creator Handbook (Sept 18 2024): *"Thumbnails must align with expectations set by the title. If a thumbnail promises a specific scene or visual spectacle, the video must deliver on that promise to keep viewers engaged."* Plus: *"CTR is what dictates what we do for videos. 'I Spent 50 Hours In My Front Yard' is lame, and you wouldn't click it. But you would click 'I Spent 50 Hours In Ketchup.'"*
885
+
886
+ **Source corroboration.** **[primary]** Rene Ritchie via vidIQ Aug 2025: *"Great thumbnails don't just get viewers to click — they also help viewers understand what the video is about, so that they can make informed decisions about what to watch."*
887
+
888
+ **Failure mode.** Thumbnail-bait. CTR spikes, retention craters, the algorithm penalises distribution.
889
+
890
+ ### 8. Mid-video re-engagement at minute 3 and 6 (MrBeast)
891
+
892
+ **Structure:** A mini-hook (twist, reveal, escalation) at exactly the points where retention historically dips. Not the climax — a refresh.
893
+
894
+ **Source.** **[primary]** Leaked MrBeast manual via Cybernews (Sept 16 2024): *"Around the three-minute mark, MrBeast's team aims to include a 're-engagement,' which is highly interesting and impressive… The next most crucial segment of a video is the 3–6 minute mark, which needs to be filled with most exciting and interesting content. After another 're-engagement' at the six-minute mark, the hope is to retain the viewers till the end."*
895
+
896
+ **Failure mode.** Recapping what just happened instead of escalating. Recap-style re-engagements drop retention sharper than no recap.
897
+
898
+ ### 9. The "I asked Google" / "I asked an expert" hook (Sean Andrew)
899
+
900
+ **Structure:** Open with a researched question whose answer the audience wants. The hook frames you as proxy: you found out, viewer doesn't have to.
901
+
902
+ **Source.** **[secondary, named example]** vidIQ Shorts hooks roundup (Feb 2026, https://vidiq.com/blog/post/viral-video-hooks-youtube-shorts/): *"Sean Andrew used this opener to get 478,000 views on a long-jumping video. He asked Google 'what the longest jump in history is,' and then attempted to break the long-jump record."*
903
+
904
+ **Failure mode.** The answer is in the hook. The hook works because Google's answer becomes the implicit promise *to be tested*; if you reveal the answer, the test loses tension.
905
+
906
+ ### 10. Credibility + specific N (Erika Kullberg)
907
+
908
+ **Structure:** "Here are N things I do before [scenario], coming from [credential]." Combines foreshadowing (audience knows it ends on item N) with credential framing.
909
+
910
+ **Source.** **[secondary, named example]** vidIQ (Feb 2026): *"Erika Kullberg's 'Quick Travel Tips' starts: 'Here are three things I do before every flight, coming from a lawyer who travels six months out of the year.' She speaks directly to travellers and builds credibility by saying how often she travels."*
911
+
912
+ **Failure mode.** Generic credibility ("as a content creator…") or N too high. Lists of 10+ erode foreshadowing because viewers can't track them.
913
+
914
+ ### 11. End-of-video cliffhanger (Rene Ritchie's underused tactic)
915
+
916
+ **Structure:** End the current video on a cliffhanger that resolves in the next upload. Pulls watch-time INTO the channel, not out of it.
917
+
918
+ **Source.** **[primary]** Rene Ritchie via Search Engine Journal (Aug 15 2023 — older but still cited canonically): *"Cliffhangers are an underused tactic on YouTube. Similar to how they're used in television and movies, implementing cliffhangers at the end of YouTube videos can make viewers eager to watch the next video to see what happens. This builds excitement and investment in the audience."*
919
+
920
+ **Failure mode.** No payoff in the next upload. The cliffhanger creates an unfulfilled loop, dropping satisfaction surveys (one of the four signals YouTube weighs in 2025).
921
+
922
+ ### 12. First-frame-as-thumbnail (Hoyos + Galloway)
923
+
924
+ **Structure:** The literal first frame of the video should communicate the promise without audio. Treat it like a thumbnail.
925
+
926
+ **Source.** **[primary]** Paddy Galloway's Rattibha-archived X thread (2023): *"It's important to make your first second really punchy and engaging to hook viewers early into the video. Treat your intro like a thumbnail."* **[primary]** Hoyos on My First Million ep. 580 (May 3 2024) discusses the importance of "the first frame" as a retention lever; she found that removing a single frame can change a Short's retention curve.
927
+
928
+ **Failure mode.** First frame is a logo, a black slate, a cold-open loading shot, or a face mid-blink. Mobile autoplay shows this in the feed before audio loads.
929
+
930
+ ---
931
+
932
+ ## Picking a pattern
933
+
934
+ Default order to try, by content type:
935
+
936
+ | Content type | First choice | Backup |
937
+ |---|---|---|
938
+ | Shorts | Pattern 2 (Shock/Intrigue/Satisfy) + Pattern 5 (power word) | Pattern 3 (foreshadow ending) |
939
+ | Long-form challenge / spectacle | Pattern 6 (crazy progression) + Pattern 7 (match thumbnail) | Pattern 8 (3-min re-engagement) |
940
+ | Educational long-form | Pattern 10 (credibility + N) | Pattern 1 (click to unpause) |
941
+ | Vlog / journey | Pattern 4 (but/so) | Pattern 11 (cliffhanger to next) |
942
+ | Reaction / opinion | Pattern 1 (click to unpause) | Pattern 9 (asked Google) |
943
+ | Series content | Pattern 11 (cliffhanger) | – |
944
+
945
+ **Avoid combining patterns** — viewers can only track one promise at a time. One pattern, executed well, beats three layered patterns.
946
+
947
+ ---
948
+
949
+ ## Anti-patterns (don't ship these)
950
+
951
+ - **"Hey guys what's up so today I want to talk about…"** — Beaupré's quote about establishing value in 7 seconds rules this out. \`analyze_hook\` will catch it; flag with a red marker.
952
+ - **Static talking head with no visual change in 0–2 seconds** — even with a perfect line, retention adds zero. Pair every hook with a visual change (cut, zoom, gesture). MrBeast manual: *"incorporating the maximum amount of visuals, music, effects and quick scene changes."*
953
+ - **Hooks that contain the answer.** "Here are 5 ways to save money: 1. budget, 2. invest, 3. …" — burns the curiosity gap immediately.
954
+ - **Generic music sting with no spoken content for 1+ second** — the first second is the hook on Shorts. Move the punchline forward.
955
+ - **Posed thumbnail mismatch.** A posed studio thumbnail paired with a candid mid-action video opener loses both audiences (no click-to-unpause AND no thumbnail-promise match).
956
+
957
+ ---
958
+
959
+ ## Operationalising in the agent
960
+
961
+ The agent does NOT generate footage. It can only re-cut from what was filmed, or recommend a re-shoot. Frame every hook diagnosis around that constraint.
962
+
963
+ When \`analyze_hook\` returns a low score:
964
+
965
+ 1. Read the \`hookLine\` field from \`find_viral_moments\` (or the first sentence of the chosen window).
966
+ 2. Call \`rewrite_hook(currentHook=<line>, videoTopic=<one-line>, transcriptExcerpt=<200–500 chars>, pattern="auto")\` — returns 3 candidate rewrites with the chosen pattern + rationale.
967
+ 3. Surface the candidates to the user. **Do NOT auto-apply** — we can't speak the new line on-camera; the user has to either:
968
+ - **Pick an existing alternative opener from the source footage** — if so, propose a cut window via \`text_based_cut\`.
969
+ - **Re-shoot the opener** — if so, drop a red marker:
970
+ \`\`\`
971
+ add_marker(color="red", note="PAUSE: hook needs re-shoot. Suggested line: '[candidate]'")
972
+ \`\`\`
973
+ 4. **Never silently ship a sub-60 hook.** If the user can't re-shoot and source has no better alternative, the right move is to tell them so explicitly — not to pretend the current opener is fine.
974
+
975
+ For Shorts specifically, the canonical pre-flight chain is:
976
+
977
+ \`\`\`
978
+ audit_first_frame(input) # is the t=0 frame thumbnail-quality?
979
+ analyze_hook(input) # does the spoken line earn the watch?
980
+ verify_thumbnail_promise(thumb, input) # does the opening deliver the thumbnail's promise?
981
+ \`\`\`
982
+
983
+ Gate at all three. If any returns blocking findings, pause before render.
984
+
985
+ **Operational targets** (executable today via the tools above):
986
+ - **Optimal duration 30–45 s** — \`find_viral_moments\` already defaults to \`[20, 45]\`.
987
+ - **Target ≥ 90% retention** through to last second (Hoyos's bar) — the agent can't measure this until upload, but it's the bar to rewrite toward.
988
+ - **Target ≥ 70% view-vs-swipe** (Galloway's 3.3B-views floor) — same: post-upload metric the user reports.
989
+ - **Seamless re-loop** — run \`loop_match_short\` as the last step before delivery.
990
+
991
+ For long-form retention checkpoints (Pattern 8 — 3-min and 6-min re-engagement), use \`audit_retention_structure(transcript)\`. It returns per-checkpoint scores and weakest-checkpoint suggestions; the agent then proposes \`cut_filler_words\` / \`text_based_cut\` / \`punch_in\` / \`add_sfx_at_cuts\` on the flat windows.
992
+
993
+ ---
994
+
995
+ ## Sources & further reading
996
+
997
+ **Primary creator sources:**
998
+ - Jenny Hoyos × Todd Sherman, **YouTube Creator Blog**, Jan 28 2025 — https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/
999
+ - Jenny Hoyos, **My First Million** ep. 580, May 3 2024 — https://www.mfmpod.com/videos/the-formula-to-break-100-million-views-on-shorts-ft-jenny-hoyos/
1000
+ - Jenny Hoyos × **Marketing Examined**, May 16 2024 — https://www.marketingexamined.com/blog/jenny-hoyos-short-form-video-playbook
1001
+ - **Leaked MrBeast production manual**, August 2024, full PDF mirrored at https://simonwillison.net/2024/Sep/15/how-to-succeed-in-mrbeast-production/
1002
+ - Paddy Galloway, **Creator Science Podcast #209**, Jan 27 2026 — https://podcast.creatorscience.com/paddy-galloway-2/
1003
+ - Paddy Galloway, **LinkedIn "Click to unpause"** post, March 2026
1004
+ - Paddy Galloway, **3.3 billion Shorts views** X thread, archived at https://en.rattibha.com/thread/1646898356419981315
1005
+
1006
+ **Authoritative third-party syntheses:**
1007
+ - vidIQ — Hoyos breakdown (Dec 2023), Shorts hooks roundup (Feb 2026)
1008
+ - Creator Handbook — MrBeast manual recap, Sept 18 2024
1009
+ - ProTunesOne — Leaked MrBeast doc breakdown, Oct 28 2025
1010
+ - Cybernews — MrBeast manual on retention checkpoints, Sept 16 2024
1011
+ - Search Engine Journal — Rene Ritchie on cliffhangers, Aug 15 2023
1012
+ `;
1013
+ const YOUTUBE_ALGORITHM_PRIMER = `---
1014
+ name: youtube-algorithm-primer
1015
+ description: How YouTube actually ranks videos in 2024–2026, sourced from Creator Insider, the YouTube Liaison (Rene Ritchie), Senior Director of Growth Todd Beaupré, Paddy Galloway, and the Retention Rabbit 2025 benchmark study. Read when generating titles/descriptions/chapters or when a video is underperforming. Numbers without a primary YouTube source are flagged as third-party heuristics.
1016
+ ---
1017
+
1018
+ # youtube-algorithm-primer
1019
+
1020
+ **When to use:** any time a tool needs to optimise FOR the algorithm — title generation, description structure, chapter placement, render-format selection, end-screen placement, multi-format render decisions. Also when the user asks "why isn't this getting views?" — the answer usually maps to one of the four signals below.
1021
+
1022
+ **What this is:** a working model with cited sources. Where a number comes from YouTube's own staff, it's marked **[primary]**. Where it comes from third-party tooling (vidIQ, TubeBuddy, Dataslayer) or aggregator sources, it's marked **[secondary]**. Where it's creator folklore with no traceable source, it's marked **[unverified]** — surface those to the user as heuristics, not laws.
1023
+
1024
+ **Source quality up front.** Most authoritative in 2024–2026 order: (1) Creator Insider, the Beaupré ↔ Ritchie video conversations, especially the Jan 23 2025 algorithm explainer; (2) Rene Ritchie's "Top Five" YouTube Blog posts and \`@YouTubeLiaison\` on X; (3) the YouTube Help Center on Test & Compare and Add Custom Thumbnails; (4) Paddy Galloway (data-driven creator strategist) — his X threads and Creator Science Podcast #209 (Jan 27 2026). Tool-vendor data (vidIQ, TubeBuddy, Dataslayer, Retention Rabbit) is useful directional signal but not platform-confirmed.
1025
+
1026
+ ---
1027
+
1028
+ ## The 2025 shift: satisfaction-weighted discovery
1029
+
1030
+ The biggest change creators must internalise. YouTube announced a recommendation model overhaul in early 2025; the new system layers four qualitative satisfaction signals on top of clicks and watch time:
1031
+
1032
+ 1. **Surveys** — post-view "Did you enjoy this video?" prompts.
1033
+ 2. **Sentiment modelling** — comments + like/dislike ratios.
1034
+ 3. **Long-session retention** — time spent across multiple videos in a session.
1035
+ 4. **Feedback suppression** — "Not Interested" / "Don't Recommend Channel" clicks.
1036
+
1037
+ **[primary]** Todd Beaupré (YouTube Sr. Director, Growth & Discovery), via Buffer's recap of the Jan 2025 Creator Insider conversation: *"We're trying to understand not just about the viewer's behavior and what they do, but how they feel about the time they're spending. What do they say about their experience watching a video."* (https://buffer.com/resources/youtube-algorithm/, 2025)
1038
+
1039
+ **[primary]** Rene Ritchie (YouTube Creator Liaison), Jan 2025 Creator Insider video, paraphrased on Lia Haberman's ICYMI newsletter: *"YouTube's Algorithm Pulls, Not Pushes: The recommendation system doesn't 'push' creator videos out to YouTube audiences but instead pulls in content based on the user's individual viewing habits — think of it as automating word of mouth. Viewer Satisfaction Matters: YouTube measures user satisfaction through engagement signals such as likes, comments, and surveys. Total watch time is not the golden standard — sometimes viewers want a video to be more efficient and just get to the point."* (https://liahaberman.substack.com/p/icymi-how-youtubes-2025-algorithm, Jan 31 2025)
1040
+
1041
+ **Editorial implication.** Stop padding videos to hit a watch-time number. The platform now reads "got to the point fast" as a positive satisfaction signal, not a missed-watch-time signal.
1042
+
1043
+ ---
1044
+
1045
+ ## The four metrics that move ranking
1046
+
1047
+ In rough order of importance for general distribution:
1048
+
1049
+ ### 1. Click-through rate (CTR) on impressions
1050
+
1051
+ CTR is driven by the **thumbnail + title pair**. **[secondary]** Tool-vendor benchmarks roughly converge:
1052
+
1053
+ | Band | vidIQ (Nov 2025) | Dataslayer (~2026) | YTShark (Mar 2026) |
1054
+ |------|---|---|---|
1055
+ | Poor | < 4% (thumbnail/title isn't clear enough) | < 3% needs immediate fixes | – |
1056
+ | Average | 4–6% | 4–6% | 2–10% (most channels) |
1057
+ | Good | 7%+ | 7–10% | – |
1058
+ | Excellent | 9–10%+ | > 10% (niche channels with loyal audiences) | – |
1059
+
1060
+ Niche-specific (PostEverywhere citing vidIQ + TubeBuddy data, Jan 2026): gaming averages 8.5%, educational averages 4.5%.
1061
+
1062
+ **[unverified]** The "1,000 impressions / 10% CTR triggers expanded distribution" claim that floats in SEO blogs (Hashmeta and others) has no traceable YouTube source. Treat as folk wisdom.
1063
+
1064
+ **[primary]** What Paddy Galloway actually says about CTR — Creator Science Podcast #209, Jan 27 2026: *"CTR itself is a very fickle and in some ways infuriating metric… because the more impressions a video gets, the lower the CTR drops typically… CTR itself as a whole is not very useful. CTR in the first hour or first 24 hours can be a good predictor of success on videos. There's a very strong correlation between first-hour CTR and long-term video performance on a lot of established channels."*
1065
+
1066
+ **[primary]** What YouTube itself says about CTR's role — Rene Ritchie on the Test & Compare A/B tool, July 25 2025 (via vidIQ blog https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/): *"Thumbnail Test & Compare returns watch time rather than separate metrics on click-through rate (CTR) and retention (AVP), because watch time includes both! You have to click to watch and you have to retain to build up time. If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
1067
+
1068
+ **Operational rule for the agent:** judge CTR against the channel's own first-hour baseline, not industry averages. YouTube's native A/B tool optimises Watch Time Share, not CTR — match that bias.
1069
+
1070
+ ### 2. Average view duration (AVD) and average percentage viewed (AVP%)
1071
+
1072
+ The single best 2024–2026 retention dataset is **[secondary, large N]** Retention Rabbit's May 2025 audience-retention benchmark report (75+ niches; Q1 2024 – Q1 2025; https://www.retentionrabbit.com/blog/2025-youtube-audience-retention-benchmark-report):
1073
+
1074
+ - **Average YouTube video retains 23.7%** of its viewers.
1075
+ - **Only 1 in 6 videos (16.8%) surpass 50% retention.**
1076
+ - **55%+ viewer drop-off occurs in the first minute.**
1077
+ - Channels improving average retention by 10 percentage points see a correlated **25%+ increase in impressions**.
1078
+ - Educational How-Tos average **42.1% retention** — top niche.
1079
+
1080
+ **[secondary]** Threshold consensus across multiple 2025 sources (Solveigmm Aug 2025; PostEverywhere Jan 2026; Virvid Feb 2026):
1081
+
1082
+ - **50–60% AVP%** is solid.
1083
+ - **70%+** earns priority placement in suggested videos.
1084
+ - **< 40%** triggers active deprioritisation regardless of CTR.
1085
+
1086
+ **[primary]** The "50% rule" reframed — Rene Ritchie / Todd Beaupré (Jan 2025 Creator Insider, paraphrased on Hootsuite Sept 2025 https://blog.hootsuite.com/youtube-algorithm/): the platform now *"prioritises videos that provide a positive viewing experience, not just those that hold attention the longest."* Translation: a 6-minute video at 80% retention beats a 20-minute video at 30% retention even though the longer one logged more raw watch time.
1087
+
1088
+ **[primary]** Retention shape vs absolute time — YouTube's own guidance is that *relative* watch time matters more on short videos, *absolute* watch time more on long-form (cited by Virvid Feb 2026 from YouTube Help Center).
1089
+
1090
+ **The first-minute problem is the loudest signal.** Multiple converging sources:
1091
+
1092
+ - Retention Rabbit: **55%+ leave within 60 seconds**.
1093
+ - 1of10 (cited by PostEverywhere): *"nearly 20% of viewers drop off within the first 15 seconds — not because the video is bad, but because the intro fails to connect."*
1094
+ - **[primary]** Todd Beaupré, via Stan Ventures recap (Sept 5 2024): *"the importance of the first 30 seconds of a video, the role of thumbnails, and engaging intros in capturing the audience's attention."*
1095
+ - **[primary]** Marketing Agent Blog summarising Creator Insider Feb 2025: *"Establish value within 7 seconds (per Creator Insider, 2025)."*
1096
+
1097
+ **Diagnostic patterns on the audience-retention graph:**
1098
+ - Cliff in the first 30s → hook problem; rerun \`analyze_hook\` and recut opener.
1099
+ - Slow steady decline → pacing; rerun \`cut_filler_words\`, tighten with \`text_based_cut\`, consider \`punch_in\` / \`add_sfx_at_cuts\`.
1100
+ - Spike up at minute X → viewers told friends to skip there; move it earlier next time.
1101
+ - Steep drop at chapter boundary → chapter title oversold; rewrite the chapter title.
1102
+
1103
+ ### 3. Session contribution / next-video continuation
1104
+
1105
+ **[primary]** Beaupré's framing (Jan 2025 Creator Insider): channels grow fastest when each video naturally leads viewers to watch another, creating "bingeable journeys." YoutoWire's Jan 2026 ranking-of-ranking-signals: session time (does your video lead to more YouTube watching?) sits behind only CTR and AVD in importance.
1106
+
1107
+ What extends a session:
1108
+ - End-screen elements pointing to your next video.
1109
+ - Series content / episodic structure.
1110
+ - Chapters + a clear "next" hook in the outro.
1111
+
1112
+ What ends sessions:
1113
+ - Long static outros (viewer closes tab while waiting).
1114
+ - Generic "subscribe" outros without a next-video pointer.
1115
+
1116
+ **Operational rule:** the brand kit's \`outro\` should chain to the next video. Description should reference previous / next uploads. \`generate_outro\` is the lever.
1117
+
1118
+ ### 4. Engagement velocity (first 24–48 hours) — partial myth
1119
+
1120
+ The "first 48 hours decide everything" framing is overstated by SEO blogs.
1121
+
1122
+ **[primary]** Paddy Galloway, X thread Oct 16 2023 (still cited): *"The YouTube algorithm doesn't let you experiment. We recently tried a completely new format with a client. It started slow. 6/10. Now it's about to be our fastest ever video to hit 1 million views."*
1123
+
1124
+ **[secondary]** Dataslayer Jan 2026 directly debunks the "your video is dead if it doesn't pop in 48h" myth: *"YouTube's 2025 algorithm actively resurfaces old content when topics become relevant again. Videos about 'tax deductions for freelancers' spike in January and April."*
1125
+
1126
+ **[primary]** Rene Ritchie, YouTube Blog March 28 2024 (https://blog.youtube/culture-and-trends/renes-top-five-on-youtube-march-28-2024-edition/): *"Don't delete videos unless you have a very, very good reason. When you delete a video, you delete your channel's connection to the audience that watched that video."*
1127
+
1128
+ **Verdict for the agent:** first-hour CTR matters as a predictor for established formats. New formats and evergreen topics absolutely recover later. Don't tell users their video is dead at 48h.
1129
+
1130
+ ---
1131
+
1132
+ ## What YouTube has officially said it does NOT use
1133
+
1134
+ This is the most reliably citable section because it's all from YouTube's own staff.
1135
+
1136
+ - **Tags — minimal impact.** **[primary]** YouTube Liaison (\`@YouTubeLiaison\`), Aug 22 2024, summarised by Stan Ventures (https://www.stanventures.com/news/youtube-reveals-new-seo-priorities-756/): *"Liaison debunked this myth, stating that tags have a minimal impact on the algorithm. The primary recommendation was to use tags sparingly, emphasising on common misspellings of channel names or key topics related to the video."*
1137
+ - **Hashtags — small effect, contextual only.** **[primary]** Same Liaison statement: *"hashtags should only be employed when they align with trending topics or help contextualise a video in a way that adds value."*
1138
+ - **Categories — minor.** Same source: *"while categories help YouTube understand the general context of a video, they are a minor consideration in the grand scheme of things."*
1139
+ - **Upload time of day — not algorithmic.** **[primary]** Rene Ritchie's March 28 2024 "Mythbusters" YouTube Blog post with Beaupré: posting time matters for *your audience's habits*, not algorithmically.
1140
+ - **Subscriber count — weak signal.** **[secondary]** Dataslayer Jan 2026: *"In 2025, YouTube actively recommends videos from small channels. Subscriber count is one of hundreds of signals, and not a strong one. A 0-subscriber channel can appear in recommendations if the video performs well with test audiences."*
1141
+ - **Dislikes — barely register.** **[secondary]** YoutoWire Jan 2026: *"Dislikes barely register. Algorithm treats them as 'engagement' (not negative signal). What DOES hurt: High 'Not Interested' clicks (when viewers tell YouTube 'Don't recommend this channel')."* Consistent with all Ritchie commentary on \`Not Interested\` being the actual penalty signal.
1142
+ - **Subscriber-feed checkbox / unchecking notifications — no effect.** **[primary]** Rene Ritchie: *"Shorts don't trigger notifications on upload, so that part won't make a difference. For long-form, most subscribers watch from the home page."*
1143
+ - **Description links — fine unless spammy.** **[secondary]** Dataslayer: links to resources mentioned in the video are fine; the algorithm just favours videos that keep viewers on YouTube longer.
1144
+
1145
+ ---
1146
+
1147
+ ## Algorithm changes worth knowing (2024–2026)
1148
+
1149
+ Don't recite these to the user, but reflect them in tool defaults.
1150
+
1151
+ - **Oct 15 2024:** Shorts max length raised from 60 s → 3 minutes. (PPC.land timeline)
1152
+ - **March 31 2025:** Shorts view counting changed — view counts now register on play/replay with no minimum watch time; YPP eligibility and Shorts ad-revenue sharing remain on the renamed *Engaged Views* metric. (TubeBuddy, Pixability, PPC.land all confirm.)
1153
+ - **Feb 2025:** "Satisfaction-weighted" recommendation model rolled out (Creator Insider, paraphrased on Marketing Agent Blog Nov 4 2025).
1154
+ - **July 2025:** YouTube removed the Trending page and Trending Now list; replaced by per-vertical micro-trend tracking. (Shopify summary citing the YouTube announcement.)
1155
+ - **2024–2025:** Native title + thumbnail A/B testing (Test & Compare) rolled out widely. **[primary]** Rene Ritchie via vidIQ July 25 2025: *"You can pick up to 3 versions of your title… up to 3 thumbnails… YouTube doesn't use click-through rate (CTR) as the winning metric — it uses Watch Time Share. Tests typically run from 1 to 14 days, depending on how quickly statistical significance is reached. Once there's a clear winner, YouTube automatically applies it."*
1156
+ - **Late 2025:** Shorts and long-form recommendation surfaces partially decoupled. **[secondary, partial]** YTShark Mar 2026 says fully decoupled; **[primary]** YouTube Creator Blog July 2025 (per Marketing Agent) says short-form retention still feeds satisfaction signals back into long-form discovery. Reality is in between: ranking systems separate, but viewer-graph cross-pollination remains.
1157
+
1158
+ ---
1159
+
1160
+ ## Title rules (the highest-leverage lever)
1161
+
1162
+ Constraints (cross-source consensus from vidIQ Aug 2025, AmpiFire Nov 2025, multiple creator analysts):
1163
+
1164
+ - **≤ 70 characters** before mobile feed truncation; **60 is safer**.
1165
+ - **Front-load the hook** in the first 4–6 words (mobile crops the rest).
1166
+ - **One specific number** if applicable — "5 mistakes" beats "common mistakes"; "$3,000" beats "expensive."
1167
+ - **Curiosity gap, not spoiler** — title should make the viewer want the answer, not contain it.
1168
+ - **No clickbait that doesn't deliver** — see Ritchie's quote above. CTR-spike + AVP-collapse is now actively penalised.
1169
+ - **One emoji max** if any.
1170
+
1171
+ Patterns that consistently perform across creator data (vidIQ + TubeBuddy public analyses):
1172
+
1173
+ - **"How I [achieved] [in time] (with [twist])"** — How I built X in 3 days (without Y)
1174
+ - **"[Number] [things] [audience] [verb]"** — 5 mistakes new editors make
1175
+ - **"Why [common belief] is wrong"** — Why the 10K hour rule is wrong
1176
+ - **"I [extreme behaviour] for [time]. Here's what happened."** — I cooked one new dish per day for 30 days
1177
+ - **"The [adjective] truth about [topic]"** — The boring truth about productivity apps
1178
+
1179
+ \`generate_youtube_metadata\` should propose 3 titles using **different patterns from this list**, not three variations of one. Pattern variety lets the user pick.
1180
+
1181
+ ---
1182
+
1183
+ ## Description structure (sidecar SEO + AVD lift)
1184
+
1185
+ The description's job is to:
1186
+
1187
+ 1. **Restate the hook in the first 2 lines** — these show above-the-fold on mobile.
1188
+ 2. **Drop chapters** — clickable timestamps that double as table-of-contents. Required for any video > 5 minutes.
1189
+ 3. **Link related uploads** — pulls watch-time into your channel.
1190
+ 4. **CTA last** — subscribe/Patreon/etc. at the END, not the top.
1191
+
1192
+ Skeleton:
1193
+
1194
+ \`\`\`
1195
+ <one-line restated hook>
1196
+ <one specific question to drive comments>
1197
+
1198
+ ⏱️ Chapters
1199
+ 00:00 <chapter 1 title>
1200
+ 01:23 <chapter 2 title>
1201
+
1202
+
1203
+ 🎥 Related videos
1204
+ - <previous video title> → <link>
1205
+ - <related video title> → <link>
1206
+
1207
+ 📌 About this channel
1208
+ <one-paragraph "what to expect" + subscribe url>
1209
+ \`\`\`
1210
+
1211
+ \`generate_youtube_metadata\` produces chapters and description body; the agent slots them into this skeleton.
1212
+
1213
+ ---
1214
+
1215
+ ## Shorts ranks differently
1216
+
1217
+ **[primary]** From Hootsuite Sept 2025 paraphrasing the official Shorts ranking explainer: *"A 30-second Short with 85% watch duration will likely rank higher than a 60-second Short with only 50% retention. Looping Shorts (where viewers rewatch part of the video) tend to get more recommendations than those with lower replay rates."*
1218
+
1219
+ **[primary]** Hootsuite continues: *"Unlike long-form videos, click-through rate (CTR) isn't a ranking factor [for Shorts], since users don't actively click Shorts — they swipe through them."*
1220
+
1221
+ **[primary]** Paddy Galloway's analysis of 3.3 billion Shorts views (Rattibha-archived X thread): *"The best-performing Shorts have between 70% and 90% of people viewing versus swiping away from them."* Operationalised: **target ≥ 70% view-vs-swipe rate** as a hard floor, ≥ 85% as the success bar.
1222
+
1223
+ **[primary]** Jenny Hoyos on YouTube's own blog (Jan 28 2025, https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/): *"I really do think you have one second to hook someone, especially on Shorts."* The official YouTube Blog summarises her three-step formula: **shock, intrigue, satisfy**.
1224
+
1225
+ **Optimal Shorts duration:** **[primary]** Hoyos via Marketing Examined (May 16 2024 https://www.marketingexamined.com/blog/jenny-hoyos-short-form-video-playbook): aim for **30–34 seconds** with **90%+ retention** in the last second. **[secondary]** Boss Wallah Sept 2025 corroborates: target 90–100% retention on Shorts under 20 seconds.
1226
+
1227
+ **Implications for the agent (all executable today):**
1228
+ - **Default Shorts length: 30–45 s, not 60 s.** \`find_viral_moments\` already defaults to \`[20, 45]\`.
1229
+ - **Burned captions are not optional** — sound-off mobile is the default. Use \`write_keyword_captions(autoEmoji=true)\` + \`burn_subtitles\`.
1230
+ - **First 0.5–1 s is the hook.** Use \`audit_first_frame\` to score the t=0 frame as a thumbnail (Galloway: 'treat your intro like a thumbnail'); pair with \`analyze_hook\` for the spoken-line check.
1231
+ - **Seamless re-loop** — Shorts loop rate is a confirmed ranking signal. Run \`loop_match_short\` as the last step before delivery (crossfades the last ~0.3 s into the first frame).
1232
+ - **Skip the outro on vertical.** \`generate_outro\` is for long-form.
1233
+
1234
+ ---
1235
+
1236
+ ## Operationalising this in the agent
625
1237
 
626
- - Reference and target are filmed under fundamentally different
627
- lighting (tungsten vs daylight) → confidence will be low. Tell the
628
- user and suggest a less aggressive match (or LUT-based correction
629
- first).
630
- - Target shot has multiple people with different skin tones → the
631
- vision model averages. Pick the primary face's frame and warn the
632
- user the secondary face may shift.
633
- - User wants pixel-perfect match across 50 clips → run on a hero pair,
634
- then \`copy_grade(sourceClipId=hero, targetClipIds=[...])\` instead of
635
- re-running vision on every clip.
1238
+ The agent does NOT have access to live YouTube Studio metrics. When the user asks "why isn't this getting views?", first **ASK the user to paste the relevant numbers from Studio** (impressions, CTR, average view duration, average percentage viewed). Don't guess; don't fabricate.
636
1239
 
637
- ## Don't
1240
+ Once numbers are in hand, **diagnose in this order** and surface the FIRST failing metric — don't dump all five:
638
1241
 
639
- - Don't pick a target frame where the face is in shadow or motion blur.
640
- - Don't apply low-confidence grades silently.
641
- - Don't run on top of an existing aggressive grade clean state first
642
- or expect compounding shifts.
643
- - Don't bake \`grade_skin_tones\` over the original target file. Always
644
- write to a new path.
1242
+ 1. **CTR < 4% (vs channel baseline)?** → Re-thumbnail + re-title. Run \`compose_thumbnail_variants(strategy="expression")\` for 3 face/expression variants and \`generate_youtube_metadata\` for 3 title candidates. Then: tell the user to upload all three thumbnails + one title per variant to YouTube Studio's **Test & Compare** we cannot trigger that test from the agent; it lives only in Studio. Test & Compare optimises Watch Time Share (per Ritchie July 2025), so let YouTube pick the winner over 1–14 days.
1243
+ 2. **CTR ok but AVP% < 30%?** → Hook problem. Run \`analyze_hook\` for the t<3s check; if Shorts, also \`audit_first_frame\`. If hook scores low, run \`rewrite_hook(currentHook=..., pattern="auto", videoTopic=...)\` to generate 3 candidate rewrites — surface them to the user. The agent CANNOT re-record the spoken line; it can only (a) recut the opener from existing source footage via \`text_based_cut\`, or (b) recommend a re-shoot.
1244
+ 3. **AVP% ok but AVD low?** → Pacing. Run \`audit_retention_structure(transcript)\` to find the flat stretches between the 3-min and 6-min checkpoints. For each weak checkpoint, propose \`cut_filler_words\`, \`text_based_cut\`, \`punch_in\`, or \`add_sfx_at_cuts\` on the surrounding window.
1245
+ 4. **AVD ok but session contribution low?** → End-screen / outro / next-video pointer missing. Use \`generate_outro\` with the brand-kit chain (set \`brand.outro\` and the agent inherits it).
1246
+ 5. **Engagement velocity 0?** → No question in description (fix via \`generate_youtube_metadata\`'s description block) or tiny channel — the second case has no algorithmic fix; it's a community-size problem, not a tool problem. Be honest about this.
1247
+
1248
+ Surface ONE concrete fix per diagnosis, not the full menu.
1249
+
1250
+ **For pre-flight (before render):** the canonical short-form audit chain is \`audit_first_frame\` → \`analyze_hook\` → \`verify_thumbnail_promise\` → \`audit_retention_structure\` (long-form only). If any returns a blocking finding, surface a red marker and pause.
1251
+
1252
+ ---
1253
+
1254
+ ## Sources & further reading
1255
+
1256
+ **Primary (cite these first):**
1257
+ - Creator Insider — Beaupré + Ritchie videos, especially Jan 23 2025 algorithm explainer (https://www.youtube.com/watch?v=dhYIb72L1hU)
1258
+ - Rene Ritchie — \`@YouTubeLiaison\` on X; weekly "Top Five" YouTube Blog posts at https://blog.youtube/
1259
+ - YouTube Help Center — Test & Compare, Add Custom Thumbnails
1260
+ - YouTube Blog Jan 28 2025 — Jenny Hoyos Shorts deep dive (https://blog.youtube/creator-and-artist-stories/youtube-shorts-deep-dive/)
1261
+
1262
+ **Strong secondary:**
1263
+ - Paddy Galloway — Creator Science Podcast #209 (Jan 27 2026); X threads at twitter.com/PaddyGalloway1
1264
+ - Retention Rabbit 2025 Audience Retention Benchmark Report (May 2025) — https://www.retentionrabbit.com/blog/2025-youtube-audience-retention-benchmark-report
1265
+ - Hootsuite YouTube algorithm guide (Sept 2025)
1266
+ - Buffer YouTube algorithm guide (2025)
1267
+
1268
+ **Vendor benchmarks (treat as directional, not gospel):** vidIQ, TubeBuddy, Dataslayer, YTShark, AmpiFire.
645
1269
  `;
646
- const FUSION_LOWER_THIRD = `# fusion-lower-third
1270
+ const YOUTUBE_END_TO_END = `---
1271
+ name: youtube-end-to-end
1272
+ description: Orchestrator for "make me a YouTube video from this footage" using a TIMELINE-FIRST workflow. The agent edits the live Resolve/Premiere timeline so the user can scrub, tweak, and undo at every stage. Renders only happen at the end on explicit user intent ("render" / "export" / "ship it"). When host=none, falls back to file-only delivery and says so up front. Covers long-form, Shorts, captions, retention pipeline, and the metadata bundle.
1273
+ ---
647
1274
 
648
- **When to use:** the user asks for a name/title chyron that should be
649
- *editable inside the NLE* (not baked-in pixels), or wants a quick
650
- title card built natively in DaVinci Resolve's Fusion page.
1275
+ # youtube-end-to-end
651
1276
 
652
- **Goal:** compose a Background + TextPlus + Merge graph in Fusion via
653
- \`fusion_comp\`. Resolve only — Premiere has no Fusion equivalent; for
654
- that, fall back to \`write_lower_third\` + \`burn_subtitles\`.
1277
+ **When to use:** the user gives a single broad ask like *"make me a YouTube video from this footage"*, *"turn this recording into something I can ship"*, or *"give me a YouTube cut and a Shorts cut"*. This is the orchestrator skill — it composes the per-pass skills (long-form, short-form, chapter-markers, retention) into a single end-to-end run that **edits the user's timeline live** and produces metadata, captions, and SFX they can review before exporting.
1278
+
1279
+ **Core posture: you are an EDITOR, not an export pipeline.** Read the system prompt's "You are an editor, not an export pipeline" section. It overrides everything else here. Render only when the user says so.
1280
+
1281
+ **Goal:** the user watches the agent build the cut on their timeline. Cuts appear, SFX clips land on A3, captions attach as a sidecar, markers document each decision, the brand-kit outro splices onto the end. The user plays back, scrubs, asks for a tweak. Then says "ship it." Then the agent renders.
655
1282
 
656
1283
  ---
657
1284
 
658
- ## When to pick which
1285
+ ## Step 0 Intent triage (ONE question max)
659
1286
 
660
- - **fusion_comp** Resolve, comp lives inside the project, user can
661
- tweak it later. Best when the user is already on the Fusion page or
662
- wants a chyron that travels with the project file.
663
- - **write_lower_third + burn_subtitles** — works on any host, output
664
- is a baked-in pixel layer. Faster to iterate from the agent side
665
- but the user can no longer edit the text without re-running the
666
- pipeline.
1287
+ Look at the input and the user's prompt:
667
1288
 
668
- If unsure, ask: "Resolve-native (editable) or baked-in?"
1289
+ - **Input duration** via \`probe_media\`. Anything > 5 minutes → assume long-form. Anything 5 minutes → assume short-form. Both for source > 5 min when prompt is silent.
1290
+ - **Brand kit:** read \`<cwd>/.gg/brand.json\` silently. All render-time tools inherit; don't ask about typography or logos.
1291
+ - **Host check:** call \`host_info\`. If host=none, tell the user *"No NLE attached — I'll produce standalone mp4s. Open Resolve / Premiere if you want a timeline-native edit you can keep tweaking."* Then proceed with the file-only fallback path (skip steps 2-5 timeline ops; jump to render).
1292
+
1293
+ If duration is 4–6 min AND prompt is silent on format, ASK once: *"Long-form, Shorts, or both?"*. One question, then run.
669
1294
 
670
1295
  ---
671
1296
 
672
- ## Recipename + title lower-third on the active comp
1297
+ ## Step 1 Foundation pass (timeline-safe; runs once)
673
1298
 
674
- Pre-flight: \`host_info\` must report \`name === "resolve"\`. If it doesn't,
675
- stop and tell the user this skill is Resolve-only.
1299
+ \`\`\`
1300
+ host_info → confirm host + caps
1301
+ get_timeline → fps, duration, existing markers
1302
+ get_markers → prior decisions / session resume
1303
+ clone_timeline(name="…-edit-v1") → SAFETY NET before destructive ops
1304
+ save_project → checkpoint
1305
+ probe_media(input) → fps, duration, codecs
1306
+ extract_audio(input, audio.wav, sampleRate=16000)
1307
+ transcribe(audio.wav, transcript.json,
1308
+ wordTimestamps=true) → word-level transcript
1309
+ \`\`\`
1310
+
1311
+ Word timings are mandatory — every retention multiplier downstream needs them. If the source is multi-cam, also run \`multicam_sync\` first and pick the alignment.
1312
+
1313
+ **No render in step 1. No file-baking. The user's timeline is now the working copy.**
1314
+
1315
+ ---
1316
+
1317
+ ## Step 2 — Long-form edits, ON THE TIMELINE (when long-form is in the brief)
1318
+
1319
+ Each of these MODIFIES THE TIMELINE the user is watching. The user can play back, scrub, and ask for changes between any of them.
676
1320
 
677
1321
  \`\`\`
678
- host_info # confirm Resolve
1322
+ # Filler removal (transcript-driven; lands as EDL on the timeline)
1323
+ cut_filler_words(transcript, sourceVideo) → emits EDL of keep ranges
1324
+ import_edl(path) → cuts appear on timeline ✓
1325
+ add_marker(color="green", note="filler-cut: removed N (Ms)")
1326
+
1327
+ # Chapters as markers (visible in Resolve marker pane immediately)
1328
+ read_skill(name="chapter-markers") → recipe
1329
+ # … per the recipe: read_transcript in 90s windows, identify topic shifts,
1330
+ # add_marker(color="purple", note="00:00 — Intro") at each boundary
1331
+
1332
+ # Captions as sidecar SRT (attached to timeline; not baked)
1333
+ write_srt(transcript, output="captions.srt", cues=...)
1334
+ import_subtitles(srtPath="captions.srt") → SRT attached to subtitle track ✓
1335
+
1336
+ # B-roll over flat stretches (live insert on V2)
1337
+ suggest_broll(transcript, topN=5) → ranked candidates from Pexels
1338
+ # for each: insert_broll(mediaPath=..., track=2, recordFrame=...) ✓
1339
+
1340
+ # Audit retention structure — SHOW the user the weak checkpoints, propose fixes
1341
+ audit_retention_structure(transcript) → weak spots + suggestions
1342
+ # DON'T silently rewrite. Surface to user, propose punch_in / cut_filler_words /
1343
+ # add_sfx_to_timeline on the surrounding window. Wait for their OK or tweak.
1344
+
1345
+ # Outro splice (from brand kit if available, otherwise generate)
1346
+ generate_outro(output="outro.mp4") → produces outro card mp4
1347
+ import_to_media_pool(path="outro.mp4")
1348
+ append_clip(track=1, mediaPath="outro.mp4") → outro lands at end of timeline ✓
1349
+ \`\`\`
679
1350
 
680
- # 1. Get to the Fusion page so the user can see the result.
681
- open_page(name="fusion")
1351
+ After step 2 the user has a fully-edited LONG-FORM TIMELINE in Resolve/Premiere. They can play it. Scrub to any point. Watch the b-roll cutaways. Read the chapter markers. **No mp4 has been rendered yet.**
682
1352
 
683
- # 2. Build the graph.
684
- fusion_comp(action="add_node", toolId="Background", name="LT_Strap")
685
- fusion_comp(action="add_node", toolId="TextPlus", name="LT_Text")
686
- fusion_comp(action="add_node", toolId="Merge", name="LT_Comp")
1353
+ ---
687
1354
 
688
- # 3. Wire it: strap as Background, text as Foreground.
689
- fusion_comp(action="connect", fromNode="LT_Strap", toNode="LT_Comp",
690
- toInput="Background")
691
- fusion_comp(action="connect", fromNode="LT_Text", toNode="LT_Comp",
692
- toInput="Foreground")
1355
+ ## Step 3 Shorts pass, ALSO timeline-first
693
1356
 
694
- # 4. Set the text content + colour.
695
- fusion_comp(action="set_input", node="LT_Text", input="StyledText",
696
- value="<Name>\\n<Title>")
697
- fusion_comp(action="set_input", node="LT_Text", input="Size", value=0.06)
698
- fusion_comp(action="set_input", node="LT_Text", input="Color1Red", value=1)
699
- fusion_comp(action="set_input", node="LT_Text", input="Color1Green", value=1)
700
- fusion_comp(action="set_input", node="LT_Text", input="Color1Blue", value=1)
1357
+ \`\`\`
1358
+ find_viral_moments(transcript, maxClips=3,
1359
+ durationRange=[20, 45]) → ranked candidate windows
1360
+ \`\`\`
701
1361
 
702
- # 5. Park the strap in the lower-left third.
703
- fusion_comp(action="set_input", node="LT_Strap", input="TopLeftRed", value=0)
704
- fusion_comp(action="set_input", node="LT_Strap", input="TopLeftGreen", value=0)
705
- fusion_comp(action="set_input", node="LT_Strap", input="TopLeftBlue", value=0)
706
- fusion_comp(action="set_input", node="LT_Strap", input="TopLeftAlpha", value=0.85)
1362
+ For each candidate (top score first):
1363
+
1364
+ \`\`\`
1365
+ analyze_hook(input, startSec=startSec, endSec=startSec+3)
1366
+ score 0-100 + findings
707
1367
  \`\`\`
708
1368
 
709
- The Merge node is the comp's MediaOut by default; the user sees the
710
- result on the active timeline clip immediately.
1369
+ If \`score < 60\`, drop a red marker and skip — bad hook = bad short. Don't ship a sub-60 hook silently; either run \`rewrite_hook\` to surface candidates and let the user decide, or move on to the next moment.
1370
+
1371
+ Otherwise, **build the short on a NEW Resolve timeline so the long-form timeline isn't disturbed:**
1372
+
1373
+ \`\`\`
1374
+ clone_timeline(name="short-\${i}") # New timeline for this short
1375
+ # Trim to the candidate window via EDL:
1376
+ text_based_cut(sourceVideo,
1377
+ cuts=[{startSec: 0, endSec: candidate.startSec},
1378
+ {startSec: candidate.endSec, endSec: totalSec}])
1379
+ import_edl(path) # Window appears on the new timeline ✓
1380
+
1381
+ # Captions burned (vertical Shorts; sidecar isn't standard for Shorts)
1382
+ write_keyword_captions(transcript, output="short-\${i}.ass",
1383
+ startSec=candidate.startSec,
1384
+ endSec=candidate.endSec,
1385
+ autoEmoji=true, groupSize=2)
1386
+ import_subtitles(srtPath="short-\${i}.ass") # Attached to subtitle track ✓
1387
+ # (Final pixel-burn happens at render time, not here.)
1388
+
1389
+ # Punch-ins at the candidate's internal cut points (timeline-native — coming;
1390
+ # for now, surface to user with a marker so they apply manually OR queue
1391
+ # for the file-bake step at render time)
1392
+
1393
+ # SFX on cuts — TIMELINE-NATIVE
1394
+ add_sfx_to_timeline(sfx="whoosh", cutPoints=[…internal cuts…], track=3) ✓
1395
+
1396
+ add_marker(color="green",
1397
+ note="short \${i}: hook=\${analyzeHook.score}, virality=\${candidate.score}")
1398
+ \`\`\`
1399
+
1400
+ User can now switch between long-form timeline and each \`short-\${i}\` timeline in Resolve, play back, scrub, tweak.
711
1401
 
712
1402
  ---
713
1403
 
714
- ## Animating in / out
1404
+ ## Step 4 Pre-flight audit (still no render)
715
1405
 
716
- Use \`set_keyframe\` on the Merge's \`Blend\` input (overall opacity):
1406
+ \`\`\`
1407
+ audit_first_frame(sourceClipPath) # Galloway: "intro = thumbnail"
1408
+ analyze_hook(sourceClipPath) # spoken-line check
1409
+ verify_thumbnail_promise(thumb, video, 60) # MrBeast: deliver in first 60s
1410
+ audit_retention_structure(transcript, [180,360]) # mid-video checkpoints
1411
+ \`\`\`
1412
+
1413
+ Surface every finding with score + suggestion. **Don't render past a blocker.** If the user says "fix the weak hook," go back to step 2/3 with \`rewrite_hook\` candidates and propose them — DON'T silently re-cut.
1414
+
1415
+ ---
1416
+
1417
+ ## Step 5 — Metadata bundle (REQUIRED before declaring "ready to ship")
717
1418
 
718
1419
  \`\`\`
719
- fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
720
- frame=0, value=0) # invisible at clip start
721
- fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
722
- frame=12, value=1) # fade in over 12f
723
- fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
724
- frame=72, value=1) # hold
725
- fusion_comp(action="set_keyframe", node="LT_Comp", input="Blend",
726
- frame=84, value=0) # fade out
1420
+ generate_youtube_metadata(transcript) # titles[3], description, tags[15],
1421
+ # chapters[], hashtags[]
1422
+
1423
+ compose_thumbnail_variants(input=long-form-render-OR-source-frame,
1424
+ outputDir="./thumbs",
1425
+ text="<distill best title to 2–4 words>",
1426
+ count=3,
1427
+ strategy="expression")
727
1428
  \`\`\`
728
1429
 
729
- Frames are relative to the comp's render rangeset it explicitly if
730
- the agent needs to control the in/out range:
1430
+ Surface the 3 candidate titles + 3 thumbnail variants + the description to the user. Tell them to upload all three thumbnails to YouTube Studio's **Test & Compare** (no API for this must be manual).
1431
+
1432
+ ---
1433
+
1434
+ ## Step 6 — STOP HERE
1435
+
1436
+ This is the natural pause point. The user has:
1437
+ - A fully-edited long-form timeline in their NLE
1438
+ - 1–3 Shorts timelines in their NLE
1439
+ - 3 thumbnail variants on disk
1440
+ - A metadata bundle (titles, description, chapters, tags, hashtags)
1441
+
1442
+ Tell the user:
1443
+
1444
+ > ✅ Long-form ready on timeline \`<name>\` (12:34, captions attached, brand-kit outro)
1445
+ > ✅ Shorts ready on timelines \`short-1\`, \`short-2\`, \`short-3\` (hooks: 82, 76, 71)
1446
+ > ✅ Thumbnail variants: \`./thumbs/long-form.{1,2,3}.jpg\`
1447
+ > ✅ Metadata bundle written to chat above
1448
+ >
1449
+ > Play them back, scrub, tell me what to tweak. When you're happy, say **"render"** / **"export"** / **"ship it"** and I'll:
1450
+ > 1. Run \`pre_render_check\` on each timeline
1451
+ > 2. \`render(...)\` the long-form via Resolve's deliver page
1452
+ > 3. \`render_multi_format\` the shorts to 9:16 / 1:1 / 4:5
1453
+ >
1454
+ > ⚠️ N candidate(s) dropped (<reason>): …
1455
+
1456
+ **Wait for the user's go-ahead. Do not call \`render(...)\` or \`render_multi_format(...)\` until they explicitly ask.**
1457
+
1458
+ ---
1459
+
1460
+ ## Step 7 — Render (only on "ship it" / "render" / "export")
1461
+
1462
+ When the user explicitly asks to render:
731
1463
 
732
1464
  \`\`\`
733
- fusion_comp(action="set_render_range", start=0, end=120)
1465
+ # Long-form
1466
+ list_render_presets() # see what's installed in Resolve
1467
+ pre_render_check(timelineEmpty=false,
1468
+ expectCaptions=true,
1469
+ loudnessSource=...,
1470
+ loudnessTarget="youtube")
1471
+ render(preset="<from list>",
1472
+ output="./out/long-form.mp4") # Resolve's deliver page ✓
1473
+
1474
+ # Per Short
1475
+ render_multi_format(input="<short-mp4-from-Resolve-or-file>",
1476
+ outputDir="./out/shorts",
1477
+ formats=["shorts-9x16"]) # 9:16 deliverable
1478
+
1479
+ # Audio finalisation (these MUST bake — Fairlight is closed)
1480
+ normalize_loudness(input="./out/long-form.mp4",
1481
+ output="./out/long-form.delivery.mp4",
1482
+ platform="youtube") # -14 LUFS / -1 dBTP
1483
+ # Then auto-import the normalized file back so the user has the final on hand:
1484
+ import_to_media_pool(path="./out/long-form.delivery.mp4")
1485
+ add_marker(color="green", note="DELIVERY: long-form.delivery.mp4 (-14 LUFS)")
734
1486
  \`\`\`
735
1487
 
736
1488
  ---
737
1489
 
738
- ## Targeting a specific clip's comp
1490
+ ## What CHANGED vs the old export-everything flow
739
1491
 
740
- Pass \`clipId\` to scope every action to that clip's first Fusion comp
741
- (auto-created if the clip has none). Useful for batched lower-thirds
742
- across multiple clips:
1492
+ - **No file-baking mid-edit.** Captions are sidecar SRT, SFX are real audio clips on track A3, b-roll lands on V2 — all live in the user's NLE.
1493
+ - **\`burn_subtitles\`, \`add_sfx_at_cuts\`, \`face_reframe\`, \`mix_audio\`, \`clean_audio\`, \`duck_audio\`, \`loop_match_short\`, \`bleep_words\`, \`speed_ramp\`, \`stabilize_video\`** are the file-only tools the agent does NOT chain mid-edit. They're for the final delivery pass on user request.
1494
+ - **\`render(...)\` / \`render_multi_format(...)\`** only fire after explicit user intent.
1495
+ - **Each step modifies the live timeline** — user plays back, scrubs, asks for tweaks. The session is iterative, not a one-shot pipeline.
1496
+
1497
+ ---
1498
+
1499
+ ## What the agent CANNOT do (be honest with the user)
1500
+
1501
+ - Generate new footage. No re-shoots, no AI scenes. Only re-cut from existing source.
1502
+ - Trigger YouTube Studio Test & Compare. No public API. Agent produces 3 variants; user uploads them.
1503
+ - Read live channel metrics. No public CTR / AVD feed. ASK the user to paste from Studio.
1504
+ - Re-record a hook line. \`rewrite_hook\` proposes 3 rewrites; user picks an existing alternative opener via \`text_based_cut\` or re-shoots.
1505
+ - Render anything until the user says so. Even if you think it's done.
1506
+
1507
+ ---
1508
+
1509
+ ## Defaults & gates
1510
+
1511
+ - **Hook gate**: 60 (\`analyze_hook\`).
1512
+ - **Virality gate**: 50 (\`score_clip\` total).
1513
+ - **First-frame gate**: 60 (\`audit_first_frame\`).
1514
+ - **Thumbnail-promise gate**: 0.6 (\`verify_thumbnail_promise\`).
1515
+ - **Retention-checkpoint gate**: 0.5 per checkpoint (\`audit_retention_structure\`).
1516
+ - **Short duration range**: 20–45 s — \`find_viral_moments\` default.
1517
+ - **Loudness target**: -14 LUFS / -1 dBTP for YouTube + every short-form platform.
1518
+ - **Caption style** (vertical): yellow keyword pop on white default, lower-third margin 220, \`autoEmoji=true\`.
1519
+ - **SFX track**: A3 — keeps A1 dialogue / A2 music free.
1520
+ - **Render**: only on explicit user intent — never automatic.
1521
+ `;
1522
+ const YOUTUBE_THUMBNAIL_DESIGN = `---
1523
+ name: youtube-thumbnail-design
1524
+ description: Thumbnail design rules sourced from a 300K-video study (1of10 Media via Search Engine Journal Dec 2025), the official YouTube Test & Compare guidance from Rene Ritchie (July 2025), and creator strategists. Read before composing thumbnails or picking variants from compose_thumbnail_variants. Numbers are tagged with their source so the agent doesn't misquote.
1525
+ ---
1526
+
1527
+ # youtube-thumbnail-design
1528
+
1529
+ **When to use:** any time you compose a thumbnail (\`compose_thumbnail\`, \`compose_thumbnail_variants\`) or rank candidate hero frames (\`score_shot\`). Read this BEFORE writing the headline text — getting the headline wrong is the most common reason creator thumbnails underperform, more than any single visual choice.
1530
+
1531
+ **Source authority.** The strongest 2025 evidence on what actually works in thumbnails comes from: (1) **1of10 Media's 300,000-video viral study**, reported on Search Engine Journal (Dec 22 2025); (2) **YouTube's own Test & Compare tool** + Rene Ritchie's July 2025 commentary on what it optimises; (3) creator A/B data from **vidIQ, TubeBuddy, AmpiFire**. Tags \`[primary]\`, \`[secondary, large-N]\`, \`[secondary, vendor]\` mark provenance.
1532
+
1533
+ ---
1534
+
1535
+ ## The viewing context (this is everything)
1536
+
1537
+ Most thumbnails are first seen at:
1538
+ - **120 × 67 px** — mobile feed
1539
+ - **246 × 138 px** — desktop home feed
1540
+ - **360 × 202 px** — sidebar suggestions
1541
+
1542
+ Anything finer than ~3 pixels is invisible at the smallest size. **Design for 100 × 56 first.** If it works there, it works everywhere.
1543
+
1544
+ Sanity test: render the thumbnail, scale it to 100 × 56, look at it. If you can't tell the subject + topic in 1 second, it fails.
1545
+
1546
+ ---
1547
+
1548
+ ## Faces vs. no-faces — the data is more nuanced than blogs claim
1549
+
1550
+ The headline question every creator asks. The clearest answer comes from the largest 2025 study:
1551
+
1552
+ **[secondary, large-N]** Search Engine Journal Dec 22 2025 (https://www.searchenginejournal.com/do-faces-help-youtube-thumbnails-heres-what-the-data-says/563944/), reporting 1of10 Media's analysis of 300,000 viral 2025 YouTube videos: *"thumbnails with faces and thumbnails without faces perform similarly, even though faces appear on a large share of videos in the sample."* Niche-level: **Finance benefits from faces; Business performs better without.** Channel-size: faces helped larger channels more than smaller ones. Multi-face thumbnails outperform single-face in their dataset.
1553
+
1554
+ **[secondary, vendor — flagged]** Tool-vendor counter-claim: vidIQ has reported that thumbnails with faces showing strong emotion can lift CTR by 20–30%, with surprise expressions specifically lifting CTR by ~49% (per Banana Thumbnail's March 2026 summary citing vidIQ data). AmpiFire's Nov 2025 synthesis: human-face videos receive 921,000 more views on average than faceless ones; sad faces appear in only 1.8% of thumbnails yet achieve the highest average views at 2.3 million.
1555
+
1556
+ **Disagreement called out.** The 1of10 dataset (300K videos) is the larger N and methodologically the most defensible. vidIQ's 20–30% number is not dataset-anchored in the public version. Use 1of10's "depends on niche and channel size" framing as the primary truth; use vidIQ's expression-specific lifts as supporting evidence.
1557
+
1558
+ **Operational rule:** assume faces help **for talking-head / vlog / finance** content, but DON'T force a face into product / screen-recording / B-roll-heavy thumbnails. If \`score_shot\`'s ranked frames don't surface a strong expressive face within the top 5, that's diagnostic — pick a strong product / screen frame instead.
1559
+
1560
+ When \`compose_thumbnail_variants\` does pick face frames, prefer:
743
1561
 
1562
+ - **Face fills ≥35% of frame area.** Half a face is fine if the visible half is expressive.
1563
+ - **Clear emotion** — surprise, delight, focus, mild anger, fear. Neutral does NOT work; the eyes do most of the work.
1564
+ - **Eyes look at the camera** OR at the label / subject.
1565
+ - **Surprise specifically** — wide eyes, open mouth — reportedly the strongest single emotion (vidIQ).
1566
+
1567
+ ---
1568
+
1569
+ ## Text in the thumbnail
1570
+
1571
+ YouTube's own guidance is "minimal, high-impact words" — confirmed across multiple primary sources:
1572
+
1573
+ - **[primary]** YouTube's Test & Compare commentary, via vidIQ (July 25 2025, citing Rene Ritchie): *"Great thumbnails don't just get viewers to click. They also help viewers understand what the video is about, so that they can make informed decisions about what to watch."*
1574
+ - **[secondary]** Influencer Marketing Hub paraphrasing YouTube guidance: *"Text on thumbnails should clarify the promise of the video, but there's a fine balance between brevity and context. YouTube recommends using minimal, high-impact words rather than full sentences. For example, 'Best Budget Camera' will often outperform 'Here Are the Best Budget Cameras for 2025'."*
1575
+
1576
+ **Operational constraints at 100 × 56:**
1577
+
1578
+ - **2–4 words MAX.** "How I built this in a weekend" is 6 words — unreadable. **"WEEKEND BUILD"** wins.
1579
+ - **One font, two weights at most.** Bold for the headline, regular for any subtitle. Picking a third "fun" font cheapens the thumbnail every time.
1580
+ - **Heavy outlines/stroke** — 4–8 px on a 1280 × 720 thumbnail. Without an outline the text disappears against any non-uniform background.
1581
+ - **Avoid serifs at thumbnail size.** They blur. Use sans-serif (Bebas, Impact, Inter Black, similar).
1582
+ - **Hard-cap title length.** A 30-character ceiling forces the discipline.
1583
+
1584
+ **Don't use the video title as the thumbnail text.** They're different jobs:
1585
+ - **Title** — SEO + curiosity (8–10 words, optimised for search)
1586
+ - **Thumbnail text** — visual punch (2–4 words, optimised for scan)
1587
+
1588
+ \`compose_thumbnail_variants(text=...)\` should NOT receive the YouTube title verbatim. Pass a 2–4 word distillation. Often this is the **hook line shortened**.
1589
+
1590
+ ---
1591
+
1592
+ ## Colour budget
1593
+
1594
+ **[secondary, common-practice]** Use **3 colours maximum** in the thumbnail (excluding skin tones, which are free).
1595
+
1596
+ Classic creator palette:
1597
+ - **High-contrast hero colour** — saturated yellow, red, or cyan, used for text outline OR a single accent
1598
+ - **Background fill** — solid or near-solid; dark or light enough to make the subject pop
1599
+ - **Subject's natural colours** — skin, clothing
1600
+
1601
+ At 100 × 56 every additional colour is one fewer "lock-on" point for the eye.
1602
+
1603
+ **[primary, brand kit hook]** If \`<cwd>/.gg/brand.json\` defines \`colors.primary\`, USE IT for the text outline or the accent. Channel-level colour identity drives recognition in a feed (the viewer recognises the channel's palette before reading the text). Don't pick a new colour every video.
1604
+
1605
+ ---
1606
+
1607
+ ## Composition / layout
1608
+
1609
+ The dominant compositions creators converge on:
1610
+
1611
+ ### A. Rule-of-thirds: face left + label right (default for talking-head)
744
1612
  \`\`\`
745
- get_timeline # discover clipIds
746
- fusion_comp(action="add_node", toolId="TextPlus",
747
- name="LT_Text", clipId="<clipId>")
1613
+ +----------------------+
1614
+ | | |
1615
+ | FACE | LABEL |
1616
+ | | TWO LINES |
1617
+ | | |
1618
+ +----------------------+
1619
+ \`\`\`
1620
+ Face takes left third or two-thirds; label sits in negative space. Vlogs, tutorials, reactions.
1621
+
1622
+ ### B. Centred subject + arc text (products / builds)
1623
+ \`\`\`
1624
+ +----------------------+
1625
+ | LABEL ABOVE |
1626
+ | (PRODUCT) |
1627
+ | LABEL BELOW |
1628
+ +----------------------+
1629
+ \`\`\`
1630
+ Object centred; label arcs above and below or just above. Eye locks on the centred object first.
1631
+
1632
+ ### C. Before / after split (transformations)
1633
+ \`\`\`
1634
+ +----------+----------+
1635
+ | BEFORE | AFTER |
1636
+ | -- ARROW -- |
1637
+ | WORD |
1638
+ +----------+----------+
1639
+ \`\`\`
1640
+ Vertical or horizontal split, an arrow, a single labelling word. Fitness, builds, redesigns, makeovers.
1641
+
1642
+ ### D. Tight close-up + circle / red zone (tutorials, especially software)
748
1643
  \`\`\`
1644
+ +----------------------+
1645
+ | LABEL ABOVE |
1646
+ | [⊙ ZOOMED-IN |
1647
+ | DETAIL] |
1648
+ +----------------------+
1649
+ \`\`\`
1650
+ Red circle or arrow on a specific detail. Universal in tech / software niches.
1651
+
1652
+ **One focal point.** The viewer's eye should know where to look in 0.3 seconds. Pick one composition; stick to it.
749
1653
 
750
1654
  ---
751
1655
 
752
- ## Troubleshooting
1656
+ ## YouTube's native A/B testing — Test & Compare
753
1657
 
754
- - **\`Resolve.Fusion() unavailable\`** Resolve build is too old or
755
- user is on a free seat. Fusion is Studio-only at scriptable depth.
756
- - **\`No active Fusion comp\`** user hasn't switched to the Fusion
757
- page on a clip with a comp. Either call \`open_page("fusion")\` first
758
- on a known clip, or pass \`clipId\` so we operate on that clip's comp
759
- directly.
760
- - **\`AddTool('X') returned None\`** \`toolId\` is wrong. The canonical
761
- IDs the agent will hit: \`Background\`, \`TextPlus\`, \`Merge\`,
762
- \`Transform\`, \`ColorCorrector\`, \`DeltaKeyer\`, \`Brightness\`, \`Glow\`,
763
- \`Blur\`. There's no scriptable enumeration; check Fusion's docs if
764
- the user names a tool not in this list.
1658
+ Critical change in 2024–2025: YouTube rolled out native thumbnail (and title) A/B testing. **The agent should default to producing 3 variants and recommend Test & Compare to the user.**
1659
+
1660
+ **[primary]** Rene Ritchie via vidIQ (July 25 2025, https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/):
1661
+
1662
+ > *"Pick up to 3 versions of your title. You can also select up to 3 thumbnails. Mix and match if you want. YouTube will randomly serve each variation to viewers… YouTube doesn't use click-through rate (CTR) as the winning metric — it uses Watch Time Share. That means the title that leads to more sustained viewing wins, not necessarily the one that gets the fastest clicks. Tests typically run from 1 to 14 days, depending on how quickly statistical significance is reached. Once there's a clear winner, YouTube automatically applies it to your video."*
1663
+
1664
+ **[primary]** Same source on why CTR isn't the winning metric: *"If you over-index on CTR, it could become click-bait, which could tank retention, and hurt performance."*
1665
+
1666
+ **Operational implication the agent CANNOT trigger Test & Compare itself** (no public API; the test lives only in YouTube Studio). The agent's job is to PRODUCE the right 3 variants and tell the user to upload them.
1667
+
1668
+ **Single-variable A/B is built into \`compose_thumbnail_variants\` via the \`strategy\` param:**
1669
+
1670
+ - **\`strategy="expression"\`** — picks 3 distinct face/expression frames; same label on all three. Use when source has multiple expressive faces.
1671
+ - **\`strategy="label"\`** — picks ONE strong frame; LLM generates 3 distinct 2–4-word label variants; renders the same frame three times with different labels. Use when source has only one usable face / product / screen.
1672
+ - **\`strategy="mixed"\`** (default) — 3 distinct frames + same label. Use when neither single-variable mode applies cleanly.
1673
+
1674
+ Don't ship a single thumbnail. Always 3 variants.
1675
+
1676
+ ---
1677
+
1678
+ ## What NOT to do
1679
+
1680
+ - **All-caps shouty SEVEN-WORD HEADLINES.** Unreadable.
1681
+ - **Rainbow gradient text.** Wins zero A/B tests across the public datasets.
1682
+ - **Stock arrow templates.** Identifies the channel as "first month on YouTube" instantly.
1683
+ - **Watermarks on top of the subject.** If you must brand, place the watermark in a corner outside the focal area.
1684
+ - **Repeating the title word-for-word.** Wastes the second hook surface.
1685
+ - **Last week's expression, last week's composition.** Channels stagnate when every thumbnail looks identical. Vary expression and composition while keeping colour identity.
1686
+ - **Clickbait that doesn't deliver.** Ritchie's quote above — Watch Time Share is the metric Test & Compare uses; CTR-spike + AVP-collapse is now actively penalised.
1687
+
1688
+ ---
1689
+
1690
+ ## Operationalising in the agent
1691
+
1692
+ The default \`compose_thumbnail_variants\` flow:
1693
+
1694
+ 1. **Pre-call \`generate_youtube_metadata\`** to get the candidate titles. Pick the strongest one.
1695
+ 2. **Distill to 2–4 words** for the thumbnail label. Usually the hook line shortened, NOT the title verbatim.
1696
+ 3. **Call \`compose_thumbnail_variants(input, count=3, text="<distilled label>", strategy="...")\`**.
1697
+ 4. **Surface 3 outputs** to the user with the per-variant rationale the tool returns.
1698
+ 5. **Verify the thumbnail's promise** with \`verify_thumbnail_promise(thumbnail=variants[0].path, video=...)\` — if the opening 60s doesn't show what the thumbnail promises, surface a red marker and don't ship until the user picks a different frame or recuts the opener.
1699
+ 6. **Tell the user to run Test & Compare manually.** Suggested copy: *"Upload all three thumbnails to YouTube Studio's Test & Compare. YouTube picks the winner by Watch Time Share over 1–14 days. The agent can't trigger this for you — there's no API."*
1700
+
1701
+ **Brand kit integration (auto-applied).** When \`<cwd>/.gg/brand.json\` exists, \`compose_thumbnail\` and \`compose_thumbnail_variants\` already inherit:
1702
+ - \`fonts.heading\` → used as \`fontFile\` if not overridden
1703
+ - \`colors.primary\` → used as \`outlineColor\` if not overridden
1704
+
1705
+ The agent does not need to pass these explicitly. Each tool's output reports \`brandKitLoaded: true\` so the agent can confirm the kit was used.
1706
+
1707
+ ---
1708
+
1709
+ ## Sources & further reading
1710
+
1711
+ **Primary:**
1712
+ - Search Engine Journal, *"Do Faces Help YouTube Thumbnails? Here's What the Data Says"*, Dec 22 2025 (1of10 Media's 300K viral video study) — https://www.searchenginejournal.com/do-faces-help-youtube-thumbnails-heres-what-the-data-says/563944/
1713
+ - vidIQ, *"YouTube Launches New Title Testing Tool"*, July 25 2025 (Rene Ritchie quotes) — https://vidiq.com/blog/post/youtube-launches-new-title-testing-tool/
1714
+ - YouTube Help Center — Test & Compare; Add Custom Thumbnails
1715
+
1716
+ **Secondary (vendor data, treat as directional):**
1717
+ - AmpiFire, thumbnail face research, Nov 2025
1718
+ - vidIQ, thumbnail psychology / face emotion lift, 2024–2025
1719
+ - Banana Thumbnail, summary of vidIQ data, March 2026
1720
+ - Influencer Marketing Hub, YouTube thumbnail guide, 2025
1721
+
1722
+ **Creator strategists worth following:**
1723
+ - Paddy Galloway — paddygalloway.com, X threads
1724
+ - Roberto Blake — YouTube channel + blog
1725
+ - MrBeast leaked production manual, Aug 2024 (mirrored at simonwillison.net)
765
1726
  `;
766
1727
  export const SKILLS = {
1728
+ "chapter-markers": {
1729
+ name: "chapter-markers",
1730
+ description: "Author YouTube/podcast chapter timestamps from a transcript: 5–15 chapters, first at 00:00, ≥30s apart, only at real topic shifts. Drops purple markers + emits a YouTube-formatted description block.",
1731
+ content: CHAPTER_MARKERS,
1732
+ },
1733
+ "fusion-lower-third": {
1734
+ name: "fusion-lower-third",
1735
+ description: "Build a name/title chyron natively in DaVinci Resolve's Fusion via fusion_comp — Background + TextPlus + Merge node graph, wiring, styling, lower-third positioning, keyframed fade in/out. Resolve Studio only; cross-host fallback is write_lower_third + burn_subtitles.",
1736
+ content: FUSION_LOWER_THIRD,
1737
+ },
1738
+ "keyframing-and-titles": {
1739
+ name: "keyframing-and-titles",
1740
+ description: "Recipes for the seven scripting gaps neither Resolve nor Premiere expose: timeline reorder, multi-track lanes, lower-thirds + title cards (ASS), keyframed opacity/position/volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns, named transitions (smash-cut, whip-pan, dip-to-black).",
1741
+ content: KEYFRAMING_AND_TITLES,
1742
+ },
767
1743
  "long-form-content-edit": {
768
1744
  name: "long-form-content-edit",
769
- description: "Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires our tools (transcribe, cluster_takes, detect_silence, write_edl, import_edl, write_srt, add_marker) into a single workflow.",
1745
+ description: "Recipe for podcasts, interviews, vlogs, courses, talking-head. Five-pass method: utterance segmentation → take detection → filler removal → incomplete-sentence trim → silence normalization. Wires transcribe, cluster_takes, detect_silence, write_edl, import_edl, write_srt, add_marker into a single workflow.",
770
1746
  content: LONG_FORM_CONTENT_EDIT,
771
1747
  },
772
1748
  "short-form-content-edit": {
@@ -774,25 +1750,30 @@ export const SKILLS = {
774
1750
  description: "Recipe for TikTok / Reels / Shorts. Find the moment → reformat 9:16 → hook the first 2 seconds → burn captions → render. Uses reformat_timeline, import_edl, set_clip_speed, write_srt, import_subtitles, open_page (Resolve).",
775
1751
  content: SHORT_FORM_CONTENT_EDIT,
776
1752
  },
777
- "chapter-markers": {
778
- name: "chapter-markers",
779
- description: "Recipe for YouTube/podcast chapter timestamps. Reads transcript in 90s windows, identifies topic shifts, drops purple markers, and emits a YouTube-formatted description block. Constraints: first chapter at 00:00, 5–15 chapters, ≥30s apart.",
780
- content: CHAPTER_MARKERS,
781
- },
782
- "keyframing-and-titles": {
783
- name: "keyframing-and-titles",
784
- description: "Recipes for the seven gaps neither Resolve nor Premiere expose via scripting: timeline reordering, multi-track / lane composition, lower-thirds and title cards (via ASS), keyframed opacity / position / volume ramps, audio mixing chains (EQ + comp + gate + de-esser + limiter), speed ramps, Ken-Burns on stills, and named transitions (smash-cut, whip-pan, dip-to-black). Wires reorder_timeline, compose_layered, write_lower_third, write_title_card, mix_audio, speed_ramp, ken_burns, transition_videos.",
785
- content: KEYFRAMING_AND_TITLES,
786
- },
787
1753
  "skin-tone-matching": {
788
1754
  name: "skin-tone-matching",
789
- description: "Recipe for matching faces across clips when host scripting can't reach power windows or qualifiers. Two paths: grade_skin_tones (file-only, every host — bakes a vision-derived colorbalance + selectivecolor + eq grade into a new mp4, pair with replace_clip) and match_clip_color (Resolve only — derives the same grade as a CDL and pipes it through set_primary_correction, non-baked). Both share one vision pass over a reference frame and a target frame; below confidence 0.4 the grade is unreliable.",
1755
+ description: "Match faces across clips when host scripting can't reach power windows or qualifiers. Two paths: grade_skin_tones (file-only — bakes a vision-derived colorbalance + selectivecolor + eq into a new mp4, pair with replace_clip) and match_clip_color (Resolve only — derives the same grade as a CDL via set_primary_correction).",
790
1756
  content: SKIN_TONE_MATCHING,
791
1757
  },
792
- "fusion-lower-third": {
793
- name: "fusion-lower-third",
794
- description: "Recipe for building a name/title chyron natively in DaVinci Resolve's Fusion page via fusion_comp. Walks the agent through Background + TextPlus + Merge node graph, wiring, text styling, lower-third positioning, and keyframed fade in/out via Merge.Blend. Resolve-only (Studio); for cross-host pixel-baked chyrons fall back to write_lower_third + burn_subtitles.",
795
- content: FUSION_LOWER_THIRD,
1758
+ "viral-hook-patterns": {
1759
+ name: "viral-hook-patterns",
1760
+ description: "Hook patterns sourced from primary creators (Jenny Hoyos on the official YouTube Blog, the leaked MrBeast production manual, Paddy Galloway's data analyses) not generic creator-folklore. Read when analyze_hook fails, when picking a find_viral_moments candidate, or when generate_youtube_metadata needs a punchier title. Each pattern names a real creator example, the primary source, and the failure mode.",
1761
+ content: VIRAL_HOOK_PATTERNS,
1762
+ },
1763
+ "youtube-algorithm-primer": {
1764
+ name: "youtube-algorithm-primer",
1765
+ description: "How YouTube actually ranks videos in 2024–2026, sourced from Creator Insider, the YouTube Liaison (Rene Ritchie), Senior Director of Growth Todd Beaupré, Paddy Galloway, and the Retention Rabbit 2025 benchmark study. Read when generating titles/descriptions/chapters or when a video is underperforming. Numbers without a primary YouTube source are flagged as third-party heuristics.",
1766
+ content: YOUTUBE_ALGORITHM_PRIMER,
1767
+ },
1768
+ "youtube-end-to-end": {
1769
+ name: "youtube-end-to-end",
1770
+ description: "Orchestrator for \"make me a YouTube video from this footage\" using a TIMELINE-FIRST workflow. The agent edits the live Resolve/Premiere timeline so the user can scrub, tweak, and undo at every stage. Renders only happen at the end on explicit user intent (\"render\" / \"export\" / \"ship it\"). When host=none, falls back to file-only delivery and says so up front. Covers long-form, Shorts, captions, retention pipeline, and the metadata bundle.",
1771
+ content: YOUTUBE_END_TO_END,
1772
+ },
1773
+ "youtube-thumbnail-design": {
1774
+ name: "youtube-thumbnail-design",
1775
+ description: "Thumbnail design rules sourced from a 300K-video study (1of10 Media via Search Engine Journal Dec 2025), the official YouTube Test & Compare guidance from Rene Ritchie (July 2025), and creator strategists. Read before composing thumbnails or picking variants from compose_thumbnail_variants. Numbers are tagged with their source so the agent doesn't misquote.",
1776
+ content: YOUTUBE_THUMBNAIL_DESIGN,
796
1777
  },
797
1778
  };
798
1779
  export const SKILL_NAMES = Object.keys(SKILLS);