videowright 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. package/README.md +91 -0
  2. package/dist/cli/argv.d.ts +28 -0
  3. package/dist/cli/argv.d.ts.map +1 -0
  4. package/dist/cli/argv.js +115 -0
  5. package/dist/cli/argv.js.map +1 -0
  6. package/dist/cli/bin.d.ts +7 -0
  7. package/dist/cli/bin.d.ts.map +1 -0
  8. package/dist/cli/bin.js +10 -0
  9. package/dist/cli/bin.js.map +1 -0
  10. package/dist/cli/dev.d.ts +19 -0
  11. package/dist/cli/dev.d.ts.map +1 -0
  12. package/dist/cli/dev.js +104 -0
  13. package/dist/cli/dev.js.map +1 -0
  14. package/dist/cli/discover.d.ts +29 -0
  15. package/dist/cli/discover.d.ts.map +1 -0
  16. package/dist/cli/discover.js +104 -0
  17. package/dist/cli/discover.js.map +1 -0
  18. package/dist/cli/discover_project.d.ts +29 -0
  19. package/dist/cli/discover_project.d.ts.map +1 -0
  20. package/dist/cli/discover_project.js +108 -0
  21. package/dist/cli/discover_project.js.map +1 -0
  22. package/dist/cli/errors.d.ts +10 -0
  23. package/dist/cli/errors.d.ts.map +1 -0
  24. package/dist/cli/errors.js +13 -0
  25. package/dist/cli/errors.js.map +1 -0
  26. package/dist/cli/ffmpeg.d.ts +57 -0
  27. package/dist/cli/ffmpeg.d.ts.map +1 -0
  28. package/dist/cli/ffmpeg.js +122 -0
  29. package/dist/cli/ffmpeg.js.map +1 -0
  30. package/dist/cli/index.d.ts +7 -0
  31. package/dist/cli/index.d.ts.map +1 -0
  32. package/dist/cli/index.js +152 -0
  33. package/dist/cli/index.js.map +1 -0
  34. package/dist/cli/playwright_check.d.ts +44 -0
  35. package/dist/cli/playwright_check.d.ts.map +1 -0
  36. package/dist/cli/playwright_check.js +20 -0
  37. package/dist/cli/playwright_check.js.map +1 -0
  38. package/dist/cli/prompt.d.ts +13 -0
  39. package/dist/cli/prompt.d.ts.map +1 -0
  40. package/dist/cli/prompt.js +47 -0
  41. package/dist/cli/prompt.js.map +1 -0
  42. package/dist/cli/render.d.ts +60 -0
  43. package/dist/cli/render.d.ts.map +1 -0
  44. package/dist/cli/render.js +471 -0
  45. package/dist/cli/render.js.map +1 -0
  46. package/dist/cli/script_cmd.d.ts +26 -0
  47. package/dist/cli/script_cmd.d.ts.map +1 -0
  48. package/dist/cli/script_cmd.js +88 -0
  49. package/dist/cli/script_cmd.js.map +1 -0
  50. package/dist/cli/time_shim.d.ts +44 -0
  51. package/dist/cli/time_shim.d.ts.map +1 -0
  52. package/dist/cli/time_shim.js +390 -0
  53. package/dist/cli/time_shim.js.map +1 -0
  54. package/dist/cli/ts_loader.d.ts +28 -0
  55. package/dist/cli/ts_loader.d.ts.map +1 -0
  56. package/dist/cli/ts_loader.js +95 -0
  57. package/dist/cli/ts_loader.js.map +1 -0
  58. package/dist/cli/vite_helpers.d.ts +62 -0
  59. package/dist/cli/vite_helpers.d.ts.map +1 -0
  60. package/dist/cli/vite_helpers.js +273 -0
  61. package/dist/cli/vite_helpers.js.map +1 -0
  62. package/dist/index.d.ts +11 -0
  63. package/dist/index.d.ts.map +1 -0
  64. package/dist/index.js +14 -0
  65. package/dist/index.js.map +1 -0
  66. package/dist/player/hash_router.d.ts +23 -0
  67. package/dist/player/hash_router.d.ts.map +1 -0
  68. package/dist/player/hash_router.js +49 -0
  69. package/dist/player/hash_router.js.map +1 -0
  70. package/dist/player/hud.d.ts +33 -0
  71. package/dist/player/hud.d.ts.map +1 -0
  72. package/dist/player/hud.js +357 -0
  73. package/dist/player/hud.js.map +1 -0
  74. package/dist/player/index.d.ts +123 -0
  75. package/dist/player/index.d.ts.map +1 -0
  76. package/dist/player/index.js +848 -0
  77. package/dist/player/index.js.map +1 -0
  78. package/dist/player/input.d.ts +14 -0
  79. package/dist/player/input.d.ts.map +1 -0
  80. package/dist/player/input.js +90 -0
  81. package/dist/player/input.js.map +1 -0
  82. package/dist/player/slot.d.ts +22 -0
  83. package/dist/player/slot.d.ts.map +1 -0
  84. package/dist/player/slot.js +43 -0
  85. package/dist/player/slot.js.map +1 -0
  86. package/dist/player/transitions/cut.d.ts +7 -0
  87. package/dist/player/transitions/cut.d.ts.map +1 -0
  88. package/dist/player/transitions/cut.js +9 -0
  89. package/dist/player/transitions/cut.js.map +1 -0
  90. package/dist/player/transitions/fade.d.ts +7 -0
  91. package/dist/player/transitions/fade.d.ts.map +1 -0
  92. package/dist/player/transitions/fade.js +18 -0
  93. package/dist/player/transitions/fade.js.map +1 -0
  94. package/dist/player/transitions/index.d.ts +4 -0
  95. package/dist/player/transitions/index.d.ts.map +1 -0
  96. package/dist/player/transitions/index.js +4 -0
  97. package/dist/player/transitions/index.js.map +1 -0
  98. package/dist/player/transitions/slide.d.ts +6 -0
  99. package/dist/player/transitions/slide.d.ts.map +1 -0
  100. package/dist/player/transitions/slide.js +35 -0
  101. package/dist/player/transitions/slide.js.map +1 -0
  102. package/dist/script/index.d.ts +2 -0
  103. package/dist/script/index.d.ts.map +1 -0
  104. package/dist/script/index.js +2 -0
  105. package/dist/script/index.js.map +1 -0
  106. package/dist/script/script.d.ts +10 -0
  107. package/dist/script/script.d.ts.map +1 -0
  108. package/dist/script/script.js +41 -0
  109. package/dist/script/script.js.map +1 -0
  110. package/dist/segment/SegmentRunner.d.ts +52 -0
  111. package/dist/segment/SegmentRunner.d.ts.map +1 -0
  112. package/dist/segment/SegmentRunner.js +187 -0
  113. package/dist/segment/SegmentRunner.js.map +1 -0
  114. package/dist/segment/defineConfig.d.ts +6 -0
  115. package/dist/segment/defineConfig.d.ts.map +1 -0
  116. package/dist/segment/defineConfig.js +7 -0
  117. package/dist/segment/defineConfig.js.map +1 -0
  118. package/dist/segment/defineSegment.d.ts +7 -0
  119. package/dist/segment/defineSegment.d.ts.map +1 -0
  120. package/dist/segment/defineSegment.js +25 -0
  121. package/dist/segment/defineSegment.js.map +1 -0
  122. package/dist/segment/index.d.ts +5 -0
  123. package/dist/segment/index.d.ts.map +1 -0
  124. package/dist/segment/index.js +4 -0
  125. package/dist/segment/index.js.map +1 -0
  126. package/dist/timeline/index.d.ts +73 -0
  127. package/dist/timeline/index.d.ts.map +1 -0
  128. package/dist/timeline/index.js +142 -0
  129. package/dist/timeline/index.js.map +1 -0
  130. package/dist/timeline/loadAudioTrack.d.ts +18 -0
  131. package/dist/timeline/loadAudioTrack.d.ts.map +1 -0
  132. package/dist/timeline/loadAudioTrack.js +44 -0
  133. package/dist/timeline/loadAudioTrack.js.map +1 -0
  134. package/dist/timeline/loadVoiceover.d.ts +18 -0
  135. package/dist/timeline/loadVoiceover.d.ts.map +1 -0
  136. package/dist/timeline/loadVoiceover.js +38 -0
  137. package/dist/timeline/loadVoiceover.js.map +1 -0
  138. package/dist/timeline/resolveTiming.d.ts +28 -0
  139. package/dist/timeline/resolveTiming.d.ts.map +1 -0
  140. package/dist/timeline/resolveTiming.js +63 -0
  141. package/dist/timeline/resolveTiming.js.map +1 -0
  142. package/dist/timeline/validateTiming.d.ts +29 -0
  143. package/dist/timeline/validateTiming.d.ts.map +1 -0
  144. package/dist/timeline/validateTiming.js +62 -0
  145. package/dist/timeline/validateTiming.js.map +1 -0
  146. package/dist/types.d.ts +216 -0
  147. package/dist/types.d.ts.map +1 -0
  148. package/dist/types.js +6 -0
  149. package/dist/types.js.map +1 -0
  150. package/package.json +47 -0
  151. package/skill/SKILL.md +64 -0
  152. package/skill/assets/hello_world/PLAN.md +31 -0
  153. package/skill/assets/hello_world/README.md +27 -0
  154. package/skill/assets/hello_world/audio/audio_plan.md +14 -0
  155. package/skill/assets/hello_world/segments/hello_intro.ts +69 -0
  156. package/skill/assets/hello_world/segments/hello_outro.ts +71 -0
  157. package/skill/assets/hello_world/timeline.ts +15 -0
  158. package/skill/assets/hello_world/voiceover_script/script.md +10 -0
  159. package/skill/assets/install/package.json +10 -0
  160. package/skill/assets/install/tsconfig.json +23 -0
  161. package/skill/assets/styles/editorial-mono/STYLE.md +124 -0
  162. package/skill/assets/styles/editorial-mono/brand.md +85 -0
  163. package/skill/assets/styles/editorial-mono/reference/animations.jsx +752 -0
  164. package/skill/assets/styles/editorial-mono/reference/scenes.html +563 -0
  165. package/skill/assets/styles/editorial-mono/sample/bullet.ts +101 -0
  166. package/skill/assets/styles/editorial-mono/sample/content.ts +104 -0
  167. package/skill/assets/styles/editorial-mono/sample/cta.ts +113 -0
  168. package/skill/assets/styles/editorial-mono/sample/feature.ts +111 -0
  169. package/skill/assets/styles/editorial-mono/sample/grid.ts +97 -0
  170. package/skill/assets/styles/editorial-mono/sample/kinetic.ts +96 -0
  171. package/skill/assets/styles/editorial-mono/sample/section.ts +101 -0
  172. package/skill/assets/styles/editorial-mono/sample/stat.ts +128 -0
  173. package/skill/assets/styles/editorial-mono/sample/title.ts +97 -0
  174. package/skill/assets/styles/editorial-mono/sample/ui-showcase.ts +159 -0
  175. package/skill/assets/styles/editorial-mono/tokens.css +44 -0
  176. package/skill/assets/styles/iso-diagram/STYLE.md +109 -0
  177. package/skill/assets/styles/iso-diagram/brand.md +32 -0
  178. package/skill/assets/styles/iso-diagram/reference/animations.jsx +673 -0
  179. package/skill/assets/styles/iso-diagram/reference/scenes.html +427 -0
  180. package/skill/assets/styles/iso-diagram/sample/bullet.ts +144 -0
  181. package/skill/assets/styles/iso-diagram/sample/content.ts +192 -0
  182. package/skill/assets/styles/iso-diagram/sample/cta.ts +162 -0
  183. package/skill/assets/styles/iso-diagram/sample/feature.ts +205 -0
  184. package/skill/assets/styles/iso-diagram/sample/grid.ts +181 -0
  185. package/skill/assets/styles/iso-diagram/sample/kinetic.ts +102 -0
  186. package/skill/assets/styles/iso-diagram/sample/section.ts +149 -0
  187. package/skill/assets/styles/iso-diagram/sample/stat.ts +164 -0
  188. package/skill/assets/styles/iso-diagram/sample/title.ts +173 -0
  189. package/skill/assets/styles/iso-diagram/sample/ui-showcase.ts +162 -0
  190. package/skill/assets/styles/iso-diagram/tokens.css +40 -0
  191. package/skill/assets/styles/motion-engineering/STYLE.md +106 -0
  192. package/skill/assets/styles/motion-engineering/brand.md +29 -0
  193. package/skill/assets/styles/motion-engineering/reference/animations.jsx +673 -0
  194. package/skill/assets/styles/motion-engineering/reference/scenes.html +513 -0
  195. package/skill/assets/styles/motion-engineering/sample/bullet.ts +176 -0
  196. package/skill/assets/styles/motion-engineering/sample/content.ts +228 -0
  197. package/skill/assets/styles/motion-engineering/sample/cta.ts +209 -0
  198. package/skill/assets/styles/motion-engineering/sample/feature.ts +299 -0
  199. package/skill/assets/styles/motion-engineering/sample/grid.ts +190 -0
  200. package/skill/assets/styles/motion-engineering/sample/kinetic.ts +159 -0
  201. package/skill/assets/styles/motion-engineering/sample/section.ts +196 -0
  202. package/skill/assets/styles/motion-engineering/sample/stat.ts +230 -0
  203. package/skill/assets/styles/motion-engineering/sample/title.ts +219 -0
  204. package/skill/assets/styles/motion-engineering/sample/ui-showcase.ts +267 -0
  205. package/skill/assets/styles/motion-engineering/tokens.css +40 -0
  206. package/skill/assets/styles/neon-terminal/STYLE.md +105 -0
  207. package/skill/assets/styles/neon-terminal/brand.md +27 -0
  208. package/skill/assets/styles/neon-terminal/reference/animations.jsx +673 -0
  209. package/skill/assets/styles/neon-terminal/reference/scenes.html +387 -0
  210. package/skill/assets/styles/neon-terminal/sample/bullet.ts +113 -0
  211. package/skill/assets/styles/neon-terminal/sample/content.ts +117 -0
  212. package/skill/assets/styles/neon-terminal/sample/cta.ts +131 -0
  213. package/skill/assets/styles/neon-terminal/sample/feature.ts +112 -0
  214. package/skill/assets/styles/neon-terminal/sample/grid.ts +128 -0
  215. package/skill/assets/styles/neon-terminal/sample/kinetic.ts +105 -0
  216. package/skill/assets/styles/neon-terminal/sample/section.ts +96 -0
  217. package/skill/assets/styles/neon-terminal/sample/stat.ts +123 -0
  218. package/skill/assets/styles/neon-terminal/sample/title.ts +122 -0
  219. package/skill/assets/styles/neon-terminal/sample/ui-showcase.ts +127 -0
  220. package/skill/assets/styles/neon-terminal/tokens.css +39 -0
  221. package/skill/assets/styles/risograph/STYLE.md +110 -0
  222. package/skill/assets/styles/risograph/brand.md +26 -0
  223. package/skill/assets/styles/risograph/reference/animations.jsx +673 -0
  224. package/skill/assets/styles/risograph/reference/scenes.html +403 -0
  225. package/skill/assets/styles/risograph/sample/bullet.ts +124 -0
  226. package/skill/assets/styles/risograph/sample/content.ts +135 -0
  227. package/skill/assets/styles/risograph/sample/cta.ts +149 -0
  228. package/skill/assets/styles/risograph/sample/feature.ts +152 -0
  229. package/skill/assets/styles/risograph/sample/grid.ts +123 -0
  230. package/skill/assets/styles/risograph/sample/kinetic.ts +125 -0
  231. package/skill/assets/styles/risograph/sample/section.ts +130 -0
  232. package/skill/assets/styles/risograph/sample/stat.ts +145 -0
  233. package/skill/assets/styles/risograph/sample/title.ts +132 -0
  234. package/skill/assets/styles/risograph/sample/ui-showcase.ts +147 -0
  235. package/skill/assets/styles/risograph/tokens.css +39 -0
  236. package/skill/assets/styles/swiss-console/STYLE.md +107 -0
  237. package/skill/assets/styles/swiss-console/brand.md +37 -0
  238. package/skill/assets/styles/swiss-console/reference/animations.jsx +673 -0
  239. package/skill/assets/styles/swiss-console/reference/scenes.html +420 -0
  240. package/skill/assets/styles/swiss-console/sample/bullet.ts +122 -0
  241. package/skill/assets/styles/swiss-console/sample/content.ts +137 -0
  242. package/skill/assets/styles/swiss-console/sample/cta.ts +109 -0
  243. package/skill/assets/styles/swiss-console/sample/feature.ts +163 -0
  244. package/skill/assets/styles/swiss-console/sample/grid.ts +145 -0
  245. package/skill/assets/styles/swiss-console/sample/kinetic.ts +117 -0
  246. package/skill/assets/styles/swiss-console/sample/section.ts +127 -0
  247. package/skill/assets/styles/swiss-console/sample/stat.ts +148 -0
  248. package/skill/assets/styles/swiss-console/sample/title.ts +148 -0
  249. package/skill/assets/styles/swiss-console/sample/ui-showcase.ts +198 -0
  250. package/skill/assets/styles/swiss-console/tokens.css +39 -0
  251. package/skill/install/INSTALL.md +400 -0
  252. package/skill/references/audio/audio_plan.md +199 -0
  253. package/skill/references/audio/build.md +208 -0
  254. package/skill/references/audio/cue_template.md +219 -0
  255. package/skill/references/audio/ffmpeg_cookbook.md +267 -0
  256. package/skill/references/audio/music/music.md +171 -0
  257. package/skill/references/audio/music/providers/elevenlabs.md +170 -0
  258. package/skill/references/audio/music/providers/manual.md +140 -0
  259. package/skill/references/audio/music/providers/openverse.md +265 -0
  260. package/skill/references/audio/sfx/providers/elevenlabs.md +152 -0
  261. package/skill/references/audio/sfx/providers/manual.md +117 -0
  262. package/skill/references/audio/sfx/providers/openverse.md +243 -0
  263. package/skill/references/audio/sfx/sfx.md +149 -0
  264. package/skill/references/audio/styles.md +102 -0
  265. package/skill/references/audio/sync.md +237 -0
  266. package/skill/references/audio/voiceover/animation_sync.md +142 -0
  267. package/skill/references/audio/voiceover/provider_script.md +153 -0
  268. package/skill/references/audio/voiceover/providers/elevenlabs.md +288 -0
  269. package/skill/references/audio/voiceover/providers/manual.md +100 -0
  270. package/skill/references/audio/voiceover/script_writing.md +100 -0
  271. package/skill/references/audio/voiceover/style_intake.md +56 -0
  272. package/skill/references/audio/voiceover/sync_algorithm.md +167 -0
  273. package/skill/references/audio/voiceover.md +296 -0
  274. package/skill/references/audio.md +135 -0
  275. package/skill/references/authoring_segment.md +446 -0
  276. package/skill/references/create_or_edit_video.md +232 -0
  277. package/skill/references/dev_server.md +157 -0
  278. package/skill/references/export.md +145 -0
  279. package/skill/references/new_video.md +117 -0
  280. package/skill/references/project_structure.md +144 -0
  281. package/skill/references/setup.md +109 -0
  282. package/skill/references/setup_new_style.md +158 -0
  283. package/skill/references/styles.md +154 -0
  284. package/skill/references/testing.md +115 -0
  285. package/skill/references/types.md +240 -0
  286. package/src/cli/entry/components/copy_button.ts +42 -0
  287. package/src/cli/entry/components/download_modal.ts +204 -0
  288. package/src/cli/entry/components/empty_state.ts +55 -0
  289. package/src/cli/entry/components/hide_hud_tab.ts +37 -0
  290. package/src/cli/entry/components/icons.ts +31 -0
  291. package/src/cli/entry/components/top_bar.ts +69 -0
  292. package/src/cli/entry/components/video_card.ts +57 -0
  293. package/src/cli/entry/dev_frame.ts +189 -0
  294. package/src/cli/entry/entry_index.ts +16 -0
  295. package/src/cli/entry/entry_video.ts +24 -0
  296. package/src/cli/entry/index.html +12 -0
  297. package/src/cli/entry/parse_slug.ts +14 -0
  298. package/src/cli/entry/render.html +17 -0
  299. package/src/cli/entry/render_entry.ts +121 -0
  300. package/src/cli/entry/styles/base.css +45 -0
  301. package/src/cli/entry/styles/components.css +605 -0
  302. package/src/cli/entry/styles/tokens.css +44 -0
  303. package/src/cli/entry/video.html +22 -0
  304. package/src/cli/entry/views/homepage.ts +66 -0
  305. package/src/cli/entry/views/video_view.ts +286 -0
  306. package/src/cli/entry/virtual.d.ts +8 -0
@@ -0,0 +1,167 @@
1
+ # Sync Algorithm
2
+
3
+ ## When this is loaded
4
+
5
+ You have a voiceover audio file and provider timing data, and you need to compute a `Timing` object that syncs segment advances to the audio.
6
+
7
+ ## Overview
8
+
9
+ This is an agent reasoning step, not a deterministic function. You read the per-word timing data from the provider, walk the per-segment script from PLAN.md, and produce a `Timing` object with advance times for each segment.
10
+
11
+ ## Inputs
12
+
13
+ 1. **Per-segment script** from PLAN.md (the `## Script` section with subsections per segment id).
14
+ 2. **Provider timing JSON** at `audio/originals/voiceovers/<slug>/timing.json`. This contains per-word or per-character timestamps from the TTS provider or STT transcription.
15
+ 3. **Segment ids** in timeline order, plus each segment's `notes` and `voiceover` hint string.
16
+ 4. **Each segment's `advances` array** -- the current timing. You will be replacing these values in the `Timing`, but the array length tells you how many advances each segment needs.
17
+
18
+ ## Output
19
+
20
+ A `Timing` object written into `voiceover.ts`:
21
+
22
+ ```ts
23
+ timing: {
24
+ perSegment: {
25
+ 'intro': [4.2],
26
+ 'feature-cards': [2.1, 5.8, 9.3, 12.0],
27
+ 'outro': [3.5],
28
+ },
29
+ },
30
+ ```
31
+
32
+ Each value array has the same length as the segment's `advances` array. Values are segment-relative seconds (same units as `SegmentSpec.advances`).
33
+
34
+ ## Parsing provider timing JSON
35
+
36
+ ### ElevenLabs TTS timing
37
+
38
+ ElevenLabs TTS can output per-word timing (via the API's with-timestamps endpoint, or extracted via STT after portal generation). The JSON format contains an array of word entries with start and end timestamps:
39
+
40
+ ```json
41
+ {
42
+ "words": [
43
+ { "word": "Welcome", "start": 0.0, "end": 0.45 },
44
+ { "word": "to", "start": 0.47, "end": 0.55 },
45
+ { "word": "Acme", "start": 0.58, "end": 0.92 },
46
+ ...
47
+ ]
48
+ }
49
+ ```
50
+
51
+ Timestamps are in seconds from the start of the audio file. The `end` of the last word in a segment's script section gives you the boundary for that segment's audio content.
52
+
53
+ ### ElevenLabs Speech-to-Text timing
54
+
55
+ ElevenLabs STT output has a similar structure with word-level timestamps. The format may include additional fields like confidence scores -- ignore those. Focus on `word`, `start`, and `end`.
56
+
57
+ If the JSON structure differs from the above (ElevenLabs may update their format), adapt by looking for word-level entries with start/end time fields. The core need is: which word was spoken at which timestamp.
58
+
59
+ ## The sync procedure
60
+
61
+ ### Step 1: Map script text to timing words
62
+
63
+ Walk through the provider timing JSON word by word. For each segment's script section in PLAN.md, find the corresponding words in the timing data by text matching.
64
+
65
+ - Match is case-insensitive and ignores punctuation.
66
+ - Provider timing may include words from pause markers or annotations that were in the provider script but not the PLAN script -- skip those.
67
+ - If the provider timing has significantly different text (indicating the TTS changed wording), flag this to the user and ask which text to use.
68
+
69
+ ### Step 2: Find segment boundaries
70
+
71
+ For each segment, identify the timestamp where the segment should transition to the next:
72
+
73
+ - **Align to the next segment's VO onset, not the current segment's VO offset.** Each segment's final advance should land just *before* the next segment's first spoken word, so the voiceover starts right after the transition -- not after a dead-air pause. The transition is a lead-in to the next VO, not a tail-out from the previous one.
74
+ - Find the first word of the *next* segment's script section. Place the segment boundary 0.1-0.3 seconds before that word's `start` timestamp, so the transition finishes right as the new narration begins.
75
+ - For the **last segment** (no next segment), find the last word in the segment's script section and add a small buffer (0.3-0.5 seconds) after its `end` timestamp.
76
+
77
+ ### Step 3: Convert to segment-relative advances
78
+
79
+ The `Timing` uses segment-relative seconds (time since the segment started, not since the audio started). To convert:
80
+
81
+ ```
82
+ segment_start = sum of all previous segments' durations
83
+ advance_time = absolute_timestamp - segment_start
84
+ ```
85
+
86
+ For the last advance of each segment (the one that transitions to the next segment), set it so the transition lands just before the next segment's first VO word (per Step 2).
87
+
88
+ ### Step 4: Handle multi-advance segments
89
+
90
+ Segments with multiple advances have internal beats (`waitForNext()` calls in their `play()` function). For these:
91
+
92
+ 1. **Count the advances.** The segment's `advances` array length tells you how many beats are needed.
93
+ 2. **Identify beat positions.** Look for natural break points in the segment's script:
94
+ - `[pause for animation]` markers in the PLAN.md script.
95
+ - Sentence boundaries that align with visual transitions (check the segment's `notes` or code).
96
+ - Content transition cues: "Next,...", "And now,...", "Finally,...", "Moving on,...".
97
+ 3. **Place internal advances** at the timestamps corresponding to these break points. Each internal advance should land at the end of the narration chunk before the next visual beat.
98
+ 4. **Place the final advance** (the segment transition) just before the next segment's first VO word, per Step 2.
99
+
100
+ Example for a segment with 4 advances:
101
+
102
+ ```
103
+ Script: "First feature. [pause] Second feature. [pause] Third feature."
104
+ Advances array length: 4 (3 internal beats + 1 final transition)
105
+
106
+ advance[0] = end of "First feature" + buffer (first waitForNext resolves)
107
+ advance[1] = end of "Second feature" + buffer (second waitForNext resolves)
108
+ advance[2] = end of "Third feature" + buffer (third waitForNext resolves)
109
+ advance[3] = just before next segment's first VO word (transition to next segment)
110
+ ```
111
+
112
+ ### Step 5: Apply the "audio always wins" rule
113
+
114
+ The video duration adapts to match the audio duration:
115
+
116
+ - If the audio for a segment is shorter than the segment's current `advances` suggest, compress the advances.
117
+ - If the audio is longer, stretch the advances.
118
+ - The last advance of the last segment should land at (or very near) the end of the audio file.
119
+ - Never truncate audio. Never pad with silence.
120
+
121
+ ## Presenting the timing to the user
122
+
123
+ After computing the `Timing`, present it with annotations:
124
+
125
+ ```
126
+ Proposed timing:
127
+
128
+ intro (1 advance):
129
+ [0] 4.2s -- "...set us apart." (end of intro narration)
130
+
131
+ feature-cards (4 advances):
132
+ [0] 2.1s -- "...across devices." (end of collaboration section)
133
+ [1] 5.8s -- "...in one view." (end of analytics section)
134
+ [2] 9.3s -- "...and more." (end of integrations section)
135
+ [3] 12.0s -- transition (next VO starts at ~12.2s)
136
+
137
+ outro (1 advance):
138
+ [0] 3.5s -- "...Thanks for watching." (end of video)
139
+
140
+ Total audio duration: 19.7s
141
+ ```
142
+
143
+ For each advance, show:
144
+
145
+ - The segment-relative time in seconds.
146
+ - A snippet of the script text that the advance lands on.
147
+ - What the advance does (internal beat vs. segment transition).
148
+
149
+ ## Iteration
150
+
151
+ The user may request adjustments:
152
+
153
+ - "Move the second beat in feature-cards 0.5 seconds later" -- adjust that advance value.
154
+ - "The intro feels rushed" -- extend the intro's advance time by adding more buffer.
155
+ - "Combine the first two beats in feature-cards into one" -- this changes the number of advances, which means the segment's `play()` function needs a `waitForNext()` removed. Flag this as a code change.
156
+
157
+ After each adjustment, re-present the timing. When the user confirms, write it into `voiceover.ts`.
158
+
159
+ ## Edge cases
160
+
161
+ | Situation | Behavior |
162
+ |---|---|
163
+ | Provider timing JSON is missing | Error. The user must download it from the provider portal. Direct them to the provider walkthrough. |
164
+ | Words in timing do not match the script | Likely the TTS changed wording. Flag specific mismatches, ask user whether to use TTS text or original script text for alignment. |
165
+ | Segment has no script (silent segment) | Use the segment's existing `advances` values. The segment passes through without voiceover. |
166
+ | Audio is significantly shorter/longer than expected | Apply "audio always wins" -- compress or stretch. Flag the discrepancy so the user can decide if they want to re-record or adjust the video. |
167
+ | Provider timing has character-level rather than word-level data | Aggregate characters into words by grouping on whitespace boundaries. Use the word-end timestamp. |
@@ -0,0 +1,296 @@
1
+ # Voiceover
2
+
3
+ ## When this is loaded
4
+
5
+ You were routed here from [audio.md](../audio.md) or from another workflow that needs to work with voiceover content. This is the top-level reference for all voiceover functionality.
6
+
7
+ ## Overview
8
+
9
+ Videowright supports voiceover audio integrated into video playback. A voiceover consists of an audio file (mp3 or wav), a `Timing` that syncs segment advances to the audio, and metadata stored in a typed `voiceover.ts` file. Audio plays in the dev server via an HTML `<audio>` element and is muxed into MP4 output by `render` via ffmpeg.
10
+
11
+ Two production flows are supported:
12
+
13
+ - **AI-generated** -- write a script, transform it with v2-targeted provider annotations, generate audio via ElevenLabs (API key or web portal), and import the audio and per-word timing JSON.
14
+ - **Manual** -- user provides their own audio file, then runs it through ElevenLabs Speech-to-Text to get per-word timing data for sync.
15
+
16
+ Both flows produce the same output: a `voiceover.ts` file with a `Voiceover` object that includes the audio path and a `Timing` object.
17
+
18
+ ## Flow entry point
19
+
20
+ When the user asks to "add a voiceover" or "generate a voiceover", ask:
21
+
22
+ > Do you have an audio file already, or would you like to generate one with AI text-to-speech?
23
+
24
+ - **AI generation** -- follow Flow A below.
25
+ - **User-provided audio** -- follow Flow B below.
26
+
27
+ ### Flow A: AI generation (ElevenLabs)
28
+
29
+ 1. **Approach and voice selection.** Ask API key vs. portal, then (API only) which voice from the curated catalog. See [voiceover/providers/elevenlabs.md](voiceover/providers/elevenlabs.md) for the mode selection prompt and [voice catalog](#curated-voice-catalog).
30
+ 2. **Style intake.** Ask the user about tone and emotional arc preferences. See [voiceover/style_intake.md](voiceover/style_intake.md).
31
+ 3. **Script.** Write or integrate the VO script into PLAN.md. See [voiceover/script_writing.md](voiceover/script_writing.md).
32
+ 4. **Provider script.** Transform the PLAN script into `provider_script.md` with v2-targeted annotations (SSML `<break>` tags, punctuation-driven prosody -- no v3 emotion tags). See [voiceover/provider_script.md](voiceover/provider_script.md).
33
+ 5. **Audio generation.** Follow the sub-flow for the approach chosen in step 1. See [voiceover/providers/elevenlabs.md](voiceover/providers/elevenlabs.md).
34
+ 6. **Sync timing.** Read the provider timing JSON and compute a `Timing` object. See [voiceover/sync_algorithm.md](voiceover/sync_algorithm.md).
35
+ 7. **Write `voiceover.ts`.** Create the typed module exporting a `Voiceover` object.
36
+ 8. **Audio plan and build.** Create or update `audio/audio_plan.md` with a VO cue pointing at this voiceover. For VO-only videos, the plan is minimal (single cue, full file, placed at 0s -- see [audio_plan.md](audio_plan.md) for the VO-only shortcut). Then build the track via [build.md](build.md). The build workflow handles approval, timeline.ts update, and sync.
37
+
38
+ ### Curated voice catalog
39
+
40
+ When the user picks the **API key** approach in step 1, immediately present this catalog (default is **Asher** if no preference):
41
+
42
+ | # | Voice | Description | Preview |
43
+ |---|---|---|---|
44
+ | 1 | **Asher** | Warm, clear, and conversational male voice with confident, grounded delivery. Natural pacing and friendly authority give him an engaging presence that holds attention without feeling forced. Ideal for podcasts, narration, explainers, and authentic commercial reads. Works especially well as a default voice because of his versatility across different content types and tones. | [Listen](https://elevenlabs.io/app/voice-library?voiceId=tMvyQtpCVQ0DkixuYm6J) |
45
+ | 2 | **Cecily** | Warm, versatile female voice from the West Coast with an engaging, approachable delivery. Her natural warmth and conversational style make her equally effective for advertisements, social media content, and brand storytelling. She can shift between polished and casual registers without losing authenticity. A strong choice when you want a voice that feels relatable and trustworthy across a range of content. | [Listen](https://elevenlabs.io/app/voice-library?voiceId=Uc7anshoV8mdBhDnEZEX) |
46
+ | 3 | **Don** | Young American male voice with a casual, approachable tone that feels natural and engaging. Light, clear, and expressive -- perfect for conversations with listeners in a relaxed way. This style works especially well for social media content, storytelling, and audiobooks, where relatability and flow are key. The voice carries warmth and clarity, making it easy to listen to over long sessions from narration to digital campaigns. | [Listen](https://elevenlabs.io/app/voice-library?voiceId=8IbUB2LiiCZ85IJAHNnZ) |
47
+ | 4 | **Hanna** | Professional American female voice with a polished, authoritative delivery. Clear articulation and steady pacing make her an excellent choice for informative narration, e-learning modules, and corporate voiceover. She conveys competence and credibility without sounding stiff or robotic. Best when you need a voice that commands attention while remaining approachable in instructional or business contexts. | [Listen](https://elevenlabs.io/app/voice-library?voiceId=Hh0rE70WfnSFN80K8uJC) |
48
+ | 5 | **Other** | Provide any ElevenLabs voice ID. Browse voices at the [ElevenLabs Voice Library](https://elevenlabs.io/app/voice-library) to find one that fits your project. | -- |
49
+
50
+ If the user does not pick, default to **Asher**. Save the selected voice ID to the `eleven_labs_voice_id` field in the `voiceover.ts` file (not as an env var). If the user picks "Other", ask them to provide the voice ID.
51
+
52
+ Portal users skip this catalog -- they pick a voice visually in the ElevenLabs UI during audio generation (step 5).
53
+
54
+ ### Flow B: Manual (user-provided audio)
55
+
56
+ 1. **Get the audio.** Ask the user to provide or drop an audio file into `audio/originals/voiceovers/<slug>/`.
57
+ 2. **Generate transcript and timing.** Walk the user through ElevenLabs Speech-to-Text to get per-word timing data. See [voiceover/providers/manual.md](voiceover/providers/manual.md).
58
+ 3. **Sync timing.** Same as Flow A step 6.
59
+ 4. **Write `voiceover.ts`.** Same as Flow A step 7.
60
+ 5. **Audio plan and build.** Same as Flow A step 8.
61
+
62
+ ## File and folder conventions
63
+
64
+ Voiceover originals live per-video under `audio/originals/voiceovers/`:
65
+
66
+ ```
67
+ videos/<video-slug>/
68
+ timeline.ts
69
+ PLAN.md
70
+ voiceover_script/
71
+ script.md
72
+ audio/
73
+ originals/
74
+ voiceovers/
75
+ <vo-slug>/
76
+ voiceover.ts # typed Voiceover object (default export)
77
+ audio.mp3 # audio file (mp3 or wav; any name works, referenced from voiceover.ts)
78
+ timing.json # provider-supplied per-word timings (optional)
79
+ provider_script.md # provider-annotated script (AI flow only)
80
+ generate.sh # API generation script (AI flow only)
81
+ tracks/
82
+ v1/
83
+ track.ts # typed AudioTrack object (default export)
84
+ track.mp3 # rendered audio
85
+ plan_snapshot.md # point-in-time copy of audio plan
86
+ ```
87
+
88
+ **Slug naming.** Both auto-versioned (`v1`, `v2`) and user-named (`narrator-warm`, `take-3`) are valid. The slug is the folder name under `audio/originals/voiceovers/`.
89
+
90
+ **Multiple voiceovers.** Stored as separate sibling folders under `audio/originals/voiceovers/`. Each is independent and self-contained. The active audio is determined by the audio track referenced in `timeline.ts` via `default_audio_track`.
91
+
92
+ ## Types
93
+
94
+ ### `Voiceover`
95
+
96
+ ```ts
97
+ type Voiceover = {
98
+ audio_file: string; // path relative to the voiceover.ts file
99
+ provider: "elevenlabs" | "manual";
100
+ provider_timing_file?: string; // path relative to the voiceover.ts file
101
+ timing: Timing;
102
+ notes?: string;
103
+ eleven_labs_voice_id?: string; // ElevenLabs voice ID; defaults to Asher if omitted
104
+ };
105
+ ```
106
+
107
+ ### `Timing`
108
+
109
+ ```ts
110
+ type Timing = {
111
+ perSegment: Partial<Record<string, number[]>>;
112
+ };
113
+ ```
114
+
115
+ A `Timing` overrides segment `advances` for any segments it lists. Segments not listed fall back to their own `advances` array.
116
+
117
+ ### `Timeline` extensions
118
+
119
+ ```ts
120
+ interface Timeline {
121
+ meta: TimelineMeta;
122
+ segments: TimelineEntry[];
123
+ default_timing?: Timing; // standalone timing overrides
124
+ default_audio_track?: AudioTrack; // default audio track for this video
125
+ }
126
+ ```
127
+
128
+ ## Writing `voiceover.ts`
129
+
130
+ A voiceover module default-exports a `Voiceover` object:
131
+
132
+ ```ts
133
+ import type { Voiceover } from 'videowright';
134
+
135
+ const voiceover: Voiceover = {
136
+ audio_file: './audio.mp3',
137
+ provider: 'elevenlabs',
138
+ provider_timing_file: './timing.json',
139
+ eleven_labs_voice_id: 'tMvyQtpCVQ0DkixuYm6J', // Asher
140
+ timing: {
141
+ perSegment: {
142
+ 'intro': [4.2],
143
+ 'feature-cards': [2.1, 5.8, 9.3, 12.0],
144
+ 'outro': [3.5],
145
+ },
146
+ },
147
+ notes: 'Warm male voice, conversational tone',
148
+ };
149
+
150
+ export default voiceover;
151
+ ```
152
+
153
+ ## Setting the default audio track
154
+
155
+ After generating a voiceover, it is combined into an audio track (see [../audio.md](../audio.md) for the full audio workflow). The active audio track is set in `timeline.ts`:
156
+
157
+ ```ts
158
+ import '../../styles/editorial-mono/tokens.css';
159
+ import type { Timeline } from 'videowright';
160
+ import defaultAudioTrack from './audio/tracks/v1/track.js';
161
+
162
+ const timeline: Timeline = {
163
+ meta: { title: 'My Video' },
164
+ segments: [
165
+ { id: 'intro' },
166
+ { id: 'feature-cards', transition: 'fade' },
167
+ { id: 'outro', transition: 'fade' },
168
+ ],
169
+ default_audio_track: defaultAudioTrack,
170
+ };
171
+
172
+ export default timeline;
173
+ ```
174
+
175
+ The `default_audio_track` import is the single source of truth for which audio track is active. Switching tracks means updating the import path.
176
+
177
+ ## CLI usage
178
+
179
+ `render` accepts `--audio-track`:
180
+
181
+ ```bash
182
+ # Use a specific audio track
183
+ npx videowright render --audio-track v1
184
+
185
+ # Suppress audio (ignore default_audio_track, use default_timing or segment advances)
186
+ npx videowright render --audio-track none
187
+
188
+ # No flag: use default_audio_track from timeline.ts if set, otherwise no audio
189
+ npx videowright render
190
+ ```
191
+
192
+ `dev` does not accept `--audio-track`. It uses `default_audio_track` from `timeline.ts` if set, otherwise no audio.
193
+
194
+ ## Audio playback by mode
195
+
196
+ | Mode | Audio mechanism | Behavior |
197
+ |---|---|---|
198
+ | `dev` | HTML `<audio>` element | Play button in HUD starts auto-advance with synced audio. Manual nav pauses audio. |
199
+ | `render` | ffmpeg audio mux | Audio file is muxed into the output MP4 as a second input to ffmpeg. No `<audio>` element. |
200
+
201
+ ## Timing precedence
202
+
203
+ When determining advance schedules:
204
+
205
+ 1. **Active audio track's `timing`** -- if an audio track is active (via `--audio-track <id>` or `default_audio_track`).
206
+ 2. **`default_timing`** on `timeline.ts` -- if no audio track is active.
207
+ 3. **`SegmentSpec.advances`** -- per-segment fallback.
208
+
209
+ `--audio-track none` suppresses level 1 (audio tracks) but preserves `default_timing` (level 2) and per-segment advances (level 3).
210
+
211
+ ## The `voiceover` field on segments
212
+
213
+ Each segment can declare a `voiceover` string in `defineSegment`:
214
+
215
+ ```ts
216
+ export default defineSegment({
217
+ id: 'intro',
218
+ advances: [3.0],
219
+ voiceover: 'Welcome to the product demo.',
220
+ async play(ctx) { await ctx.hold(3000); },
221
+ });
222
+ ```
223
+
224
+ This field is:
225
+
226
+ - **Shown in the HUD** during dev mode.
227
+ - **Collected by `videowright script`** into a single markdown document.
228
+ - **Used by the agent** to understand the segment's narrative purpose when editing.
229
+
230
+ It is a display hint, not the canonical voiceover audio source. The canonical audio comes from the `Voiceover` object in `voiceover.ts`.
231
+
232
+ ## VO-first authoring
233
+
234
+ The default authoring pattern for new videos with voiceover intent:
235
+
236
+ 1. **Write the script first.** Draft the full VO copy organized by segment in PLAN.md.
237
+ 2. **Scaffold segments from the script.** Each segment's content and timing follow from its VO text. A 30-word section suggests ~12s; a 100-word section suggests ~40s (based on ~150 WPM).
238
+ 3. **Use `waitForNext()` for every VO-aligned beat.** Each content reveal that a voiceover line should cue must be gated by `waitForNext()`, not `hold()`. This is what makes voiceover-swapping possible — different voiceovers supply different advance timings, and segments respond by advancing at the right moment without code changes. Use `hold()` only for animation lead-in or fixed internal pauses within a beat.
239
+ 4. **Set `voiceover` on each segment** to its section of the script.
240
+ 5. **Generate the audio** using one of the two flows above.
241
+ 6. **Sync timing** to align segment advances with the audio.
242
+
243
+ ## VO-alignment smell
244
+
245
+ If you are adjusting `hold()` values inside a segment to make an animation line up with a specific voiceover recording, that is a code smell. It means the segment is coupled to one narration — any change to the voiceover (different voice, different pacing, re-recorded take) will require re-tuning those timers.
246
+
247
+ The fix is structural: content that needs to sync with the voiceover should be gated by `waitForNext()`, so timing comes from the `advances` / `Timing` data rather than from hardcoded milliseconds in the segment code. Add a new advance at the sync point, and let the timers within each beat use percentage-based durations so they scale when beat lengths shift. See [authoring_segment.md § Percentage-based timing within beats](../authoring_segment.md#percentage-based-timing-within-beats) for the pattern.
248
+
249
+ ## `videowright script` CLI
250
+
251
+ The `script` command reads segments' `voiceover` fields and assembles them into markdown:
252
+
253
+ ```bash
254
+ npx videowright script # print to stdout
255
+ npx videowright script --write # write to voiceover_script/script.md
256
+ ```
257
+
258
+ See the `videowright script` section below for output format and `--write` behavior.
259
+
260
+ ### Output format
261
+
262
+ ```markdown
263
+ # Video Title
264
+
265
+ ## segment-id-1
266
+ Voiceover text for the first segment.
267
+
268
+ ## segment-id-2
269
+ Voiceover text for the second segment.
270
+
271
+ ---
272
+
273
+ *No voiceover: segment-id-3, segment-id-4*
274
+ ```
275
+
276
+ ### `--write` flag
277
+
278
+ With `--write`, the script is written to `videos/<name>/voiceover_script/script.md`. Without `--write`, it prints to stdout.
279
+
280
+ ## Keeping things in sync
281
+
282
+ The `voiceover` field on each segment and `voiceover_script/script.md` are two representations of the same content:
283
+
284
+ - **After editing `voiceover` fields** on segments, run `npx videowright script --write` to regenerate `script.md`.
285
+ - **After editing `script.md`** directly, update each segment's `voiceover` field to match.
286
+
287
+ ## Edge cases
288
+
289
+ | Situation | Behavior |
290
+ |---|---|
291
+ | User wants VO but has no script yet | Draft one during the build phase based on the video's purpose and segment outline. |
292
+ | User changes audio intent from silent to voiceover mid-project | Add `voiceover` fields to existing segments. Run `videowright script --write`. Follow the voiceover flow to generate audio and timing. |
293
+ | Audio file missing on disk | CLI errors before playback or render starts with a clear message and path. |
294
+ | `--audio-track <id>` with non-existent track | CLI errors with a hint to check the `audio/tracks/` folder. |
295
+ | Browser autoplay blocked | Audio is silent until the user clicks the play button (which counts as a user gesture). |
296
+ | Default audio track set but user switches via `--audio-track <other-id>` | Advance timing updates automatically. In-segment animations remain tuned to the original default -- the user can re-run the animation sync pass if needed. |
@@ -0,0 +1,135 @@
1
+ # Audio
2
+
3
+ ## When this is loaded
4
+
5
+ You were routed here from the intent dispatch table because the user wants to work with audio -- voiceover, sound effects, or background music.
6
+
7
+ ## Overview
8
+
9
+ Videowright supports multi-source audio tracks: voice-over, sound effects, and music combined into a single rendered audio file. Videos reference an audio track (not a voice-over directly); the audio track drives video timing and is muxed into the final MP4.
10
+
11
+ The audio workflow is progressive: start with what the video needs, source the assets, then build and sync the track.
12
+
13
+ ## Audio intent questions
14
+
15
+ Ask these three questions upfront. Skip any whose answer is already clear from the user's input.
16
+
17
+ > 1. Will this video have a **voice-over**? (yes / no)
18
+ > 2. Will it have **sound effects**? (yes / no)
19
+ > 3. Will it have **background music**? (yes / no)
20
+
21
+ Based on answers, load only the relevant sub-references below.
22
+
23
+ ## Routing
24
+
25
+ ### Voice-over
26
+
27
+ If the user wants a voice-over, load [audio/voiceover.md](audio/voiceover.md). This covers:
28
+
29
+ - AI-generated (ElevenLabs) and manual (user-provided audio) flows
30
+ - Script writing, provider script transformation
31
+ - Voice selection and style intake
32
+ - Sync timing computation
33
+
34
+ ### Sound effects
35
+
36
+ If the user wants sound effects, load [audio/sfx/sfx.md](audio/sfx/sfx.md). This covers:
37
+
38
+ - BYO (user-provided audio), ElevenLabs (AI-generated), and Openverse (free search) sourcing flows
39
+ - `sfx.ts` metadata authoring
40
+ - Per-asset approval UX (Approve / Discard and request changes)
41
+ - Integration into the audio plan as cues
42
+
43
+ SFX assets live in `audio/originals/sfx/<slug>/` and are referenced by cues in the audio plan.
44
+
45
+ ### Background music
46
+
47
+ If the user wants background music, load [audio/music/music.md](audio/music/music.md). This covers:
48
+
49
+ - BYO (user-provided audio), ElevenLabs (AI-generated), and Openverse (free search) sourcing flows
50
+ - `music.ts` metadata authoring (rich free-text notes for BPM, key, mood, structure)
51
+ - Per-asset approval UX (Approve / Discard and request changes)
52
+ - Integration into the audio plan as cues with volume curves and ducking
53
+
54
+ Music assets live in `audio/originals/music/<slug>/` and are referenced by cues in the audio plan.
55
+
56
+ ### Audio plan, build, and sync
57
+
58
+ If any audio is present (VO, SFX, or music), the audio plan/build/sync workflow applies:
59
+
60
+ 1. **Audio plan** -- author `audio/audio_plan.md` describing the mix composition. See [audio/audio_plan.md](audio/audio_plan.md) for the format spec, [audio/cue_template.md](audio/cue_template.md) for the per-cue field template, and [audio/styles.md](audio/styles.md) for mix-level guidance.
61
+ 2. **Build** -- render the plan into an audio track via ffmpeg. See [audio/build.md](audio/build.md). Uses recipes from [audio/ffmpeg_cookbook.md](audio/ffmpeg_cookbook.md).
62
+ 3. **Sync** -- compute per-segment timing from the track. See [audio/sync.md](audio/sync.md).
63
+
64
+ For **VO-only videos** (no SFX, no music), the plan is minimal (single cue, full file, placed at 0s) and is auto-emitted during the voiceover flow. The user does not need to understand the plan format -- it is created transparently and consumed by build and sync.
65
+
66
+ For **multi-source mixes** (VO + SFX, VO + music, or all three), the plan is authored explicitly with per-cue volume curves, fades, and a full ffmpeg mix command.
67
+
68
+ ## File layout
69
+
70
+ Each video has an `audio/` directory:
71
+
72
+ ```
73
+ videos/<video-slug>/
74
+ voiceover_script/
75
+ script.md # assembled VO script
76
+ audio/
77
+ audio_plan.md # audio composition plan + log
78
+ originals/ # source files (immutable after use)
79
+ voiceovers/
80
+ v1/
81
+ voiceover.ts
82
+ audio.mp3
83
+ timing.json
84
+ provider_script.md
85
+ generate.sh
86
+ sfx/ # slug-named subfolders
87
+ keyboard_typing/
88
+ audio.mp3
89
+ sfx.ts
90
+ generate.sh
91
+ music/ # slug-named subfolders
92
+ uplift_piano/
93
+ audio.mp3
94
+ music.ts
95
+ generate.sh
96
+ tracks/ # rendered audio tracks
97
+ v1/
98
+ track.ts # typed AudioTrack object
99
+ track.mp3 # rendered audio
100
+ plan_snapshot.md # point-in-time plan copy
101
+ ```
102
+
103
+ ### Active track
104
+
105
+ `timeline.ts` imports the active track via `default_audio_track`. The import is the single source of truth for which track is live. Switching tracks means updating the import.
106
+
107
+ ```ts
108
+ import defaultAudioTrack from './audio/tracks/v1/track.js';
109
+
110
+ const timeline: Timeline = {
111
+ // ...
112
+ default_audio_track: defaultAudioTrack,
113
+ };
114
+ ```
115
+
116
+ ## CLI usage
117
+
118
+ ```bash
119
+ # Use a specific audio track
120
+ npx videowright render --audio-track v1
121
+
122
+ # Suppress audio (use default_timing or segment advances)
123
+ npx videowright render --audio-track none
124
+
125
+ # No flag: use default_audio_track from timeline.ts if set
126
+ npx videowright render
127
+ ```
128
+
129
+ ## Timing precedence
130
+
131
+ 1. **Active audio track's `timing`** -- via `--audio-track <id>` or `default_audio_track`
132
+ 2. **`default_timing`** on `timeline.ts`
133
+ 3. **`SegmentSpec.advances`** -- per-segment fallback
134
+
135
+ `--audio-track none` suppresses level 1 (audio tracks) but preserves levels 2 and 3.