vargai 0.4.0-alpha74 → 0.4.0-alpha76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -199,7 +199,7 @@ await render(
199
199
  | `<Music>` | background music | `prompt`, `src`, `model`, `volume`, `loop`, `ducking` |
200
200
  | `<Title>` | text overlay | `position`, `color`, `start`, `end` |
201
201
  | `<Subtitle>` | subtitle text | `backgroundColor` |
202
- | `<Captions>` | auto-generated subs | `src`, `srt`, `style`, `color`, `activeColor` |
202
+ | `<Captions>` | auto-generated subs | `src`, `srt`, `style`, `color`, `activeColor`, `withAudio` |
203
203
  | `<Overlay>` | positioned layer | `left`, `top`, `width`, `height`, `keepAudio` |
204
204
  | `<Split>` | side-by-side | `direction` |
205
205
  | `<Slider>` | before/after reveal | `direction` |
@@ -282,7 +282,7 @@ await render(
282
282
  />
283
283
  </Clip>
284
284
 
285
- <Captions src={voiceover} style="tiktok" color="#ffffff" />
285
+ <Captions src={voiceover} style="tiktok" color="#ffffff" withAudio />
286
286
  </Render>,
287
287
  { output: "output/talking-head.mp4" }
288
288
  );
@@ -337,7 +337,7 @@ await render(
337
337
  <Title position="top" color="#ffffff">My 3-Month Transformation</Title>
338
338
  </Clip>
339
339
 
340
- <Captions src={voiceover} style="tiktok" color="#ffffff" />
340
+ <Captions src={voiceover} style="tiktok" color="#ffffff" withAudio />
341
341
  </Render>,
342
342
  { output: "output/transformation.mp4" }
343
343
  );
package/docs/react.md CHANGED
@@ -328,10 +328,13 @@ or feed it a speech element directly:
328
328
  <Captions
329
329
  src={ralph-speech}
330
330
  style="tiktok"
331
+ withAudio
331
332
  />
332
333
  </Clip>
333
334
  ```
334
335
 
336
+ > by default, `<Captions src={speech} />` renders captions only (no audio). add `withAudio` to include the speech audio track in the video.
337
+
335
338
  ### caption styles
336
339
 
337
340
  ```tsx
@@ -123,6 +123,11 @@ export default (
123
123
  </Clip>
124
124
 
125
125
  {/* TikTok-style captions with voiceover */}
126
- <Captions src={voiceover} style={CAPTIONS_STYLE} color={CAPTIONS_COLOR} />
126
+ <Captions
127
+ src={voiceover}
128
+ style={CAPTIONS_STYLE}
129
+ color={CAPTIONS_COLOR}
130
+ withAudio
131
+ />
127
132
  </Render>
128
133
  );
@@ -102,6 +102,6 @@ export default (
102
102
  </Clip>
103
103
 
104
104
  {/* TikTok-style captions */}
105
- <Captions src={voiceover} style="tiktok" color="#ffffff" />
105
+ <Captions src={voiceover} style="tiktok" color="#ffffff" withAudio />
106
106
  </Render>
107
107
  );
package/package.json CHANGED
@@ -71,7 +71,7 @@
71
71
  "zod": "^4.2.1"
72
72
  },
73
73
  "sideEffects": false,
74
- "version": "0.4.0-alpha74",
74
+ "version": "0.4.0-alpha76",
75
75
  "exports": {
76
76
  ".": "./src/index.ts",
77
77
  "./ai": "./src/ai-sdk/index.ts",
@@ -143,7 +143,7 @@ export default (
143
143
  model={fal.videoModel("wan-2.5")}
144
144
  />
145
145
  </Clip>
146
- <Captions src={voiceover} style="tiktok" />
146
+ <Captions src={voiceover} style="tiktok" withAudio />
147
147
  </Render>
148
148
  );
149
149
  ```
@@ -226,13 +226,13 @@ export default (
226
226
  <Clip duration={21}>
227
227
  <Image src={character} />
228
228
  </Clip>
229
- <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
229
+ <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" withAudio />
230
230
  </Render>
231
231
  );
232
232
  \`\`\`
233
233
  This file can be both rendered directly (\`bunx vargai render file.tsx\`) and imported by other files (\`import { character } from "./file.tsx"\`).
234
234
 
235
- 2. **Captions include audio** - \`<Captions src={voiceover} />\` already plays the audio. No need for separate \`<Speech>\` in the clip.
235
+ 2. **Captions and audio** - \`<Captions src={voiceover} />\` renders captions only (no audio). Add \`withAudio\` to also play the speech audio: \`<Captions src={voiceover} withAudio />\`.
236
236
 
237
237
  3. **Clip duration** - Omit \`duration\` to auto-fit content. Set explicit \`duration={N}\` to lock length. If duration is shorter than content, you get black screen while audio continues.
238
238
 
@@ -305,7 +305,7 @@ export default (
305
305
  aspectRatio="9:16"
306
306
  />
307
307
  </Clip>
308
- <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
308
+ <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" withAudio />
309
309
  </Render>
310
310
  );
311
311
  \`\`\`
@@ -249,6 +249,7 @@ function extractElementInfo(element: VargElement): StoryboardElement {
249
249
  color: props.color,
250
250
  activeColor: props.activeColor,
251
251
  fontSize: props.fontSize,
252
+ withAudio: props.withAudio,
252
253
  };
253
254
  break;
254
255
  }
@@ -190,6 +190,7 @@ describe("ResolvedElement in composition tree", () => {
190
190
  const captions = Captions({
191
191
  src: audio as unknown as VargElement<"speech">,
192
192
  style: "tiktok",
193
+ withAudio: true,
193
194
  });
194
195
 
195
196
  expect(captions.type).toBe("captions");
@@ -458,6 +459,7 @@ describe("nested clips (container clip pattern)", () => {
458
459
  Captions({
459
460
  src: audio as unknown as VargElement<"speech">,
460
461
  style: "tiktok",
462
+ withAudio: true,
461
463
  }),
462
464
  ],
463
465
  }),
@@ -585,6 +587,7 @@ describe("nested clips (container clip pattern)", () => {
585
587
  Captions({
586
588
  src: audio as unknown as VargElement<"speech">,
587
589
  style: "tiktok",
590
+ withAudio: true,
588
591
  }),
589
592
  ],
590
593
  });
@@ -50,7 +50,7 @@ async function Scene({
50
50
  </Clip>
51
51
  );
52
52
  })}
53
- <Captions src={audio} style="tiktok" position="bottom" />
53
+ <Captions src={audio} style="tiktok" position="bottom" withAudio />
54
54
  </Clip>
55
55
  );
56
56
  }
@@ -17,6 +17,6 @@ export default (
17
17
  zoom="in"
18
18
  />
19
19
  </Clip>
20
- <Captions src={audio} style="tiktok" />
20
+ <Captions src={audio} style="tiktok" withAudio />
21
21
  </Render>
22
22
  );
@@ -19,7 +19,7 @@
19
19
 
20
20
  import { elevenlabs } from "../../../ai-sdk/providers/elevenlabs";
21
21
  import { fal } from "../../../ai-sdk/providers/fal";
22
- import { Clip, Image, Render, render, Speech, Video } from "../..";
22
+ import { Captions, Clip, Image, Render, render, Speech, Video } from "../..";
23
23
 
24
24
  // --- One speech call, three segments ---
25
25
  const { audio, segments } = await Speech({
@@ -84,6 +84,9 @@ const demo = (
84
84
 
85
85
  {/* Full continuous voiceover — smooth, no splicing */}
86
86
  {audio}
87
+
88
+ {/* Captions from the voiceover — no withAudio since audio is already included above */}
89
+ <Captions src={audio} style="tiktok" />
87
90
  </Render>
88
91
  );
89
92
 
@@ -37,7 +37,7 @@ export default (
37
37
  {/* Scene 1: talking head — lipsync via VEED, audio baked in */}
38
38
  <Clip duration={audio1.duration}>
39
39
  {talkingHead}
40
- <Captions src={audio1} style="tiktok" />
40
+ <Captions src={audio1} style="tiktok" withAudio />
41
41
  </Clip>
42
42
 
43
43
  {/* Scene 2: science b-roll — image + voiceover via captions */}
@@ -48,7 +48,7 @@ export default (
48
48
  aspectRatio="9:16"
49
49
  zoom="out"
50
50
  />
51
- <Captions src={audio2} style="tiktok" />
51
+ <Captions src={audio2} style="tiktok" withAudio />
52
52
  </Clip>
53
53
  </Render>
54
54
  );
@@ -20,7 +20,7 @@ async function main() {
20
20
  <Clip duration={5}>
21
21
  <Image src="media/cyberpunk-street.png" />
22
22
  </Clip>
23
- <Captions src={speech} style="tiktok" />
23
+ <Captions src={speech} style="tiktok" withAudio />
24
24
  </Render>
25
25
  );
26
26
 
@@ -346,5 +346,9 @@ export async function renderCaptions(
346
346
  writeFileSync(assPath, assContent);
347
347
  ctx.tempFiles.push(assPath);
348
348
 
349
- return { assPath, srtPath, audioPath };
349
+ return {
350
+ assPath,
351
+ srtPath,
352
+ audioPath: props.withAudio ? audioPath : undefined,
353
+ };
350
354
  }
@@ -221,6 +221,10 @@ async function sliceSegments(
221
221
  *
222
222
  * Adds a small safety padding (50ms) to capture any trailing silence
223
223
  * that exists in the original audio beyond the segment boundary.
224
+ *
225
+ * Routes through the FFmpegBackend when available (local or cloud/Rendi),
226
+ * falling back to a direct local `ffmpeg` shell command only when no
227
+ * backend exists (top-level `await` outside render()).
224
228
  */
225
229
  const SLICE_PADDING_S = 0.05; // 50ms safety padding
226
230
 
@@ -234,18 +238,45 @@ async function sliceAudio(
234
238
  const suffix = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
235
239
  const outPath = `/tmp/varg-segment-${suffix}.mp3`;
236
240
 
237
- const inputPath = ctx?.backend
238
- ? await ctx.backend.resolvePath(file)
239
- : await file.toTempFile();
241
+ if (ctx?.backend) {
242
+ // Use the backend abstraction (works for both local ffmpeg and cloud/Rendi).
243
+ // -ss goes in input options (before -i for fast seek).
244
+ const result = await ctx.backend.run({
245
+ inputs: [{ path: file, options: ["-ss", String(start)] }],
246
+ outputArgs: [
247
+ "-t",
248
+ String(duration),
249
+ "-acodec",
250
+ "libmp3lame",
251
+ "-q:a",
252
+ "2",
253
+ ],
254
+ outputPath: outPath,
255
+ });
256
+
257
+ // Rendi returns a URL, local backend returns a file path.
258
+ if (result.output.type === "url") {
259
+ const response = await fetch(result.output.url);
260
+ return new Uint8Array(await response.arrayBuffer());
261
+ }
262
+ const sliced = await Bun.file(result.output.path).arrayBuffer();
263
+ try {
264
+ await Bun.file(result.output.path).delete?.();
265
+ } catch {
266
+ /* ignore cleanup errors */
267
+ }
268
+ return new Uint8Array(sliced);
269
+ }
240
270
 
241
- // -ss before -i for fast seek, then re-encode for sample-accurate cut
271
+ // Fallback: no backend (top-level `await` outside render()) use local ffmpeg directly.
272
+ const inputPath = await file.toTempFile();
242
273
  await $`ffmpeg -y -ss ${start} -i ${inputPath} -t ${duration} -acodec libmp3lame -q:a 2 ${outPath}`.quiet();
243
274
 
244
275
  const sliced = await Bun.file(outPath).arrayBuffer();
245
276
  try {
246
277
  await Bun.file(outPath).delete?.();
247
278
  } catch {
248
- /* ignore */
279
+ /* ignore cleanup errors */
249
280
  }
250
281
  return new Uint8Array(sliced);
251
282
  }
@@ -240,6 +240,8 @@ export interface CaptionsProps extends BaseProps {
240
240
  color?: string;
241
241
  activeColor?: string;
242
242
  fontSize?: number;
243
+ /** When src is a Speech element, include its audio track in the video. Defaults to false. */
244
+ withAudio?: boolean;
243
245
  }
244
246
 
245
247
  export interface SplitProps extends BaseProps {