varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
package/lib/ffmpeg.ts DELETED
@@ -1,467 +0,0 @@
1
- #!/usr/bin/env bun
2
-
3
- /**
4
- * ffmpeg wrapper for video editing operations
5
- * requires ffmpeg to be installed on the system
6
- */
7
-
8
- import { existsSync } from "node:fs";
9
- import ffmpeg from "fluent-ffmpeg";
10
-
11
- // types
12
- export interface ConcatVideosOptions {
13
- inputs: string[];
14
- output: string;
15
- transition?: boolean;
16
- }
17
-
18
- export interface AddAudioOptions {
19
- videoPath: string;
20
- audioPath: string;
21
- output: string;
22
- }
23
-
24
- export interface ResizeVideoOptions {
25
- input: string;
26
- output: string;
27
- width?: number;
28
- height?: number;
29
- aspectRatio?: string;
30
- }
31
-
32
- export interface TrimVideoOptions {
33
- input: string;
34
- output: string;
35
- start: number;
36
- duration?: number;
37
- }
38
-
39
- export interface ConvertFormatOptions {
40
- input: string;
41
- output: string;
42
- format?: string;
43
- }
44
-
45
- // core functions
46
- export async function concatVideos(
47
- options: ConcatVideosOptions,
48
- ): Promise<string> {
49
- const { inputs, output } = options;
50
-
51
- if (!inputs || inputs.length === 0) {
52
- throw new Error("inputs are required");
53
- }
54
- if (!output) {
55
- throw new Error("output is required");
56
- }
57
-
58
- // validate all inputs exist
59
- for (const input of inputs) {
60
- if (!existsSync(input)) {
61
- throw new Error(`input file not found: ${input}`);
62
- }
63
- }
64
-
65
- console.log(`[ffmpeg] concatenating ${inputs.length} videos...`);
66
-
67
- return new Promise((resolve, reject) => {
68
- const command = ffmpeg();
69
-
70
- // add all inputs
71
- for (const input of inputs) {
72
- command.input(input);
73
- }
74
-
75
- // use concat filter
76
- const filterComplex =
77
- inputs.map((_, i) => `[${i}:v][${i}:a]`).join("") +
78
- `concat=n=${inputs.length}:v=1:a=1[outv][outa]`;
79
-
80
- command
81
- .complexFilter(filterComplex)
82
- .outputOptions(["-map", "[outv]", "-map", "[outa]"])
83
- .output(output)
84
- .on("end", () => {
85
- console.log(`[ffmpeg] saved to ${output}`);
86
- resolve(output);
87
- })
88
- .on("error", (err) => {
89
- console.error(`[ffmpeg] error:`, err);
90
- reject(err);
91
- })
92
- .run();
93
- });
94
- }
95
-
96
- export async function addAudio(options: AddAudioOptions): Promise<string> {
97
- const { videoPath, audioPath, output } = options;
98
-
99
- if (!videoPath || !audioPath || !output) {
100
- throw new Error("videoPath, audioPath, and output are required");
101
- }
102
-
103
- console.log(`[ffmpeg] adding audio to video...`);
104
-
105
- return new Promise((resolve, reject) => {
106
- ffmpeg()
107
- .input(videoPath)
108
- .input(audioPath)
109
- .outputOptions([
110
- "-c:v",
111
- "copy",
112
- "-c:a",
113
- "aac",
114
- "-map",
115
- "0:v:0",
116
- "-map",
117
- "1:a:0",
118
- ])
119
- .output(output)
120
- .on("end", () => {
121
- console.log(`[ffmpeg] saved to ${output}`);
122
- resolve(output);
123
- })
124
- .on("error", (err) => {
125
- console.error(`[ffmpeg] error:`, err);
126
- reject(err);
127
- })
128
- .run();
129
- });
130
- }
131
-
132
- export async function resizeVideo(
133
- options: ResizeVideoOptions,
134
- ): Promise<string> {
135
- const { input, output, width, height, aspectRatio } = options;
136
-
137
- if (!input || !output) {
138
- throw new Error("input and output are required");
139
- }
140
-
141
- console.log(`[ffmpeg] resizing video...`);
142
-
143
- return new Promise((resolve, reject) => {
144
- const command = ffmpeg(input);
145
-
146
- if (width && height) {
147
- command.size(`${width}x${height}`);
148
- } else if (aspectRatio) {
149
- command.aspect(aspectRatio);
150
- }
151
-
152
- command
153
- .output(output)
154
- .on("end", () => {
155
- console.log(`[ffmpeg] saved to ${output}`);
156
- resolve(output);
157
- })
158
- .on("error", (err) => {
159
- console.error(`[ffmpeg] error:`, err);
160
- reject(err);
161
- })
162
- .run();
163
- });
164
- }
165
-
166
- export async function trimVideo(options: TrimVideoOptions): Promise<string> {
167
- const { input, output, start, duration } = options;
168
-
169
- if (!input || !output || start === undefined) {
170
- throw new Error("input, output, and start are required");
171
- }
172
-
173
- console.log(`[ffmpeg] trimming video...`);
174
-
175
- return new Promise((resolve, reject) => {
176
- const command = ffmpeg(input).setStartTime(start);
177
-
178
- if (duration) {
179
- command.setDuration(duration);
180
- }
181
-
182
- command
183
- .output(output)
184
- .on("end", () => {
185
- console.log(`[ffmpeg] saved to ${output}`);
186
- resolve(output);
187
- })
188
- .on("error", (err) => {
189
- console.error(`[ffmpeg] error:`, err);
190
- reject(err);
191
- })
192
- .run();
193
- });
194
- }
195
-
196
- export async function convertFormat(
197
- options: ConvertFormatOptions,
198
- ): Promise<string> {
199
- const { input, output, format } = options;
200
-
201
- if (!input || !output) {
202
- throw new Error("input and output are required");
203
- }
204
-
205
- console.log(`[ffmpeg] converting format...`);
206
-
207
- return new Promise((resolve, reject) => {
208
- const command = ffmpeg(input);
209
-
210
- if (format) {
211
- command.format(format);
212
- }
213
-
214
- command
215
- .output(output)
216
- .on("end", () => {
217
- console.log(`[ffmpeg] saved to ${output}`);
218
- resolve(output);
219
- })
220
- .on("error", (err) => {
221
- console.error(`[ffmpeg] error:`, err);
222
- reject(err);
223
- })
224
- .run();
225
- });
226
- }
227
-
228
- export async function extractAudio(
229
- input: string,
230
- output: string,
231
- ): Promise<string> {
232
- if (!input || !output) {
233
- throw new Error("input and output are required");
234
- }
235
-
236
- console.log(`[ffmpeg] extracting audio...`);
237
-
238
- return new Promise((resolve, reject) => {
239
- ffmpeg(input)
240
- .outputOptions(["-vn", "-acodec", "copy"])
241
- .output(output)
242
- .on("end", () => {
243
- console.log(`[ffmpeg] saved to ${output}`);
244
- resolve(output);
245
- })
246
- .on("error", (err) => {
247
- console.error(`[ffmpeg] error:`, err);
248
- reject(err);
249
- })
250
- .run();
251
- });
252
- }
253
-
254
- export interface ProbeResult {
255
- duration: number;
256
- width: number;
257
- height: number;
258
- fps: number;
259
- codec: string;
260
- format: string;
261
- }
262
-
263
- export async function probe(input: string): Promise<ProbeResult> {
264
- if (!input) {
265
- throw new Error("input is required");
266
- }
267
-
268
- if (!existsSync(input)) {
269
- throw new Error(`input file not found: ${input}`);
270
- }
271
-
272
- console.log(`[ffmpeg] probing ${input}...`);
273
-
274
- return new Promise((resolve, reject) => {
275
- ffmpeg.ffprobe(input, (err, metadata) => {
276
- if (err) {
277
- console.error(`[ffmpeg] error:`, err);
278
- reject(err);
279
- return;
280
- }
281
-
282
- const videoStream = metadata.streams.find(
283
- (s) => s.codec_type === "video",
284
- );
285
- if (!videoStream) {
286
- reject(new Error("no video stream found"));
287
- return;
288
- }
289
-
290
- const result: ProbeResult = {
291
- duration: metadata.format.duration || 0,
292
- width: videoStream.width || 0,
293
- height: videoStream.height || 0,
294
- fps: Number(videoStream.r_frame_rate || "0") || 0,
295
- codec: videoStream.codec_name || "",
296
- format: metadata.format.format_name || "",
297
- };
298
-
299
- console.log(
300
- `[ffmpeg] ${result.width}x${result.height} @ ${result.fps}fps, ${result.duration}s, codec: ${result.codec}`,
301
- );
302
- resolve(result);
303
- });
304
- });
305
- }
306
-
307
- // cli
308
- async function cli() {
309
- const args = process.argv.slice(2);
310
- const command = args[0];
311
-
312
- if (!command || command === "help") {
313
- console.log(`
314
- usage:
315
- bun run lib/ffmpeg.ts <command> [args]
316
-
317
- commands:
318
- probe <input> get video metadata
319
- concat <output> <input1> <input2> [input3...] concatenate videos
320
- add_audio <video> <audio> <output> add audio to video
321
- resize <input> <output> <width> <height> resize video
322
- trim <input> <output> <start> [duration] trim video
323
- convert <input> <output> [format] convert format
324
- extract_audio <input> <output> extract audio from video
325
- help show this help
326
-
327
- examples:
328
- bun run lib/ffmpeg.ts probe input.mp4
329
- bun run lib/ffmpeg.ts concat output.mp4 video1.mp4 video2.mp4
330
- bun run lib/ffmpeg.ts add_audio video.mp4 audio.mp3 output.mp4
331
- bun run lib/ffmpeg.ts resize input.mp4 output.mp4 1920 1080
332
- bun run lib/ffmpeg.ts trim input.mp4 output.mp4 10 30
333
- bun run lib/ffmpeg.ts convert input.mov output.mp4
334
- bun run lib/ffmpeg.ts extract_audio input.mp4 output.mp3
335
-
336
- requirements:
337
- ffmpeg must be installed on your system
338
- brew install ffmpeg (macos)
339
- apt-get install ffmpeg (linux)
340
- `);
341
- process.exit(0);
342
- }
343
-
344
- try {
345
- switch (command) {
346
- case "probe": {
347
- const input = args[1];
348
-
349
- if (!input) {
350
- throw new Error("input is required");
351
- }
352
-
353
- const result = await probe(input);
354
- console.log("\nmetadata:");
355
- console.log(` duration: ${result.duration}s`);
356
- console.log(` resolution: ${result.width}x${result.height}`);
357
- console.log(` fps: ${result.fps}`);
358
- console.log(` codec: ${result.codec}`);
359
- console.log(` format: ${result.format}`);
360
- break;
361
- }
362
-
363
- case "concat": {
364
- const output = args[1];
365
- const inputs = args.slice(2);
366
-
367
- if (!output || inputs.length === 0) {
368
- throw new Error("output and at least one input are required");
369
- }
370
-
371
- await concatVideos({ inputs, output });
372
- break;
373
- }
374
-
375
- case "add_audio": {
376
- const videoPath = args[1];
377
- const audioPath = args[2];
378
- const output = args[3];
379
-
380
- if (!videoPath || !audioPath || !output) {
381
- throw new Error("videoPath, audioPath, and output are required");
382
- }
383
-
384
- await addAudio({ videoPath, audioPath, output });
385
- break;
386
- }
387
-
388
- case "resize": {
389
- const input = args[1];
390
- const output = args[2];
391
- const widthArg = args[3];
392
- const heightArg = args[4];
393
-
394
- if (!input || !output || !widthArg || !heightArg) {
395
- throw new Error("input, output, width, and height are required");
396
- }
397
-
398
- const width = Number.parseInt(widthArg, 10);
399
- const height = Number.parseInt(heightArg, 10);
400
-
401
- if (Number.isNaN(width) || Number.isNaN(height)) {
402
- throw new Error("width and height must be valid numbers");
403
- }
404
-
405
- await resizeVideo({ input, output, width, height });
406
- break;
407
- }
408
-
409
- case "trim": {
410
- const input = args[1];
411
- const output = args[2];
412
- const startArg = args[3];
413
-
414
- if (!input || !output || !startArg) {
415
- throw new Error("input, output, and start are required");
416
- }
417
-
418
- const start = Number.parseFloat(startArg);
419
- if (Number.isNaN(start)) {
420
- throw new Error("start must be a valid number");
421
- }
422
-
423
- const duration = args[4] ? parseFloat(args[4]) : undefined;
424
-
425
- await trimVideo({ input, output, start, duration });
426
- break;
427
- }
428
-
429
- case "convert": {
430
- const input = args[1];
431
- const output = args[2];
432
- const format = args[3];
433
-
434
- if (!input || !output) {
435
- throw new Error("input and output are required");
436
- }
437
-
438
- await convertFormat({ input, output, format });
439
- break;
440
- }
441
-
442
- case "extract_audio": {
443
- const input = args[1];
444
- const output = args[2];
445
-
446
- if (!input || !output) {
447
- throw new Error("input and output are required");
448
- }
449
-
450
- await extractAudio(input, output);
451
- break;
452
- }
453
-
454
- default:
455
- console.error(`unknown command: ${command}`);
456
- console.log(`run 'bun run lib/ffmpeg.ts help' for usage`);
457
- process.exit(1);
458
- }
459
- } catch (error) {
460
- console.error(`[ffmpeg] error:`, error);
461
- process.exit(1);
462
- }
463
- }
464
-
465
- if (import.meta.main) {
466
- cli();
467
- }
package/lib/fireworks.ts DELETED
@@ -1,235 +0,0 @@
1
- #!/usr/bin/env bun
2
-
3
- /**
4
- * fireworks.ai api wrapper for audio transcription with word-level timestamps
5
- * supports whisper models with advanced features like diarization and vad
6
- */
7
-
8
- import { readFileSync, writeFileSync } from "node:fs";
9
- import { join } from "node:path";
10
-
11
- // types
12
- export interface FireworksWord {
13
- word: string;
14
- language: string;
15
- probability: number;
16
- hallucination_score: number;
17
- start: number;
18
- end: number;
19
- retry_count: number;
20
- }
21
-
22
- export interface FireworksResponse {
23
- task: string;
24
- language: string;
25
- text: string;
26
- request_id: string;
27
- words: FireworksWord[];
28
- duration: number;
29
- }
30
-
31
- export interface FireworksTranscribeOptions {
32
- audioPath: string; // local file path or url
33
- vadModel?: "whisperx-pyannet" | "silero";
34
- alignmentModel?: "tdnn_ffn" | "wav2vec2";
35
- responseFormat?: "json" | "verbose_json" | "text" | "srt" | "vtt";
36
- preprocessing?: "none" | "denoise";
37
- temperature?: string; // comma-separated values like "0,0.2,0.4,0.6,0.8,1"
38
- timestampGranularities?: "word" | "segment";
39
- diarize?: boolean;
40
- language?: string;
41
- outputPath?: string;
42
- }
43
-
44
- // srt conversion
45
- export function convertFireworksToSRT(words: FireworksWord[]): string {
46
- let srt = "";
47
- let index = 1;
48
-
49
- for (const word of words) {
50
- const startTime = formatTime(word.start);
51
- const endTime = formatTime(word.end);
52
-
53
- srt += `${index}\n`;
54
- srt += `${startTime} --> ${endTime}\n`;
55
- srt += `${word.word.trim()}\n\n`;
56
- index++;
57
- }
58
-
59
- return srt;
60
- }
61
-
62
- function formatTime(seconds: number): string {
63
- const hours = Math.floor(seconds / 3600);
64
- const minutes = Math.floor((seconds % 3600) / 60);
65
- const secs = Math.floor(seconds % 60);
66
- const millis = Math.floor((seconds % 1) * 1000);
67
-
68
- return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")},${String(millis).padStart(3, "0")}`;
69
- }
70
-
71
- // core function
72
- export async function transcribeWithFireworks(
73
- options: FireworksTranscribeOptions,
74
- ): Promise<FireworksResponse> {
75
- const {
76
- audioPath,
77
- vadModel = "whisperx-pyannet",
78
- alignmentModel = "tdnn_ffn",
79
- responseFormat = "verbose_json",
80
- preprocessing = "none",
81
- temperature = "0,0.2,0.4,0.6,0.8,1",
82
- timestampGranularities = "word",
83
- diarize = false,
84
- language,
85
- outputPath,
86
- } = options;
87
-
88
- if (!audioPath) {
89
- throw new Error("audioPath is required");
90
- }
91
-
92
- if (!process.env.FIREWORKS_API_KEY) {
93
- throw new Error("FIREWORKS_API_KEY environment variable is required");
94
- }
95
-
96
- console.log("[fireworks] transcribing audio...");
97
-
98
- try {
99
- // load audio file (local or remote)
100
- let audioBlob: Blob;
101
- let fileName = "audio.mp3";
102
-
103
- if (audioPath.startsWith("http://") || audioPath.startsWith("https://")) {
104
- // fetch remote file
105
- const audioResponse = await fetch(audioPath);
106
- audioBlob = await audioResponse.blob();
107
- fileName = audioPath.split("/").pop()?.split("?")[0] || "audio.mp3";
108
- } else {
109
- // read local file
110
- const buffer = readFileSync(audioPath);
111
- audioBlob = new Blob([buffer]);
112
- fileName = audioPath.split("/").pop() || "audio.mp3";
113
- }
114
-
115
- // prepare form data
116
- const formData = new FormData();
117
- formData.append("file", audioBlob, fileName);
118
- formData.append("vad_model", vadModel);
119
- formData.append("alignment_model", alignmentModel);
120
- formData.append("response_format", responseFormat);
121
- formData.append("preprocessing", preprocessing);
122
- formData.append("temperature", temperature);
123
- formData.append("timestamp_granularities", timestampGranularities);
124
- formData.append("diarize", diarize.toString());
125
-
126
- if (language) {
127
- formData.append("language", language);
128
- }
129
-
130
- // call fireworks api
131
- const response = await fetch(
132
- "https://audio-prod.us-virginia-1.direct.fireworks.ai/v1/audio/transcriptions",
133
- {
134
- method: "POST",
135
- headers: {
136
- Authorization: `Bearer ${process.env.FIREWORKS_API_KEY}`,
137
- },
138
- body: formData,
139
- },
140
- );
141
-
142
- if (!response.ok) {
143
- const errorText = await response.text();
144
- console.error("[fireworks] api error:", errorText);
145
- throw new Error(`fireworks api error: ${response.statusText}`);
146
- }
147
-
148
- const data = (await response.json()) as FireworksResponse;
149
-
150
- console.log(
151
- `[fireworks] transcription complete (${data.words?.length || 0} words)`,
152
- );
153
-
154
- // save to file if requested
155
- if (outputPath) {
156
- let content: string;
157
-
158
- if (outputPath.endsWith(".srt")) {
159
- content = convertFireworksToSRT(data.words || []);
160
- } else if (outputPath.endsWith(".json")) {
161
- content = JSON.stringify(data, null, 2);
162
- } else {
163
- content = data.text;
164
- }
165
-
166
- writeFileSync(outputPath, content);
167
- console.log(`[fireworks] saved to ${outputPath}`);
168
- }
169
-
170
- return data;
171
- } catch (error) {
172
- console.error("[fireworks] error:", error);
173
- throw error;
174
- }
175
- }
176
-
177
- // cli
178
- async function cli() {
179
- const args = process.argv.slice(2);
180
- const command = args[0];
181
-
182
- if (!command || command === "help") {
183
- console.log(`
184
- usage:
185
- bun run lib/fireworks.ts <audioPath> [outputPath]
186
-
187
- arguments:
188
- audioPath - local file path or url to audio file
189
- outputPath - optional output file (.srt, .json, or .txt)
190
-
191
- examples:
192
- bun run lib/fireworks.ts media/audio.mp3
193
- bun run lib/fireworks.ts media/audio.mp3 output.srt
194
- bun run lib/fireworks.ts https://example.com/audio.mp3 output.json
195
- bun run lib/fireworks.ts media/dora.ogg transcription.txt
196
-
197
- features:
198
- - word-level timestamps for precise subtitles
199
- - voice activity detection (vad) for better accuracy
200
- - speaker diarization support
201
- - advanced preprocessing options
202
- - multiple output formats (srt, json, text)
203
-
204
- environment:
205
- FIREWORKS_API_KEY - your fireworks.ai api key
206
- `);
207
- process.exit(0);
208
- }
209
-
210
- try {
211
- const audioPath = args[0];
212
- const outputPath = args[1];
213
-
214
- if (!audioPath) {
215
- throw new Error("audioPath is required");
216
- }
217
-
218
- const data = await transcribeWithFireworks({
219
- audioPath,
220
- outputPath: outputPath || join(process.cwd(), "output.srt"),
221
- });
222
-
223
- console.log(`\ntranscription:\n${data.text}\n`);
224
- console.log(`words: ${data.words?.length || 0}`);
225
- console.log(`language: ${data.language}`);
226
- console.log(`duration: ${data.duration}s`);
227
- } catch (error) {
228
- console.error("[fireworks] error:", error);
229
- process.exit(1);
230
- }
231
- }
232
-
233
- if (import.meta.main) {
234
- cli();
235
- }