varg.ai-sdk 0.1.1 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +43 -10
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -169
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -437
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -105
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -145
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -125
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -136
  217. package/cli/commands/find.ts +0 -58
  218. package/cli/commands/help.ts +0 -70
  219. package/cli/commands/list.ts +0 -49
  220. package/cli/commands/run.ts +0 -237
  221. package/cli/commands/which.ts +0 -66
  222. package/cli/discover.ts +0 -66
  223. package/cli/index.ts +0 -33
  224. package/cli/runner.ts +0 -65
  225. package/cli/types.ts +0 -49
  226. package/cli/ui.ts +0 -185
  227. package/index.ts +0 -75
  228. package/lib/README.md +0 -144
  229. package/lib/ai-sdk/fal.ts +0 -106
  230. package/lib/ai-sdk/replicate.ts +0 -107
  231. package/lib/elevenlabs.ts +0 -382
  232. package/lib/fal.ts +0 -467
  233. package/lib/ffmpeg.ts +0 -467
  234. package/lib/fireworks.ts +0 -235
  235. package/lib/groq.ts +0 -246
  236. package/lib/higgsfield.ts +0 -176
  237. package/lib/remotion/SKILL.md +0 -823
  238. package/lib/remotion/cli.ts +0 -115
  239. package/lib/remotion/functions.ts +0 -283
  240. package/lib/remotion/index.ts +0 -19
  241. package/lib/remotion/templates.ts +0 -73
  242. package/lib/replicate.ts +0 -304
  243. package/output.txt +0 -1
  244. package/test-import.ts +0 -7
  245. package/test-services.ts +0 -97
  246. package/utilities/s3.ts +0 -147
@@ -0,0 +1,144 @@
1
+ # editly implementation plan
2
+
3
+ ## goal
4
+
5
+ match the original editly interface/types so users can use the same config format, but implement everything with pure ffmpeg (no headless-gl, no Fabric.js, no canvas dependencies).
6
+
7
+ ## process
8
+
9
+ 1. implement feature
10
+ 2. create test that generates output video
11
+ 3. tell user what to look for in the output (specific visual behavior to verify)
12
+ 4. wait for user confirmation before moving to next feature
13
+
14
+ ## instructions
15
+
16
+ implement features one by one. check with user and show examples before moving on to the next feature.
17
+
18
+ ---
19
+
20
+ ## layer types
21
+
22
+ | layer | status | notes |
23
+ |-------|--------|-------|
24
+ | video | ✅ done | originX/originY, cutFrom/cutTo, contain-blur |
25
+ | image | ✅ done | zoomDirection in/out/left/right, contain-blur |
26
+ | image-overlay | ✅ done | position presets, PositionObject, Ken Burns zoom/pan |
27
+ | title | ✅ done | fontPath, fontFamily, start/stop timing |
28
+ | subtitle | ✅ done | centered bottom, background box, start/stop timing |
29
+ | title-background | ✅ done | title with gradient/color background |
30
+ | news-title | ✅ done | colored bar with text, top/bottom, start/stop timing |
31
+ | slide-in-text | ✅ done | animated text sliding from left, start/stop timing |
32
+ | fill-color | ✅ done | |
33
+ | pause | ✅ done | aliased to fill-color |
34
+ | radial-gradient | ✅ done | |
35
+ | linear-gradient | ✅ done | |
36
+ | rainbow-colors | ✅ done | animated hue rotation |
37
+ | audio | ✅ done | works as clip layer with cutFrom/cutTo/mixVolume |
38
+ | detached-audio | ✅ done | clip-relative timing with start offset |
39
+ | canvas | 🚫 skip | requires dependencies |
40
+ | fabric | 🚫 skip | requires dependencies |
41
+ | gl | 🚫 skip | requires dependencies |
42
+
43
+ ## transitions
44
+
45
+ | feature | status | notes |
46
+ |---------|--------|-------|
47
+ | ffmpeg xfade | ✅ working | ~40 transitions available |
48
+ | gl-transitions | 🚫 skip | requires headless-gl |
49
+ | audio crossfade | ✅ done | afade in/out during transitions |
50
+
51
+ ## audio
52
+
53
+ | feature | status | notes |
54
+ |---------|--------|-------|
55
+ | audioFilePath | ✅ done | |
56
+ | audioTracks | ✅ done | cutFrom/cutTo/start/mixVolume |
57
+ | loopAudio | ✅ done | loops background audio to match video duration |
58
+ | keepSourceAudio | ✅ done | extracts audio from video clips, syncs with cutFrom |
59
+ | clipsAudioVolume | ✅ done | controls volume of source video audio |
60
+ | outputVolume | ✅ done | |
61
+ | audioNorm | ✅ done | dynaudnorm filter with gaussSize/maxGain |
62
+
63
+ ## config
64
+
65
+ | option | status | notes |
66
+ |--------|--------|-------|
67
+ | defaults.layer | ✅ done | applies common props to all layers |
68
+ | defaults.layerType | ✅ done | applies type-specific defaults |
69
+ | layer start/stop timing | ✅ done | enable expression for text layers |
70
+ | contain-blur resize | ✅ done | blurred background instead of black bars |
71
+ | custom fonts | ✅ done | fontPath and fontFamily support |
72
+
73
+ ---
74
+
75
+ ## implementation order
76
+
77
+ ### phase 1: complete existing layers
78
+ 1. [x] video: add originX/originY ✅
79
+ 2. [x] image: add zoomDirection left/right ✅
80
+ 3. [x] title: add fontPath/fontFamily support ✅
81
+
82
+ ### phase 2: missing layer types
83
+ 4. [x] image-overlay layer ✅
84
+ 5. [x] subtitle layer ✅
85
+ 6. [x] title-background layer ✅
86
+ 7. [x] rainbow-colors layer ✅
87
+ 8. [x] news-title layer ✅
88
+ 9. [x] slide-in-text layer ✅
89
+
90
+ ### phase 3: audio features
91
+ 10. [x] audio layer (as clip layer, not just audioTracks) ✅
92
+ 11. [x] detached-audio layer ✅
93
+ 12. [x] loopAudio ✅
94
+ 13. [x] keepSourceAudio ✅
95
+ 14. [x] clipsAudioVolume ✅
96
+ 15. [x] audioNorm ✅
97
+ 16. [x] audioTracks cutFrom/cutTo/start ✅
98
+
99
+ ### phase 4: advanced features
100
+ 17. [x] layer start/stop timing ✅
101
+ 18. [x] contain-blur resize mode ✅
102
+ 19. [x] defaults.layer / defaults.layerType ✅
103
+ 20. [x] audio crossfade during transitions ✅
104
+
105
+ ---
106
+
107
+ ## ffmpeg xfade transitions (available)
108
+
109
+ these work out of the box:
110
+ - fade, fadeblack, fadewhite, fadeslow
111
+ - wipeleft, wiperight, wipeup, wipedown
112
+ - slideleft, slideright, slideup, slidedown
113
+ - circlecrop, rectcrop
114
+ - distance, fadegrayscale
115
+ - hblur, pixelize, diagtl, diagtr, diagbl, diagbr
116
+ - hlslice, hrslice, vuslice, vdslice
117
+ - dissolve, radial, smoothleft, smoothright, smoothup, smoothdown
118
+ - circleopen, circleclose, vertopen, vertclose, horzopen, horzclose
119
+ - squeezev, squeezeh, zoomin, hlwind, hrwind, vuwind, vdwind
120
+ - coverleft, coverright, coverup, coverdown
121
+ - revealleft, revealright, revealup, revealdown
122
+
123
+ ---
124
+
125
+ ## key differences from original editly
126
+
127
+ ### continuous video overlays
128
+
129
+ when the same video file is used as an overlay (with `left`/`top`/`width`/`height` positioning) across multiple clips, our implementation automatically makes it continuous:
130
+
131
+ 1. `collectVideoOverlays()` groups overlay videos by path
132
+ 2. calculates `totalDuration` across all clips
133
+ 3. uses a single ffmpeg input stream
134
+ 4. overlays it on the final composited video
135
+
136
+ this means if you add `{ type: "video", path: "pip.mp4", left: 0.73, top: 0.73, width: 0.25, height: 0.25 }` to each clip, the video plays continuously across the timeline (not restarting per clip).
137
+
138
+ **limitation**: the overlay video must be long enough to cover the total timeline. if your timeline is 12s but the overlay video is 5s, it stops at 5s.
139
+
140
+ ---
141
+
142
+ ## progress log
143
+
144
+ <!-- append progress here as we implement -->
@@ -0,0 +1,328 @@
1
+ // Types from original editly (https://github.com/mifi/editly)
2
+ // Adapted for pure ffmpeg implementation (no fabric/canvas/gl dependencies)
3
+
4
+ export type OriginX = "left" | "center" | "right";
5
+ export type OriginY = "top" | "center" | "bottom";
6
+ export type SizeValue = number | `${number}%` | `${number}px`;
7
+
8
+ /**
9
+ * How to fit image to screen. Can be one of:
10
+ * - `'contain'` - All the video will be contained within the frame and letterboxed.
11
+ * - `'contain-blur'` - Like contain, but with a blurred copy as the letterbox.
12
+ * - `'cover'` - Video be cropped to cover the whole screen (aspect ratio preserved).
13
+ * - `'stretch'` - Video will be stretched to cover the whole screen (aspect ratio ignored).
14
+ *
15
+ * @default 'contain-blur'
16
+ */
17
+ export type ResizeMode = "contain" | "contain-blur" | "cover" | "stretch";
18
+
19
+ export interface PositionObject {
20
+ x: SizeValue;
21
+ y: SizeValue;
22
+ originX?: OriginX;
23
+ originY?: OriginY;
24
+ }
25
+
26
+ /**
27
+ * Certain layers support the position parameter.
28
+ */
29
+ export type Position =
30
+ | "top"
31
+ | "top-left"
32
+ | "top-right"
33
+ | "center"
34
+ | "center-left"
35
+ | "center-right"
36
+ | "bottom"
37
+ | "bottom-left"
38
+ | "bottom-right"
39
+ | PositionObject;
40
+
41
+ /**
42
+ * Arbitrary audio tracks.
43
+ */
44
+ export interface AudioTrack {
45
+ path: string;
46
+ mixVolume?: number | string;
47
+ cutFrom?: number;
48
+ cutTo?: number;
49
+ start?: number;
50
+ }
51
+
52
+ /**
53
+ * Ken Burns parameters.
54
+ */
55
+ export interface KenBurns {
56
+ zoomDirection?: "in" | "out" | "left" | "right" | null;
57
+ zoomAmount?: number;
58
+ }
59
+
60
+ export type LayerType =
61
+ | "video"
62
+ | "audio"
63
+ | "detached-audio"
64
+ | "image"
65
+ | "image-overlay"
66
+ | "title"
67
+ | "subtitle"
68
+ | "title-background"
69
+ | "news-title"
70
+ | "slide-in-text"
71
+ | "fill-color"
72
+ | "pause"
73
+ | "radial-gradient"
74
+ | "linear-gradient"
75
+ | "rainbow-colors";
76
+
77
+ export interface BaseLayer {
78
+ type: LayerType;
79
+ start?: number;
80
+ stop?: number;
81
+ }
82
+
83
+ export interface TextLayer extends BaseLayer {
84
+ text: string;
85
+ textColor?: string;
86
+ fontPath?: string;
87
+ fontFamily?: string;
88
+ }
89
+
90
+ /**
91
+ * For video layers, if parent `clip.duration` is specified, the video will be slowed/sped-up to match `clip.duration`.
92
+ * If `cutFrom`/`cutTo` is set, the resulting segment (`cutTo`-`cutFrom`) will be slowed/sped-up to fit `clip.duration`.
93
+ */
94
+ export interface VideoLayer extends BaseLayer {
95
+ type: "video";
96
+ path: string;
97
+ resizeMode?: ResizeMode;
98
+ cutFrom?: number;
99
+ cutTo?: number;
100
+ width?: SizeValue;
101
+ height?: SizeValue;
102
+ left?: SizeValue;
103
+ top?: SizeValue;
104
+ originX?: OriginX;
105
+ originY?: OriginY;
106
+ mixVolume?: number | string;
107
+ }
108
+
109
+ /**
110
+ * Audio layers will be mixed together.
111
+ */
112
+ export interface AudioLayer extends BaseLayer {
113
+ type: "audio";
114
+ path: string;
115
+ cutFrom?: number;
116
+ cutTo?: number;
117
+ mixVolume?: number | string;
118
+ }
119
+
120
+ /**
121
+ * Detached audio - like audioTracks but start time is relative to clip's start.
122
+ */
123
+ export interface DetachedAudioLayer extends BaseLayer, AudioTrack {
124
+ type: "detached-audio";
125
+ }
126
+
127
+ /**
128
+ * Full screen image.
129
+ */
130
+ export interface ImageLayer extends BaseLayer, KenBurns {
131
+ type: "image";
132
+ path: string;
133
+ resizeMode?: ResizeMode;
134
+ duration?: number;
135
+ }
136
+
137
+ /**
138
+ * Image overlay with a custom position and size on the screen.
139
+ */
140
+ export interface ImageOverlayLayer extends BaseLayer, KenBurns {
141
+ type: "image-overlay";
142
+ path: string;
143
+ position?: Position;
144
+ width?: SizeValue;
145
+ height?: SizeValue;
146
+ }
147
+
148
+ export interface TitleLayer extends TextLayer, KenBurns {
149
+ type: "title";
150
+ position?: Position;
151
+ }
152
+
153
+ export interface SubtitleLayer extends TextLayer {
154
+ type: "subtitle";
155
+ backgroundColor?: string;
156
+ }
157
+
158
+ /**
159
+ * Title with background.
160
+ */
161
+ export interface TitleBackgroundLayer extends TextLayer {
162
+ type: "title-background";
163
+ background?: BackgroundLayer;
164
+ }
165
+
166
+ export interface NewsTitleLayer extends TextLayer {
167
+ type: "news-title";
168
+ backgroundColor?: string;
169
+ position?: Position;
170
+ }
171
+
172
+ export interface SlideInTextLayer extends TextLayer {
173
+ type: "slide-in-text";
174
+ fontSize?: number;
175
+ charSpacing?: number;
176
+ color?: string;
177
+ position?: Position;
178
+ }
179
+
180
+ export interface FillColorLayer extends BaseLayer {
181
+ type: "fill-color";
182
+ color?: string;
183
+ }
184
+
185
+ export interface PauseLayer extends BaseLayer {
186
+ type: "pause";
187
+ color?: string;
188
+ }
189
+
190
+ export interface RadialGradientLayer extends BaseLayer {
191
+ type: "radial-gradient";
192
+ colors?: [string, string];
193
+ }
194
+
195
+ export interface LinearGradientLayer extends BaseLayer {
196
+ type: "linear-gradient";
197
+ colors?: [string, string];
198
+ }
199
+
200
+ export interface RainbowColorsLayer extends BaseLayer {
201
+ type: "rainbow-colors";
202
+ }
203
+
204
+ export type Layer =
205
+ | VideoLayer
206
+ | AudioLayer
207
+ | DetachedAudioLayer
208
+ | ImageLayer
209
+ | ImageOverlayLayer
210
+ | TitleLayer
211
+ | SubtitleLayer
212
+ | TitleBackgroundLayer
213
+ | NewsTitleLayer
214
+ | SlideInTextLayer
215
+ | FillColorLayer
216
+ | PauseLayer
217
+ | RadialGradientLayer
218
+ | LinearGradientLayer
219
+ | RainbowColorsLayer;
220
+
221
+ /**
222
+ * Special layers that can be used in the 'title-background' layer.
223
+ */
224
+ export type BackgroundLayer =
225
+ | RadialGradientLayer
226
+ | LinearGradientLayer
227
+ | FillColorLayer;
228
+
229
+ /**
230
+ * Curve types for audio fades.
231
+ * @see https://trac.ffmpeg.org/wiki/AfadeCurves
232
+ */
233
+ export type CurveType =
234
+ | "tri"
235
+ | "qsin"
236
+ | "hsin"
237
+ | "esin"
238
+ | "log"
239
+ | "ipar"
240
+ | "qua"
241
+ | "cub"
242
+ | "squ"
243
+ | "cbr"
244
+ | "par"
245
+ | "exp"
246
+ | "iqsin"
247
+ | "ihsin"
248
+ | "dese"
249
+ | "desi"
250
+ | "losi"
251
+ | "nofade";
252
+
253
+ export interface TransitionOptions {
254
+ duration?: number;
255
+ name?: string;
256
+ audioOutCurve?: CurveType;
257
+ audioInCurve?: CurveType;
258
+ }
259
+
260
+ export interface Clip {
261
+ layers: Layer[];
262
+ duration?: number;
263
+ transition?: TransitionOptions | null;
264
+ }
265
+
266
+ export interface DefaultLayerOptions {
267
+ fontPath?: string;
268
+ [key: string]: unknown;
269
+ }
270
+
271
+ export type DefaultLayerTypeOptions = {
272
+ [P in LayerType]?: Partial<Omit<Extract<Layer, { type: P }>, "type">>;
273
+ };
274
+
275
+ export interface DefaultOptions {
276
+ duration?: number;
277
+ layer?: DefaultLayerOptions;
278
+ layerType?: DefaultLayerTypeOptions;
279
+ transition?: TransitionOptions | null;
280
+ }
281
+
282
+ /**
283
+ * Audio normalization options.
284
+ * @see https://ffmpeg.org/ffmpeg-filters.html#dynaudnorm
285
+ */
286
+ export interface AudioNormalizationOptions {
287
+ enable?: boolean;
288
+ gaussSize?: number;
289
+ maxGain?: number;
290
+ }
291
+
292
+ export interface EditlyConfig {
293
+ outPath: string;
294
+ clips: Clip[];
295
+ width?: number;
296
+ height?: number;
297
+ fps?: number;
298
+ customOutputArgs?: string[];
299
+ allowRemoteRequests?: boolean;
300
+ fast?: boolean;
301
+ defaults?: DefaultOptions;
302
+ audioTracks?: AudioTrack[];
303
+ audioFilePath?: string;
304
+ backgroundAudioVolume?: string | number;
305
+ loopAudio?: boolean;
306
+ keepSourceAudio?: boolean;
307
+ clipsAudioVolume?: number | string;
308
+ outputVolume?: number | string;
309
+ audioNorm?: AudioNormalizationOptions;
310
+ verbose?: boolean;
311
+ enableFfmpegLog?: boolean;
312
+ }
313
+
314
+ // Internal types used by our implementation
315
+ export interface VideoInfo {
316
+ duration: number;
317
+ width?: number;
318
+ height?: number;
319
+ fps?: number;
320
+ framerateStr?: string;
321
+ rotation?: number;
322
+ }
323
+
324
+ export interface ProcessedClip {
325
+ layers: Layer[];
326
+ duration: number;
327
+ transition: Required<TransitionOptions>;
328
+ }
@@ -0,0 +1,255 @@
1
+ import {
2
+ type EmbeddingModelV3,
3
+ type ImageModelV3,
4
+ type LanguageModelV3,
5
+ NoSuchModelError,
6
+ type ProviderV3,
7
+ type SharedV3Warning,
8
+ type SpeechModelV3,
9
+ type SpeechModelV3CallOptions,
10
+ } from "@ai-sdk/provider";
11
+ import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
12
+ import type { MusicModelV3, MusicModelV3CallOptions } from "../music-model";
13
+
14
+ const VOICES: Record<string, string> = {
15
+ rachel: "21m00Tcm4TlvDq8ikWAM",
16
+ domi: "AZnzlk1XvdvUeBnXmlld",
17
+ bella: "EXAVITQu4vr4xnSDxMaL",
18
+ antoni: "ErXwobaYiN019PkySvjV",
19
+ elli: "MF3mGyEYCl7XYWbV9V6O",
20
+ josh: "TxGEqnHWrfWFTfGW9XjX",
21
+ arnold: "VR6AewLTigWG4xSOukaG",
22
+ adam: "pNInz6obpgDQGcFmaJgB",
23
+ sam: "yoZ06aMxZJJ28mfd3POQ",
24
+ };
25
+
26
+ const TTS_MODELS: Record<string, string> = {
27
+ eleven_multilingual_v2: "eleven_multilingual_v2",
28
+ eleven_turbo_v2: "eleven_turbo_v2",
29
+ eleven_monolingual_v1: "eleven_monolingual_v1",
30
+ multilingual_v2: "eleven_multilingual_v2",
31
+ turbo_v2: "eleven_turbo_v2",
32
+ turbo: "eleven_turbo_v2",
33
+ };
34
+
35
+ function resolveVoiceId(voice: string): string {
36
+ const lower = voice.toLowerCase();
37
+ return VOICES[lower] ?? voice;
38
+ }
39
+
40
+ function resolveModelId(modelId: string): string {
41
+ return TTS_MODELS[modelId] ?? modelId;
42
+ }
43
+
44
+ class ElevenLabsMusicModel implements MusicModelV3 {
45
+ readonly specificationVersion = "v3" as const;
46
+ readonly provider = "elevenlabs";
47
+ readonly modelId: string;
48
+
49
+ private client: ElevenLabsClient;
50
+
51
+ constructor(modelId: string, client: ElevenLabsClient) {
52
+ this.modelId = modelId;
53
+ this.client = client;
54
+ }
55
+
56
+ async doGenerate(options: MusicModelV3CallOptions) {
57
+ const { prompt, duration, providerOptions } = options;
58
+ const warnings: SharedV3Warning[] = [];
59
+
60
+ const elevenLabsOptions = providerOptions?.elevenlabs ?? {};
61
+ const audio = await this.client.music.compose({
62
+ prompt,
63
+ musicLengthMs: duration ? duration * 1000 : undefined,
64
+ modelId: this.modelId,
65
+ ...elevenLabsOptions,
66
+ } as Parameters<typeof this.client.music.compose>[0]);
67
+
68
+ const reader = audio.getReader();
69
+ const chunks: Uint8Array[] = [];
70
+
71
+ while (true) {
72
+ const { done, value } = await reader.read();
73
+ if (done) break;
74
+ chunks.push(value);
75
+ }
76
+
77
+ const totalLength = chunks.reduce((acc, c) => acc + c.length, 0);
78
+ const result = new Uint8Array(totalLength);
79
+ let offset = 0;
80
+ for (const chunk of chunks) {
81
+ result.set(chunk, offset);
82
+ offset += chunk.length;
83
+ }
84
+
85
+ return {
86
+ audio: result,
87
+ warnings,
88
+ response: {
89
+ timestamp: new Date(),
90
+ modelId: this.modelId,
91
+ headers: undefined,
92
+ },
93
+ };
94
+ }
95
+ }
96
+
97
+ class ElevenLabsSpeechModel implements SpeechModelV3 {
98
+ readonly specificationVersion = "v3" as const;
99
+ readonly provider = "elevenlabs";
100
+ readonly modelId: string;
101
+
102
+ private client: ElevenLabsClient;
103
+
104
+ constructor(modelId: string, client: ElevenLabsClient) {
105
+ this.modelId = modelId;
106
+ this.client = client;
107
+ }
108
+
109
+ async doGenerate(options: SpeechModelV3CallOptions) {
110
+ const { text, voice, speed, providerOptions } = options;
111
+ const warnings: SharedV3Warning[] = [];
112
+
113
+ const voiceId = resolveVoiceId(voice ?? "rachel");
114
+ const model = resolveModelId(this.modelId);
115
+
116
+ if (speed !== undefined) {
117
+ warnings.push({
118
+ type: "unsupported",
119
+ feature: "speed",
120
+ details: "Speed control requires voice settings adjustment",
121
+ });
122
+ }
123
+
124
+ const elevenLabsOptions = providerOptions?.elevenlabs ?? {};
125
+ const audio = await this.client.textToSpeech.convert(voiceId, {
126
+ text,
127
+ modelId: model,
128
+ outputFormat: "mp3_44100_128",
129
+ ...elevenLabsOptions,
130
+ } as Parameters<typeof this.client.textToSpeech.convert>[1]);
131
+
132
+ const reader = audio.getReader();
133
+ const chunks: Uint8Array[] = [];
134
+
135
+ while (true) {
136
+ const { done, value } = await reader.read();
137
+ if (done) break;
138
+ chunks.push(value);
139
+ }
140
+
141
+ const totalLength = chunks.reduce((acc, c) => acc + c.length, 0);
142
+ const result = new Uint8Array(totalLength);
143
+ let offset = 0;
144
+ for (const chunk of chunks) {
145
+ result.set(chunk, offset);
146
+ offset += chunk.length;
147
+ }
148
+
149
+ return {
150
+ audio: result,
151
+ warnings,
152
+ response: {
153
+ timestamp: new Date(),
154
+ modelId: this.modelId,
155
+ headers: undefined,
156
+ },
157
+ };
158
+ }
159
+ }
160
+
161
+ export interface ElevenLabsProviderSettings {
162
+ apiKey?: string;
163
+ }
164
+
165
+ export interface ElevenLabsProvider extends ProviderV3 {
166
+ speechModel(modelId?: string): SpeechModelV3;
167
+ musicModel(modelId?: string): MusicModelV3;
168
+ }
169
+
170
+ export function createElevenLabs(
171
+ settings: ElevenLabsProviderSettings = {},
172
+ ): ElevenLabsProvider {
173
+ const apiKey = settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
174
+ if (!apiKey) {
175
+ throw new Error("ELEVENLABS_API_KEY not set");
176
+ }
177
+ const client = new ElevenLabsClient({ apiKey });
178
+
179
+ return {
180
+ specificationVersion: "v3",
181
+ speechModel(modelId = "eleven_turbo_v2") {
182
+ return new ElevenLabsSpeechModel(modelId, client);
183
+ },
184
+ musicModel(modelId = "music_v1") {
185
+ return new ElevenLabsMusicModel(modelId, client);
186
+ },
187
+ languageModel(modelId: string): LanguageModelV3 {
188
+ throw new NoSuchModelError({ modelId, modelType: "languageModel" });
189
+ },
190
+ embeddingModel(modelId: string): EmbeddingModelV3 {
191
+ throw new NoSuchModelError({ modelId, modelType: "embeddingModel" });
192
+ },
193
+ imageModel(modelId: string): ImageModelV3 {
194
+ throw new NoSuchModelError({ modelId, modelType: "imageModel" });
195
+ },
196
+ };
197
+ }
198
+
199
+ export const elevenlabs_provider = createElevenLabs();
200
+ export { elevenlabs_provider as elevenlabs, VOICES };
201
+
202
+ export interface GenerateMusicOptions {
203
+ prompt: string;
204
+ durationSeconds?: number;
205
+ apiKey?: string;
206
+ }
207
+
208
+ export interface GenerateMusicResult {
209
+ audio: {
210
+ uint8Array: Uint8Array;
211
+ mimeType: string;
212
+ };
213
+ }
214
+
215
+ export async function generateMusic(
216
+ options: GenerateMusicOptions,
217
+ ): Promise<GenerateMusicResult> {
218
+ const { prompt, durationSeconds, apiKey } = options;
219
+ const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
220
+ if (!key) {
221
+ throw new Error("ELEVENLABS_API_KEY not set");
222
+ }
223
+
224
+ const client = new ElevenLabsClient({ apiKey: key });
225
+
226
+ const audio = await client.music.compose({
227
+ prompt,
228
+ musicLengthMs: durationSeconds ? durationSeconds * 1000 : undefined,
229
+ modelId: "music_v1",
230
+ });
231
+
232
+ const reader = audio.getReader();
233
+ const chunks: Uint8Array[] = [];
234
+
235
+ while (true) {
236
+ const { done, value } = await reader.read();
237
+ if (done) break;
238
+ chunks.push(value);
239
+ }
240
+
241
+ const totalLength = chunks.reduce((acc, c) => acc + c.length, 0);
242
+ const result = new Uint8Array(totalLength);
243
+ let offset = 0;
244
+ for (const chunk of chunks) {
245
+ result.set(chunk, offset);
246
+ offset += chunk.length;
247
+ }
248
+
249
+ return {
250
+ audio: {
251
+ uint8Array: result,
252
+ mimeType: "audio/mpeg",
253
+ },
254
+ };
255
+ }