vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
import {
|
|
2
|
+
type EmbeddingModelV3,
|
|
3
|
+
type ImageModelV3,
|
|
4
|
+
type ImageModelV3CallOptions,
|
|
5
|
+
type ImageModelV3File,
|
|
6
|
+
type LanguageModelV3,
|
|
7
|
+
NoSuchModelError,
|
|
8
|
+
type ProviderV3,
|
|
9
|
+
type SharedV3Warning,
|
|
10
|
+
type TranscriptionModelV3,
|
|
11
|
+
type TranscriptionModelV3CallOptions,
|
|
12
|
+
} from "@ai-sdk/provider";
|
|
13
|
+
import { fal } from "@fal-ai/client";
|
|
14
|
+
import type { VideoModelV3, VideoModelV3CallOptions } from "../video-model";
|
|
15
|
+
|
|
16
|
+
const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
|
|
17
|
+
"kling-v2.5": {
|
|
18
|
+
t2v: "fal-ai/kling-video/v2.5-turbo/pro/text-to-video",
|
|
19
|
+
i2v: "fal-ai/kling-video/v2.5-turbo/pro/image-to-video",
|
|
20
|
+
},
|
|
21
|
+
"kling-v2.1": {
|
|
22
|
+
t2v: "fal-ai/kling-video/v2.1/pro/text-to-video",
|
|
23
|
+
i2v: "fal-ai/kling-video/v2.1/pro/image-to-video",
|
|
24
|
+
},
|
|
25
|
+
"kling-v2": {
|
|
26
|
+
t2v: "fal-ai/kling-video/v2/master/text-to-video",
|
|
27
|
+
i2v: "fal-ai/kling-video/v2/master/image-to-video",
|
|
28
|
+
},
|
|
29
|
+
"wan-2.5": {
|
|
30
|
+
t2v: "fal-ai/wan-25/text-to-video",
|
|
31
|
+
i2v: "fal-ai/wan-25/image-to-video",
|
|
32
|
+
},
|
|
33
|
+
"wan-2.5-preview": {
|
|
34
|
+
t2v: "fal-ai/wan-25-preview/text-to-video",
|
|
35
|
+
i2v: "fal-ai/wan-25-preview/image-to-video",
|
|
36
|
+
},
|
|
37
|
+
minimax: {
|
|
38
|
+
t2v: "fal-ai/minimax-video/text-to-video",
|
|
39
|
+
i2v: "fal-ai/minimax-video/image-to-video",
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// lipsync models - video + audio input
|
|
44
|
+
const LIPSYNC_MODELS: Record<string, string> = {
|
|
45
|
+
"sync-v2": "fal-ai/sync-lipsync",
|
|
46
|
+
"sync-v2-pro": "fal-ai/sync-lipsync/v2",
|
|
47
|
+
lipsync: "fal-ai/sync-lipsync",
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const IMAGE_MODELS: Record<string, string> = {
|
|
51
|
+
"flux-pro": "fal-ai/flux-pro/v1.1",
|
|
52
|
+
"flux-dev": "fal-ai/flux/dev",
|
|
53
|
+
"flux-schnell": "fal-ai/flux/schnell",
|
|
54
|
+
"recraft-v3": "fal-ai/recraft/v3/text-to-image",
|
|
55
|
+
"nano-banana-pro": "fal-ai/nano-banana-pro",
|
|
56
|
+
"nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
|
|
57
|
+
"seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// Models that use image_size instead of aspect_ratio
|
|
61
|
+
const IMAGE_SIZE_MODELS = new Set(["seedream-v4.5/edit"]);
|
|
62
|
+
|
|
63
|
+
// Map aspect ratio strings to image_size enum values
|
|
64
|
+
const ASPECT_RATIO_TO_IMAGE_SIZE: Record<string, string> = {
|
|
65
|
+
"1:1": "square",
|
|
66
|
+
"4:3": "landscape_4_3",
|
|
67
|
+
"3:4": "portrait_4_3",
|
|
68
|
+
"16:9": "landscape_16_9",
|
|
69
|
+
"9:16": "portrait_16_9",
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const TRANSCRIPTION_MODELS: Record<string, string> = {
|
|
73
|
+
whisper: "fal-ai/whisper",
|
|
74
|
+
"whisper-large-v3": "fal-ai/whisper",
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
function getMediaType(file: ImageModelV3File): string | undefined {
|
|
78
|
+
if (file.type === "file") return file.mediaType;
|
|
79
|
+
const ext = file.url.split(".").pop()?.toLowerCase();
|
|
80
|
+
const mimeTypes: Record<string, string> = {
|
|
81
|
+
png: "image/png",
|
|
82
|
+
jpg: "image/jpeg",
|
|
83
|
+
jpeg: "image/jpeg",
|
|
84
|
+
mp3: "audio/mpeg",
|
|
85
|
+
wav: "audio/wav",
|
|
86
|
+
mp4: "video/mp4",
|
|
87
|
+
};
|
|
88
|
+
return mimeTypes[ext ?? ""];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function detectImageType(bytes: Uint8Array): string | undefined {
|
|
92
|
+
// Check magic bytes for common image formats
|
|
93
|
+
if (
|
|
94
|
+
bytes[0] === 0x89 &&
|
|
95
|
+
bytes[1] === 0x50 &&
|
|
96
|
+
bytes[2] === 0x4e &&
|
|
97
|
+
bytes[3] === 0x47
|
|
98
|
+
) {
|
|
99
|
+
return "image/png";
|
|
100
|
+
}
|
|
101
|
+
if (bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff) {
|
|
102
|
+
return "image/jpeg";
|
|
103
|
+
}
|
|
104
|
+
if (bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46) {
|
|
105
|
+
return "image/gif";
|
|
106
|
+
}
|
|
107
|
+
if (
|
|
108
|
+
bytes[0] === 0x52 &&
|
|
109
|
+
bytes[1] === 0x49 &&
|
|
110
|
+
bytes[2] === 0x46 &&
|
|
111
|
+
bytes[3] === 0x46
|
|
112
|
+
) {
|
|
113
|
+
return "image/webp";
|
|
114
|
+
}
|
|
115
|
+
return undefined;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async function fileToUrl(file: ImageModelV3File): Promise<string> {
|
|
119
|
+
if (file.type === "url") return file.url;
|
|
120
|
+
const data = file.data;
|
|
121
|
+
const bytes =
|
|
122
|
+
typeof data === "string"
|
|
123
|
+
? Uint8Array.from(atob(data), (c) => c.charCodeAt(0))
|
|
124
|
+
: data;
|
|
125
|
+
// Use mediaType from file if available, otherwise detect from bytes or default to png
|
|
126
|
+
const mediaType = file.mediaType ?? detectImageType(bytes) ?? "image/png";
|
|
127
|
+
return fal.storage.upload(new Blob([bytes], { type: mediaType }));
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async function uploadBuffer(buffer: ArrayBuffer): Promise<string> {
|
|
131
|
+
return fal.storage.upload(new Blob([buffer]));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
class FalVideoModel implements VideoModelV3 {
|
|
135
|
+
readonly specificationVersion = "v3" as const;
|
|
136
|
+
readonly provider = "fal";
|
|
137
|
+
readonly modelId: string;
|
|
138
|
+
readonly maxVideosPerCall = 1;
|
|
139
|
+
|
|
140
|
+
constructor(modelId: string) {
|
|
141
|
+
this.modelId = modelId;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async doGenerate(options: VideoModelV3CallOptions) {
|
|
145
|
+
const {
|
|
146
|
+
prompt,
|
|
147
|
+
duration,
|
|
148
|
+
aspectRatio,
|
|
149
|
+
files,
|
|
150
|
+
providerOptions,
|
|
151
|
+
abortSignal,
|
|
152
|
+
} = options;
|
|
153
|
+
const warnings: SharedV3Warning[] = [];
|
|
154
|
+
|
|
155
|
+
const _hasVideoInput = files?.some((f) =>
|
|
156
|
+
getMediaType(f)?.startsWith("video/"),
|
|
157
|
+
);
|
|
158
|
+
const hasImageInput = files?.some((f) =>
|
|
159
|
+
getMediaType(f)?.startsWith("image/"),
|
|
160
|
+
);
|
|
161
|
+
const _hasAudioInput = files?.some((f) =>
|
|
162
|
+
getMediaType(f)?.startsWith("audio/"),
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
const isLipsync = LIPSYNC_MODELS[this.modelId] !== undefined;
|
|
166
|
+
const endpoint = isLipsync
|
|
167
|
+
? this.resolveLipsyncEndpoint()
|
|
168
|
+
: this.resolveEndpoint(hasImageInput ?? false);
|
|
169
|
+
|
|
170
|
+
const input: Record<string, unknown> = {
|
|
171
|
+
...(providerOptions?.fal ?? {}),
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
if (isLipsync) {
|
|
175
|
+
const videoFile = files?.find((f) =>
|
|
176
|
+
getMediaType(f)?.startsWith("video/"),
|
|
177
|
+
);
|
|
178
|
+
const audioFile = files?.find((f) =>
|
|
179
|
+
getMediaType(f)?.startsWith("audio/"),
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
if (videoFile) {
|
|
183
|
+
input.video_url = await fileToUrl(videoFile);
|
|
184
|
+
}
|
|
185
|
+
if (audioFile) {
|
|
186
|
+
input.audio_url = await fileToUrl(audioFile);
|
|
187
|
+
}
|
|
188
|
+
} else {
|
|
189
|
+
input.prompt = prompt;
|
|
190
|
+
input.duration = duration ?? 5;
|
|
191
|
+
|
|
192
|
+
if (hasImageInput && files) {
|
|
193
|
+
const imageFile = files.find((f) =>
|
|
194
|
+
getMediaType(f)?.startsWith("image/"),
|
|
195
|
+
);
|
|
196
|
+
if (imageFile) {
|
|
197
|
+
input.image_url = await fileToUrl(imageFile);
|
|
198
|
+
}
|
|
199
|
+
} else {
|
|
200
|
+
input.aspect_ratio = aspectRatio ?? "16:9";
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const audioFile = files?.find((f) =>
|
|
204
|
+
getMediaType(f)?.startsWith("audio/"),
|
|
205
|
+
);
|
|
206
|
+
if (audioFile) {
|
|
207
|
+
input.audio_url = await fileToUrl(audioFile);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (options.seed !== undefined) {
|
|
212
|
+
warnings.push({
|
|
213
|
+
type: "unsupported",
|
|
214
|
+
feature: "seed",
|
|
215
|
+
details: "Seed is not supported by this model",
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (options.resolution !== undefined) {
|
|
220
|
+
warnings.push({
|
|
221
|
+
type: "unsupported",
|
|
222
|
+
feature: "resolution",
|
|
223
|
+
details: "Use aspectRatio instead",
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (options.fps !== undefined) {
|
|
228
|
+
warnings.push({
|
|
229
|
+
type: "unsupported",
|
|
230
|
+
feature: "fps",
|
|
231
|
+
details: "FPS is not configurable for this model",
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const result = await fal.subscribe(endpoint, {
|
|
236
|
+
input,
|
|
237
|
+
logs: true,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
const data = result.data as { video?: { url?: string } };
|
|
241
|
+
const videoUrl = data?.video?.url;
|
|
242
|
+
|
|
243
|
+
if (!videoUrl) {
|
|
244
|
+
throw new Error("No video URL in fal response");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const videoResponse = await fetch(videoUrl, { signal: abortSignal });
|
|
248
|
+
const videoBuffer = await videoResponse.arrayBuffer();
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
videos: [new Uint8Array(videoBuffer)],
|
|
252
|
+
warnings,
|
|
253
|
+
response: {
|
|
254
|
+
timestamp: new Date(),
|
|
255
|
+
modelId: this.modelId,
|
|
256
|
+
headers: undefined,
|
|
257
|
+
},
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
private resolveEndpoint(hasImage: boolean): string {
|
|
262
|
+
if (this.modelId.startsWith("raw:")) {
|
|
263
|
+
return this.modelId.slice(4);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const mapping = VIDEO_MODELS[this.modelId];
|
|
267
|
+
if (mapping) {
|
|
268
|
+
return hasImage ? mapping.i2v : mapping.t2v;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return this.modelId;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
private resolveLipsyncEndpoint(): string {
|
|
275
|
+
if (this.modelId.startsWith("raw:")) {
|
|
276
|
+
return this.modelId.slice(4);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return LIPSYNC_MODELS[this.modelId] ?? this.modelId;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
class FalImageModel implements ImageModelV3 {
|
|
284
|
+
readonly specificationVersion = "v3" as const;
|
|
285
|
+
readonly provider = "fal";
|
|
286
|
+
readonly modelId: string;
|
|
287
|
+
readonly maxImagesPerCall = 4;
|
|
288
|
+
|
|
289
|
+
constructor(modelId: string) {
|
|
290
|
+
this.modelId = modelId;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async doGenerate(options: ImageModelV3CallOptions) {
|
|
294
|
+
const {
|
|
295
|
+
prompt,
|
|
296
|
+
n,
|
|
297
|
+
size,
|
|
298
|
+
aspectRatio,
|
|
299
|
+
seed,
|
|
300
|
+
files,
|
|
301
|
+
providerOptions,
|
|
302
|
+
abortSignal,
|
|
303
|
+
} = options;
|
|
304
|
+
const warnings: SharedV3Warning[] = [];
|
|
305
|
+
|
|
306
|
+
const input: Record<string, unknown> = {
|
|
307
|
+
prompt,
|
|
308
|
+
num_images: n ?? 1,
|
|
309
|
+
...(providerOptions?.fal ?? {}),
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
const usesImageSize = IMAGE_SIZE_MODELS.has(this.modelId);
|
|
313
|
+
|
|
314
|
+
if (size) {
|
|
315
|
+
// size format is "{width}x{height}"
|
|
316
|
+
const [width, height] = size.split("x").map(Number);
|
|
317
|
+
if (usesImageSize) {
|
|
318
|
+
input.image_size = { width, height };
|
|
319
|
+
} else {
|
|
320
|
+
input.image_size = size;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (aspectRatio) {
|
|
325
|
+
if (usesImageSize) {
|
|
326
|
+
// Convert aspect ratio to image_size enum for models that require it
|
|
327
|
+
// Only set if size wasn't already provided
|
|
328
|
+
if (!input.image_size) {
|
|
329
|
+
const imageSizeEnum = ASPECT_RATIO_TO_IMAGE_SIZE[aspectRatio];
|
|
330
|
+
if (imageSizeEnum) {
|
|
331
|
+
input.image_size = imageSizeEnum;
|
|
332
|
+
} else {
|
|
333
|
+
warnings.push({
|
|
334
|
+
type: "unsupported",
|
|
335
|
+
feature: "aspectRatio",
|
|
336
|
+
details: `Aspect ratio "${aspectRatio}" not supported, use one of: ${Object.keys(ASPECT_RATIO_TO_IMAGE_SIZE).join(", ")}`,
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
} else {
|
|
341
|
+
input.aspect_ratio = aspectRatio;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (seed !== undefined) {
|
|
346
|
+
input.seed = seed;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const hasFiles = files && files.length > 0;
|
|
350
|
+
if (hasFiles) {
|
|
351
|
+
input.image_urls = await Promise.all(files.map((f) => fileToUrl(f)));
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
const hasImageUrls =
|
|
355
|
+
hasFiles ||
|
|
356
|
+
!!(providerOptions?.fal as Record<string, unknown>)?.image_urls;
|
|
357
|
+
if (hasImageUrls) {
|
|
358
|
+
if (!files) {
|
|
359
|
+
throw new Error("No files provided");
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
const finalEndpoint = this.resolveEndpoint();
|
|
364
|
+
|
|
365
|
+
// Debug: log the input being sent
|
|
366
|
+
if (IMAGE_SIZE_MODELS.has(this.modelId)) {
|
|
367
|
+
console.log(
|
|
368
|
+
"[fal-provider] seedream input:",
|
|
369
|
+
JSON.stringify(input, null, 2),
|
|
370
|
+
);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const result = await fal.subscribe(finalEndpoint, {
|
|
374
|
+
input,
|
|
375
|
+
logs: true,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
const data = result.data as { images?: Array<{ url?: string }> };
|
|
379
|
+
const images = data?.images ?? [];
|
|
380
|
+
|
|
381
|
+
if (images.length === 0) {
|
|
382
|
+
throw new Error("No images in fal response");
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const imageBuffers = await Promise.all(
|
|
386
|
+
images.map(async (img) => {
|
|
387
|
+
const response = await fetch(img.url!, { signal: abortSignal });
|
|
388
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
389
|
+
}),
|
|
390
|
+
);
|
|
391
|
+
|
|
392
|
+
return {
|
|
393
|
+
images: imageBuffers,
|
|
394
|
+
warnings,
|
|
395
|
+
response: {
|
|
396
|
+
timestamp: new Date(),
|
|
397
|
+
modelId: this.modelId,
|
|
398
|
+
headers: undefined,
|
|
399
|
+
},
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
private resolveEndpoint(): string {
|
|
404
|
+
if (this.modelId.startsWith("raw:")) {
|
|
405
|
+
return this.modelId.slice(4);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return IMAGE_MODELS[this.modelId] ?? this.modelId;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
class FalTranscriptionModel implements TranscriptionModelV3 {
|
|
413
|
+
readonly specificationVersion = "v3" as const;
|
|
414
|
+
readonly provider = "fal";
|
|
415
|
+
readonly modelId: string;
|
|
416
|
+
|
|
417
|
+
constructor(modelId: string) {
|
|
418
|
+
this.modelId = modelId;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
async doGenerate(options: TranscriptionModelV3CallOptions) {
|
|
422
|
+
const { audio, providerOptions } = options;
|
|
423
|
+
const warnings: SharedV3Warning[] = [];
|
|
424
|
+
|
|
425
|
+
const endpoint = TRANSCRIPTION_MODELS[this.modelId] ?? this.modelId;
|
|
426
|
+
|
|
427
|
+
const audioBytes =
|
|
428
|
+
typeof audio === "string"
|
|
429
|
+
? Uint8Array.from(atob(audio), (c) => c.charCodeAt(0))
|
|
430
|
+
: audio;
|
|
431
|
+
|
|
432
|
+
const audioUrl = await uploadBuffer(audioBytes.buffer as ArrayBuffer);
|
|
433
|
+
|
|
434
|
+
const input: Record<string, unknown> = {
|
|
435
|
+
audio_url: audioUrl,
|
|
436
|
+
task: "transcribe",
|
|
437
|
+
...(providerOptions?.fal ?? {}),
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
const result = await fal.subscribe(endpoint, {
|
|
441
|
+
input,
|
|
442
|
+
logs: true,
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
const data = result.data as {
|
|
446
|
+
text?: string;
|
|
447
|
+
chunks?: Array<{ timestamp: [number, number]; text: string }>;
|
|
448
|
+
language?: string;
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
return {
|
|
452
|
+
text: data?.text ?? "",
|
|
453
|
+
segments: (data?.chunks ?? []).map((chunk) => ({
|
|
454
|
+
text: chunk.text,
|
|
455
|
+
startSecond: chunk.timestamp[0],
|
|
456
|
+
endSecond: chunk.timestamp[1],
|
|
457
|
+
})),
|
|
458
|
+
language: data?.language,
|
|
459
|
+
durationInSeconds: undefined,
|
|
460
|
+
warnings,
|
|
461
|
+
response: {
|
|
462
|
+
timestamp: new Date(),
|
|
463
|
+
modelId: this.modelId,
|
|
464
|
+
headers: undefined,
|
|
465
|
+
},
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
export interface FalProviderSettings {
|
|
471
|
+
apiKey?: string;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
export interface FalProvider extends ProviderV3 {
|
|
475
|
+
videoModel(modelId: string): VideoModelV3;
|
|
476
|
+
imageModel(modelId: string): ImageModelV3;
|
|
477
|
+
transcriptionModel(modelId: string): TranscriptionModelV3;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
export function createFal(settings: FalProviderSettings = {}): FalProvider {
|
|
481
|
+
if (settings.apiKey) {
|
|
482
|
+
fal.config({ credentials: settings.apiKey });
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
specificationVersion: "v3",
|
|
487
|
+
videoModel(modelId: string): FalVideoModel {
|
|
488
|
+
return new FalVideoModel(modelId);
|
|
489
|
+
},
|
|
490
|
+
imageModel(modelId: string): FalImageModel {
|
|
491
|
+
return new FalImageModel(modelId);
|
|
492
|
+
},
|
|
493
|
+
transcriptionModel(modelId: string): FalTranscriptionModel {
|
|
494
|
+
return new FalTranscriptionModel(modelId);
|
|
495
|
+
},
|
|
496
|
+
languageModel(modelId: string): LanguageModelV3 {
|
|
497
|
+
throw new NoSuchModelError({
|
|
498
|
+
modelId,
|
|
499
|
+
modelType: "languageModel",
|
|
500
|
+
});
|
|
501
|
+
},
|
|
502
|
+
embeddingModel(modelId: string): EmbeddingModelV3 {
|
|
503
|
+
throw new NoSuchModelError({
|
|
504
|
+
modelId,
|
|
505
|
+
modelType: "embeddingModel",
|
|
506
|
+
});
|
|
507
|
+
},
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
export const fal_provider = createFal();
|
|
512
|
+
export { fal_provider as fal };
|