@ai-sdk/fal 2.0.9 → 2.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/docs/10-fal.mdx +320 -0
- package/package.json +8 -3
- package/src/fal-api-types.ts +189 -0
- package/src/fal-config.ts +9 -0
- package/src/fal-error.test.ts +34 -0
- package/src/fal-error.ts +16 -0
- package/src/fal-image-model.test.ts +930 -0
- package/src/fal-image-model.ts +367 -0
- package/src/fal-image-options.ts +129 -0
- package/src/fal-image-settings.ts +71 -0
- package/src/fal-provider.test.ts +57 -0
- package/src/fal-provider.ts +183 -0
- package/src/fal-speech-model.test.ts +128 -0
- package/src/fal-speech-model.ts +156 -0
- package/src/fal-speech-settings.ts +10 -0
- package/src/fal-transcription-model.test.ts +181 -0
- package/src/fal-transcription-model.ts +270 -0
- package/src/fal-transcription-options.ts +1 -0
- package/src/index.ts +4 -0
- package/src/transcript-test.mp3 +0 -0
- package/src/version.ts +6 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AISDKError,
|
|
3
|
+
TranscriptionModelV3,
|
|
4
|
+
SharedV3Warning,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
import {
|
|
7
|
+
combineHeaders,
|
|
8
|
+
convertUint8ArrayToBase64,
|
|
9
|
+
createJsonErrorResponseHandler,
|
|
10
|
+
createJsonResponseHandler,
|
|
11
|
+
delay,
|
|
12
|
+
getFromApi,
|
|
13
|
+
parseProviderOptions,
|
|
14
|
+
postJsonToApi,
|
|
15
|
+
} from '@ai-sdk/provider-utils';
|
|
16
|
+
import { z } from 'zod/v4';
|
|
17
|
+
import { FalConfig } from './fal-config';
|
|
18
|
+
import { falErrorDataSchema, falFailedResponseHandler } from './fal-error';
|
|
19
|
+
import { FalTranscriptionModelId } from './fal-transcription-options';
|
|
20
|
+
import { FalTranscriptionAPITypes } from './fal-api-types';
|
|
21
|
+
|
|
22
|
+
// https://fal.ai/models/fal-ai/whisper/api?platform=http
|
|
23
|
+
const falProviderOptionsSchema = z.object({
|
|
24
|
+
/**
|
|
25
|
+
* Language of the audio file. If set to null, the language will be automatically detected. Defaults to null.
|
|
26
|
+
*
|
|
27
|
+
* If translate is selected as the task, the audio will be translated to English, regardless of the language selected.
|
|
28
|
+
*/
|
|
29
|
+
language: z
|
|
30
|
+
.union([z.enum(['en']), z.string()])
|
|
31
|
+
.nullish()
|
|
32
|
+
.default('en'),
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Whether to diarize the audio file. Defaults to true.
|
|
36
|
+
*/
|
|
37
|
+
diarize: z.boolean().nullish().default(true),
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Level of the chunks to return. Either segment or word. Default value: "segment"
|
|
41
|
+
*/
|
|
42
|
+
chunkLevel: z.enum(['segment', 'word']).nullish().default('segment'),
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Version of the model to use. All of the models are the Whisper large variant. Default value: "3"
|
|
46
|
+
*/
|
|
47
|
+
version: z.enum(['3']).nullish().default('3'),
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Default value: 64
|
|
51
|
+
*/
|
|
52
|
+
batchSize: z.number().nullish().default(64),
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Number of speakers in the audio file. Defaults to null. If not provided, the number of speakers will be automatically detected.
|
|
56
|
+
*/
|
|
57
|
+
numSpeakers: z.number().nullable().nullish(),
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
export type FalTranscriptionCallOptions = z.infer<
|
|
61
|
+
typeof falProviderOptionsSchema
|
|
62
|
+
>;
|
|
63
|
+
|
|
64
|
+
interface FalTranscriptionModelConfig extends FalConfig {
|
|
65
|
+
_internal?: {
|
|
66
|
+
currentDate?: () => Date;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export class FalTranscriptionModel implements TranscriptionModelV3 {
|
|
71
|
+
readonly specificationVersion = 'v3';
|
|
72
|
+
|
|
73
|
+
get provider(): string {
|
|
74
|
+
return this.config.provider;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
constructor(
|
|
78
|
+
readonly modelId: FalTranscriptionModelId,
|
|
79
|
+
private readonly config: FalTranscriptionModelConfig,
|
|
80
|
+
) {}
|
|
81
|
+
|
|
82
|
+
private async getArgs({
|
|
83
|
+
providerOptions,
|
|
84
|
+
}: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
|
|
85
|
+
const warnings: SharedV3Warning[] = [];
|
|
86
|
+
|
|
87
|
+
// Parse provider options
|
|
88
|
+
const falOptions = await parseProviderOptions({
|
|
89
|
+
provider: 'fal',
|
|
90
|
+
providerOptions,
|
|
91
|
+
schema: falProviderOptionsSchema,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// Create form data with base fields
|
|
95
|
+
const body: Omit<FalTranscriptionAPITypes, 'audio_url'> = {
|
|
96
|
+
task: 'transcribe',
|
|
97
|
+
diarize: true,
|
|
98
|
+
chunk_level: 'word',
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
// Add provider-specific options
|
|
102
|
+
if (falOptions) {
|
|
103
|
+
body.language = falOptions.language as never;
|
|
104
|
+
body.version = falOptions.version ?? undefined;
|
|
105
|
+
body.batch_size = falOptions.batchSize ?? undefined;
|
|
106
|
+
body.num_speakers = falOptions.numSpeakers ?? undefined;
|
|
107
|
+
|
|
108
|
+
if (typeof falOptions.diarize === 'boolean') {
|
|
109
|
+
body.diarize = falOptions.diarize;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (falOptions.chunkLevel) {
|
|
113
|
+
body.chunk_level = falOptions.chunkLevel;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
body,
|
|
119
|
+
warnings,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async doGenerate(
|
|
124
|
+
options: Parameters<TranscriptionModelV3['doGenerate']>[0],
|
|
125
|
+
): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
|
|
126
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
127
|
+
const { body, warnings } = await this.getArgs(options);
|
|
128
|
+
|
|
129
|
+
const base64Audio =
|
|
130
|
+
typeof options.audio === 'string'
|
|
131
|
+
? options.audio
|
|
132
|
+
: convertUint8ArrayToBase64(options.audio);
|
|
133
|
+
|
|
134
|
+
const audioUrl = `data:${options.mediaType};base64,${base64Audio}`;
|
|
135
|
+
|
|
136
|
+
const { value: queueResponse } = await postJsonToApi({
|
|
137
|
+
url: this.config.url({
|
|
138
|
+
path: `https://queue.fal.run/fal-ai/${this.modelId}`,
|
|
139
|
+
modelId: this.modelId,
|
|
140
|
+
}),
|
|
141
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
142
|
+
body: {
|
|
143
|
+
...body,
|
|
144
|
+
audio_url: audioUrl,
|
|
145
|
+
},
|
|
146
|
+
failedResponseHandler: falFailedResponseHandler,
|
|
147
|
+
successfulResponseHandler:
|
|
148
|
+
createJsonResponseHandler(falJobResponseSchema),
|
|
149
|
+
abortSignal: options.abortSignal,
|
|
150
|
+
fetch: this.config.fetch,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
// Poll for completion with timeout
|
|
154
|
+
const startTime = Date.now();
|
|
155
|
+
const timeoutMs = 60000; // 60 seconds timeout
|
|
156
|
+
const pollIntervalMs = 1000; // 1 second interval
|
|
157
|
+
|
|
158
|
+
let response;
|
|
159
|
+
let responseHeaders;
|
|
160
|
+
let rawResponse;
|
|
161
|
+
|
|
162
|
+
while (true) {
|
|
163
|
+
try {
|
|
164
|
+
const {
|
|
165
|
+
value: statusResponse,
|
|
166
|
+
responseHeaders: statusHeaders,
|
|
167
|
+
rawValue: statusRawResponse,
|
|
168
|
+
} = await getFromApi({
|
|
169
|
+
url: this.config.url({
|
|
170
|
+
path: `https://queue.fal.run/fal-ai/${this.modelId}/requests/${queueResponse.request_id}`,
|
|
171
|
+
modelId: this.modelId,
|
|
172
|
+
}),
|
|
173
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
174
|
+
failedResponseHandler: async ({
|
|
175
|
+
requestBodyValues,
|
|
176
|
+
response,
|
|
177
|
+
url,
|
|
178
|
+
}) => {
|
|
179
|
+
const clone = response.clone();
|
|
180
|
+
const body = (await clone.json()) as { detail: string };
|
|
181
|
+
|
|
182
|
+
if (body.detail === 'Request is still in progress') {
|
|
183
|
+
// This is not an error, just a status update that the request is still processing
|
|
184
|
+
// Continue polling by returning a special error that signals to continue
|
|
185
|
+
return {
|
|
186
|
+
value: new Error('Request is still in progress'),
|
|
187
|
+
rawValue: body,
|
|
188
|
+
responseHeaders: {},
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return createJsonErrorResponseHandler({
|
|
193
|
+
errorSchema: falErrorDataSchema,
|
|
194
|
+
errorToMessage: data => data.error.message,
|
|
195
|
+
})({ requestBodyValues, response, url });
|
|
196
|
+
},
|
|
197
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
198
|
+
falTranscriptionResponseSchema,
|
|
199
|
+
),
|
|
200
|
+
abortSignal: options.abortSignal,
|
|
201
|
+
fetch: this.config.fetch,
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
response = statusResponse;
|
|
205
|
+
responseHeaders = statusHeaders;
|
|
206
|
+
rawResponse = statusRawResponse;
|
|
207
|
+
break;
|
|
208
|
+
} catch (error) {
|
|
209
|
+
// If the error message indicates the request is still in progress, ignore it and continue polling
|
|
210
|
+
if (
|
|
211
|
+
error instanceof Error &&
|
|
212
|
+
error.message === 'Request is still in progress'
|
|
213
|
+
) {
|
|
214
|
+
// Continue with the polling loop
|
|
215
|
+
} else {
|
|
216
|
+
// Re-throw any other errors
|
|
217
|
+
throw error;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Check if we've exceeded the timeout
|
|
222
|
+
if (Date.now() - startTime > timeoutMs) {
|
|
223
|
+
throw new AISDKError({
|
|
224
|
+
message: 'Transcription request timed out after 60 seconds',
|
|
225
|
+
name: 'TranscriptionRequestTimedOut',
|
|
226
|
+
cause: response,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Wait before polling again
|
|
231
|
+
await delay(pollIntervalMs);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
text: response.text,
|
|
236
|
+
segments:
|
|
237
|
+
response.chunks?.map(chunk => ({
|
|
238
|
+
text: chunk.text,
|
|
239
|
+
startSecond: chunk.timestamp?.at(0) ?? 0,
|
|
240
|
+
endSecond: chunk.timestamp?.at(1) ?? 0,
|
|
241
|
+
})) ?? [],
|
|
242
|
+
language: response.inferred_languages?.at(0) ?? undefined,
|
|
243
|
+
durationInSeconds: response.chunks?.at(-1)?.timestamp?.at(1) ?? undefined,
|
|
244
|
+
warnings,
|
|
245
|
+
response: {
|
|
246
|
+
timestamp: currentDate,
|
|
247
|
+
modelId: this.modelId,
|
|
248
|
+
headers: responseHeaders,
|
|
249
|
+
body: rawResponse,
|
|
250
|
+
},
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const falJobResponseSchema = z.object({
|
|
256
|
+
request_id: z.string().nullish(),
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
const falTranscriptionResponseSchema = z.object({
|
|
260
|
+
text: z.string(),
|
|
261
|
+
chunks: z
|
|
262
|
+
.array(
|
|
263
|
+
z.object({
|
|
264
|
+
text: z.string(),
|
|
265
|
+
timestamp: z.array(z.number()).nullish(),
|
|
266
|
+
}),
|
|
267
|
+
)
|
|
268
|
+
.nullish(),
|
|
269
|
+
inferred_languages: z.array(z.string()).nullish(),
|
|
270
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export type FalTranscriptionModelId = 'whisper' | 'wizper' | (string & {});
|
package/src/index.ts
ADDED
|
Binary file
|