@ai-sdk/prodia 2.0.0-beta.4 → 2.0.0-beta.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ import {
2
+ UnsupportedFunctionalityError,
3
+ type LanguageModelV4,
4
+ type LanguageModelV4CallOptions,
5
+ type LanguageModelV4Content,
6
+ type LanguageModelV4StreamPart,
7
+ type SharedV4Warning,
8
+ } from '@ai-sdk/provider';
9
+ import {
10
+ combineHeaders,
11
+ isCustomReasoning,
12
+ convertBase64ToUint8Array,
13
+ detectMediaType,
14
+ generateId,
15
+ getTopLevelMediaType,
16
+ isFullMediaType,
17
+ parseJSON,
18
+ parseProviderOptions,
19
+ postFormDataToApi,
20
+ resolve,
21
+ serializeModelOptions,
22
+ WORKFLOW_SERIALIZE,
23
+ WORKFLOW_DESERIALIZE,
24
+ zodSchema,
25
+ } from '@ai-sdk/provider-utils';
26
+ import {
27
+ buildProdiaProviderMetadata,
28
+ parseMultipart,
29
+ prodiaFailedResponseHandler,
30
+ prodiaJobResultSchema,
31
+ type ProdiaJobResult,
32
+ type ProdiaModelConfig,
33
+ } from './prodia-api';
34
+ import { prodiaLanguageModelOptionsSchema } from './prodia-language-model-options';
35
+ import type { ProdiaLanguageModelId } from './prodia-language-model-settings';
36
+
37
+ export class ProdiaLanguageModel implements LanguageModelV4 {
38
+ readonly specificationVersion = 'v4';
39
+ readonly supportedUrls = {};
40
+
41
+ get provider(): string {
42
+ return this.config.provider;
43
+ }
44
+
45
+ static [WORKFLOW_SERIALIZE](model: ProdiaLanguageModel) {
46
+ return serializeModelOptions({
47
+ modelId: model.modelId,
48
+ config: model.config,
49
+ });
50
+ }
51
+
52
+ static [WORKFLOW_DESERIALIZE](options: {
53
+ modelId: ProdiaLanguageModelId;
54
+ config: ProdiaModelConfig;
55
+ }) {
56
+ return new ProdiaLanguageModel(options.modelId, options.config);
57
+ }
58
+
59
+ constructor(
60
+ readonly modelId: ProdiaLanguageModelId,
61
+ private readonly config: ProdiaModelConfig,
62
+ ) {}
63
+
64
+ async doGenerate(options: LanguageModelV4CallOptions) {
65
+ const warnings: Array<SharedV4Warning> = [];
66
+
67
+ // Warn about unsupported LLM features
68
+ if (options.temperature !== undefined) {
69
+ warnings.push({ type: 'unsupported', feature: 'temperature' });
70
+ }
71
+ if (options.topP !== undefined) {
72
+ warnings.push({ type: 'unsupported', feature: 'topP' });
73
+ }
74
+ if (options.topK !== undefined) {
75
+ warnings.push({ type: 'unsupported', feature: 'topK' });
76
+ }
77
+ if (options.maxOutputTokens !== undefined) {
78
+ warnings.push({ type: 'unsupported', feature: 'maxOutputTokens' });
79
+ }
80
+ if (options.stopSequences !== undefined) {
81
+ warnings.push({ type: 'unsupported', feature: 'stopSequences' });
82
+ }
83
+ if (options.presencePenalty !== undefined) {
84
+ warnings.push({ type: 'unsupported', feature: 'presencePenalty' });
85
+ }
86
+ if (options.frequencyPenalty !== undefined) {
87
+ warnings.push({ type: 'unsupported', feature: 'frequencyPenalty' });
88
+ }
89
+ if (options.tools !== undefined && options.tools.length > 0) {
90
+ warnings.push({ type: 'unsupported', feature: 'tools' });
91
+ }
92
+ if (options.toolChoice !== undefined) {
93
+ warnings.push({ type: 'unsupported', feature: 'toolChoice' });
94
+ }
95
+ if (
96
+ options.responseFormat !== undefined &&
97
+ options.responseFormat.type !== 'text'
98
+ ) {
99
+ warnings.push({ type: 'unsupported', feature: 'responseFormat' });
100
+ }
101
+
102
+ if (isCustomReasoning(options.reasoning)) {
103
+ warnings.push({
104
+ type: 'unsupported',
105
+ feature: 'reasoning',
106
+ details: 'This provider does not support reasoning configuration.',
107
+ });
108
+ }
109
+
110
+ const prodiaOptions = await parseProviderOptions({
111
+ provider: 'prodia',
112
+ providerOptions: options.providerOptions,
113
+ schema: prodiaLanguageModelOptionsSchema,
114
+ });
115
+
116
+ // Extract text prompt from messages
117
+ let prompt = '';
118
+ let systemMessage = '';
119
+ for (const message of options.prompt) {
120
+ if (message.role === 'system') {
121
+ systemMessage = message.content;
122
+ }
123
+ }
124
+ // Get text from the last user message
125
+ for (let i = options.prompt.length - 1; i >= 0; i--) {
126
+ const message = options.prompt[i];
127
+ if (message.role === 'user') {
128
+ for (const part of message.content) {
129
+ if (part.type === 'text') {
130
+ prompt += (prompt ? '\n' : '') + part.text;
131
+ }
132
+ }
133
+ break;
134
+ }
135
+ }
136
+ if (systemMessage) {
137
+ prompt = systemMessage + '\n' + prompt;
138
+ }
139
+
140
+ // Extract image from user messages
141
+ let imageBytes: Uint8Array | undefined;
142
+ let imageMediaType = 'image/png';
143
+ for (let i = options.prompt.length - 1; i >= 0; i--) {
144
+ const message = options.prompt[i];
145
+ if (message.role === 'user') {
146
+ for (const part of message.content) {
147
+ if (
148
+ part.type === 'file' &&
149
+ getTopLevelMediaType(part.mediaType) === 'image'
150
+ ) {
151
+ switch (part.data.type) {
152
+ case 'reference': {
153
+ throw new UnsupportedFunctionalityError({
154
+ functionality: 'file parts with provider references',
155
+ });
156
+ }
157
+ case 'text': {
158
+ throw new UnsupportedFunctionalityError({
159
+ functionality: 'text file parts',
160
+ });
161
+ }
162
+ case 'data': {
163
+ if (part.data.data instanceof Uint8Array) {
164
+ imageBytes = part.data.data;
165
+ } else {
166
+ imageBytes = convertBase64ToUint8Array(part.data.data);
167
+ }
168
+ break;
169
+ }
170
+ case 'url': {
171
+ const fetchFn = this.config.fetch ?? globalThis.fetch;
172
+ const response = await fetchFn(part.data.url.toString());
173
+ const arrayBuffer = await response.arrayBuffer();
174
+ imageBytes = new Uint8Array(arrayBuffer);
175
+ break;
176
+ }
177
+ }
178
+ if (isFullMediaType(part.mediaType)) {
179
+ imageMediaType = part.mediaType;
180
+ } else if (imageBytes !== undefined) {
181
+ const detected = detectMediaType({
182
+ data: imageBytes,
183
+ topLevelType: getTopLevelMediaType(part.mediaType),
184
+ });
185
+ if (detected !== undefined) {
186
+ imageMediaType = detected;
187
+ }
188
+ }
189
+ break;
190
+ }
191
+ }
192
+ break;
193
+ }
194
+ }
195
+
196
+ const jobConfig: Record<string, unknown> = {
197
+ prompt,
198
+ include_messages: true,
199
+ };
200
+
201
+ if (prodiaOptions?.aspectRatio !== undefined) {
202
+ jobConfig.aspect_ratio = prodiaOptions.aspectRatio;
203
+ }
204
+
205
+ const body = {
206
+ type: this.modelId,
207
+ config: jobConfig,
208
+ };
209
+
210
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
211
+ const combinedHeaders = combineHeaders(
212
+ this.config.headers ? await resolve(this.config.headers) : undefined,
213
+ options.headers,
214
+ );
215
+
216
+ // Always use multipart form-data since img2img requires image input
217
+ const formData = new FormData();
218
+ formData.append(
219
+ 'job',
220
+ new Blob([JSON.stringify(body)], { type: 'application/json' }),
221
+ 'job.json',
222
+ );
223
+ if (imageBytes) {
224
+ const fileExtension =
225
+ imageMediaType === 'image/png'
226
+ ? '.png'
227
+ : imageMediaType === 'image/jpeg'
228
+ ? '.jpg'
229
+ : imageMediaType === 'image/webp'
230
+ ? '.webp'
231
+ : '';
232
+ formData.append(
233
+ 'input',
234
+ new Blob([imageBytes], { type: imageMediaType }),
235
+ 'input' + fileExtension,
236
+ );
237
+ }
238
+
239
+ const { value: multipartResult, responseHeaders } = await postFormDataToApi(
240
+ {
241
+ url: `${this.config.baseURL}/job?price=true`,
242
+ headers: {
243
+ ...combinedHeaders,
244
+ Accept: 'multipart/form-data',
245
+ },
246
+ formData,
247
+ failedResponseHandler: prodiaFailedResponseHandler,
248
+ successfulResponseHandler: createLanguageMultipartResponseHandler(),
249
+ abortSignal: options.abortSignal,
250
+ fetch: this.config.fetch,
251
+ },
252
+ );
253
+
254
+ const { jobResult, textContent, fileContent } = multipartResult;
255
+
256
+ const content: Array<LanguageModelV4Content> = [];
257
+ if (textContent !== undefined) {
258
+ content.push({ type: 'text', text: textContent });
259
+ }
260
+ for (const file of fileContent) {
261
+ content.push({
262
+ type: 'file',
263
+ mediaType: file.mediaType,
264
+ data: { type: 'data', data: file.data },
265
+ });
266
+ }
267
+
268
+ return {
269
+ content,
270
+ finishReason: { unified: 'stop' as const, raw: undefined },
271
+ usage: {
272
+ inputTokens: {
273
+ total: undefined,
274
+ noCache: undefined,
275
+ cacheRead: undefined,
276
+ cacheWrite: undefined,
277
+ },
278
+ outputTokens: {
279
+ total: undefined,
280
+ text: undefined,
281
+ reasoning: undefined,
282
+ },
283
+ },
284
+ warnings,
285
+ providerMetadata: {
286
+ prodia: buildProdiaProviderMetadata(jobResult),
287
+ },
288
+ response: {
289
+ modelId: this.modelId,
290
+ timestamp: currentDate,
291
+ headers: responseHeaders,
292
+ },
293
+ };
294
+ }
295
+
296
+ async doStream(options: LanguageModelV4CallOptions) {
297
+ const result = await this.doGenerate(options);
298
+
299
+ const stream = new ReadableStream<LanguageModelV4StreamPart>({
300
+ start(controller) {
301
+ controller.enqueue({
302
+ type: 'stream-start',
303
+ warnings: result.warnings,
304
+ });
305
+
306
+ controller.enqueue({
307
+ type: 'response-metadata',
308
+ modelId: result.response?.modelId,
309
+ timestamp: result.response?.timestamp,
310
+ });
311
+
312
+ for (const part of result.content) {
313
+ if (part.type === 'text') {
314
+ const id = generateId();
315
+ controller.enqueue({ type: 'text-start', id });
316
+ controller.enqueue({
317
+ type: 'text-delta',
318
+ id,
319
+ delta: part.text,
320
+ });
321
+ controller.enqueue({ type: 'text-end', id });
322
+ } else if (part.type === 'file') {
323
+ controller.enqueue({
324
+ type: 'file',
325
+ mediaType: part.mediaType,
326
+ data: part.data,
327
+ });
328
+ }
329
+ }
330
+
331
+ controller.enqueue({
332
+ type: 'finish',
333
+ usage: result.usage,
334
+ finishReason: result.finishReason,
335
+ providerMetadata: result.providerMetadata,
336
+ });
337
+
338
+ controller.close();
339
+ },
340
+ });
341
+
342
+ return {
343
+ stream,
344
+ response: {
345
+ headers: result.response?.headers,
346
+ },
347
+ };
348
+ }
349
+ }
350
+
351
+ interface LanguageMultipartResult {
352
+ jobResult: ProdiaJobResult;
353
+ textContent: string | undefined;
354
+ fileContent: Array<{ mediaType: string; data: Uint8Array }>;
355
+ }
356
+
357
+ function createLanguageMultipartResponseHandler() {
358
+ return async ({
359
+ response,
360
+ }: {
361
+ response: Response;
362
+ }): Promise<{
363
+ value: LanguageMultipartResult;
364
+ responseHeaders: Record<string, string>;
365
+ }> => {
366
+ const contentType = response.headers.get('content-type') ?? '';
367
+ const responseHeaders: Record<string, string> = {};
368
+ response.headers.forEach((value, key) => {
369
+ responseHeaders[key] = value;
370
+ });
371
+
372
+ const boundaryMatch = contentType.match(/boundary=([^\s;]+)/);
373
+ if (!boundaryMatch) {
374
+ throw new Error(
375
+ `Prodia response missing multipart boundary in content-type: ${contentType}`,
376
+ );
377
+ }
378
+ const boundary = boundaryMatch[1];
379
+
380
+ const arrayBuffer = await response.arrayBuffer();
381
+ const bytes = new Uint8Array(arrayBuffer);
382
+
383
+ const parts = parseMultipart(bytes, boundary);
384
+
385
+ let jobResult: ProdiaJobResult | undefined;
386
+ let textContent: string | undefined;
387
+ const fileContent: Array<{ mediaType: string; data: Uint8Array }> = [];
388
+
389
+ for (const part of parts) {
390
+ const contentDisposition = part.headers['content-disposition'] ?? '';
391
+ const partContentType = part.headers['content-type'] ?? '';
392
+
393
+ if (contentDisposition.includes('name="job"')) {
394
+ const jsonStr = new TextDecoder().decode(part.body);
395
+ jobResult = await parseJSON({
396
+ text: jsonStr,
397
+ schema: zodSchema(prodiaJobResultSchema),
398
+ });
399
+ } else if (contentDisposition.includes('name="output"')) {
400
+ if (
401
+ partContentType.startsWith('text/') ||
402
+ contentDisposition.includes('.txt')
403
+ ) {
404
+ textContent = new TextDecoder().decode(part.body);
405
+ } else if (partContentType.startsWith('image/')) {
406
+ fileContent.push({
407
+ mediaType: partContentType,
408
+ data: part.body,
409
+ });
410
+ }
411
+ }
412
+ }
413
+
414
+ if (!jobResult) {
415
+ throw new Error('Prodia multipart response missing job part');
416
+ }
417
+
418
+ return {
419
+ value: { jobResult, textContent, fileContent },
420
+ responseHeaders,
421
+ };
422
+ };
423
+ }
@@ -1,16 +1,22 @@
1
1
  import {
2
- type ImageModelV4,
3
2
  NoSuchModelError,
3
+ type Experimental_VideoModelV4,
4
+ type ImageModelV4,
5
+ type LanguageModelV4,
4
6
  type ProviderV4,
5
7
  } from '@ai-sdk/provider';
6
- import type { FetchFunction } from '@ai-sdk/provider-utils';
7
8
  import {
8
9
  loadApiKey,
9
10
  withoutTrailingSlash,
10
11
  withUserAgentSuffix,
12
+ type FetchFunction,
11
13
  } from '@ai-sdk/provider-utils';
12
14
  import { ProdiaImageModel } from './prodia-image-model';
13
15
  import type { ProdiaImageModelId } from './prodia-image-settings';
16
+ import { ProdiaLanguageModel } from './prodia-language-model';
17
+ import type { ProdiaLanguageModelId } from './prodia-language-model-settings';
18
+ import { ProdiaVideoModel } from './prodia-video-model';
19
+ import type { ProdiaVideoModelId } from './prodia-video-model-settings';
14
20
  import { VERSION } from './version';
15
21
 
16
22
  export interface ProdiaProviderSettings {
@@ -37,6 +43,11 @@ export interface ProdiaProviderSettings {
37
43
  }
38
44
 
39
45
  export interface ProdiaProvider extends ProviderV4 {
46
+ /**
47
+ * Creates a language model for multimodal generation (img2img with text+image output).
48
+ */
49
+ languageModel(modelId: ProdiaLanguageModelId): LanguageModelV4;
50
+
40
51
  /**
41
52
  * Creates a model for image generation.
42
53
  */
@@ -47,6 +58,16 @@ export interface ProdiaProvider extends ProviderV4 {
47
58
  */
48
59
  imageModel(modelId: ProdiaImageModelId): ImageModelV4;
49
60
 
61
+ /**
62
+ * Creates a model for video generation.
63
+ */
64
+ video(modelId: ProdiaVideoModelId): Experimental_VideoModelV4;
65
+
66
+ /**
67
+ * Creates a model for video generation.
68
+ */
69
+ videoModel(modelId: ProdiaVideoModelId): Experimental_VideoModelV4;
70
+
50
71
  /**
51
72
  * @deprecated Use `embeddingModel` instead.
52
73
  */
@@ -80,25 +101,36 @@ export function createProdia(
80
101
  fetch: options.fetch,
81
102
  });
82
103
 
83
- const embeddingModel = (modelId: string) => {
84
- throw new NoSuchModelError({
85
- modelId,
86
- modelType: 'embeddingModel',
104
+ const createLanguageModel = (modelId: ProdiaLanguageModelId) =>
105
+ new ProdiaLanguageModel(modelId, {
106
+ provider: 'prodia.language',
107
+ baseURL: baseURL ?? defaultBaseURL,
108
+ headers: getHeaders,
109
+ fetch: options.fetch,
87
110
  });
88
- };
89
111
 
90
- const languageModel = (modelId: string) => {
112
+ const createVideoModel = (modelId: ProdiaVideoModelId) =>
113
+ new ProdiaVideoModel(modelId, {
114
+ provider: 'prodia.video',
115
+ baseURL: baseURL ?? defaultBaseURL,
116
+ headers: getHeaders,
117
+ fetch: options.fetch,
118
+ });
119
+
120
+ const embeddingModel = (modelId: string) => {
91
121
  throw new NoSuchModelError({
92
122
  modelId,
93
- modelType: 'languageModel',
123
+ modelType: 'embeddingModel',
94
124
  });
95
125
  };
96
126
 
97
127
  return {
98
128
  specificationVersion: 'v4',
129
+ languageModel: createLanguageModel,
99
130
  imageModel: createImageModel,
100
131
  image: createImageModel,
101
- languageModel,
132
+ videoModel: createVideoModel,
133
+ video: createVideoModel,
102
134
  embeddingModel,
103
135
  textEmbeddingModel: embeddingModel,
104
136
  };
@@ -0,0 +1,21 @@
1
+ import {
2
+ lazySchema,
3
+ zodSchema,
4
+ type InferSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ export const prodiaVideoModelOptionsSchema = lazySchema(() =>
9
+ zodSchema(
10
+ z.object({
11
+ /**
12
+ * Video resolution (e.g. "480p", "720p").
13
+ */
14
+ resolution: z.string().optional(),
15
+ }),
16
+ ),
17
+ );
18
+
19
+ export type ProdiaVideoModelOptions = InferSchema<
20
+ typeof prodiaVideoModelOptionsSchema
21
+ >;
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Prodia job types for video generation.
3
+ */
4
+ export type ProdiaVideoModelId =
5
+ | 'inference.wan2-2.lightning.txt2vid.v0'
6
+ | 'inference.wan2-2.lightning.img2vid.v0'
7
+ | (string & {});