@ai-sdk/prodia 2.0.0-beta.3 → 2.0.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,23 @@
1
1
  import type { ImageModelV4, SharedV4Warning } from '@ai-sdk/provider';
2
- import type { InferSchema, Resolvable } from '@ai-sdk/provider-utils';
2
+ import type { InferSchema } from '@ai-sdk/provider-utils';
3
3
  import {
4
4
  combineHeaders,
5
- createJsonErrorResponseHandler,
6
- type FetchFunction,
7
5
  lazySchema,
6
+ parseJSON,
8
7
  parseProviderOptions,
9
8
  postToApi,
10
9
  resolve,
11
10
  zodSchema,
12
11
  } from '@ai-sdk/provider-utils';
13
12
  import { z } from 'zod/v4';
13
+ import type { ProdiaModelConfig } from './prodia-api';
14
+ import {
15
+ buildProdiaProviderMetadata,
16
+ parseMultipart,
17
+ prodiaFailedResponseHandler,
18
+ prodiaJobResultSchema,
19
+ } from './prodia-api';
20
+ import type { ProdiaJobResult } from './prodia-api';
14
21
  import type { ProdiaImageModelId } from './prodia-image-settings';
15
22
 
16
23
  export class ProdiaImageModel implements ImageModelV4 {
@@ -23,7 +30,7 @@ export class ProdiaImageModel implements ImageModelV4 {
23
30
 
24
31
  constructor(
25
32
  readonly modelId: ProdiaImageModelId,
26
- private readonly config: ProdiaImageModelConfig,
33
+ private readonly config: ProdiaModelConfig,
27
34
  ) {}
28
35
 
29
36
  private async getArgs({
@@ -137,29 +144,7 @@ export class ProdiaImageModel implements ImageModelV4 {
137
144
  warnings,
138
145
  providerMetadata: {
139
146
  prodia: {
140
- images: [
141
- {
142
- jobId: jobResult.id,
143
- ...(jobResult.config?.seed != null && {
144
- seed: jobResult.config.seed,
145
- }),
146
- ...(jobResult.metrics?.elapsed != null && {
147
- elapsed: jobResult.metrics.elapsed,
148
- }),
149
- ...(jobResult.metrics?.ips != null && {
150
- iterationsPerSecond: jobResult.metrics.ips,
151
- }),
152
- ...(jobResult.created_at != null && {
153
- createdAt: jobResult.created_at,
154
- }),
155
- ...(jobResult.updated_at != null && {
156
- updatedAt: jobResult.updated_at,
157
- }),
158
- ...(jobResult.price?.dollars != null && {
159
- dollars: jobResult.price.dollars,
160
- }),
161
- },
162
- ],
147
+ images: [buildProdiaProviderMetadata(jobResult)],
163
148
  },
164
149
  },
165
150
  response: {
@@ -226,48 +211,6 @@ export type ProdiaImageModelOptions = InferSchema<
226
211
  typeof prodiaImageModelOptionsSchema
227
212
  >;
228
213
 
229
- interface ProdiaImageModelConfig {
230
- provider: string;
231
- baseURL: string;
232
- headers?: Resolvable<Record<string, string | undefined>>;
233
- fetch?: FetchFunction;
234
- _internal?: {
235
- currentDate?: () => Date;
236
- };
237
- }
238
-
239
- const prodiaJobResultSchema = z.object({
240
- id: z.string(),
241
- created_at: z.string().optional(),
242
- updated_at: z.string().optional(),
243
- expires_at: z.string().optional(),
244
- state: z
245
- .object({
246
- current: z.string(),
247
- })
248
- .optional(),
249
- config: z
250
- .object({
251
- seed: z.number().optional(),
252
- })
253
- .passthrough()
254
- .optional(),
255
- metrics: z
256
- .object({
257
- elapsed: z.number().optional(),
258
- ips: z.number().optional(),
259
- })
260
- .optional(),
261
- price: z
262
- .object({
263
- product: z.string(),
264
- dollars: z.number(),
265
- })
266
- .nullish(),
267
- });
268
-
269
- type ProdiaJobResult = z.infer<typeof prodiaJobResultSchema>;
270
-
271
214
  interface MultipartResult {
272
215
  jobResult: ProdiaJobResult;
273
216
  imageBytes: Uint8Array;
@@ -310,7 +253,10 @@ function createMultipartResponseHandler() {
310
253
 
311
254
  if (contentDisposition.includes('name="job"')) {
312
255
  const jsonStr = new TextDecoder().decode(part.body);
313
- jobResult = prodiaJobResultSchema.parse(JSON.parse(jsonStr));
256
+ jobResult = await parseJSON({
257
+ text: jsonStr,
258
+ schema: zodSchema(prodiaJobResultSchema),
259
+ });
314
260
  } else if (contentDisposition.includes('name="output"')) {
315
261
  imageBytes = part.body;
316
262
  } else if (partContentType.startsWith('image/')) {
@@ -331,129 +277,3 @@ function createMultipartResponseHandler() {
331
277
  };
332
278
  };
333
279
  }
334
-
335
- interface MultipartPart {
336
- headers: Record<string, string>;
337
- body: Uint8Array;
338
- }
339
-
340
- function parseMultipart(data: Uint8Array, boundary: string): MultipartPart[] {
341
- const parts: MultipartPart[] = [];
342
- const boundaryBytes = new TextEncoder().encode(`--${boundary}`);
343
- const endBoundaryBytes = new TextEncoder().encode(`--${boundary}--`);
344
-
345
- const positions: number[] = [];
346
- for (let i = 0; i <= data.length - boundaryBytes.length; i++) {
347
- let match = true;
348
- for (let j = 0; j < boundaryBytes.length; j++) {
349
- if (data[i + j] !== boundaryBytes[j]) {
350
- match = false;
351
- break;
352
- }
353
- }
354
- if (match) {
355
- positions.push(i);
356
- }
357
- }
358
-
359
- for (let i = 0; i < positions.length - 1; i++) {
360
- const start = positions[i] + boundaryBytes.length;
361
- const end = positions[i + 1];
362
-
363
- let isEndBoundary = true;
364
- for (let j = 0; j < endBoundaryBytes.length && isEndBoundary; j++) {
365
- if (data[positions[i] + j] !== endBoundaryBytes[j]) {
366
- isEndBoundary = false;
367
- }
368
- }
369
- if (
370
- isEndBoundary &&
371
- positions[i] + endBoundaryBytes.length <= data.length
372
- ) {
373
- continue;
374
- }
375
-
376
- let partStart = start;
377
- if (data[partStart] === 0x0d && data[partStart + 1] === 0x0a) {
378
- partStart += 2;
379
- } else if (data[partStart] === 0x0a) {
380
- partStart += 1;
381
- }
382
-
383
- let partEnd = end;
384
- if (data[partEnd - 2] === 0x0d && data[partEnd - 1] === 0x0a) {
385
- partEnd -= 2;
386
- } else if (data[partEnd - 1] === 0x0a) {
387
- partEnd -= 1;
388
- }
389
-
390
- const partData = data.slice(partStart, partEnd);
391
-
392
- let headerEnd = -1;
393
- for (let j = 0; j < partData.length - 3; j++) {
394
- if (
395
- partData[j] === 0x0d &&
396
- partData[j + 1] === 0x0a &&
397
- partData[j + 2] === 0x0d &&
398
- partData[j + 3] === 0x0a
399
- ) {
400
- headerEnd = j;
401
- break;
402
- }
403
- if (partData[j] === 0x0a && partData[j + 1] === 0x0a) {
404
- headerEnd = j;
405
- break;
406
- }
407
- }
408
-
409
- if (headerEnd === -1) {
410
- continue;
411
- }
412
-
413
- const headerBytes = partData.slice(0, headerEnd);
414
- const headerStr = new TextDecoder().decode(headerBytes);
415
- const headers: Record<string, string> = {};
416
- for (const line of headerStr.split(/\r?\n/)) {
417
- const colonIdx = line.indexOf(':');
418
- if (colonIdx > 0) {
419
- const key = line.slice(0, colonIdx).trim().toLowerCase();
420
- const value = line.slice(colonIdx + 1).trim();
421
- headers[key] = value;
422
- }
423
- }
424
-
425
- let bodyStart = headerEnd + 2;
426
- if (partData[headerEnd] === 0x0d) {
427
- bodyStart = headerEnd + 4;
428
- }
429
- const body = partData.slice(bodyStart);
430
-
431
- parts.push({ headers, body });
432
- }
433
-
434
- return parts;
435
- }
436
-
437
- const prodiaErrorSchema = z.object({
438
- message: z.string().optional(),
439
- detail: z.unknown().optional(),
440
- error: z.string().optional(),
441
- });
442
-
443
- const prodiaFailedResponseHandler = createJsonErrorResponseHandler({
444
- errorSchema: prodiaErrorSchema,
445
- errorToMessage: error => {
446
- const parsed = prodiaErrorSchema.safeParse(error);
447
- if (!parsed.success) return 'Unknown Prodia error';
448
- const { message, detail, error: errorField } = parsed.data;
449
- if (typeof detail === 'string') return detail;
450
- if (detail != null) {
451
- try {
452
- return JSON.stringify(detail);
453
- } catch {
454
- // ignore
455
- }
456
- }
457
- return errorField ?? message ?? 'Unknown Prodia error';
458
- },
459
- });
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Prodia job types for language model (multimodal img2img).
3
+ */
4
+ export type ProdiaLanguageModelId =
5
+ | 'inference.nano-banana.img2img.v2'
6
+ | (string & {});
@@ -0,0 +1,395 @@
1
+ import type {
2
+ LanguageModelV4,
3
+ LanguageModelV4CallOptions,
4
+ LanguageModelV4Content,
5
+ LanguageModelV4StreamPart,
6
+ SharedV4Warning,
7
+ } from '@ai-sdk/provider';
8
+ import type { InferSchema } from '@ai-sdk/provider-utils';
9
+ import {
10
+ combineHeaders,
11
+ convertBase64ToUint8Array,
12
+ generateId,
13
+ lazySchema,
14
+ parseJSON,
15
+ parseProviderOptions,
16
+ postFormDataToApi,
17
+ resolve,
18
+ zodSchema,
19
+ } from '@ai-sdk/provider-utils';
20
+ import { z } from 'zod/v4';
21
+ import type { ProdiaModelConfig } from './prodia-api';
22
+ import {
23
+ buildProdiaProviderMetadata,
24
+ parseMultipart,
25
+ prodiaFailedResponseHandler,
26
+ prodiaJobResultSchema,
27
+ } from './prodia-api';
28
+ import type { ProdiaJobResult } from './prodia-api';
29
+ import type { ProdiaLanguageModelId } from './prodia-language-model-settings';
30
+
31
+ export class ProdiaLanguageModel implements LanguageModelV4 {
32
+ readonly specificationVersion = 'v4';
33
+ readonly supportedUrls = {};
34
+
35
+ get provider(): string {
36
+ return this.config.provider;
37
+ }
38
+
39
+ constructor(
40
+ readonly modelId: ProdiaLanguageModelId,
41
+ private readonly config: ProdiaModelConfig,
42
+ ) {}
43
+
44
+ async doGenerate(options: LanguageModelV4CallOptions) {
45
+ const warnings: Array<SharedV4Warning> = [];
46
+
47
+ // Warn about unsupported LLM features
48
+ if (options.temperature !== undefined) {
49
+ warnings.push({ type: 'unsupported', feature: 'temperature' });
50
+ }
51
+ if (options.topP !== undefined) {
52
+ warnings.push({ type: 'unsupported', feature: 'topP' });
53
+ }
54
+ if (options.topK !== undefined) {
55
+ warnings.push({ type: 'unsupported', feature: 'topK' });
56
+ }
57
+ if (options.maxOutputTokens !== undefined) {
58
+ warnings.push({ type: 'unsupported', feature: 'maxOutputTokens' });
59
+ }
60
+ if (options.stopSequences !== undefined) {
61
+ warnings.push({ type: 'unsupported', feature: 'stopSequences' });
62
+ }
63
+ if (options.presencePenalty !== undefined) {
64
+ warnings.push({ type: 'unsupported', feature: 'presencePenalty' });
65
+ }
66
+ if (options.frequencyPenalty !== undefined) {
67
+ warnings.push({ type: 'unsupported', feature: 'frequencyPenalty' });
68
+ }
69
+ if (options.tools !== undefined && options.tools.length > 0) {
70
+ warnings.push({ type: 'unsupported', feature: 'tools' });
71
+ }
72
+ if (options.toolChoice !== undefined) {
73
+ warnings.push({ type: 'unsupported', feature: 'toolChoice' });
74
+ }
75
+ if (
76
+ options.responseFormat !== undefined &&
77
+ options.responseFormat.type !== 'text'
78
+ ) {
79
+ warnings.push({ type: 'unsupported', feature: 'responseFormat' });
80
+ }
81
+
82
+ const prodiaOptions = await parseProviderOptions({
83
+ provider: 'prodia',
84
+ providerOptions: options.providerOptions,
85
+ schema: prodiaLanguageModelOptionsSchema,
86
+ });
87
+
88
+ // Extract text prompt from messages
89
+ let prompt = '';
90
+ let systemMessage = '';
91
+ for (const message of options.prompt) {
92
+ if (message.role === 'system') {
93
+ systemMessage = message.content;
94
+ }
95
+ }
96
+ // Get text from the last user message
97
+ for (let i = options.prompt.length - 1; i >= 0; i--) {
98
+ const message = options.prompt[i];
99
+ if (message.role === 'user') {
100
+ for (const part of message.content) {
101
+ if (part.type === 'text') {
102
+ prompt += (prompt ? '\n' : '') + part.text;
103
+ }
104
+ }
105
+ break;
106
+ }
107
+ }
108
+ if (systemMessage) {
109
+ prompt = systemMessage + '\n' + prompt;
110
+ }
111
+
112
+ // Extract image from user messages
113
+ let imageBytes: Uint8Array | undefined;
114
+ let imageMediaType = 'image/png';
115
+ for (let i = options.prompt.length - 1; i >= 0; i--) {
116
+ const message = options.prompt[i];
117
+ if (message.role === 'user') {
118
+ for (const part of message.content) {
119
+ if (part.type === 'file' && part.mediaType.startsWith('image/')) {
120
+ if (part.data instanceof Uint8Array) {
121
+ imageBytes = part.data;
122
+ } else if (typeof part.data === 'string') {
123
+ // base64 encoded
124
+ imageBytes = convertBase64ToUint8Array(part.data);
125
+ } else if (part.data instanceof URL) {
126
+ const fetchFn = this.config.fetch ?? globalThis.fetch;
127
+ const response = await fetchFn(part.data.toString());
128
+ const arrayBuffer = await response.arrayBuffer();
129
+ imageBytes = new Uint8Array(arrayBuffer);
130
+ }
131
+ imageMediaType = part.mediaType;
132
+ break;
133
+ }
134
+ }
135
+ break;
136
+ }
137
+ }
138
+
139
+ const jobConfig: Record<string, unknown> = {
140
+ prompt,
141
+ include_messages: true,
142
+ };
143
+
144
+ if (prodiaOptions?.aspectRatio !== undefined) {
145
+ jobConfig.aspect_ratio = prodiaOptions.aspectRatio;
146
+ }
147
+
148
+ const body = {
149
+ type: this.modelId,
150
+ config: jobConfig,
151
+ };
152
+
153
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
154
+ const combinedHeaders = combineHeaders(
155
+ await resolve(this.config.headers),
156
+ options.headers,
157
+ );
158
+
159
+ // Always use multipart form-data since img2img requires image input
160
+ const formData = new FormData();
161
+ formData.append(
162
+ 'job',
163
+ new Blob([JSON.stringify(body)], { type: 'application/json' }),
164
+ 'job.json',
165
+ );
166
+ if (imageBytes) {
167
+ const ext =
168
+ imageMediaType === 'image/png'
169
+ ? '.png'
170
+ : imageMediaType === 'image/jpeg'
171
+ ? '.jpg'
172
+ : imageMediaType === 'image/webp'
173
+ ? '.webp'
174
+ : '';
175
+ formData.append(
176
+ 'input',
177
+ new Blob([imageBytes], { type: imageMediaType }),
178
+ 'input' + ext,
179
+ );
180
+ }
181
+
182
+ const { value: multipartResult, responseHeaders } = await postFormDataToApi(
183
+ {
184
+ url: `${this.config.baseURL}/job?price=true`,
185
+ headers: {
186
+ ...combinedHeaders,
187
+ Accept: 'multipart/form-data',
188
+ },
189
+ formData,
190
+ failedResponseHandler: prodiaFailedResponseHandler,
191
+ successfulResponseHandler: createLanguageMultipartResponseHandler(),
192
+ abortSignal: options.abortSignal,
193
+ fetch: this.config.fetch,
194
+ },
195
+ );
196
+
197
+ const { jobResult, textContent, fileContent } = multipartResult;
198
+
199
+ const content: Array<LanguageModelV4Content> = [];
200
+ if (textContent !== undefined) {
201
+ content.push({ type: 'text', text: textContent });
202
+ }
203
+ for (const file of fileContent) {
204
+ content.push({
205
+ type: 'file',
206
+ mediaType: file.mediaType,
207
+ data: file.data,
208
+ });
209
+ }
210
+
211
+ return {
212
+ content,
213
+ finishReason: { unified: 'stop' as const, raw: undefined },
214
+ usage: {
215
+ inputTokens: {
216
+ total: undefined,
217
+ noCache: undefined,
218
+ cacheRead: undefined,
219
+ cacheWrite: undefined,
220
+ },
221
+ outputTokens: {
222
+ total: undefined,
223
+ text: undefined,
224
+ reasoning: undefined,
225
+ },
226
+ },
227
+ warnings,
228
+ providerMetadata: {
229
+ prodia: buildProdiaProviderMetadata(jobResult),
230
+ },
231
+ response: {
232
+ modelId: this.modelId,
233
+ timestamp: currentDate,
234
+ headers: responseHeaders,
235
+ },
236
+ };
237
+ }
238
+
239
+ async doStream(options: LanguageModelV4CallOptions) {
240
+ const result = await this.doGenerate(options);
241
+
242
+ const stream = new ReadableStream<LanguageModelV4StreamPart>({
243
+ start(controller) {
244
+ controller.enqueue({
245
+ type: 'stream-start',
246
+ warnings: result.warnings,
247
+ });
248
+
249
+ controller.enqueue({
250
+ type: 'response-metadata',
251
+ modelId: result.response?.modelId,
252
+ timestamp: result.response?.timestamp,
253
+ });
254
+
255
+ for (const part of result.content) {
256
+ if (part.type === 'text') {
257
+ const id = generateId();
258
+ controller.enqueue({ type: 'text-start', id });
259
+ controller.enqueue({
260
+ type: 'text-delta',
261
+ id,
262
+ delta: part.text,
263
+ });
264
+ controller.enqueue({ type: 'text-end', id });
265
+ } else if (part.type === 'file') {
266
+ controller.enqueue({
267
+ type: 'file',
268
+ mediaType: part.mediaType,
269
+ data: part.data,
270
+ });
271
+ }
272
+ }
273
+
274
+ controller.enqueue({
275
+ type: 'finish',
276
+ usage: result.usage,
277
+ finishReason: result.finishReason,
278
+ providerMetadata: result.providerMetadata,
279
+ });
280
+
281
+ controller.close();
282
+ },
283
+ });
284
+
285
+ return {
286
+ stream,
287
+ response: {
288
+ headers: result.response?.headers,
289
+ },
290
+ };
291
+ }
292
+ }
293
+
294
+ export const prodiaLanguageModelOptionsSchema = lazySchema(() =>
295
+ zodSchema(
296
+ z.object({
297
+ /**
298
+ * Aspect ratio for the output image.
299
+ */
300
+ aspectRatio: z
301
+ .enum([
302
+ '1:1',
303
+ '2:3',
304
+ '3:2',
305
+ '4:5',
306
+ '5:4',
307
+ '4:7',
308
+ '7:4',
309
+ '9:16',
310
+ '16:9',
311
+ '9:21',
312
+ '21:9',
313
+ ])
314
+ .optional(),
315
+ }),
316
+ ),
317
+ );
318
+
319
+ export type ProdiaLanguageModelOptions = InferSchema<
320
+ typeof prodiaLanguageModelOptionsSchema
321
+ >;
322
+
323
+ interface LanguageMultipartResult {
324
+ jobResult: ProdiaJobResult;
325
+ textContent: string | undefined;
326
+ fileContent: Array<{ mediaType: string; data: Uint8Array }>;
327
+ }
328
+
329
+ function createLanguageMultipartResponseHandler() {
330
+ return async ({
331
+ response,
332
+ }: {
333
+ response: Response;
334
+ }): Promise<{
335
+ value: LanguageMultipartResult;
336
+ responseHeaders: Record<string, string>;
337
+ }> => {
338
+ const contentType = response.headers.get('content-type') ?? '';
339
+ const responseHeaders: Record<string, string> = {};
340
+ response.headers.forEach((value, key) => {
341
+ responseHeaders[key] = value;
342
+ });
343
+
344
+ const boundaryMatch = contentType.match(/boundary=([^\s;]+)/);
345
+ if (!boundaryMatch) {
346
+ throw new Error(
347
+ `Prodia response missing multipart boundary in content-type: ${contentType}`,
348
+ );
349
+ }
350
+ const boundary = boundaryMatch[1];
351
+
352
+ const arrayBuffer = await response.arrayBuffer();
353
+ const bytes = new Uint8Array(arrayBuffer);
354
+
355
+ const parts = parseMultipart(bytes, boundary);
356
+
357
+ let jobResult: ProdiaJobResult | undefined;
358
+ let textContent: string | undefined;
359
+ const fileContent: Array<{ mediaType: string; data: Uint8Array }> = [];
360
+
361
+ for (const part of parts) {
362
+ const contentDisposition = part.headers['content-disposition'] ?? '';
363
+ const partContentType = part.headers['content-type'] ?? '';
364
+
365
+ if (contentDisposition.includes('name="job"')) {
366
+ const jsonStr = new TextDecoder().decode(part.body);
367
+ jobResult = await parseJSON({
368
+ text: jsonStr,
369
+ schema: zodSchema(prodiaJobResultSchema),
370
+ });
371
+ } else if (contentDisposition.includes('name="output"')) {
372
+ if (
373
+ partContentType.startsWith('text/') ||
374
+ contentDisposition.includes('.txt')
375
+ ) {
376
+ textContent = new TextDecoder().decode(part.body);
377
+ } else if (partContentType.startsWith('image/')) {
378
+ fileContent.push({
379
+ mediaType: partContentType,
380
+ data: part.body,
381
+ });
382
+ }
383
+ }
384
+ }
385
+
386
+ if (!jobResult) {
387
+ throw new Error('Prodia multipart response missing job part');
388
+ }
389
+
390
+ return {
391
+ value: { jobResult, textContent, fileContent },
392
+ responseHeaders,
393
+ };
394
+ };
395
+ }