@animalabs/membrane 0.5.26 → 0.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,581 @@
1
+ /**
2
+ * OpenAI Responses API provider adapter
3
+ *
4
+ * Adapter for OpenAI's `/v1/responses` endpoint, required for image generation
5
+ * models like `gpt-image-1`. The Responses API differs from Chat Completions:
6
+ *
7
+ * - Uses `input` array instead of `messages`
8
+ * - Content types: `input_text` / `input_image` (not `text` / `image_url`)
9
+ * - Image generation is a tool: `{"type": "image_generation"}`
10
+ * - Generated images come back as `image_generation_call` output items
11
+ * - Streaming uses different event types
12
+ *
13
+ * This adapter converts membrane's ProviderRequest into the Responses API format,
14
+ * sends the request, and converts the response back into membrane ContentBlocks.
15
+ */
16
+
17
+ import type {
18
+ ProviderAdapter,
19
+ ProviderRequest,
20
+ ProviderRequestOptions,
21
+ ProviderResponse,
22
+ StreamCallbacks,
23
+ ContentBlock,
24
+ } from '../types/index.js';
25
+ import {
26
+ MembraneError,
27
+ rateLimitError,
28
+ contextLengthError,
29
+ authError,
30
+ serverError,
31
+ abortError,
32
+ networkError,
33
+ } from '../types/index.js';
34
+
35
+ // ============================================================================
36
+ // Responses API Types
37
+ // ============================================================================
38
+
39
+ interface ResponsesInputTextPart {
40
+ type: 'input_text';
41
+ text: string;
42
+ }
43
+
44
+ interface ResponsesInputImagePart {
45
+ type: 'input_image';
46
+ image_url: string; // data URI: "data:image/jpeg;base64,..."
47
+ detail?: 'auto' | 'low' | 'high';
48
+ }
49
+
50
+ type ResponsesInputPart = ResponsesInputTextPart | ResponsesInputImagePart;
51
+
52
+ interface ResponsesInputMessage {
53
+ role: 'user' | 'assistant' | 'system' | 'developer';
54
+ content: ResponsesInputPart[] | string;
55
+ }
56
+
57
+ interface ResponsesRequest {
58
+ model: string;
59
+ input: (ResponsesInputMessage | string)[];
60
+ instructions?: string;
61
+ tools?: { type: string; [key: string]: unknown }[];
62
+ temperature?: number;
63
+ top_p?: number;
64
+ max_output_tokens?: number;
65
+ stream?: boolean;
66
+ [key: string]: unknown;
67
+ }
68
+
69
+ interface ResponsesOutputText {
70
+ type: 'output_text';
71
+ text: string;
72
+ }
73
+
74
+ interface ResponsesImageGenerationCall {
75
+ type: 'image_generation_call';
76
+ id: string;
77
+ result: string; // base64 image data
78
+ status?: string;
79
+ }
80
+
81
+ type ResponsesOutputContent = ResponsesOutputText | ResponsesImageGenerationCall;
82
+
83
+ interface ResponsesOutputMessage {
84
+ type: 'message';
85
+ id: string;
86
+ role: 'assistant';
87
+ content: ResponsesOutputContent[];
88
+ }
89
+
90
+ type ResponsesOutputItem = ResponsesOutputMessage | ResponsesImageGenerationCall;
91
+
92
+ interface ResponsesAPIResponse {
93
+ id: string;
94
+ object: string;
95
+ model: string;
96
+ output: ResponsesOutputItem[];
97
+ usage?: {
98
+ input_tokens: number;
99
+ output_tokens: number;
100
+ total_tokens: number;
101
+ input_tokens_details?: {
102
+ cached_tokens?: number;
103
+ };
104
+ };
105
+ status?: string;
106
+ error?: { code: string; message: string };
107
+ }
108
+
109
+ // ============================================================================
110
+ // Adapter Configuration
111
+ // ============================================================================
112
+
113
+ export interface OpenAIResponsesAdapterConfig {
114
+ /** API key (defaults to OPENAI_API_KEY env var) */
115
+ apiKey?: string;
116
+
117
+ /** Base URL (default: https://api.openai.com/v1) */
118
+ baseURL?: string;
119
+
120
+ /** Organization ID (optional) */
121
+ organization?: string;
122
+
123
+ /** Default max output tokens */
124
+ defaultMaxTokens?: number;
125
+ }
126
+
127
+ // ============================================================================
128
+ // OpenAI Responses Adapter
129
+ // ============================================================================
130
+
131
+ export class OpenAIResponsesAdapter implements ProviderAdapter {
132
+ readonly name = 'openai-responses';
133
+ private apiKey: string;
134
+ private baseURL: string;
135
+ private organization?: string;
136
+ private defaultMaxTokens: number;
137
+
138
+ constructor(config: OpenAIResponsesAdapterConfig = {}) {
139
+ this.apiKey = config.apiKey ?? process.env.OPENAI_API_KEY ?? '';
140
+ this.baseURL = (config.baseURL ?? 'https://api.openai.com/v1').replace(/\/$/, '');
141
+ this.organization = config.organization;
142
+ this.defaultMaxTokens = config.defaultMaxTokens ?? 4096;
143
+
144
+ if (!this.apiKey) {
145
+ throw new Error('OpenAI API key not provided');
146
+ }
147
+ }
148
+
149
+ supportsModel(modelId: string): boolean {
150
+ return modelId.startsWith('gpt-image');
151
+ }
152
+
153
+ async complete(
154
+ request: ProviderRequest,
155
+ options?: ProviderRequestOptions
156
+ ): Promise<ProviderResponse> {
157
+ const responsesRequest = this.buildRequest(request);
158
+ options?.onRequest?.(responsesRequest);
159
+
160
+ try {
161
+ const response = await fetch(`${this.baseURL}/responses`, {
162
+ method: 'POST',
163
+ headers: this.getHeaders(),
164
+ body: JSON.stringify(responsesRequest),
165
+ signal: options?.signal,
166
+ });
167
+
168
+ if (!response.ok) {
169
+ const errorText = await response.text();
170
+ throw new Error(`OpenAI Responses API error: ${response.status} ${errorText}`);
171
+ }
172
+
173
+ const data = (await response.json()) as ResponsesAPIResponse;
174
+
175
+ if (data.error) {
176
+ throw new Error(`OpenAI Responses API error: ${data.error.code} ${data.error.message}`);
177
+ }
178
+
179
+ return this.parseResponse(data, request.model, responsesRequest);
180
+ } catch (error) {
181
+ throw this.handleError(error, responsesRequest);
182
+ }
183
+ }
184
+
185
+ async stream(
186
+ request: ProviderRequest,
187
+ callbacks: StreamCallbacks,
188
+ options?: ProviderRequestOptions
189
+ ): Promise<ProviderResponse> {
190
+ const responsesRequest = this.buildRequest(request);
191
+ responsesRequest.stream = true;
192
+ options?.onRequest?.(responsesRequest);
193
+
194
+ try {
195
+ const response = await fetch(`${this.baseURL}/responses`, {
196
+ method: 'POST',
197
+ headers: this.getHeaders(),
198
+ body: JSON.stringify(responsesRequest),
199
+ signal: options?.signal,
200
+ });
201
+
202
+ if (!response.ok) {
203
+ const errorText = await response.text();
204
+ throw new Error(`OpenAI Responses API error: ${response.status} ${errorText}`);
205
+ }
206
+
207
+ const reader = response.body?.getReader();
208
+ if (!reader) {
209
+ throw new Error('No response body');
210
+ }
211
+
212
+ const decoder = new TextDecoder();
213
+ let accumulated = '';
214
+ let images: { data: string; mimeType: string }[] = [];
215
+ let lastUsage: ResponsesAPIResponse['usage'] | undefined;
216
+ let buffer = '';
217
+
218
+ while (true) {
219
+ const { done, value } = await reader.read();
220
+ if (done) break;
221
+
222
+ buffer += decoder.decode(value, { stream: true });
223
+ const lines = buffer.split('\n');
224
+ buffer = lines.pop() ?? '';
225
+
226
+ for (const line of lines) {
227
+ if (!line.startsWith('data: ')) continue;
228
+ const data = line.slice(6).trim();
229
+ if (!data || data === '[DONE]') continue;
230
+
231
+ try {
232
+ const parsed = JSON.parse(data);
233
+
234
+ // Handle text deltas
235
+ if (parsed.type === 'response.output_text.delta') {
236
+ const delta = parsed.delta ?? '';
237
+ accumulated += delta;
238
+ callbacks.onChunk(delta);
239
+ }
240
+
241
+ // Handle completed text
242
+ if (parsed.type === 'response.output_text.done') {
243
+ // Text already accumulated via deltas
244
+ }
245
+
246
+ // Handle image generation results
247
+ if (parsed.type === 'response.image_generation_call.done') {
248
+ if (parsed.result) {
249
+ images.push({
250
+ data: parsed.result,
251
+ mimeType: 'image/png',
252
+ });
253
+ }
254
+ }
255
+
256
+ // Handle completed response (has usage)
257
+ if (parsed.type === 'response.completed' || parsed.type === 'response.done') {
258
+ const resp = parsed.response ?? parsed;
259
+ if (resp.usage) {
260
+ lastUsage = resp.usage;
261
+ }
262
+ // Extract any images from the completed response output
263
+ if (resp.output) {
264
+ for (const item of resp.output) {
265
+ if (item.type === 'image_generation_call' && item.result) {
266
+ // Only add if not already captured via streaming events
267
+ const alreadyCaptured = images.some(img => img.data === item.result);
268
+ if (!alreadyCaptured) {
269
+ images.push({
270
+ data: item.result,
271
+ mimeType: 'image/png',
272
+ });
273
+ }
274
+ }
275
+ }
276
+ }
277
+ }
278
+ } catch {
279
+ // Ignore parse errors in stream chunks
280
+ }
281
+ }
282
+ }
283
+
284
+ // Process remaining buffer
285
+ if (buffer.trim()) {
286
+ const remaining = buffer.trim();
287
+ const dataLine = remaining.startsWith('data: ') ? remaining.slice(6).trim() : remaining;
288
+ if (dataLine && dataLine !== '[DONE]') {
289
+ try {
290
+ const parsed = JSON.parse(dataLine);
291
+ if (parsed.type === 'response.completed' || parsed.type === 'response.done') {
292
+ const resp = parsed.response ?? parsed;
293
+ if (resp.usage) lastUsage = resp.usage;
294
+ }
295
+ } catch {
296
+ // Final buffer wasn't valid JSON
297
+ }
298
+ }
299
+ }
300
+
301
+ const cachedTokens = lastUsage?.input_tokens_details?.cached_tokens ?? 0;
302
+
303
+ return {
304
+ content: this.buildContentBlocks(accumulated, images),
305
+ stopReason: 'end_turn',
306
+ stopSequence: undefined,
307
+ usage: {
308
+ inputTokens: lastUsage?.input_tokens ?? 0,
309
+ outputTokens: lastUsage?.output_tokens ?? 0,
310
+ cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
311
+ },
312
+ model: request.model,
313
+ rawRequest: responsesRequest,
314
+ raw: { usage: lastUsage },
315
+ };
316
+ } catch (error) {
317
+ throw this.handleError(error, responsesRequest);
318
+ }
319
+ }
320
+
321
+ // --------------------------------------------------------------------------
322
+ // Request Building
323
+ // --------------------------------------------------------------------------
324
+
325
+ private getHeaders(): Record<string, string> {
326
+ const headers: Record<string, string> = {
327
+ Authorization: `Bearer ${this.apiKey}`,
328
+ 'Content-Type': 'application/json',
329
+ };
330
+
331
+ if (this.organization) {
332
+ headers['OpenAI-Organization'] = this.organization;
333
+ }
334
+
335
+ return headers;
336
+ }
337
+
338
+ private buildRequest(request: ProviderRequest): ResponsesRequest {
339
+ const input = this.convertMessages(request.messages as any[]);
340
+ const maxTokens = request.maxTokens || this.defaultMaxTokens;
341
+
342
+ const responsesRequest: ResponsesRequest = {
343
+ model: request.model,
344
+ input,
345
+ max_output_tokens: maxTokens,
346
+ };
347
+
348
+ // System prompt → instructions
349
+ if (request.system) {
350
+ const systemText =
351
+ typeof request.system === 'string'
352
+ ? request.system
353
+ : (request.system as any[])
354
+ .filter((b: any) => b.type === 'text')
355
+ .map((b: any) => b.text)
356
+ .join('\n');
357
+
358
+ if (systemText) {
359
+ responsesRequest.instructions = systemText;
360
+ }
361
+ }
362
+
363
+ if (request.temperature !== undefined) {
364
+ responsesRequest.temperature = request.temperature;
365
+ }
366
+
367
+ if (request.topP !== undefined) {
368
+ responsesRequest.top_p = request.topP;
369
+ }
370
+
371
+ // Auto-include image_generation tool for image models
372
+ if (request.model?.includes('image')) {
373
+ responsesRequest.tools = [{ type: 'image_generation' }];
374
+ }
375
+
376
+ // Apply extra params (filter out internal membrane fields)
377
+ if (request.extra) {
378
+ const { normalizedMessages, prompt, ...rest } = request.extra as Record<string, unknown>;
379
+ Object.assign(responsesRequest, rest);
380
+ }
381
+
382
+ return responsesRequest;
383
+ }
384
+
385
+ private convertMessages(messages: any[]): ResponsesInputMessage[] {
386
+ const result: ResponsesInputMessage[] = [];
387
+
388
+ for (const msg of messages) {
389
+ const role = this.mapRole(msg.role);
390
+
391
+ // Simple string content
392
+ if (typeof msg.content === 'string') {
393
+ result.push({ role, content: msg.content });
394
+ continue;
395
+ }
396
+
397
+ // Array content blocks (Anthropic-style)
398
+ if (Array.isArray(msg.content)) {
399
+ const parts: ResponsesInputPart[] = [];
400
+
401
+ for (const block of msg.content) {
402
+ if (block.type === 'text') {
403
+ if (block.text) {
404
+ parts.push({ type: 'input_text', text: block.text });
405
+ }
406
+ } else if (block.type === 'image') {
407
+ const source = block.source;
408
+ if (source?.type === 'base64' && source.data) {
409
+ const mimeType = source.media_type ?? source.mediaType ?? 'image/jpeg';
410
+ parts.push({
411
+ type: 'input_image',
412
+ image_url: `data:${mimeType};base64,${source.data}`,
413
+ });
414
+ }
415
+ }
416
+ // tool_use and tool_result are not supported in the Responses API input
417
+ // for image models — skip them silently
418
+ }
419
+
420
+ if (parts.length > 0) {
421
+ result.push({ role, content: parts });
422
+ }
423
+ continue;
424
+ }
425
+
426
+ // Null/empty content — skip
427
+ if (msg.content === null || msg.content === undefined) continue;
428
+
429
+ // Fallback
430
+ result.push({ role, content: String(msg.content) });
431
+ }
432
+
433
+ return result;
434
+ }
435
+
436
+ private mapRole(role: string): 'user' | 'assistant' | 'system' | 'developer' {
437
+ switch (role) {
438
+ case 'user':
439
+ return 'user';
440
+ case 'assistant':
441
+ return 'assistant';
442
+ case 'system':
443
+ return 'developer';
444
+ default:
445
+ return 'user';
446
+ }
447
+ }
448
+
449
+ // --------------------------------------------------------------------------
450
+ // Response Parsing
451
+ // --------------------------------------------------------------------------
452
+
453
+ private parseResponse(
454
+ response: ResponsesAPIResponse,
455
+ requestedModel: string,
456
+ rawRequest: unknown
457
+ ): ProviderResponse {
458
+ let text = '';
459
+ const images: { data: string; mimeType: string }[] = [];
460
+
461
+ for (const item of response.output) {
462
+ if (item.type === 'message') {
463
+ for (const content of item.content) {
464
+ if (content.type === 'output_text') {
465
+ text += content.text;
466
+ } else if (content.type === 'image_generation_call') {
467
+ images.push({
468
+ data: content.result,
469
+ mimeType: 'image/png',
470
+ });
471
+ }
472
+ }
473
+ } else if (item.type === 'image_generation_call') {
474
+ images.push({
475
+ data: item.result,
476
+ mimeType: 'image/png',
477
+ });
478
+ }
479
+ }
480
+
481
+ const cachedTokens = response.usage?.input_tokens_details?.cached_tokens ?? 0;
482
+
483
+ return {
484
+ content: this.buildContentBlocks(text, images),
485
+ stopReason: 'end_turn',
486
+ stopSequence: undefined,
487
+ usage: {
488
+ inputTokens: response.usage?.input_tokens ?? 0,
489
+ outputTokens: response.usage?.output_tokens ?? 0,
490
+ cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
491
+ },
492
+ model: response.model ?? requestedModel,
493
+ rawRequest,
494
+ raw: response,
495
+ };
496
+ }
497
+
498
+ private buildContentBlocks(
499
+ text: string,
500
+ images: { data: string; mimeType: string }[] = []
501
+ ): ContentBlock[] {
502
+ const content: ContentBlock[] = [];
503
+
504
+ if (text) {
505
+ content.push({ type: 'text', text });
506
+ }
507
+
508
+ for (const img of images) {
509
+ content.push({
510
+ type: 'generated_image',
511
+ data: img.data,
512
+ mimeType: img.mimeType,
513
+ } as ContentBlock);
514
+ }
515
+
516
+ return content;
517
+ }
518
+
519
+ // --------------------------------------------------------------------------
520
+ // Error Handling
521
+ // --------------------------------------------------------------------------
522
+
523
+ private handleError(error: unknown, rawRequest?: unknown): MembraneError {
524
+ if (error instanceof MembraneError) return error;
525
+
526
+ if (error instanceof Error) {
527
+ const message = error.message;
528
+
529
+ if (message.includes('429') || message.includes('rate_limit')) {
530
+ const retryMatch = message.match(/retry after (\d+)/i);
531
+ const retryAfter = retryMatch?.[1] ? parseInt(retryMatch[1], 10) * 1000 : undefined;
532
+ return rateLimitError(message, retryAfter, error, rawRequest);
533
+ }
534
+
535
+ if (
536
+ message.includes('401') ||
537
+ message.includes('invalid_api_key') ||
538
+ message.includes('Incorrect API key')
539
+ ) {
540
+ return authError(message, error, rawRequest);
541
+ }
542
+
543
+ if (
544
+ message.includes('context_length') ||
545
+ message.includes('maximum context') ||
546
+ message.includes('too long')
547
+ ) {
548
+ return contextLengthError(message, error, rawRequest);
549
+ }
550
+
551
+ if (
552
+ message.includes('500') ||
553
+ message.includes('502') ||
554
+ message.includes('503') ||
555
+ message.includes('server_error')
556
+ ) {
557
+ return serverError(message, undefined, error, rawRequest);
558
+ }
559
+
560
+ if (error.name === 'AbortError') {
561
+ return abortError(undefined, rawRequest);
562
+ }
563
+
564
+ if (
565
+ message.includes('network') ||
566
+ message.includes('fetch') ||
567
+ message.includes('ECONNREFUSED')
568
+ ) {
569
+ return networkError(message, error, rawRequest);
570
+ }
571
+ }
572
+
573
+ return new MembraneError({
574
+ type: 'unknown',
575
+ message: error instanceof Error ? error.message : String(error),
576
+ retryable: false,
577
+ rawError: error,
578
+ rawRequest,
579
+ });
580
+ }
581
+ }
@@ -48,6 +48,10 @@ export interface ImageContent {
48
48
  type: 'image';
49
49
  source: MediaSource;
50
50
  tokenEstimate?: number;
51
+ /** Original URL of the image (e.g., Discord CDN). Used by providers that
52
+ * can auto-fetch URLs from text (like Gemini 3.x) when inlineData is
53
+ * not viable (e.g., missing thought_signature on model-role images). */
54
+ sourceUrl?: string;
51
55
  }
52
56
 
53
57
  export interface DocumentContent {