@animalabs/membrane 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +25 -13
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +17 -15
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +16 -13
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +16 -13
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers/openrouter.d.ts.map +1 -1
- package/dist/providers/openrouter.js +16 -13
- package/dist/providers/openrouter.js.map +1 -1
- package/dist/types/errors.d.ts +10 -10
- package/dist/types/errors.d.ts.map +1 -1
- package/dist/types/errors.js +20 -10
- package/dist/types/errors.js.map +1 -1
- package/dist/types/provider.d.ts +2 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/membrane.ts +33 -17
- package/src/providers/anthropic.ts +30 -28
- package/src/providers/openai-compatible.ts +41 -37
- package/src/providers/openai.ts +44 -40
- package/src/providers/openrouter.ts +46 -42
- package/src/types/errors.ts +20 -10
- package/src/types/provider.ts +4 -1
package/src/membrane.ts
CHANGED
|
@@ -95,23 +95,24 @@ export class Membrane {
|
|
|
95
95
|
finalRequest = await this.config.hooks.beforeRequest(request, providerRequest) ?? providerRequest;
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
rawRequest = finalRequest;
|
|
99
|
-
|
|
100
|
-
// Call onRequest callback for logging
|
|
101
|
-
options.onRequest?.(rawRequest);
|
|
102
|
-
|
|
103
98
|
const providerResponse = await this.adapter.complete(finalRequest, {
|
|
104
99
|
signal: options.signal,
|
|
105
100
|
timeoutMs: options.timeoutMs,
|
|
106
101
|
});
|
|
107
102
|
|
|
103
|
+
// Use the actual raw request from provider (after any adapter transformations)
|
|
104
|
+
rawRequest = providerResponse.rawRequest;
|
|
105
|
+
|
|
106
|
+
// Call onRequest callback with actual request sent to API
|
|
107
|
+
options.onRequest?.(rawRequest);
|
|
108
|
+
|
|
108
109
|
const response = this.transformResponse(
|
|
109
110
|
providerResponse,
|
|
110
111
|
request,
|
|
111
112
|
prefillResult,
|
|
112
113
|
startTime,
|
|
113
114
|
attempts,
|
|
114
|
-
|
|
115
|
+
rawRequest
|
|
115
116
|
);
|
|
116
117
|
|
|
117
118
|
// Call afterResponse hook
|
|
@@ -239,17 +240,16 @@ export class Membrane {
|
|
|
239
240
|
|
|
240
241
|
// Initialize parser with prefill content so it knows about any open tags
|
|
241
242
|
// (e.g., <thinking> in the prefill means API response continues inside thinking)
|
|
243
|
+
// Track the initial prefill length so we can extract only NEW content for response
|
|
244
|
+
let initialPrefillLength = 0;
|
|
242
245
|
if (prefillResult.assistantPrefill) {
|
|
243
246
|
parser.push(prefillResult.assistantPrefill);
|
|
247
|
+
initialPrefillLength = prefillResult.assistantPrefill.length;
|
|
244
248
|
}
|
|
245
249
|
|
|
246
250
|
try {
|
|
247
251
|
// Tool execution loop
|
|
248
252
|
while (toolDepth <= maxToolDepth) {
|
|
249
|
-
rawRequest = providerRequest;
|
|
250
|
-
|
|
251
|
-
// Call onRequest callback for logging
|
|
252
|
-
onRequest?.(rawRequest);
|
|
253
253
|
|
|
254
254
|
// Track if we manually detected a stop sequence (API doesn't always stop)
|
|
255
255
|
let detectedStopSequence: string | null = null;
|
|
@@ -325,6 +325,12 @@ export class Membrane {
|
|
|
325
325
|
streamResult.stopSequence = detectedStopSequence;
|
|
326
326
|
}
|
|
327
327
|
|
|
328
|
+
// Use the actual raw request from provider (after adapter transformations)
|
|
329
|
+
rawRequest = streamResult.rawRequest;
|
|
330
|
+
|
|
331
|
+
// Call onRequest callback with actual request sent to API
|
|
332
|
+
onRequest?.(rawRequest);
|
|
333
|
+
|
|
328
334
|
rawResponse = streamResult.raw;
|
|
329
335
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
330
336
|
|
|
@@ -455,9 +461,13 @@ export class Membrane {
|
|
|
455
461
|
break;
|
|
456
462
|
}
|
|
457
463
|
|
|
458
|
-
// Build final response
|
|
464
|
+
// Build final response - only use NEW content (after initial prefill) for content parsing
|
|
465
|
+
// The full accumulated text is still available in raw.response
|
|
466
|
+
const fullAccumulated = parser.getAccumulated();
|
|
467
|
+
const newContent = fullAccumulated.slice(initialPrefillLength);
|
|
468
|
+
|
|
459
469
|
return this.buildFinalResponse(
|
|
460
|
-
|
|
470
|
+
newContent,
|
|
461
471
|
contentBlocks,
|
|
462
472
|
lastStopReason,
|
|
463
473
|
totalUsage,
|
|
@@ -473,8 +483,12 @@ export class Membrane {
|
|
|
473
483
|
} catch (error) {
|
|
474
484
|
// Check if this is an abort error
|
|
475
485
|
if (this.isAbortError(error)) {
|
|
486
|
+
// Only use NEW content (after initial prefill) for partial content
|
|
487
|
+
const fullAccumulated = parser.getAccumulated();
|
|
488
|
+
const newContent = fullAccumulated.slice(initialPrefillLength);
|
|
489
|
+
|
|
476
490
|
return this.buildAbortedResponse(
|
|
477
|
-
|
|
491
|
+
newContent,
|
|
478
492
|
totalUsage,
|
|
479
493
|
executedToolCalls,
|
|
480
494
|
executedToolResults,
|
|
@@ -527,10 +541,6 @@ export class Membrane {
|
|
|
527
541
|
while (toolDepth <= maxToolDepth) {
|
|
528
542
|
// Build provider request with native tools
|
|
529
543
|
const providerRequest = this.buildNativeToolRequest(request, messages);
|
|
530
|
-
rawRequest = providerRequest;
|
|
531
|
-
|
|
532
|
-
// Call onRequest callback for logging
|
|
533
|
-
onRequest?.(rawRequest);
|
|
534
544
|
|
|
535
545
|
// Stream from provider
|
|
536
546
|
let textAccumulated = '';
|
|
@@ -557,6 +567,12 @@ export class Membrane {
|
|
|
557
567
|
{ signal }
|
|
558
568
|
);
|
|
559
569
|
|
|
570
|
+
// Use the actual raw request from provider (after adapter transformations)
|
|
571
|
+
rawRequest = streamResult.rawRequest;
|
|
572
|
+
|
|
573
|
+
// Call onRequest callback with actual request sent to API
|
|
574
|
+
onRequest?.(rawRequest);
|
|
575
|
+
|
|
560
576
|
rawResponse = streamResult.raw;
|
|
561
577
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
562
578
|
|
|
@@ -61,18 +61,16 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
61
61
|
options?: ProviderRequestOptions
|
|
62
62
|
): Promise<ProviderResponse> {
|
|
63
63
|
const anthropicRequest = this.buildRequest(request);
|
|
64
|
-
|
|
64
|
+
const fullRequest = { ...anthropicRequest, stream: false as const };
|
|
65
|
+
|
|
65
66
|
try {
|
|
66
|
-
const response = await this.client.messages.create({
|
|
67
|
-
...anthropicRequest,
|
|
68
|
-
stream: false,
|
|
69
|
-
}, {
|
|
67
|
+
const response = await this.client.messages.create(fullRequest, {
|
|
70
68
|
signal: options?.signal,
|
|
71
69
|
});
|
|
72
|
-
|
|
73
|
-
return this.parseResponse(response);
|
|
70
|
+
|
|
71
|
+
return this.parseResponse(response, fullRequest);
|
|
74
72
|
} catch (error) {
|
|
75
|
-
throw this.handleError(error);
|
|
73
|
+
throw this.handleError(error, fullRequest);
|
|
76
74
|
}
|
|
77
75
|
}
|
|
78
76
|
|
|
@@ -82,16 +80,18 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
82
80
|
options?: ProviderRequestOptions
|
|
83
81
|
): Promise<ProviderResponse> {
|
|
84
82
|
const anthropicRequest = this.buildRequest(request);
|
|
85
|
-
|
|
83
|
+
// Note: stream is implicitly true when using .stream()
|
|
84
|
+
const fullRequest = { ...anthropicRequest, stream: true };
|
|
85
|
+
|
|
86
86
|
try {
|
|
87
87
|
const stream = await this.client.messages.stream(anthropicRequest, {
|
|
88
88
|
signal: options?.signal,
|
|
89
89
|
});
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
let accumulated = '';
|
|
92
92
|
const contentBlocks: unknown[] = [];
|
|
93
93
|
let currentBlockIndex = -1;
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
for await (const event of stream) {
|
|
96
96
|
if (event.type === 'content_block_start') {
|
|
97
97
|
currentBlockIndex = event.index;
|
|
@@ -110,12 +110,12 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
110
110
|
callbacks.onContentBlock?.(currentBlockIndex, contentBlocks[currentBlockIndex]);
|
|
111
111
|
}
|
|
112
112
|
}
|
|
113
|
-
|
|
113
|
+
|
|
114
114
|
const finalMessage = await stream.finalMessage();
|
|
115
|
-
return this.parseResponse(finalMessage);
|
|
116
|
-
|
|
115
|
+
return this.parseResponse(finalMessage, fullRequest);
|
|
116
|
+
|
|
117
117
|
} catch (error) {
|
|
118
|
-
throw this.handleError(error);
|
|
118
|
+
throw this.handleError(error, fullRequest);
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
121
|
|
|
@@ -161,7 +161,7 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
161
161
|
return params;
|
|
162
162
|
}
|
|
163
163
|
|
|
164
|
-
private parseResponse(response: Anthropic.Message): ProviderResponse {
|
|
164
|
+
private parseResponse(response: Anthropic.Message, rawRequest: unknown): ProviderResponse {
|
|
165
165
|
return {
|
|
166
166
|
content: response.content,
|
|
167
167
|
stopReason: response.stop_reason ?? 'end_turn',
|
|
@@ -173,43 +173,45 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
173
173
|
cacheReadTokens: (response.usage as any).cache_read_input_tokens,
|
|
174
174
|
},
|
|
175
175
|
model: response.model,
|
|
176
|
+
rawRequest,
|
|
176
177
|
raw: response,
|
|
177
178
|
};
|
|
178
179
|
}
|
|
179
180
|
|
|
180
|
-
private handleError(error: unknown): MembraneError {
|
|
181
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
181
182
|
if (error instanceof Anthropic.APIError) {
|
|
182
183
|
const status = error.status;
|
|
183
184
|
const message = error.message;
|
|
184
|
-
|
|
185
|
+
|
|
185
186
|
if (status === 429) {
|
|
186
187
|
// Try to parse retry-after
|
|
187
188
|
const retryAfter = this.parseRetryAfter(error);
|
|
188
|
-
return rateLimitError(message, retryAfter, error);
|
|
189
|
+
return rateLimitError(message, retryAfter, error, rawRequest);
|
|
189
190
|
}
|
|
190
|
-
|
|
191
|
+
|
|
191
192
|
if (status === 401) {
|
|
192
|
-
return authError(message, error);
|
|
193
|
+
return authError(message, error, rawRequest);
|
|
193
194
|
}
|
|
194
|
-
|
|
195
|
+
|
|
195
196
|
if (message.includes('context') || message.includes('too long')) {
|
|
196
|
-
return contextLengthError(message, error);
|
|
197
|
+
return contextLengthError(message, error, rawRequest);
|
|
197
198
|
}
|
|
198
|
-
|
|
199
|
+
|
|
199
200
|
if (status >= 500) {
|
|
200
|
-
return serverError(message, status, error);
|
|
201
|
+
return serverError(message, status, error, rawRequest);
|
|
201
202
|
}
|
|
202
203
|
}
|
|
203
|
-
|
|
204
|
+
|
|
204
205
|
if (error instanceof Error && error.name === 'AbortError') {
|
|
205
|
-
return abortError();
|
|
206
|
+
return abortError(undefined, rawRequest);
|
|
206
207
|
}
|
|
207
|
-
|
|
208
|
+
|
|
208
209
|
return new MembraneError({
|
|
209
210
|
type: 'unknown',
|
|
210
211
|
message: error instanceof Error ? error.message : String(error),
|
|
211
212
|
retryable: false,
|
|
212
213
|
rawError: error,
|
|
214
|
+
rawRequest,
|
|
213
215
|
});
|
|
214
216
|
}
|
|
215
217
|
|
|
@@ -130,12 +130,12 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
130
130
|
options?: ProviderRequestOptions
|
|
131
131
|
): Promise<ProviderResponse> {
|
|
132
132
|
const openAIRequest = this.buildRequest(request);
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
try {
|
|
135
135
|
const response = await this.makeRequest(openAIRequest, options);
|
|
136
|
-
return this.parseResponse(response, request.model);
|
|
136
|
+
return this.parseResponse(response, request.model, openAIRequest);
|
|
137
137
|
} catch (error) {
|
|
138
|
-
throw this.handleError(error);
|
|
138
|
+
throw this.handleError(error, openAIRequest);
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
|
|
@@ -146,7 +146,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
146
146
|
): Promise<ProviderResponse> {
|
|
147
147
|
const openAIRequest = this.buildRequest(request);
|
|
148
148
|
openAIRequest.stream = true;
|
|
149
|
-
|
|
149
|
+
|
|
150
150
|
try {
|
|
151
151
|
const response = await fetch(`${this.baseURL}/chat/completions`, {
|
|
152
152
|
method: 'POST',
|
|
@@ -154,42 +154,42 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
154
154
|
body: JSON.stringify(openAIRequest),
|
|
155
155
|
signal: options?.signal,
|
|
156
156
|
});
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
if (!response.ok) {
|
|
159
159
|
const errorText = await response.text();
|
|
160
160
|
throw new Error(`API error: ${response.status} ${errorText}`);
|
|
161
161
|
}
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
const reader = response.body?.getReader();
|
|
164
164
|
if (!reader) {
|
|
165
165
|
throw new Error('No response body');
|
|
166
166
|
}
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
const decoder = new TextDecoder();
|
|
169
169
|
let accumulated = '';
|
|
170
170
|
let finishReason = 'stop';
|
|
171
171
|
let toolCalls: OpenAIToolCall[] = [];
|
|
172
|
-
|
|
172
|
+
|
|
173
173
|
while (true) {
|
|
174
174
|
const { done, value } = await reader.read();
|
|
175
175
|
if (done) break;
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
const chunk = decoder.decode(value, { stream: true });
|
|
178
178
|
const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
|
|
179
|
-
|
|
179
|
+
|
|
180
180
|
for (const line of lines) {
|
|
181
181
|
const data = line.slice(6);
|
|
182
182
|
if (data === '[DONE]') continue;
|
|
183
|
-
|
|
183
|
+
|
|
184
184
|
try {
|
|
185
185
|
const parsed = JSON.parse(data);
|
|
186
186
|
const delta = parsed.choices?.[0]?.delta;
|
|
187
|
-
|
|
187
|
+
|
|
188
188
|
if (delta?.content) {
|
|
189
189
|
accumulated += delta.content;
|
|
190
190
|
callbacks.onChunk(delta.content);
|
|
191
191
|
}
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
// Handle streaming tool calls
|
|
194
194
|
if (delta?.tool_calls) {
|
|
195
195
|
for (const tc of delta.tool_calls) {
|
|
@@ -208,7 +208,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
210
|
}
|
|
211
|
-
|
|
211
|
+
|
|
212
212
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
213
213
|
finishReason = parsed.choices[0].finish_reason;
|
|
214
214
|
}
|
|
@@ -217,21 +217,21 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
217
217
|
}
|
|
218
218
|
}
|
|
219
219
|
}
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
// Build response with accumulated data
|
|
222
222
|
const message: OpenAIMessage = {
|
|
223
223
|
role: 'assistant',
|
|
224
224
|
content: accumulated || null,
|
|
225
225
|
};
|
|
226
|
-
|
|
226
|
+
|
|
227
227
|
if (toolCalls.length > 0) {
|
|
228
228
|
message.tool_calls = toolCalls;
|
|
229
229
|
}
|
|
230
|
-
|
|
231
|
-
return this.parseStreamedResponse(message, finishReason, request.model);
|
|
232
|
-
|
|
230
|
+
|
|
231
|
+
return this.parseStreamedResponse(message, finishReason, request.model, openAIRequest);
|
|
232
|
+
|
|
233
233
|
} catch (error) {
|
|
234
|
-
throw this.handleError(error);
|
|
234
|
+
throw this.handleError(error, openAIRequest);
|
|
235
235
|
}
|
|
236
236
|
}
|
|
237
237
|
|
|
@@ -371,10 +371,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
371
371
|
return response.json() as Promise<OpenAIResponse>;
|
|
372
372
|
}
|
|
373
373
|
|
|
374
|
-
private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
|
|
374
|
+
private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
375
375
|
const choice = response.choices[0];
|
|
376
376
|
const message = choice?.message;
|
|
377
|
-
|
|
377
|
+
|
|
378
378
|
return {
|
|
379
379
|
content: this.messageToContent(message),
|
|
380
380
|
stopReason: this.mapFinishReason(choice?.finish_reason),
|
|
@@ -384,6 +384,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
384
384
|
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
385
385
|
},
|
|
386
386
|
model: response.model ?? requestedModel,
|
|
387
|
+
rawRequest,
|
|
387
388
|
raw: response,
|
|
388
389
|
};
|
|
389
390
|
}
|
|
@@ -391,7 +392,8 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
391
392
|
private parseStreamedResponse(
|
|
392
393
|
message: OpenAIMessage,
|
|
393
394
|
finishReason: string,
|
|
394
|
-
requestedModel: string
|
|
395
|
+
requestedModel: string,
|
|
396
|
+
rawRequest?: unknown
|
|
395
397
|
): ProviderResponse {
|
|
396
398
|
return {
|
|
397
399
|
content: this.messageToContent(message),
|
|
@@ -402,6 +404,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
402
404
|
outputTokens: 0,
|
|
403
405
|
},
|
|
404
406
|
model: requestedModel,
|
|
407
|
+
rawRequest,
|
|
405
408
|
raw: { message, finish_reason: finishReason },
|
|
406
409
|
};
|
|
407
410
|
}
|
|
@@ -444,40 +447,41 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
444
447
|
}
|
|
445
448
|
}
|
|
446
449
|
|
|
447
|
-
private handleError(error: unknown): MembraneError {
|
|
450
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
448
451
|
if (error instanceof Error) {
|
|
449
452
|
const message = error.message;
|
|
450
|
-
|
|
453
|
+
|
|
451
454
|
if (message.includes('429') || message.includes('rate')) {
|
|
452
|
-
return rateLimitError(message, undefined, error);
|
|
455
|
+
return rateLimitError(message, undefined, error, rawRequest);
|
|
453
456
|
}
|
|
454
|
-
|
|
457
|
+
|
|
455
458
|
if (message.includes('401') || message.includes('auth') || message.includes('Unauthorized')) {
|
|
456
|
-
return authError(message, error);
|
|
459
|
+
return authError(message, error, rawRequest);
|
|
457
460
|
}
|
|
458
|
-
|
|
461
|
+
|
|
459
462
|
if (message.includes('context') || message.includes('too long') || message.includes('maximum context')) {
|
|
460
|
-
return contextLengthError(message, error);
|
|
463
|
+
return contextLengthError(message, error, rawRequest);
|
|
461
464
|
}
|
|
462
|
-
|
|
465
|
+
|
|
463
466
|
if (message.includes('500') || message.includes('502') || message.includes('503')) {
|
|
464
|
-
return serverError(message, undefined, error);
|
|
467
|
+
return serverError(message, undefined, error, rawRequest);
|
|
465
468
|
}
|
|
466
|
-
|
|
469
|
+
|
|
467
470
|
if (error.name === 'AbortError') {
|
|
468
|
-
return abortError();
|
|
471
|
+
return abortError(undefined, rawRequest);
|
|
469
472
|
}
|
|
470
|
-
|
|
473
|
+
|
|
471
474
|
if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
|
|
472
|
-
return networkError(message, error);
|
|
475
|
+
return networkError(message, error, rawRequest);
|
|
473
476
|
}
|
|
474
477
|
}
|
|
475
|
-
|
|
478
|
+
|
|
476
479
|
return new MembraneError({
|
|
477
480
|
type: 'unknown',
|
|
478
481
|
message: error instanceof Error ? error.message : String(error),
|
|
479
482
|
retryable: false,
|
|
480
483
|
rawError: error,
|
|
484
|
+
rawRequest,
|
|
481
485
|
});
|
|
482
486
|
}
|
|
483
487
|
}
|
package/src/providers/openai.ts
CHANGED
|
@@ -203,12 +203,12 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
203
203
|
options?: ProviderRequestOptions
|
|
204
204
|
): Promise<ProviderResponse> {
|
|
205
205
|
const openAIRequest = this.buildRequest(request);
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
try {
|
|
208
208
|
const response = await this.makeRequest(openAIRequest, options);
|
|
209
|
-
return this.parseResponse(response, request.model);
|
|
209
|
+
return this.parseResponse(response, request.model, openAIRequest);
|
|
210
210
|
} catch (error) {
|
|
211
|
-
throw this.handleError(error);
|
|
211
|
+
throw this.handleError(error, openAIRequest);
|
|
212
212
|
}
|
|
213
213
|
}
|
|
214
214
|
|
|
@@ -221,7 +221,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
221
221
|
openAIRequest.stream = true;
|
|
222
222
|
// Request usage data in stream for cache metrics
|
|
223
223
|
openAIRequest.stream_options = { include_usage: true };
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
try {
|
|
226
226
|
const response = await fetch(`${this.baseURL}/chat/completions`, {
|
|
227
227
|
method: 'POST',
|
|
@@ -229,43 +229,43 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
229
229
|
body: JSON.stringify(openAIRequest),
|
|
230
230
|
signal: options?.signal,
|
|
231
231
|
});
|
|
232
|
-
|
|
232
|
+
|
|
233
233
|
if (!response.ok) {
|
|
234
234
|
const errorText = await response.text();
|
|
235
235
|
throw new Error(`OpenAI API error: ${response.status} ${errorText}`);
|
|
236
236
|
}
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
const reader = response.body?.getReader();
|
|
239
239
|
if (!reader) {
|
|
240
240
|
throw new Error('No response body');
|
|
241
241
|
}
|
|
242
|
-
|
|
242
|
+
|
|
243
243
|
const decoder = new TextDecoder();
|
|
244
244
|
let accumulated = '';
|
|
245
245
|
let finishReason = 'stop';
|
|
246
246
|
let toolCalls: OpenAIToolCall[] = [];
|
|
247
247
|
let streamUsage: OpenAIResponse['usage'] | undefined;
|
|
248
|
-
|
|
248
|
+
|
|
249
249
|
while (true) {
|
|
250
250
|
const { done, value } = await reader.read();
|
|
251
251
|
if (done) break;
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
const chunk = decoder.decode(value, { stream: true });
|
|
254
254
|
const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
|
|
255
|
-
|
|
255
|
+
|
|
256
256
|
for (const line of lines) {
|
|
257
257
|
const data = line.slice(6);
|
|
258
258
|
if (data === '[DONE]') continue;
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
try {
|
|
261
261
|
const parsed = JSON.parse(data);
|
|
262
262
|
const delta = parsed.choices?.[0]?.delta;
|
|
263
|
-
|
|
263
|
+
|
|
264
264
|
if (delta?.content) {
|
|
265
265
|
accumulated += delta.content;
|
|
266
266
|
callbacks.onChunk(delta.content);
|
|
267
267
|
}
|
|
268
|
-
|
|
268
|
+
|
|
269
269
|
// Handle streaming tool calls
|
|
270
270
|
if (delta?.tool_calls) {
|
|
271
271
|
for (const tc of delta.tool_calls) {
|
|
@@ -284,11 +284,11 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
284
284
|
}
|
|
285
285
|
}
|
|
286
286
|
}
|
|
287
|
-
|
|
287
|
+
|
|
288
288
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
289
289
|
finishReason = parsed.choices[0].finish_reason;
|
|
290
290
|
}
|
|
291
|
-
|
|
291
|
+
|
|
292
292
|
// Capture usage data (comes in final chunk with stream_options.include_usage)
|
|
293
293
|
if (parsed.usage) {
|
|
294
294
|
streamUsage = parsed.usage;
|
|
@@ -298,21 +298,21 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
298
298
|
}
|
|
299
299
|
}
|
|
300
300
|
}
|
|
301
|
-
|
|
301
|
+
|
|
302
302
|
// Build response with accumulated data
|
|
303
303
|
const message: OpenAIMessage = {
|
|
304
304
|
role: 'assistant',
|
|
305
305
|
content: accumulated || null,
|
|
306
306
|
};
|
|
307
|
-
|
|
307
|
+
|
|
308
308
|
if (toolCalls.length > 0) {
|
|
309
309
|
message.tool_calls = toolCalls;
|
|
310
310
|
}
|
|
311
|
-
|
|
312
|
-
return this.parseStreamedResponse(message, finishReason, request.model, streamUsage);
|
|
313
|
-
|
|
311
|
+
|
|
312
|
+
return this.parseStreamedResponse(message, finishReason, request.model, streamUsage, openAIRequest);
|
|
313
|
+
|
|
314
314
|
} catch (error) {
|
|
315
|
-
throw this.handleError(error);
|
|
315
|
+
throw this.handleError(error, openAIRequest);
|
|
316
316
|
}
|
|
317
317
|
}
|
|
318
318
|
|
|
@@ -460,13 +460,13 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
460
460
|
return response.json() as Promise<OpenAIResponse>;
|
|
461
461
|
}
|
|
462
462
|
|
|
463
|
-
private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
|
|
463
|
+
private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
464
464
|
const choice = response.choices[0];
|
|
465
465
|
const message = choice?.message;
|
|
466
|
-
|
|
466
|
+
|
|
467
467
|
// Extract prompt caching details (OpenAI automatic caching for prompts ≥1024 tokens)
|
|
468
468
|
const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
469
|
-
|
|
469
|
+
|
|
470
470
|
return {
|
|
471
471
|
content: this.messageToContent(message),
|
|
472
472
|
stopReason: this.mapFinishReason(choice?.finish_reason),
|
|
@@ -479,6 +479,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
479
479
|
cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
|
|
480
480
|
},
|
|
481
481
|
model: response.model ?? requestedModel,
|
|
482
|
+
rawRequest,
|
|
482
483
|
raw: response,
|
|
483
484
|
};
|
|
484
485
|
}
|
|
@@ -487,11 +488,12 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
487
488
|
message: OpenAIMessage,
|
|
488
489
|
finishReason: string,
|
|
489
490
|
requestedModel: string,
|
|
490
|
-
streamUsage?: OpenAIResponse['usage']
|
|
491
|
+
streamUsage?: OpenAIResponse['usage'],
|
|
492
|
+
rawRequest?: unknown
|
|
491
493
|
): ProviderResponse {
|
|
492
494
|
// Extract cached tokens from stream usage if available
|
|
493
495
|
const cachedTokens = streamUsage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
494
|
-
|
|
496
|
+
|
|
495
497
|
return {
|
|
496
498
|
content: this.messageToContent(message),
|
|
497
499
|
stopReason: this.mapFinishReason(finishReason),
|
|
@@ -502,6 +504,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
502
504
|
cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
|
|
503
505
|
},
|
|
504
506
|
model: requestedModel,
|
|
507
|
+
rawRequest,
|
|
505
508
|
raw: { message, finish_reason: finishReason, usage: streamUsage },
|
|
506
509
|
};
|
|
507
510
|
}
|
|
@@ -544,44 +547,45 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
544
547
|
}
|
|
545
548
|
}
|
|
546
549
|
|
|
547
|
-
private handleError(error: unknown): MembraneError {
|
|
550
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
548
551
|
if (error instanceof Error) {
|
|
549
552
|
const message = error.message;
|
|
550
|
-
|
|
553
|
+
|
|
551
554
|
// OpenAI specific error patterns
|
|
552
555
|
if (message.includes('429') || message.includes('rate_limit')) {
|
|
553
556
|
// Try to extract retry-after
|
|
554
557
|
const retryMatch = message.match(/retry after (\d+)/i);
|
|
555
558
|
const retryAfter = retryMatch?.[1] ? parseInt(retryMatch[1], 10) * 1000 : undefined;
|
|
556
|
-
return rateLimitError(message, retryAfter, error);
|
|
559
|
+
return rateLimitError(message, retryAfter, error, rawRequest);
|
|
557
560
|
}
|
|
558
|
-
|
|
561
|
+
|
|
559
562
|
if (message.includes('401') || message.includes('invalid_api_key') || message.includes('Incorrect API key')) {
|
|
560
|
-
return authError(message, error);
|
|
563
|
+
return authError(message, error, rawRequest);
|
|
561
564
|
}
|
|
562
|
-
|
|
565
|
+
|
|
563
566
|
if (message.includes('context_length') || message.includes('maximum context') || message.includes('too long')) {
|
|
564
|
-
return contextLengthError(message, error);
|
|
567
|
+
return contextLengthError(message, error, rawRequest);
|
|
565
568
|
}
|
|
566
|
-
|
|
569
|
+
|
|
567
570
|
if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('server_error')) {
|
|
568
|
-
return serverError(message, undefined, error);
|
|
571
|
+
return serverError(message, undefined, error, rawRequest);
|
|
569
572
|
}
|
|
570
|
-
|
|
573
|
+
|
|
571
574
|
if (error.name === 'AbortError') {
|
|
572
|
-
return abortError();
|
|
575
|
+
return abortError(undefined, rawRequest);
|
|
573
576
|
}
|
|
574
|
-
|
|
577
|
+
|
|
575
578
|
if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
|
|
576
|
-
return networkError(message, error);
|
|
579
|
+
return networkError(message, error, rawRequest);
|
|
577
580
|
}
|
|
578
581
|
}
|
|
579
|
-
|
|
582
|
+
|
|
580
583
|
return new MembraneError({
|
|
581
584
|
type: 'unknown',
|
|
582
585
|
message: error instanceof Error ? error.message : String(error),
|
|
583
586
|
retryable: false,
|
|
584
587
|
rawError: error,
|
|
588
|
+
rawRequest,
|
|
585
589
|
});
|
|
586
590
|
}
|
|
587
591
|
}
|