@animalabs/membrane 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +22 -10
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +21 -15
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +16 -13
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +16 -13
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers/openrouter.d.ts.map +1 -1
- package/dist/providers/openrouter.js +16 -13
- package/dist/providers/openrouter.js.map +1 -1
- package/dist/types/errors.d.ts +10 -10
- package/dist/types/errors.d.ts.map +1 -1
- package/dist/types/errors.js +20 -10
- package/dist/types/errors.js.map +1 -1
- package/dist/types/provider.d.ts +2 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/membrane.ts +29 -14
- package/src/providers/anthropic.ts +37 -30
- package/src/providers/openai-compatible.ts +41 -37
- package/src/providers/openai.ts +44 -40
- package/src/providers/openrouter.ts +46 -42
- package/src/types/errors.ts +20 -10
- package/src/types/provider.ts +4 -1
package/src/membrane.ts
CHANGED
|
@@ -95,23 +95,24 @@ export class Membrane {
|
|
|
95
95
|
finalRequest = await this.config.hooks.beforeRequest(request, providerRequest) ?? providerRequest;
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
rawRequest = finalRequest;
|
|
99
|
-
|
|
100
|
-
// Call onRequest callback for logging
|
|
101
|
-
options.onRequest?.(rawRequest);
|
|
102
|
-
|
|
103
98
|
const providerResponse = await this.adapter.complete(finalRequest, {
|
|
104
99
|
signal: options.signal,
|
|
105
100
|
timeoutMs: options.timeoutMs,
|
|
106
101
|
});
|
|
107
102
|
|
|
103
|
+
// Use the actual raw request from provider (after any adapter transformations)
|
|
104
|
+
rawRequest = providerResponse.rawRequest;
|
|
105
|
+
|
|
106
|
+
// Call onRequest callback with actual request sent to API
|
|
107
|
+
options.onRequest?.(rawRequest);
|
|
108
|
+
|
|
108
109
|
const response = this.transformResponse(
|
|
109
110
|
providerResponse,
|
|
110
111
|
request,
|
|
111
112
|
prefillResult,
|
|
112
113
|
startTime,
|
|
113
114
|
attempts,
|
|
114
|
-
|
|
115
|
+
rawRequest
|
|
115
116
|
);
|
|
116
117
|
|
|
117
118
|
// Call afterResponse hook
|
|
@@ -246,10 +247,6 @@ export class Membrane {
|
|
|
246
247
|
try {
|
|
247
248
|
// Tool execution loop
|
|
248
249
|
while (toolDepth <= maxToolDepth) {
|
|
249
|
-
rawRequest = providerRequest;
|
|
250
|
-
|
|
251
|
-
// Call onRequest callback for logging
|
|
252
|
-
onRequest?.(rawRequest);
|
|
253
250
|
|
|
254
251
|
// Track if we manually detected a stop sequence (API doesn't always stop)
|
|
255
252
|
let detectedStopSequence: string | null = null;
|
|
@@ -325,6 +322,12 @@ export class Membrane {
|
|
|
325
322
|
streamResult.stopSequence = detectedStopSequence;
|
|
326
323
|
}
|
|
327
324
|
|
|
325
|
+
// Use the actual raw request from provider (after adapter transformations)
|
|
326
|
+
rawRequest = streamResult.rawRequest;
|
|
327
|
+
|
|
328
|
+
// Call onRequest callback with actual request sent to API
|
|
329
|
+
onRequest?.(rawRequest);
|
|
330
|
+
|
|
328
331
|
rawResponse = streamResult.raw;
|
|
329
332
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
330
333
|
|
|
@@ -527,10 +530,6 @@ export class Membrane {
|
|
|
527
530
|
while (toolDepth <= maxToolDepth) {
|
|
528
531
|
// Build provider request with native tools
|
|
529
532
|
const providerRequest = this.buildNativeToolRequest(request, messages);
|
|
530
|
-
rawRequest = providerRequest;
|
|
531
|
-
|
|
532
|
-
// Call onRequest callback for logging
|
|
533
|
-
onRequest?.(rawRequest);
|
|
534
533
|
|
|
535
534
|
// Stream from provider
|
|
536
535
|
let textAccumulated = '';
|
|
@@ -557,6 +556,12 @@ export class Membrane {
|
|
|
557
556
|
{ signal }
|
|
558
557
|
);
|
|
559
558
|
|
|
559
|
+
// Use the actual raw request from provider (after adapter transformations)
|
|
560
|
+
rawRequest = streamResult.rawRequest;
|
|
561
|
+
|
|
562
|
+
// Call onRequest callback with actual request sent to API
|
|
563
|
+
onRequest?.(rawRequest);
|
|
564
|
+
|
|
560
565
|
rawResponse = streamResult.raw;
|
|
561
566
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
562
567
|
|
|
@@ -740,6 +745,14 @@ export class Membrane {
|
|
|
740
745
|
input_schema: tool.inputSchema,
|
|
741
746
|
}));
|
|
742
747
|
|
|
748
|
+
// Build thinking config for native extended thinking
|
|
749
|
+
const thinking = request.config.thinking?.enabled
|
|
750
|
+
? {
|
|
751
|
+
type: 'enabled' as const,
|
|
752
|
+
budget_tokens: request.config.thinking.budgetTokens ?? 5000,
|
|
753
|
+
}
|
|
754
|
+
: undefined;
|
|
755
|
+
|
|
743
756
|
return {
|
|
744
757
|
model: request.config.model,
|
|
745
758
|
maxTokens: request.config.maxTokens,
|
|
@@ -747,6 +760,7 @@ export class Membrane {
|
|
|
747
760
|
messages: providerMessages,
|
|
748
761
|
system: request.system,
|
|
749
762
|
tools,
|
|
763
|
+
thinking,
|
|
750
764
|
extra: request.providerParams,
|
|
751
765
|
};
|
|
752
766
|
}
|
|
@@ -811,6 +825,7 @@ export class Membrane {
|
|
|
811
825
|
const prefillResult = transformToPrefill(request, {
|
|
812
826
|
assistantName: this.config.assistantParticipant ?? 'Claude',
|
|
813
827
|
promptCaching: true, // Enable cache control by default
|
|
828
|
+
prefillThinking: request.config.thinking?.enabled ?? false,
|
|
814
829
|
additionalStopSequences,
|
|
815
830
|
maxParticipantsForStop,
|
|
816
831
|
});
|
|
@@ -61,18 +61,16 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
61
61
|
options?: ProviderRequestOptions
|
|
62
62
|
): Promise<ProviderResponse> {
|
|
63
63
|
const anthropicRequest = this.buildRequest(request);
|
|
64
|
-
|
|
64
|
+
const fullRequest = { ...anthropicRequest, stream: false as const };
|
|
65
|
+
|
|
65
66
|
try {
|
|
66
|
-
const response = await this.client.messages.create({
|
|
67
|
-
...anthropicRequest,
|
|
68
|
-
stream: false,
|
|
69
|
-
}, {
|
|
67
|
+
const response = await this.client.messages.create(fullRequest, {
|
|
70
68
|
signal: options?.signal,
|
|
71
69
|
});
|
|
72
|
-
|
|
73
|
-
return this.parseResponse(response);
|
|
70
|
+
|
|
71
|
+
return this.parseResponse(response, fullRequest);
|
|
74
72
|
} catch (error) {
|
|
75
|
-
throw this.handleError(error);
|
|
73
|
+
throw this.handleError(error, fullRequest);
|
|
76
74
|
}
|
|
77
75
|
}
|
|
78
76
|
|
|
@@ -82,16 +80,18 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
82
80
|
options?: ProviderRequestOptions
|
|
83
81
|
): Promise<ProviderResponse> {
|
|
84
82
|
const anthropicRequest = this.buildRequest(request);
|
|
85
|
-
|
|
83
|
+
// Note: stream is implicitly true when using .stream()
|
|
84
|
+
const fullRequest = { ...anthropicRequest, stream: true };
|
|
85
|
+
|
|
86
86
|
try {
|
|
87
87
|
const stream = await this.client.messages.stream(anthropicRequest, {
|
|
88
88
|
signal: options?.signal,
|
|
89
89
|
});
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
let accumulated = '';
|
|
92
92
|
const contentBlocks: unknown[] = [];
|
|
93
93
|
let currentBlockIndex = -1;
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
for await (const event of stream) {
|
|
96
96
|
if (event.type === 'content_block_start') {
|
|
97
97
|
currentBlockIndex = event.index;
|
|
@@ -110,12 +110,12 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
110
110
|
callbacks.onContentBlock?.(currentBlockIndex, contentBlocks[currentBlockIndex]);
|
|
111
111
|
}
|
|
112
112
|
}
|
|
113
|
-
|
|
113
|
+
|
|
114
114
|
const finalMessage = await stream.finalMessage();
|
|
115
|
-
return this.parseResponse(finalMessage);
|
|
116
|
-
|
|
115
|
+
return this.parseResponse(finalMessage, fullRequest);
|
|
116
|
+
|
|
117
117
|
} catch (error) {
|
|
118
|
-
throw this.handleError(error);
|
|
118
|
+
throw this.handleError(error, fullRequest);
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
121
|
|
|
@@ -147,16 +147,21 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
147
147
|
if (request.tools && request.tools.length > 0) {
|
|
148
148
|
params.tools = request.tools as Anthropic.Tool[];
|
|
149
149
|
}
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
// Handle extended thinking
|
|
152
|
+
if ((request as any).thinking) {
|
|
153
|
+
(params as any).thinking = (request as any).thinking;
|
|
154
|
+
}
|
|
155
|
+
|
|
151
156
|
// Apply extra params
|
|
152
157
|
if (request.extra) {
|
|
153
158
|
Object.assign(params, request.extra);
|
|
154
159
|
}
|
|
155
|
-
|
|
160
|
+
|
|
156
161
|
return params;
|
|
157
162
|
}
|
|
158
163
|
|
|
159
|
-
private parseResponse(response: Anthropic.Message): ProviderResponse {
|
|
164
|
+
private parseResponse(response: Anthropic.Message, rawRequest: unknown): ProviderResponse {
|
|
160
165
|
return {
|
|
161
166
|
content: response.content,
|
|
162
167
|
stopReason: response.stop_reason ?? 'end_turn',
|
|
@@ -168,43 +173,45 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
168
173
|
cacheReadTokens: (response.usage as any).cache_read_input_tokens,
|
|
169
174
|
},
|
|
170
175
|
model: response.model,
|
|
176
|
+
rawRequest,
|
|
171
177
|
raw: response,
|
|
172
178
|
};
|
|
173
179
|
}
|
|
174
180
|
|
|
175
|
-
private handleError(error: unknown): MembraneError {
|
|
181
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
176
182
|
if (error instanceof Anthropic.APIError) {
|
|
177
183
|
const status = error.status;
|
|
178
184
|
const message = error.message;
|
|
179
|
-
|
|
185
|
+
|
|
180
186
|
if (status === 429) {
|
|
181
187
|
// Try to parse retry-after
|
|
182
188
|
const retryAfter = this.parseRetryAfter(error);
|
|
183
|
-
return rateLimitError(message, retryAfter, error);
|
|
189
|
+
return rateLimitError(message, retryAfter, error, rawRequest);
|
|
184
190
|
}
|
|
185
|
-
|
|
191
|
+
|
|
186
192
|
if (status === 401) {
|
|
187
|
-
return authError(message, error);
|
|
193
|
+
return authError(message, error, rawRequest);
|
|
188
194
|
}
|
|
189
|
-
|
|
195
|
+
|
|
190
196
|
if (message.includes('context') || message.includes('too long')) {
|
|
191
|
-
return contextLengthError(message, error);
|
|
197
|
+
return contextLengthError(message, error, rawRequest);
|
|
192
198
|
}
|
|
193
|
-
|
|
199
|
+
|
|
194
200
|
if (status >= 500) {
|
|
195
|
-
return serverError(message, status, error);
|
|
201
|
+
return serverError(message, status, error, rawRequest);
|
|
196
202
|
}
|
|
197
203
|
}
|
|
198
|
-
|
|
204
|
+
|
|
199
205
|
if (error instanceof Error && error.name === 'AbortError') {
|
|
200
|
-
return abortError();
|
|
206
|
+
return abortError(undefined, rawRequest);
|
|
201
207
|
}
|
|
202
|
-
|
|
208
|
+
|
|
203
209
|
return new MembraneError({
|
|
204
210
|
type: 'unknown',
|
|
205
211
|
message: error instanceof Error ? error.message : String(error),
|
|
206
212
|
retryable: false,
|
|
207
213
|
rawError: error,
|
|
214
|
+
rawRequest,
|
|
208
215
|
});
|
|
209
216
|
}
|
|
210
217
|
|
|
@@ -130,12 +130,12 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
130
130
|
options?: ProviderRequestOptions
|
|
131
131
|
): Promise<ProviderResponse> {
|
|
132
132
|
const openAIRequest = this.buildRequest(request);
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
try {
|
|
135
135
|
const response = await this.makeRequest(openAIRequest, options);
|
|
136
|
-
return this.parseResponse(response, request.model);
|
|
136
|
+
return this.parseResponse(response, request.model, openAIRequest);
|
|
137
137
|
} catch (error) {
|
|
138
|
-
throw this.handleError(error);
|
|
138
|
+
throw this.handleError(error, openAIRequest);
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
|
|
@@ -146,7 +146,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
146
146
|
): Promise<ProviderResponse> {
|
|
147
147
|
const openAIRequest = this.buildRequest(request);
|
|
148
148
|
openAIRequest.stream = true;
|
|
149
|
-
|
|
149
|
+
|
|
150
150
|
try {
|
|
151
151
|
const response = await fetch(`${this.baseURL}/chat/completions`, {
|
|
152
152
|
method: 'POST',
|
|
@@ -154,42 +154,42 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
154
154
|
body: JSON.stringify(openAIRequest),
|
|
155
155
|
signal: options?.signal,
|
|
156
156
|
});
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
if (!response.ok) {
|
|
159
159
|
const errorText = await response.text();
|
|
160
160
|
throw new Error(`API error: ${response.status} ${errorText}`);
|
|
161
161
|
}
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
const reader = response.body?.getReader();
|
|
164
164
|
if (!reader) {
|
|
165
165
|
throw new Error('No response body');
|
|
166
166
|
}
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
const decoder = new TextDecoder();
|
|
169
169
|
let accumulated = '';
|
|
170
170
|
let finishReason = 'stop';
|
|
171
171
|
let toolCalls: OpenAIToolCall[] = [];
|
|
172
|
-
|
|
172
|
+
|
|
173
173
|
while (true) {
|
|
174
174
|
const { done, value } = await reader.read();
|
|
175
175
|
if (done) break;
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
const chunk = decoder.decode(value, { stream: true });
|
|
178
178
|
const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
|
|
179
|
-
|
|
179
|
+
|
|
180
180
|
for (const line of lines) {
|
|
181
181
|
const data = line.slice(6);
|
|
182
182
|
if (data === '[DONE]') continue;
|
|
183
|
-
|
|
183
|
+
|
|
184
184
|
try {
|
|
185
185
|
const parsed = JSON.parse(data);
|
|
186
186
|
const delta = parsed.choices?.[0]?.delta;
|
|
187
|
-
|
|
187
|
+
|
|
188
188
|
if (delta?.content) {
|
|
189
189
|
accumulated += delta.content;
|
|
190
190
|
callbacks.onChunk(delta.content);
|
|
191
191
|
}
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
// Handle streaming tool calls
|
|
194
194
|
if (delta?.tool_calls) {
|
|
195
195
|
for (const tc of delta.tool_calls) {
|
|
@@ -208,7 +208,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
210
|
}
|
|
211
|
-
|
|
211
|
+
|
|
212
212
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
213
213
|
finishReason = parsed.choices[0].finish_reason;
|
|
214
214
|
}
|
|
@@ -217,21 +217,21 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
217
217
|
}
|
|
218
218
|
}
|
|
219
219
|
}
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
// Build response with accumulated data
|
|
222
222
|
const message: OpenAIMessage = {
|
|
223
223
|
role: 'assistant',
|
|
224
224
|
content: accumulated || null,
|
|
225
225
|
};
|
|
226
|
-
|
|
226
|
+
|
|
227
227
|
if (toolCalls.length > 0) {
|
|
228
228
|
message.tool_calls = toolCalls;
|
|
229
229
|
}
|
|
230
|
-
|
|
231
|
-
return this.parseStreamedResponse(message, finishReason, request.model);
|
|
232
|
-
|
|
230
|
+
|
|
231
|
+
return this.parseStreamedResponse(message, finishReason, request.model, openAIRequest);
|
|
232
|
+
|
|
233
233
|
} catch (error) {
|
|
234
|
-
throw this.handleError(error);
|
|
234
|
+
throw this.handleError(error, openAIRequest);
|
|
235
235
|
}
|
|
236
236
|
}
|
|
237
237
|
|
|
@@ -371,10 +371,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
371
371
|
return response.json() as Promise<OpenAIResponse>;
|
|
372
372
|
}
|
|
373
373
|
|
|
374
|
-
private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
|
|
374
|
+
private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
375
375
|
const choice = response.choices[0];
|
|
376
376
|
const message = choice?.message;
|
|
377
|
-
|
|
377
|
+
|
|
378
378
|
return {
|
|
379
379
|
content: this.messageToContent(message),
|
|
380
380
|
stopReason: this.mapFinishReason(choice?.finish_reason),
|
|
@@ -384,6 +384,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
384
384
|
outputTokens: response.usage?.completion_tokens ?? 0,
|
|
385
385
|
},
|
|
386
386
|
model: response.model ?? requestedModel,
|
|
387
|
+
rawRequest,
|
|
387
388
|
raw: response,
|
|
388
389
|
};
|
|
389
390
|
}
|
|
@@ -391,7 +392,8 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
391
392
|
private parseStreamedResponse(
|
|
392
393
|
message: OpenAIMessage,
|
|
393
394
|
finishReason: string,
|
|
394
|
-
requestedModel: string
|
|
395
|
+
requestedModel: string,
|
|
396
|
+
rawRequest?: unknown
|
|
395
397
|
): ProviderResponse {
|
|
396
398
|
return {
|
|
397
399
|
content: this.messageToContent(message),
|
|
@@ -402,6 +404,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
402
404
|
outputTokens: 0,
|
|
403
405
|
},
|
|
404
406
|
model: requestedModel,
|
|
407
|
+
rawRequest,
|
|
405
408
|
raw: { message, finish_reason: finishReason },
|
|
406
409
|
};
|
|
407
410
|
}
|
|
@@ -444,40 +447,41 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
444
447
|
}
|
|
445
448
|
}
|
|
446
449
|
|
|
447
|
-
private handleError(error: unknown): MembraneError {
|
|
450
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
448
451
|
if (error instanceof Error) {
|
|
449
452
|
const message = error.message;
|
|
450
|
-
|
|
453
|
+
|
|
451
454
|
if (message.includes('429') || message.includes('rate')) {
|
|
452
|
-
return rateLimitError(message, undefined, error);
|
|
455
|
+
return rateLimitError(message, undefined, error, rawRequest);
|
|
453
456
|
}
|
|
454
|
-
|
|
457
|
+
|
|
455
458
|
if (message.includes('401') || message.includes('auth') || message.includes('Unauthorized')) {
|
|
456
|
-
return authError(message, error);
|
|
459
|
+
return authError(message, error, rawRequest);
|
|
457
460
|
}
|
|
458
|
-
|
|
461
|
+
|
|
459
462
|
if (message.includes('context') || message.includes('too long') || message.includes('maximum context')) {
|
|
460
|
-
return contextLengthError(message, error);
|
|
463
|
+
return contextLengthError(message, error, rawRequest);
|
|
461
464
|
}
|
|
462
|
-
|
|
465
|
+
|
|
463
466
|
if (message.includes('500') || message.includes('502') || message.includes('503')) {
|
|
464
|
-
return serverError(message, undefined, error);
|
|
467
|
+
return serverError(message, undefined, error, rawRequest);
|
|
465
468
|
}
|
|
466
|
-
|
|
469
|
+
|
|
467
470
|
if (error.name === 'AbortError') {
|
|
468
|
-
return abortError();
|
|
471
|
+
return abortError(undefined, rawRequest);
|
|
469
472
|
}
|
|
470
|
-
|
|
473
|
+
|
|
471
474
|
if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
|
|
472
|
-
return networkError(message, error);
|
|
475
|
+
return networkError(message, error, rawRequest);
|
|
473
476
|
}
|
|
474
477
|
}
|
|
475
|
-
|
|
478
|
+
|
|
476
479
|
return new MembraneError({
|
|
477
480
|
type: 'unknown',
|
|
478
481
|
message: error instanceof Error ? error.message : String(error),
|
|
479
482
|
retryable: false,
|
|
480
483
|
rawError: error,
|
|
484
|
+
rawRequest,
|
|
481
485
|
});
|
|
482
486
|
}
|
|
483
487
|
}
|
package/src/providers/openai.ts
CHANGED
|
@@ -203,12 +203,12 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
203
203
|
options?: ProviderRequestOptions
|
|
204
204
|
): Promise<ProviderResponse> {
|
|
205
205
|
const openAIRequest = this.buildRequest(request);
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
try {
|
|
208
208
|
const response = await this.makeRequest(openAIRequest, options);
|
|
209
|
-
return this.parseResponse(response, request.model);
|
|
209
|
+
return this.parseResponse(response, request.model, openAIRequest);
|
|
210
210
|
} catch (error) {
|
|
211
|
-
throw this.handleError(error);
|
|
211
|
+
throw this.handleError(error, openAIRequest);
|
|
212
212
|
}
|
|
213
213
|
}
|
|
214
214
|
|
|
@@ -221,7 +221,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
221
221
|
openAIRequest.stream = true;
|
|
222
222
|
// Request usage data in stream for cache metrics
|
|
223
223
|
openAIRequest.stream_options = { include_usage: true };
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
try {
|
|
226
226
|
const response = await fetch(`${this.baseURL}/chat/completions`, {
|
|
227
227
|
method: 'POST',
|
|
@@ -229,43 +229,43 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
229
229
|
body: JSON.stringify(openAIRequest),
|
|
230
230
|
signal: options?.signal,
|
|
231
231
|
});
|
|
232
|
-
|
|
232
|
+
|
|
233
233
|
if (!response.ok) {
|
|
234
234
|
const errorText = await response.text();
|
|
235
235
|
throw new Error(`OpenAI API error: ${response.status} ${errorText}`);
|
|
236
236
|
}
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
const reader = response.body?.getReader();
|
|
239
239
|
if (!reader) {
|
|
240
240
|
throw new Error('No response body');
|
|
241
241
|
}
|
|
242
|
-
|
|
242
|
+
|
|
243
243
|
const decoder = new TextDecoder();
|
|
244
244
|
let accumulated = '';
|
|
245
245
|
let finishReason = 'stop';
|
|
246
246
|
let toolCalls: OpenAIToolCall[] = [];
|
|
247
247
|
let streamUsage: OpenAIResponse['usage'] | undefined;
|
|
248
|
-
|
|
248
|
+
|
|
249
249
|
while (true) {
|
|
250
250
|
const { done, value } = await reader.read();
|
|
251
251
|
if (done) break;
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
const chunk = decoder.decode(value, { stream: true });
|
|
254
254
|
const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
|
|
255
|
-
|
|
255
|
+
|
|
256
256
|
for (const line of lines) {
|
|
257
257
|
const data = line.slice(6);
|
|
258
258
|
if (data === '[DONE]') continue;
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
try {
|
|
261
261
|
const parsed = JSON.parse(data);
|
|
262
262
|
const delta = parsed.choices?.[0]?.delta;
|
|
263
|
-
|
|
263
|
+
|
|
264
264
|
if (delta?.content) {
|
|
265
265
|
accumulated += delta.content;
|
|
266
266
|
callbacks.onChunk(delta.content);
|
|
267
267
|
}
|
|
268
|
-
|
|
268
|
+
|
|
269
269
|
// Handle streaming tool calls
|
|
270
270
|
if (delta?.tool_calls) {
|
|
271
271
|
for (const tc of delta.tool_calls) {
|
|
@@ -284,11 +284,11 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
284
284
|
}
|
|
285
285
|
}
|
|
286
286
|
}
|
|
287
|
-
|
|
287
|
+
|
|
288
288
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
289
289
|
finishReason = parsed.choices[0].finish_reason;
|
|
290
290
|
}
|
|
291
|
-
|
|
291
|
+
|
|
292
292
|
// Capture usage data (comes in final chunk with stream_options.include_usage)
|
|
293
293
|
if (parsed.usage) {
|
|
294
294
|
streamUsage = parsed.usage;
|
|
@@ -298,21 +298,21 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
298
298
|
}
|
|
299
299
|
}
|
|
300
300
|
}
|
|
301
|
-
|
|
301
|
+
|
|
302
302
|
// Build response with accumulated data
|
|
303
303
|
const message: OpenAIMessage = {
|
|
304
304
|
role: 'assistant',
|
|
305
305
|
content: accumulated || null,
|
|
306
306
|
};
|
|
307
|
-
|
|
307
|
+
|
|
308
308
|
if (toolCalls.length > 0) {
|
|
309
309
|
message.tool_calls = toolCalls;
|
|
310
310
|
}
|
|
311
|
-
|
|
312
|
-
return this.parseStreamedResponse(message, finishReason, request.model, streamUsage);
|
|
313
|
-
|
|
311
|
+
|
|
312
|
+
return this.parseStreamedResponse(message, finishReason, request.model, streamUsage, openAIRequest);
|
|
313
|
+
|
|
314
314
|
} catch (error) {
|
|
315
|
-
throw this.handleError(error);
|
|
315
|
+
throw this.handleError(error, openAIRequest);
|
|
316
316
|
}
|
|
317
317
|
}
|
|
318
318
|
|
|
@@ -460,13 +460,13 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
460
460
|
return response.json() as Promise<OpenAIResponse>;
|
|
461
461
|
}
|
|
462
462
|
|
|
463
|
-
private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
|
|
463
|
+
private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
464
464
|
const choice = response.choices[0];
|
|
465
465
|
const message = choice?.message;
|
|
466
|
-
|
|
466
|
+
|
|
467
467
|
// Extract prompt caching details (OpenAI automatic caching for prompts ≥1024 tokens)
|
|
468
468
|
const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
469
|
-
|
|
469
|
+
|
|
470
470
|
return {
|
|
471
471
|
content: this.messageToContent(message),
|
|
472
472
|
stopReason: this.mapFinishReason(choice?.finish_reason),
|
|
@@ -479,6 +479,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
479
479
|
cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
|
|
480
480
|
},
|
|
481
481
|
model: response.model ?? requestedModel,
|
|
482
|
+
rawRequest,
|
|
482
483
|
raw: response,
|
|
483
484
|
};
|
|
484
485
|
}
|
|
@@ -487,11 +488,12 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
487
488
|
message: OpenAIMessage,
|
|
488
489
|
finishReason: string,
|
|
489
490
|
requestedModel: string,
|
|
490
|
-
streamUsage?: OpenAIResponse['usage']
|
|
491
|
+
streamUsage?: OpenAIResponse['usage'],
|
|
492
|
+
rawRequest?: unknown
|
|
491
493
|
): ProviderResponse {
|
|
492
494
|
// Extract cached tokens from stream usage if available
|
|
493
495
|
const cachedTokens = streamUsage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
494
|
-
|
|
496
|
+
|
|
495
497
|
return {
|
|
496
498
|
content: this.messageToContent(message),
|
|
497
499
|
stopReason: this.mapFinishReason(finishReason),
|
|
@@ -502,6 +504,7 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
502
504
|
cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
|
|
503
505
|
},
|
|
504
506
|
model: requestedModel,
|
|
507
|
+
rawRequest,
|
|
505
508
|
raw: { message, finish_reason: finishReason, usage: streamUsage },
|
|
506
509
|
};
|
|
507
510
|
}
|
|
@@ -544,44 +547,45 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
544
547
|
}
|
|
545
548
|
}
|
|
546
549
|
|
|
547
|
-
private handleError(error: unknown): MembraneError {
|
|
550
|
+
private handleError(error: unknown, rawRequest?: unknown): MembraneError {
|
|
548
551
|
if (error instanceof Error) {
|
|
549
552
|
const message = error.message;
|
|
550
|
-
|
|
553
|
+
|
|
551
554
|
// OpenAI specific error patterns
|
|
552
555
|
if (message.includes('429') || message.includes('rate_limit')) {
|
|
553
556
|
// Try to extract retry-after
|
|
554
557
|
const retryMatch = message.match(/retry after (\d+)/i);
|
|
555
558
|
const retryAfter = retryMatch?.[1] ? parseInt(retryMatch[1], 10) * 1000 : undefined;
|
|
556
|
-
return rateLimitError(message, retryAfter, error);
|
|
559
|
+
return rateLimitError(message, retryAfter, error, rawRequest);
|
|
557
560
|
}
|
|
558
|
-
|
|
561
|
+
|
|
559
562
|
if (message.includes('401') || message.includes('invalid_api_key') || message.includes('Incorrect API key')) {
|
|
560
|
-
return authError(message, error);
|
|
563
|
+
return authError(message, error, rawRequest);
|
|
561
564
|
}
|
|
562
|
-
|
|
565
|
+
|
|
563
566
|
if (message.includes('context_length') || message.includes('maximum context') || message.includes('too long')) {
|
|
564
|
-
return contextLengthError(message, error);
|
|
567
|
+
return contextLengthError(message, error, rawRequest);
|
|
565
568
|
}
|
|
566
|
-
|
|
569
|
+
|
|
567
570
|
if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('server_error')) {
|
|
568
|
-
return serverError(message, undefined, error);
|
|
571
|
+
return serverError(message, undefined, error, rawRequest);
|
|
569
572
|
}
|
|
570
|
-
|
|
573
|
+
|
|
571
574
|
if (error.name === 'AbortError') {
|
|
572
|
-
return abortError();
|
|
575
|
+
return abortError(undefined, rawRequest);
|
|
573
576
|
}
|
|
574
|
-
|
|
577
|
+
|
|
575
578
|
if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
|
|
576
|
-
return networkError(message, error);
|
|
579
|
+
return networkError(message, error, rawRequest);
|
|
577
580
|
}
|
|
578
581
|
}
|
|
579
|
-
|
|
582
|
+
|
|
580
583
|
return new MembraneError({
|
|
581
584
|
type: 'unknown',
|
|
582
585
|
message: error instanceof Error ? error.message : String(error),
|
|
583
586
|
retryable: false,
|
|
584
587
|
rawError: error,
|
|
588
|
+
rawRequest,
|
|
585
589
|
});
|
|
586
590
|
}
|
|
587
591
|
}
|