@animalabs/membrane 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,18 +61,16 @@ export class AnthropicAdapter implements ProviderAdapter {
61
61
  options?: ProviderRequestOptions
62
62
  ): Promise<ProviderResponse> {
63
63
  const anthropicRequest = this.buildRequest(request);
64
-
64
+ const fullRequest = { ...anthropicRequest, stream: false as const };
65
+
65
66
  try {
66
- const response = await this.client.messages.create({
67
- ...anthropicRequest,
68
- stream: false,
69
- }, {
67
+ const response = await this.client.messages.create(fullRequest, {
70
68
  signal: options?.signal,
71
69
  });
72
-
73
- return this.parseResponse(response);
70
+
71
+ return this.parseResponse(response, fullRequest);
74
72
  } catch (error) {
75
- throw this.handleError(error);
73
+ throw this.handleError(error, fullRequest);
76
74
  }
77
75
  }
78
76
 
@@ -82,16 +80,18 @@ export class AnthropicAdapter implements ProviderAdapter {
82
80
  options?: ProviderRequestOptions
83
81
  ): Promise<ProviderResponse> {
84
82
  const anthropicRequest = this.buildRequest(request);
85
-
83
+ // Note: stream is implicitly true when using .stream()
84
+ const fullRequest = { ...anthropicRequest, stream: true };
85
+
86
86
  try {
87
87
  const stream = await this.client.messages.stream(anthropicRequest, {
88
88
  signal: options?.signal,
89
89
  });
90
-
90
+
91
91
  let accumulated = '';
92
92
  const contentBlocks: unknown[] = [];
93
93
  let currentBlockIndex = -1;
94
-
94
+
95
95
  for await (const event of stream) {
96
96
  if (event.type === 'content_block_start') {
97
97
  currentBlockIndex = event.index;
@@ -110,12 +110,12 @@ export class AnthropicAdapter implements ProviderAdapter {
110
110
  callbacks.onContentBlock?.(currentBlockIndex, contentBlocks[currentBlockIndex]);
111
111
  }
112
112
  }
113
-
113
+
114
114
  const finalMessage = await stream.finalMessage();
115
- return this.parseResponse(finalMessage);
116
-
115
+ return this.parseResponse(finalMessage, fullRequest);
116
+
117
117
  } catch (error) {
118
- throw this.handleError(error);
118
+ throw this.handleError(error, fullRequest);
119
119
  }
120
120
  }
121
121
 
@@ -161,7 +161,7 @@ export class AnthropicAdapter implements ProviderAdapter {
161
161
  return params;
162
162
  }
163
163
 
164
- private parseResponse(response: Anthropic.Message): ProviderResponse {
164
+ private parseResponse(response: Anthropic.Message, rawRequest: unknown): ProviderResponse {
165
165
  return {
166
166
  content: response.content,
167
167
  stopReason: response.stop_reason ?? 'end_turn',
@@ -173,43 +173,45 @@ export class AnthropicAdapter implements ProviderAdapter {
173
173
  cacheReadTokens: (response.usage as any).cache_read_input_tokens,
174
174
  },
175
175
  model: response.model,
176
+ rawRequest,
176
177
  raw: response,
177
178
  };
178
179
  }
179
180
 
180
- private handleError(error: unknown): MembraneError {
181
+ private handleError(error: unknown, rawRequest?: unknown): MembraneError {
181
182
  if (error instanceof Anthropic.APIError) {
182
183
  const status = error.status;
183
184
  const message = error.message;
184
-
185
+
185
186
  if (status === 429) {
186
187
  // Try to parse retry-after
187
188
  const retryAfter = this.parseRetryAfter(error);
188
- return rateLimitError(message, retryAfter, error);
189
+ return rateLimitError(message, retryAfter, error, rawRequest);
189
190
  }
190
-
191
+
191
192
  if (status === 401) {
192
- return authError(message, error);
193
+ return authError(message, error, rawRequest);
193
194
  }
194
-
195
+
195
196
  if (message.includes('context') || message.includes('too long')) {
196
- return contextLengthError(message, error);
197
+ return contextLengthError(message, error, rawRequest);
197
198
  }
198
-
199
+
199
200
  if (status >= 500) {
200
- return serverError(message, status, error);
201
+ return serverError(message, status, error, rawRequest);
201
202
  }
202
203
  }
203
-
204
+
204
205
  if (error instanceof Error && error.name === 'AbortError') {
205
- return abortError();
206
+ return abortError(undefined, rawRequest);
206
207
  }
207
-
208
+
208
209
  return new MembraneError({
209
210
  type: 'unknown',
210
211
  message: error instanceof Error ? error.message : String(error),
211
212
  retryable: false,
212
213
  rawError: error,
214
+ rawRequest,
213
215
  });
214
216
  }
215
217
 
@@ -130,12 +130,12 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
130
130
  options?: ProviderRequestOptions
131
131
  ): Promise<ProviderResponse> {
132
132
  const openAIRequest = this.buildRequest(request);
133
-
133
+
134
134
  try {
135
135
  const response = await this.makeRequest(openAIRequest, options);
136
- return this.parseResponse(response, request.model);
136
+ return this.parseResponse(response, request.model, openAIRequest);
137
137
  } catch (error) {
138
- throw this.handleError(error);
138
+ throw this.handleError(error, openAIRequest);
139
139
  }
140
140
  }
141
141
 
@@ -146,7 +146,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
146
146
  ): Promise<ProviderResponse> {
147
147
  const openAIRequest = this.buildRequest(request);
148
148
  openAIRequest.stream = true;
149
-
149
+
150
150
  try {
151
151
  const response = await fetch(`${this.baseURL}/chat/completions`, {
152
152
  method: 'POST',
@@ -154,42 +154,42 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
154
154
  body: JSON.stringify(openAIRequest),
155
155
  signal: options?.signal,
156
156
  });
157
-
157
+
158
158
  if (!response.ok) {
159
159
  const errorText = await response.text();
160
160
  throw new Error(`API error: ${response.status} ${errorText}`);
161
161
  }
162
-
162
+
163
163
  const reader = response.body?.getReader();
164
164
  if (!reader) {
165
165
  throw new Error('No response body');
166
166
  }
167
-
167
+
168
168
  const decoder = new TextDecoder();
169
169
  let accumulated = '';
170
170
  let finishReason = 'stop';
171
171
  let toolCalls: OpenAIToolCall[] = [];
172
-
172
+
173
173
  while (true) {
174
174
  const { done, value } = await reader.read();
175
175
  if (done) break;
176
-
176
+
177
177
  const chunk = decoder.decode(value, { stream: true });
178
178
  const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
179
-
179
+
180
180
  for (const line of lines) {
181
181
  const data = line.slice(6);
182
182
  if (data === '[DONE]') continue;
183
-
183
+
184
184
  try {
185
185
  const parsed = JSON.parse(data);
186
186
  const delta = parsed.choices?.[0]?.delta;
187
-
187
+
188
188
  if (delta?.content) {
189
189
  accumulated += delta.content;
190
190
  callbacks.onChunk(delta.content);
191
191
  }
192
-
192
+
193
193
  // Handle streaming tool calls
194
194
  if (delta?.tool_calls) {
195
195
  for (const tc of delta.tool_calls) {
@@ -208,7 +208,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
208
208
  }
209
209
  }
210
210
  }
211
-
211
+
212
212
  if (parsed.choices?.[0]?.finish_reason) {
213
213
  finishReason = parsed.choices[0].finish_reason;
214
214
  }
@@ -217,21 +217,21 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
217
217
  }
218
218
  }
219
219
  }
220
-
220
+
221
221
  // Build response with accumulated data
222
222
  const message: OpenAIMessage = {
223
223
  role: 'assistant',
224
224
  content: accumulated || null,
225
225
  };
226
-
226
+
227
227
  if (toolCalls.length > 0) {
228
228
  message.tool_calls = toolCalls;
229
229
  }
230
-
231
- return this.parseStreamedResponse(message, finishReason, request.model);
232
-
230
+
231
+ return this.parseStreamedResponse(message, finishReason, request.model, openAIRequest);
232
+
233
233
  } catch (error) {
234
- throw this.handleError(error);
234
+ throw this.handleError(error, openAIRequest);
235
235
  }
236
236
  }
237
237
 
@@ -371,10 +371,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
371
371
  return response.json() as Promise<OpenAIResponse>;
372
372
  }
373
373
 
374
- private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
374
+ private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
375
375
  const choice = response.choices[0];
376
376
  const message = choice?.message;
377
-
377
+
378
378
  return {
379
379
  content: this.messageToContent(message),
380
380
  stopReason: this.mapFinishReason(choice?.finish_reason),
@@ -384,6 +384,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
384
384
  outputTokens: response.usage?.completion_tokens ?? 0,
385
385
  },
386
386
  model: response.model ?? requestedModel,
387
+ rawRequest,
387
388
  raw: response,
388
389
  };
389
390
  }
@@ -391,7 +392,8 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
391
392
  private parseStreamedResponse(
392
393
  message: OpenAIMessage,
393
394
  finishReason: string,
394
- requestedModel: string
395
+ requestedModel: string,
396
+ rawRequest?: unknown
395
397
  ): ProviderResponse {
396
398
  return {
397
399
  content: this.messageToContent(message),
@@ -402,6 +404,7 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
402
404
  outputTokens: 0,
403
405
  },
404
406
  model: requestedModel,
407
+ rawRequest,
405
408
  raw: { message, finish_reason: finishReason },
406
409
  };
407
410
  }
@@ -444,40 +447,41 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
444
447
  }
445
448
  }
446
449
 
447
- private handleError(error: unknown): MembraneError {
450
+ private handleError(error: unknown, rawRequest?: unknown): MembraneError {
448
451
  if (error instanceof Error) {
449
452
  const message = error.message;
450
-
453
+
451
454
  if (message.includes('429') || message.includes('rate')) {
452
- return rateLimitError(message, undefined, error);
455
+ return rateLimitError(message, undefined, error, rawRequest);
453
456
  }
454
-
457
+
455
458
  if (message.includes('401') || message.includes('auth') || message.includes('Unauthorized')) {
456
- return authError(message, error);
459
+ return authError(message, error, rawRequest);
457
460
  }
458
-
461
+
459
462
  if (message.includes('context') || message.includes('too long') || message.includes('maximum context')) {
460
- return contextLengthError(message, error);
463
+ return contextLengthError(message, error, rawRequest);
461
464
  }
462
-
465
+
463
466
  if (message.includes('500') || message.includes('502') || message.includes('503')) {
464
- return serverError(message, undefined, error);
467
+ return serverError(message, undefined, error, rawRequest);
465
468
  }
466
-
469
+
467
470
  if (error.name === 'AbortError') {
468
- return abortError();
471
+ return abortError(undefined, rawRequest);
469
472
  }
470
-
473
+
471
474
  if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
472
- return networkError(message, error);
475
+ return networkError(message, error, rawRequest);
473
476
  }
474
477
  }
475
-
478
+
476
479
  return new MembraneError({
477
480
  type: 'unknown',
478
481
  message: error instanceof Error ? error.message : String(error),
479
482
  retryable: false,
480
483
  rawError: error,
484
+ rawRequest,
481
485
  });
482
486
  }
483
487
  }
@@ -203,12 +203,12 @@ export class OpenAIAdapter implements ProviderAdapter {
203
203
  options?: ProviderRequestOptions
204
204
  ): Promise<ProviderResponse> {
205
205
  const openAIRequest = this.buildRequest(request);
206
-
206
+
207
207
  try {
208
208
  const response = await this.makeRequest(openAIRequest, options);
209
- return this.parseResponse(response, request.model);
209
+ return this.parseResponse(response, request.model, openAIRequest);
210
210
  } catch (error) {
211
- throw this.handleError(error);
211
+ throw this.handleError(error, openAIRequest);
212
212
  }
213
213
  }
214
214
 
@@ -221,7 +221,7 @@ export class OpenAIAdapter implements ProviderAdapter {
221
221
  openAIRequest.stream = true;
222
222
  // Request usage data in stream for cache metrics
223
223
  openAIRequest.stream_options = { include_usage: true };
224
-
224
+
225
225
  try {
226
226
  const response = await fetch(`${this.baseURL}/chat/completions`, {
227
227
  method: 'POST',
@@ -229,43 +229,43 @@ export class OpenAIAdapter implements ProviderAdapter {
229
229
  body: JSON.stringify(openAIRequest),
230
230
  signal: options?.signal,
231
231
  });
232
-
232
+
233
233
  if (!response.ok) {
234
234
  const errorText = await response.text();
235
235
  throw new Error(`OpenAI API error: ${response.status} ${errorText}`);
236
236
  }
237
-
237
+
238
238
  const reader = response.body?.getReader();
239
239
  if (!reader) {
240
240
  throw new Error('No response body');
241
241
  }
242
-
242
+
243
243
  const decoder = new TextDecoder();
244
244
  let accumulated = '';
245
245
  let finishReason = 'stop';
246
246
  let toolCalls: OpenAIToolCall[] = [];
247
247
  let streamUsage: OpenAIResponse['usage'] | undefined;
248
-
248
+
249
249
  while (true) {
250
250
  const { done, value } = await reader.read();
251
251
  if (done) break;
252
-
252
+
253
253
  const chunk = decoder.decode(value, { stream: true });
254
254
  const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
255
-
255
+
256
256
  for (const line of lines) {
257
257
  const data = line.slice(6);
258
258
  if (data === '[DONE]') continue;
259
-
259
+
260
260
  try {
261
261
  const parsed = JSON.parse(data);
262
262
  const delta = parsed.choices?.[0]?.delta;
263
-
263
+
264
264
  if (delta?.content) {
265
265
  accumulated += delta.content;
266
266
  callbacks.onChunk(delta.content);
267
267
  }
268
-
268
+
269
269
  // Handle streaming tool calls
270
270
  if (delta?.tool_calls) {
271
271
  for (const tc of delta.tool_calls) {
@@ -284,11 +284,11 @@ export class OpenAIAdapter implements ProviderAdapter {
284
284
  }
285
285
  }
286
286
  }
287
-
287
+
288
288
  if (parsed.choices?.[0]?.finish_reason) {
289
289
  finishReason = parsed.choices[0].finish_reason;
290
290
  }
291
-
291
+
292
292
  // Capture usage data (comes in final chunk with stream_options.include_usage)
293
293
  if (parsed.usage) {
294
294
  streamUsage = parsed.usage;
@@ -298,21 +298,21 @@ export class OpenAIAdapter implements ProviderAdapter {
298
298
  }
299
299
  }
300
300
  }
301
-
301
+
302
302
  // Build response with accumulated data
303
303
  const message: OpenAIMessage = {
304
304
  role: 'assistant',
305
305
  content: accumulated || null,
306
306
  };
307
-
307
+
308
308
  if (toolCalls.length > 0) {
309
309
  message.tool_calls = toolCalls;
310
310
  }
311
-
312
- return this.parseStreamedResponse(message, finishReason, request.model, streamUsage);
313
-
311
+
312
+ return this.parseStreamedResponse(message, finishReason, request.model, streamUsage, openAIRequest);
313
+
314
314
  } catch (error) {
315
- throw this.handleError(error);
315
+ throw this.handleError(error, openAIRequest);
316
316
  }
317
317
  }
318
318
 
@@ -460,13 +460,13 @@ export class OpenAIAdapter implements ProviderAdapter {
460
460
  return response.json() as Promise<OpenAIResponse>;
461
461
  }
462
462
 
463
- private parseResponse(response: OpenAIResponse, requestedModel: string): ProviderResponse {
463
+ private parseResponse(response: OpenAIResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
464
464
  const choice = response.choices[0];
465
465
  const message = choice?.message;
466
-
466
+
467
467
  // Extract prompt caching details (OpenAI automatic caching for prompts ≥1024 tokens)
468
468
  const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens ?? 0;
469
-
469
+
470
470
  return {
471
471
  content: this.messageToContent(message),
472
472
  stopReason: this.mapFinishReason(choice?.finish_reason),
@@ -479,6 +479,7 @@ export class OpenAIAdapter implements ProviderAdapter {
479
479
  cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
480
480
  },
481
481
  model: response.model ?? requestedModel,
482
+ rawRequest,
482
483
  raw: response,
483
484
  };
484
485
  }
@@ -487,11 +488,12 @@ export class OpenAIAdapter implements ProviderAdapter {
487
488
  message: OpenAIMessage,
488
489
  finishReason: string,
489
490
  requestedModel: string,
490
- streamUsage?: OpenAIResponse['usage']
491
+ streamUsage?: OpenAIResponse['usage'],
492
+ rawRequest?: unknown
491
493
  ): ProviderResponse {
492
494
  // Extract cached tokens from stream usage if available
493
495
  const cachedTokens = streamUsage?.prompt_tokens_details?.cached_tokens ?? 0;
494
-
496
+
495
497
  return {
496
498
  content: this.messageToContent(message),
497
499
  stopReason: this.mapFinishReason(finishReason),
@@ -502,6 +504,7 @@ export class OpenAIAdapter implements ProviderAdapter {
502
504
  cacheReadTokens: cachedTokens > 0 ? cachedTokens : undefined,
503
505
  },
504
506
  model: requestedModel,
507
+ rawRequest,
505
508
  raw: { message, finish_reason: finishReason, usage: streamUsage },
506
509
  };
507
510
  }
@@ -544,44 +547,45 @@ export class OpenAIAdapter implements ProviderAdapter {
544
547
  }
545
548
  }
546
549
 
547
- private handleError(error: unknown): MembraneError {
550
+ private handleError(error: unknown, rawRequest?: unknown): MembraneError {
548
551
  if (error instanceof Error) {
549
552
  const message = error.message;
550
-
553
+
551
554
  // OpenAI specific error patterns
552
555
  if (message.includes('429') || message.includes('rate_limit')) {
553
556
  // Try to extract retry-after
554
557
  const retryMatch = message.match(/retry after (\d+)/i);
555
558
  const retryAfter = retryMatch?.[1] ? parseInt(retryMatch[1], 10) * 1000 : undefined;
556
- return rateLimitError(message, retryAfter, error);
559
+ return rateLimitError(message, retryAfter, error, rawRequest);
557
560
  }
558
-
561
+
559
562
  if (message.includes('401') || message.includes('invalid_api_key') || message.includes('Incorrect API key')) {
560
- return authError(message, error);
563
+ return authError(message, error, rawRequest);
561
564
  }
562
-
565
+
563
566
  if (message.includes('context_length') || message.includes('maximum context') || message.includes('too long')) {
564
- return contextLengthError(message, error);
567
+ return contextLengthError(message, error, rawRequest);
565
568
  }
566
-
569
+
567
570
  if (message.includes('500') || message.includes('502') || message.includes('503') || message.includes('server_error')) {
568
- return serverError(message, undefined, error);
571
+ return serverError(message, undefined, error, rawRequest);
569
572
  }
570
-
573
+
571
574
  if (error.name === 'AbortError') {
572
- return abortError();
575
+ return abortError(undefined, rawRequest);
573
576
  }
574
-
577
+
575
578
  if (message.includes('network') || message.includes('fetch') || message.includes('ECONNREFUSED')) {
576
- return networkError(message, error);
579
+ return networkError(message, error, rawRequest);
577
580
  }
578
581
  }
579
-
582
+
580
583
  return new MembraneError({
581
584
  type: 'unknown',
582
585
  message: error instanceof Error ? error.message : String(error),
583
586
  retryable: false,
584
587
  rawError: error,
588
+ rawRequest,
585
589
  });
586
590
  }
587
591
  }