@animalabs/membrane 0.5.54 → 0.5.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/formatters/native.d.ts.map +1 -1
  2. package/dist/formatters/native.js +11 -0
  3. package/dist/formatters/native.js.map +1 -1
  4. package/dist/formatters/normalize-tool-pairs.d.ts +4 -2
  5. package/dist/formatters/normalize-tool-pairs.d.ts.map +1 -1
  6. package/dist/formatters/normalize-tool-pairs.js +95 -22
  7. package/dist/formatters/normalize-tool-pairs.js.map +1 -1
  8. package/dist/formatters/types.d.ts +26 -0
  9. package/dist/formatters/types.d.ts.map +1 -1
  10. package/dist/membrane.d.ts +10 -0
  11. package/dist/membrane.d.ts.map +1 -1
  12. package/dist/membrane.js +118 -13
  13. package/dist/membrane.js.map +1 -1
  14. package/dist/providers/anthropic.d.ts.map +1 -1
  15. package/dist/providers/anthropic.js +83 -2
  16. package/dist/providers/anthropic.js.map +1 -1
  17. package/dist/providers/openai-compatible.d.ts.map +1 -1
  18. package/dist/providers/openai-compatible.js +3 -0
  19. package/dist/providers/openai-compatible.js.map +1 -1
  20. package/dist/providers/openai-completions.d.ts.map +1 -1
  21. package/dist/providers/openai-completions.js +57 -3
  22. package/dist/providers/openai-completions.js.map +1 -1
  23. package/dist/providers/openai.d.ts.map +1 -1
  24. package/dist/providers/openai.js +3 -0
  25. package/dist/providers/openai.js.map +1 -1
  26. package/dist/types/provider.d.ts +9 -0
  27. package/dist/types/provider.d.ts.map +1 -1
  28. package/dist/types/request.d.ts +10 -0
  29. package/dist/types/request.d.ts.map +1 -1
  30. package/package.json +1 -1
  31. package/src/formatters/native.ts +10 -0
  32. package/src/formatters/normalize-tool-pairs.ts +100 -25
  33. package/src/formatters/types.ts +28 -1
  34. package/src/membrane.ts +129 -13
  35. package/src/providers/anthropic.ts +87 -3
  36. package/src/providers/openai-compatible.ts +4 -0
  37. package/src/providers/openai-completions.ts +58 -2
  38. package/src/providers/openai.ts +4 -0
  39. package/src/types/provider.ts +10 -0
  40. package/src/types/request.ts +12 -1
@@ -122,12 +122,20 @@ export class AnthropicAdapter implements ProviderAdapter {
122
122
  let cacheReadTokens: number | undefined;
123
123
  let stopReason: string = 'end_turn';
124
124
  let stopSequence: string | undefined;
125
+ let stopDetails: unknown;
125
126
 
126
127
  // Content block tracking — finalized on content_block_stop
127
128
  const contentBlocks: Record<string, unknown>[] = [];
128
129
  let currentBlockIndex = -1;
129
130
  let currentBlockContent = '';
130
131
  let currentBlockInputJson = '';
132
+ // When wrapThinkingTags is set (XML formatter path), native thinking
133
+ // deltas are wrapped in <thinking>...</thinking> on the chunk stream so
134
+ // the tag-based parser tracks them as thinking instead of visible text.
135
+ // Tag opened lazily on the first delta — display:'omitted' models emit
136
+ // thinking blocks with no thinking_delta at all (signature only).
137
+ const wrapThinkingTags = options?.wrapThinkingTags === true;
138
+ let thinkingTagOpen = false;
131
139
 
132
140
  for await (const event of stream) {
133
141
  resetIdleTimer();
@@ -152,7 +160,21 @@ export class AnthropicAdapter implements ProviderAdapter {
152
160
  callbacks.onChunk(chunk);
153
161
  } else if (event.delta.type === 'thinking_delta') {
154
162
  currentBlockContent += event.delta.thinking;
163
+ if (wrapThinkingTags && !thinkingTagOpen) {
164
+ callbacks.onChunk('<thinking>');
165
+ thinkingTagOpen = true;
166
+ }
155
167
  callbacks.onChunk(event.delta.thinking);
168
+ } else if ((event.delta as { type: string }).type === 'signature_delta') {
169
+ // Accumulate the cryptographic signature that authenticates this
170
+ // thinking block. Without this, signatures never land on the
171
+ // streaming path and the next request — which carries the block
172
+ // back in history — fails Anthropic's signature validation.
173
+ const sig = (event.delta as { signature?: string }).signature;
174
+ const block = contentBlocks[currentBlockIndex];
175
+ if (block && block.type === 'thinking' && sig) {
176
+ block.signature = ((block.signature as string | undefined) ?? '') + sig;
177
+ }
156
178
  } else if ((event.delta as { type: string }).type === 'input_json_delta') {
157
179
  currentBlockInputJson += (event.delta as { partial_json: string }).partial_json;
158
180
  }
@@ -166,6 +188,10 @@ export class AnthropicAdapter implements ProviderAdapter {
166
188
  block.text = currentBlockContent;
167
189
  } else if (block.type === 'thinking') {
168
190
  block.thinking = currentBlockContent;
191
+ if (thinkingTagOpen) {
192
+ callbacks.onChunk('</thinking>\n');
193
+ thinkingTagOpen = false;
194
+ }
169
195
  } else if (block.type === 'tool_use' && currentBlockInputJson) {
170
196
  try { block.input = JSON.parse(currentBlockInputJson); } catch { /* partial JSON */ }
171
197
  }
@@ -176,9 +202,15 @@ export class AnthropicAdapter implements ProviderAdapter {
176
202
  // All content blocks are finalized by the time message_delta arrives.
177
203
  // Capture final metadata and exit — message_stop and the SSE connection
178
204
  // teardown after it add only variable latency with no useful data.
179
- const delta = event.delta as { stop_reason?: string; stop_sequence?: string };
205
+ const delta = event.delta as {
206
+ stop_reason?: string;
207
+ stop_sequence?: string;
208
+ stop_details?: unknown;
209
+ };
180
210
  stopReason = delta.stop_reason ?? 'end_turn';
181
211
  stopSequence = delta.stop_sequence ?? undefined;
212
+ // stop_details carries refusal metadata (e.g., category: 'reasoning_extraction')
213
+ stopDetails = delta.stop_details ?? undefined;
182
214
  const deltaUsage = event.usage as unknown as {
183
215
  output_tokens: number;
184
216
  cache_creation_input_tokens?: number | null;
@@ -219,6 +251,7 @@ export class AnthropicAdapter implements ProviderAdapter {
219
251
  content: contentBlocks,
220
252
  stop_reason: stopReason,
221
253
  stop_sequence: stopSequence ?? null,
254
+ stop_details: stopDetails ?? null,
222
255
  model,
223
256
  usage: {
224
257
  input_tokens: inputTokens,
@@ -249,7 +282,11 @@ export class AnthropicAdapter implements ProviderAdapter {
249
282
 
250
283
  private buildRequest(request: ProviderRequest): Anthropic.MessageCreateParams {
251
284
  // Strip provider-specific fields (e.g., sourceUrl for Gemini) from image blocks
252
- // before sending to Anthropic, which rejects extra inputs
285
+ // before sending to Anthropic, which rejects extra inputs.
286
+ // Also normalize nested tool_result content blocks: Membrane uses camelCase
287
+ // `mediaType`, Anthropic expects snake_case `media_type`. Without this,
288
+ // an image returned by a tool reaches the API as `{source: {mediaType: ...}}`
289
+ // and is silently rejected (the model sees the text label only).
253
290
  const sanitizedMessages = (request.messages as any[]).map((msg: any) => {
254
291
  if (!Array.isArray(msg.content)) return msg;
255
292
  return {
@@ -259,6 +296,12 @@ export class AnthropicAdapter implements ProviderAdapter {
259
296
  const { sourceUrl, ...rest } = block;
260
297
  return rest;
261
298
  }
299
+ if (block.type === 'tool_result' && Array.isArray(block.content)) {
300
+ return {
301
+ ...block,
302
+ content: toAnthropicToolResultContent(block.content as ContentBlock[]),
303
+ };
304
+ }
262
305
  return block;
263
306
  }),
264
307
  };
@@ -396,6 +439,41 @@ export class AnthropicAdapter implements ProviderAdapter {
396
439
  // Content Conversion Utilities
397
440
  // ============================================================================
398
441
 
442
+ /**
443
+ * Convert Membrane tool-result content blocks to Anthropic's tool_result.content
444
+ * mixed array (text + image). This is what carries an image returned by a tool
445
+ * (e.g. an MCP fetch_attachment result) all the way to the model. Other block
446
+ * types are not valid inside tool_result.content per the Anthropic API and are
447
+ * dropped.
448
+ */
449
+ function toAnthropicToolResultContent(
450
+ blocks: ContentBlock[],
451
+ ): Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> {
452
+ const out: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [];
453
+ for (const block of blocks) {
454
+ if (block.type === 'text') {
455
+ out.push({ type: 'text', text: block.text });
456
+ } else if (block.type === 'image') {
457
+ if (block.source.type === 'base64') {
458
+ out.push({
459
+ type: 'image',
460
+ source: {
461
+ type: 'base64',
462
+ media_type: block.source.mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
463
+ data: block.source.data,
464
+ },
465
+ });
466
+ } else if (block.source.type === 'url') {
467
+ out.push({
468
+ type: 'image',
469
+ source: { type: 'url', url: block.source.url },
470
+ });
471
+ }
472
+ }
473
+ }
474
+ return out;
475
+ }
476
+
399
477
  /**
400
478
  * Convert normalized content blocks to Anthropic format
401
479
  * Preserves cache_control for prompt caching
@@ -425,6 +503,11 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
425
503
  data: block.source.data,
426
504
  },
427
505
  });
506
+ } else if (block.source.type === 'url') {
507
+ result.push({
508
+ type: 'image',
509
+ source: { type: 'url', url: block.source.url },
510
+ });
428
511
  }
429
512
  break;
430
513
 
@@ -454,7 +537,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
454
537
  tool_use_id: block.toolUseId,
455
538
  content: typeof block.content === 'string'
456
539
  ? block.content
457
- : JSON.stringify(block.content),
540
+ : toAnthropicToolResultContent(block.content),
458
541
  is_error: block.isError,
459
542
  });
460
543
  break;
@@ -463,6 +546,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
463
546
  result.push({
464
547
  type: 'thinking',
465
548
  thinking: block.thinking,
549
+ ...(block.signature ? { signature: block.signature } : {}),
466
550
  } as any);
467
551
  break;
468
552
  }
@@ -301,6 +301,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
301
301
  params.frequency_penalty = request.frequencyPenalty;
302
302
  }
303
303
 
304
+ if (request.repetitionPenalty !== undefined) {
305
+ params.repetition_penalty = request.repetitionPenalty;
306
+ }
307
+
304
308
  // OpenAI-compatible APIs may limit stop sequences (OpenAI: 4) — truncate to be safe
305
309
  if (request.stopSequences && request.stopSequences.length > 0) {
306
310
  params.stop = request.stopSequences.slice(0, 4);
@@ -41,6 +41,7 @@ interface CompletionsRequest {
41
41
  top_p?: number;
42
42
  presence_penalty?: number;
43
43
  frequency_penalty?: number;
44
+ repetition_penalty?: number;
44
45
  stop?: string[];
45
46
  stream?: boolean;
46
47
  }
@@ -194,6 +195,19 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
194
195
  let accumulated = '';
195
196
  let finishReason = 'stop';
196
197
 
198
+ // Post-facto truncation of the adapter's own eotToken.
199
+ // The adapter serializes the prompt with this.eotToken and sends it as an
200
+ // API stop string, but some backends leak the stop string into streamed
201
+ // output. Since the bot-level formatter may use a different (or empty)
202
+ // turn-end token, downstream post-facto checks can't be relied on to
203
+ // catch it — the layer that introduced the token must truncate it.
204
+ // emittedLen tracks how much of `accumulated` has been emitted; a tail of
205
+ // eot.length-1 chars is held back in case the token is split across chunks.
206
+ const eot = this.eotToken;
207
+ let emittedLen = 0;
208
+ let eotFound = false;
209
+
210
+ streamLoop:
197
211
  while (true) {
198
212
  const { done, value } = await reader.read();
199
213
  if (done) break;
@@ -210,7 +224,28 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
210
224
 
211
225
  if (text) {
212
226
  accumulated += text;
213
- callbacks.onChunk(text);
227
+ if (eot) {
228
+ const idx = accumulated.indexOf(eot);
229
+ if (idx !== -1) {
230
+ // Truncate at the token, flush the un-emitted prefix, stop
231
+ accumulated = accumulated.slice(0, idx);
232
+ if (accumulated.length > emittedLen) {
233
+ callbacks.onChunk(accumulated.slice(emittedLen));
234
+ }
235
+ emittedLen = accumulated.length;
236
+ eotFound = true;
237
+ finishReason = 'stop';
238
+ break streamLoop;
239
+ }
240
+ // Emit all but a held-back tail that could be a partial token
241
+ const safeLen = Math.max(emittedLen, accumulated.length - (eot.length - 1));
242
+ if (safeLen > emittedLen) {
243
+ callbacks.onChunk(accumulated.slice(emittedLen, safeLen));
244
+ emittedLen = safeLen;
245
+ }
246
+ } else {
247
+ callbacks.onChunk(text);
248
+ }
214
249
  }
215
250
 
216
251
  if (parsed.choices?.[0]?.finish_reason) {
@@ -222,6 +257,14 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
222
257
  }
223
258
  }
224
259
 
260
+ // Flush any held-back tail if the token never completed
261
+ if (eot && !eotFound && accumulated.length > emittedLen) {
262
+ callbacks.onChunk(accumulated.slice(emittedLen));
263
+ }
264
+ if (eotFound) {
265
+ try { await reader.cancel(); } catch { /* stream already closed */ }
266
+ }
267
+
225
268
  return this.buildStreamedResponse(accumulated, finishReason, request.model, completionsRequest);
226
269
 
227
270
  } catch (error) {
@@ -383,6 +426,10 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
383
426
  params.frequency_penalty = request.frequencyPenalty;
384
427
  }
385
428
 
429
+ if (request.repetitionPenalty !== undefined) {
430
+ params.repetition_penalty = request.repetitionPenalty;
431
+ }
432
+
386
433
  if (stopSequences.length > 0) {
387
434
  params.stop = stopSequences;
388
435
  }
@@ -419,7 +466,16 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
419
466
 
420
467
  private parseResponse(response: CompletionsResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
421
468
  const choice = response.choices[0];
422
- const text = choice?.text ?? '';
469
+ let text = choice?.text ?? '';
470
+
471
+ // Post-facto truncation of the adapter's own eotToken — some backends
472
+ // leak the stop string into the output (see stream() for details)
473
+ if (this.eotToken) {
474
+ const idx = text.indexOf(this.eotToken);
475
+ if (idx !== -1) {
476
+ text = text.slice(0, idx);
477
+ }
478
+ }
423
479
 
424
480
  return {
425
481
  content: this.textToContent(text),
@@ -399,6 +399,10 @@ export class OpenAIAdapter implements ProviderAdapter {
399
399
  params.frequency_penalty = request.frequencyPenalty;
400
400
  }
401
401
 
402
+ if (request.repetitionPenalty !== undefined) {
403
+ params.repetition_penalty = request.repetitionPenalty;
404
+ }
405
+
402
406
  // Reasoning models (o1, o3, o4) don't support stop sequences
403
407
  // OpenAI limits stop sequences to 4 — truncate to fit
404
408
  if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
@@ -215,6 +215,9 @@ export interface ProviderRequest {
215
215
  /** Frequency penalty */
216
216
  frequencyPenalty?: number;
217
217
 
218
+ /** Repetition penalty (multiplicative, vLLM/HuggingFace style) */
219
+ repetitionPenalty?: number;
220
+
218
221
  /** Stop sequences */
219
222
  stopSequences?: string[];
220
223
 
@@ -232,6 +235,13 @@ export interface ProviderRequestOptions {
232
235
  idleTimeoutMs?: number;
233
236
  /** Called with the raw API request body right before fetch */
234
237
  onRequest?: (rawRequest: unknown) => void;
238
+ /**
239
+ * Wrap native thinking deltas in <thinking>...</thinking> tags on the
240
+ * onChunk stream. Used by the XML formatter path so its tag-based parser
241
+ * tracks thinking blocks; without this, native thinking content streams
242
+ * indistinguishably from visible text.
243
+ */
244
+ wrapThinkingTags?: boolean;
235
245
  }
236
246
 
237
247
  export interface ProviderResponse {
@@ -30,11 +30,22 @@ export interface GenerationConfig {
30
30
 
31
31
  /** Frequency penalty (provider-specific) */
32
32
  frequencyPenalty?: number;
33
-
33
+
34
+ /** Repetition penalty — multiplicative (vLLM/HuggingFace style, typically 1.0-1.2) */
35
+ repetitionPenalty?: number;
36
+
34
37
  /** Enable thinking/reasoning mode */
35
38
  thinking?: {
36
39
  enabled: boolean;
37
40
  budgetTokens?: number;
41
+ /** Thinking type for the API: 'enabled' (default, explicit budget) or 'adaptive' (model-managed) */
42
+ type?: 'enabled' | 'adaptive';
43
+ /**
44
+ * Controls how thinking content is returned: 'summarized' (readable summary)
45
+ * or 'omitted' (empty thinking field, signature only). Models like Fable 5 /
46
+ * Opus 4.7+ default to 'omitted' — set 'summarized' to receive thinking text.
47
+ */
48
+ display?: 'summarized' | 'omitted';
38
49
  };
39
50
 
40
51
  /** Image generation config (Gemini) */