@animalabs/membrane 0.5.54 → 0.5.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/native.d.ts.map +1 -1
- package/dist/formatters/native.js +11 -0
- package/dist/formatters/native.js.map +1 -1
- package/dist/formatters/normalize-tool-pairs.d.ts +4 -2
- package/dist/formatters/normalize-tool-pairs.d.ts.map +1 -1
- package/dist/formatters/normalize-tool-pairs.js +95 -22
- package/dist/formatters/normalize-tool-pairs.js.map +1 -1
- package/dist/formatters/types.d.ts +26 -0
- package/dist/formatters/types.d.ts.map +1 -1
- package/dist/membrane.d.ts +10 -0
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +118 -13
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +83 -2
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +3 -0
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +57 -3
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +3 -0
- package/dist/providers/openai.js.map +1 -1
- package/dist/types/provider.d.ts +9 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/dist/types/request.d.ts +10 -0
- package/dist/types/request.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/formatters/native.ts +10 -0
- package/src/formatters/normalize-tool-pairs.ts +100 -25
- package/src/formatters/types.ts +28 -1
- package/src/membrane.ts +129 -13
- package/src/providers/anthropic.ts +87 -3
- package/src/providers/openai-compatible.ts +4 -0
- package/src/providers/openai-completions.ts +58 -2
- package/src/providers/openai.ts +4 -0
- package/src/types/provider.ts +10 -0
- package/src/types/request.ts +12 -1
|
@@ -122,12 +122,20 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
122
122
|
let cacheReadTokens: number | undefined;
|
|
123
123
|
let stopReason: string = 'end_turn';
|
|
124
124
|
let stopSequence: string | undefined;
|
|
125
|
+
let stopDetails: unknown;
|
|
125
126
|
|
|
126
127
|
// Content block tracking — finalized on content_block_stop
|
|
127
128
|
const contentBlocks: Record<string, unknown>[] = [];
|
|
128
129
|
let currentBlockIndex = -1;
|
|
129
130
|
let currentBlockContent = '';
|
|
130
131
|
let currentBlockInputJson = '';
|
|
132
|
+
// When wrapThinkingTags is set (XML formatter path), native thinking
|
|
133
|
+
// deltas are wrapped in <thinking>...</thinking> on the chunk stream so
|
|
134
|
+
// the tag-based parser tracks them as thinking instead of visible text.
|
|
135
|
+
// Tag opened lazily on the first delta — display:'omitted' models emit
|
|
136
|
+
// thinking blocks with no thinking_delta at all (signature only).
|
|
137
|
+
const wrapThinkingTags = options?.wrapThinkingTags === true;
|
|
138
|
+
let thinkingTagOpen = false;
|
|
131
139
|
|
|
132
140
|
for await (const event of stream) {
|
|
133
141
|
resetIdleTimer();
|
|
@@ -152,7 +160,21 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
152
160
|
callbacks.onChunk(chunk);
|
|
153
161
|
} else if (event.delta.type === 'thinking_delta') {
|
|
154
162
|
currentBlockContent += event.delta.thinking;
|
|
163
|
+
if (wrapThinkingTags && !thinkingTagOpen) {
|
|
164
|
+
callbacks.onChunk('<thinking>');
|
|
165
|
+
thinkingTagOpen = true;
|
|
166
|
+
}
|
|
155
167
|
callbacks.onChunk(event.delta.thinking);
|
|
168
|
+
} else if ((event.delta as { type: string }).type === 'signature_delta') {
|
|
169
|
+
// Accumulate the cryptographic signature that authenticates this
|
|
170
|
+
// thinking block. Without this, signatures never land on the
|
|
171
|
+
// streaming path and the next request — which carries the block
|
|
172
|
+
// back in history — fails Anthropic's signature validation.
|
|
173
|
+
const sig = (event.delta as { signature?: string }).signature;
|
|
174
|
+
const block = contentBlocks[currentBlockIndex];
|
|
175
|
+
if (block && block.type === 'thinking' && sig) {
|
|
176
|
+
block.signature = ((block.signature as string | undefined) ?? '') + sig;
|
|
177
|
+
}
|
|
156
178
|
} else if ((event.delta as { type: string }).type === 'input_json_delta') {
|
|
157
179
|
currentBlockInputJson += (event.delta as { partial_json: string }).partial_json;
|
|
158
180
|
}
|
|
@@ -166,6 +188,10 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
166
188
|
block.text = currentBlockContent;
|
|
167
189
|
} else if (block.type === 'thinking') {
|
|
168
190
|
block.thinking = currentBlockContent;
|
|
191
|
+
if (thinkingTagOpen) {
|
|
192
|
+
callbacks.onChunk('</thinking>\n');
|
|
193
|
+
thinkingTagOpen = false;
|
|
194
|
+
}
|
|
169
195
|
} else if (block.type === 'tool_use' && currentBlockInputJson) {
|
|
170
196
|
try { block.input = JSON.parse(currentBlockInputJson); } catch { /* partial JSON */ }
|
|
171
197
|
}
|
|
@@ -176,9 +202,15 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
176
202
|
// All content blocks are finalized by the time message_delta arrives.
|
|
177
203
|
// Capture final metadata and exit — message_stop and the SSE connection
|
|
178
204
|
// teardown after it add only variable latency with no useful data.
|
|
179
|
-
const delta = event.delta as {
|
|
205
|
+
const delta = event.delta as {
|
|
206
|
+
stop_reason?: string;
|
|
207
|
+
stop_sequence?: string;
|
|
208
|
+
stop_details?: unknown;
|
|
209
|
+
};
|
|
180
210
|
stopReason = delta.stop_reason ?? 'end_turn';
|
|
181
211
|
stopSequence = delta.stop_sequence ?? undefined;
|
|
212
|
+
// stop_details carries refusal metadata (e.g., category: 'reasoning_extraction')
|
|
213
|
+
stopDetails = delta.stop_details ?? undefined;
|
|
182
214
|
const deltaUsage = event.usage as unknown as {
|
|
183
215
|
output_tokens: number;
|
|
184
216
|
cache_creation_input_tokens?: number | null;
|
|
@@ -219,6 +251,7 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
219
251
|
content: contentBlocks,
|
|
220
252
|
stop_reason: stopReason,
|
|
221
253
|
stop_sequence: stopSequence ?? null,
|
|
254
|
+
stop_details: stopDetails ?? null,
|
|
222
255
|
model,
|
|
223
256
|
usage: {
|
|
224
257
|
input_tokens: inputTokens,
|
|
@@ -249,7 +282,11 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
249
282
|
|
|
250
283
|
private buildRequest(request: ProviderRequest): Anthropic.MessageCreateParams {
|
|
251
284
|
// Strip provider-specific fields (e.g., sourceUrl for Gemini) from image blocks
|
|
252
|
-
// before sending to Anthropic, which rejects extra inputs
|
|
285
|
+
// before sending to Anthropic, which rejects extra inputs.
|
|
286
|
+
// Also normalize nested tool_result content blocks: Membrane uses camelCase
|
|
287
|
+
// `mediaType`, Anthropic expects snake_case `media_type`. Without this,
|
|
288
|
+
// an image returned by a tool reaches the API as `{source: {mediaType: ...}}`
|
|
289
|
+
// and is silently rejected (the model sees the text label only).
|
|
253
290
|
const sanitizedMessages = (request.messages as any[]).map((msg: any) => {
|
|
254
291
|
if (!Array.isArray(msg.content)) return msg;
|
|
255
292
|
return {
|
|
@@ -259,6 +296,12 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
259
296
|
const { sourceUrl, ...rest } = block;
|
|
260
297
|
return rest;
|
|
261
298
|
}
|
|
299
|
+
if (block.type === 'tool_result' && Array.isArray(block.content)) {
|
|
300
|
+
return {
|
|
301
|
+
...block,
|
|
302
|
+
content: toAnthropicToolResultContent(block.content as ContentBlock[]),
|
|
303
|
+
};
|
|
304
|
+
}
|
|
262
305
|
return block;
|
|
263
306
|
}),
|
|
264
307
|
};
|
|
@@ -396,6 +439,41 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
396
439
|
// Content Conversion Utilities
|
|
397
440
|
// ============================================================================
|
|
398
441
|
|
|
442
|
+
/**
|
|
443
|
+
* Convert Membrane tool-result content blocks to Anthropic's tool_result.content
|
|
444
|
+
* mixed array (text + image). This is what carries an image returned by a tool
|
|
445
|
+
* (e.g. an MCP fetch_attachment result) all the way to the model. Other block
|
|
446
|
+
* types are not valid inside tool_result.content per the Anthropic API and are
|
|
447
|
+
* dropped.
|
|
448
|
+
*/
|
|
449
|
+
function toAnthropicToolResultContent(
|
|
450
|
+
blocks: ContentBlock[],
|
|
451
|
+
): Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> {
|
|
452
|
+
const out: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [];
|
|
453
|
+
for (const block of blocks) {
|
|
454
|
+
if (block.type === 'text') {
|
|
455
|
+
out.push({ type: 'text', text: block.text });
|
|
456
|
+
} else if (block.type === 'image') {
|
|
457
|
+
if (block.source.type === 'base64') {
|
|
458
|
+
out.push({
|
|
459
|
+
type: 'image',
|
|
460
|
+
source: {
|
|
461
|
+
type: 'base64',
|
|
462
|
+
media_type: block.source.mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
|
|
463
|
+
data: block.source.data,
|
|
464
|
+
},
|
|
465
|
+
});
|
|
466
|
+
} else if (block.source.type === 'url') {
|
|
467
|
+
out.push({
|
|
468
|
+
type: 'image',
|
|
469
|
+
source: { type: 'url', url: block.source.url },
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return out;
|
|
475
|
+
}
|
|
476
|
+
|
|
399
477
|
/**
|
|
400
478
|
* Convert normalized content blocks to Anthropic format
|
|
401
479
|
* Preserves cache_control for prompt caching
|
|
@@ -425,6 +503,11 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
425
503
|
data: block.source.data,
|
|
426
504
|
},
|
|
427
505
|
});
|
|
506
|
+
} else if (block.source.type === 'url') {
|
|
507
|
+
result.push({
|
|
508
|
+
type: 'image',
|
|
509
|
+
source: { type: 'url', url: block.source.url },
|
|
510
|
+
});
|
|
428
511
|
}
|
|
429
512
|
break;
|
|
430
513
|
|
|
@@ -454,7 +537,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
454
537
|
tool_use_id: block.toolUseId,
|
|
455
538
|
content: typeof block.content === 'string'
|
|
456
539
|
? block.content
|
|
457
|
-
:
|
|
540
|
+
: toAnthropicToolResultContent(block.content),
|
|
458
541
|
is_error: block.isError,
|
|
459
542
|
});
|
|
460
543
|
break;
|
|
@@ -463,6 +546,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
463
546
|
result.push({
|
|
464
547
|
type: 'thinking',
|
|
465
548
|
thinking: block.thinking,
|
|
549
|
+
...(block.signature ? { signature: block.signature } : {}),
|
|
466
550
|
} as any);
|
|
467
551
|
break;
|
|
468
552
|
}
|
|
@@ -301,6 +301,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
301
301
|
params.frequency_penalty = request.frequencyPenalty;
|
|
302
302
|
}
|
|
303
303
|
|
|
304
|
+
if (request.repetitionPenalty !== undefined) {
|
|
305
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
306
|
+
}
|
|
307
|
+
|
|
304
308
|
// OpenAI-compatible APIs may limit stop sequences (OpenAI: 4) — truncate to be safe
|
|
305
309
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
306
310
|
params.stop = request.stopSequences.slice(0, 4);
|
|
@@ -41,6 +41,7 @@ interface CompletionsRequest {
|
|
|
41
41
|
top_p?: number;
|
|
42
42
|
presence_penalty?: number;
|
|
43
43
|
frequency_penalty?: number;
|
|
44
|
+
repetition_penalty?: number;
|
|
44
45
|
stop?: string[];
|
|
45
46
|
stream?: boolean;
|
|
46
47
|
}
|
|
@@ -194,6 +195,19 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
194
195
|
let accumulated = '';
|
|
195
196
|
let finishReason = 'stop';
|
|
196
197
|
|
|
198
|
+
// Post-facto truncation of the adapter's own eotToken.
|
|
199
|
+
// The adapter serializes the prompt with this.eotToken and sends it as an
|
|
200
|
+
// API stop string, but some backends leak the stop string into streamed
|
|
201
|
+
// output. Since the bot-level formatter may use a different (or empty)
|
|
202
|
+
// turn-end token, downstream post-facto checks can't be relied on to
|
|
203
|
+
// catch it — the layer that introduced the token must truncate it.
|
|
204
|
+
// emittedLen tracks how much of `accumulated` has been emitted; a tail of
|
|
205
|
+
// eot.length-1 chars is held back in case the token is split across chunks.
|
|
206
|
+
const eot = this.eotToken;
|
|
207
|
+
let emittedLen = 0;
|
|
208
|
+
let eotFound = false;
|
|
209
|
+
|
|
210
|
+
streamLoop:
|
|
197
211
|
while (true) {
|
|
198
212
|
const { done, value } = await reader.read();
|
|
199
213
|
if (done) break;
|
|
@@ -210,7 +224,28 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
210
224
|
|
|
211
225
|
if (text) {
|
|
212
226
|
accumulated += text;
|
|
213
|
-
|
|
227
|
+
if (eot) {
|
|
228
|
+
const idx = accumulated.indexOf(eot);
|
|
229
|
+
if (idx !== -1) {
|
|
230
|
+
// Truncate at the token, flush the un-emitted prefix, stop
|
|
231
|
+
accumulated = accumulated.slice(0, idx);
|
|
232
|
+
if (accumulated.length > emittedLen) {
|
|
233
|
+
callbacks.onChunk(accumulated.slice(emittedLen));
|
|
234
|
+
}
|
|
235
|
+
emittedLen = accumulated.length;
|
|
236
|
+
eotFound = true;
|
|
237
|
+
finishReason = 'stop';
|
|
238
|
+
break streamLoop;
|
|
239
|
+
}
|
|
240
|
+
// Emit all but a held-back tail that could be a partial token
|
|
241
|
+
const safeLen = Math.max(emittedLen, accumulated.length - (eot.length - 1));
|
|
242
|
+
if (safeLen > emittedLen) {
|
|
243
|
+
callbacks.onChunk(accumulated.slice(emittedLen, safeLen));
|
|
244
|
+
emittedLen = safeLen;
|
|
245
|
+
}
|
|
246
|
+
} else {
|
|
247
|
+
callbacks.onChunk(text);
|
|
248
|
+
}
|
|
214
249
|
}
|
|
215
250
|
|
|
216
251
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
@@ -222,6 +257,14 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
222
257
|
}
|
|
223
258
|
}
|
|
224
259
|
|
|
260
|
+
// Flush any held-back tail if the token never completed
|
|
261
|
+
if (eot && !eotFound && accumulated.length > emittedLen) {
|
|
262
|
+
callbacks.onChunk(accumulated.slice(emittedLen));
|
|
263
|
+
}
|
|
264
|
+
if (eotFound) {
|
|
265
|
+
try { await reader.cancel(); } catch { /* stream already closed */ }
|
|
266
|
+
}
|
|
267
|
+
|
|
225
268
|
return this.buildStreamedResponse(accumulated, finishReason, request.model, completionsRequest);
|
|
226
269
|
|
|
227
270
|
} catch (error) {
|
|
@@ -383,6 +426,10 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
383
426
|
params.frequency_penalty = request.frequencyPenalty;
|
|
384
427
|
}
|
|
385
428
|
|
|
429
|
+
if (request.repetitionPenalty !== undefined) {
|
|
430
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
431
|
+
}
|
|
432
|
+
|
|
386
433
|
if (stopSequences.length > 0) {
|
|
387
434
|
params.stop = stopSequences;
|
|
388
435
|
}
|
|
@@ -419,7 +466,16 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
419
466
|
|
|
420
467
|
private parseResponse(response: CompletionsResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
421
468
|
const choice = response.choices[0];
|
|
422
|
-
|
|
469
|
+
let text = choice?.text ?? '';
|
|
470
|
+
|
|
471
|
+
// Post-facto truncation of the adapter's own eotToken — some backends
|
|
472
|
+
// leak the stop string into the output (see stream() for details)
|
|
473
|
+
if (this.eotToken) {
|
|
474
|
+
const idx = text.indexOf(this.eotToken);
|
|
475
|
+
if (idx !== -1) {
|
|
476
|
+
text = text.slice(0, idx);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
423
479
|
|
|
424
480
|
return {
|
|
425
481
|
content: this.textToContent(text),
|
package/src/providers/openai.ts
CHANGED
|
@@ -399,6 +399,10 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
399
399
|
params.frequency_penalty = request.frequencyPenalty;
|
|
400
400
|
}
|
|
401
401
|
|
|
402
|
+
if (request.repetitionPenalty !== undefined) {
|
|
403
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
404
|
+
}
|
|
405
|
+
|
|
402
406
|
// Reasoning models (o1, o3, o4) don't support stop sequences
|
|
403
407
|
// OpenAI limits stop sequences to 4 — truncate to fit
|
|
404
408
|
if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
|
package/src/types/provider.ts
CHANGED
|
@@ -215,6 +215,9 @@ export interface ProviderRequest {
|
|
|
215
215
|
/** Frequency penalty */
|
|
216
216
|
frequencyPenalty?: number;
|
|
217
217
|
|
|
218
|
+
/** Repetition penalty (multiplicative, vLLM/HuggingFace style) */
|
|
219
|
+
repetitionPenalty?: number;
|
|
220
|
+
|
|
218
221
|
/** Stop sequences */
|
|
219
222
|
stopSequences?: string[];
|
|
220
223
|
|
|
@@ -232,6 +235,13 @@ export interface ProviderRequestOptions {
|
|
|
232
235
|
idleTimeoutMs?: number;
|
|
233
236
|
/** Called with the raw API request body right before fetch */
|
|
234
237
|
onRequest?: (rawRequest: unknown) => void;
|
|
238
|
+
/**
|
|
239
|
+
* Wrap native thinking deltas in <thinking>...</thinking> tags on the
|
|
240
|
+
* onChunk stream. Used by the XML formatter path so its tag-based parser
|
|
241
|
+
* tracks thinking blocks; without this, native thinking content streams
|
|
242
|
+
* indistinguishably from visible text.
|
|
243
|
+
*/
|
|
244
|
+
wrapThinkingTags?: boolean;
|
|
235
245
|
}
|
|
236
246
|
|
|
237
247
|
export interface ProviderResponse {
|
package/src/types/request.ts
CHANGED
|
@@ -30,11 +30,22 @@ export interface GenerationConfig {
|
|
|
30
30
|
|
|
31
31
|
/** Frequency penalty (provider-specific) */
|
|
32
32
|
frequencyPenalty?: number;
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
/** Repetition penalty — multiplicative (vLLM/HuggingFace style, typically 1.0-1.2) */
|
|
35
|
+
repetitionPenalty?: number;
|
|
36
|
+
|
|
34
37
|
/** Enable thinking/reasoning mode */
|
|
35
38
|
thinking?: {
|
|
36
39
|
enabled: boolean;
|
|
37
40
|
budgetTokens?: number;
|
|
41
|
+
/** Thinking type for the API: 'enabled' (default, explicit budget) or 'adaptive' (model-managed) */
|
|
42
|
+
type?: 'enabled' | 'adaptive';
|
|
43
|
+
/**
|
|
44
|
+
* Controls how thinking content is returned: 'summarized' (readable summary)
|
|
45
|
+
* or 'omitted' (empty thinking field, signature only). Models like Fable 5 /
|
|
46
|
+
* Opus 4.7+ default to 'omitted' — set 'summarized' to receive thinking text.
|
|
47
|
+
*/
|
|
48
|
+
display?: 'summarized' | 'omitted';
|
|
38
49
|
};
|
|
39
50
|
|
|
40
51
|
/** Image generation config (Gemini) */
|