@animalabs/membrane 0.5.55 → 0.5.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/native.d.ts.map +1 -1
- package/dist/formatters/native.js +11 -0
- package/dist/formatters/native.js.map +1 -1
- package/dist/membrane.d.ts +28 -0
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +169 -17
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +94 -3
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/bedrock.d.ts.map +1 -1
- package/dist/providers/bedrock.js +14 -4
- package/dist/providers/bedrock.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +3 -0
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +57 -3
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +3 -0
- package/dist/providers/openai.js.map +1 -1
- package/dist/types/content.d.ts +6 -0
- package/dist/types/content.d.ts.map +1 -1
- package/dist/types/content.js.map +1 -1
- package/dist/types/provider.d.ts +9 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/dist/types/request.d.ts +10 -0
- package/dist/types/request.d.ts.map +1 -1
- package/dist/types/tools.d.ts +9 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/formatters/native.ts +10 -0
- package/src/membrane.ts +191 -19
- package/src/providers/anthropic.ts +100 -5
- package/src/providers/bedrock.ts +13 -4
- package/src/providers/openai-compatible.ts +4 -0
- package/src/providers/openai-completions.ts +58 -2
- package/src/providers/openai.ts +4 -0
- package/src/types/content.ts +6 -0
- package/src/types/provider.ts +10 -0
- package/src/types/request.ts +12 -1
- package/src/types/tools.ts +14 -4
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../src/types/tools.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CACH;AAMD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,MAAM,GAAG,sBAAsB,EAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,sBAAsB,GAC9B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE;QAAE,IAAI,EAAE,QAAQ,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAMnF,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAEhB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IAEd,oDAAoD;IACpD,eAAe,EAAE,UAAU,EAAE,CAAC;IAE9B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../src/types/tools.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CACH;AAMD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,MAAM,GAAG,sBAAsB,EAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,sBAAsB,GAC9B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE;QAAE,IAAI,EAAE,QAAQ,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAMnF,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAEhB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IAEd,oDAAoD;IACpD,eAAe,EAAE,UAAU,EAAE,CAAC;IAE9B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;;;OAOG;IACH,YAAY,CAAC,EAAE,OAAO,cAAc,EAAE,YAAY,EAAE,CAAC;CACtD;AAMD,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAElB,uCAAuC;IACvC,UAAU,EAAE,MAAM,CAAC;IAEnB,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAElB,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;CACnB"}
|
package/package.json
CHANGED
package/src/formatters/native.ts
CHANGED
|
@@ -385,10 +385,20 @@ export class NativeFormatter implements PrefillFormatter {
|
|
|
385
385
|
is_error: block.isError,
|
|
386
386
|
});
|
|
387
387
|
} else if (block.type === 'thinking') {
|
|
388
|
+
// Round-trip thinking blocks verbatim, including the signature — the
|
|
389
|
+
// API validates it and (on display:'omitted' models) decrypts it to
|
|
390
|
+
// reconstruct the original reasoning. Signature-only blocks (empty
|
|
391
|
+
// thinking field) are valid and must be passed back unchanged.
|
|
388
392
|
result.push({
|
|
389
393
|
type: 'thinking',
|
|
390
394
|
thinking: block.thinking,
|
|
395
|
+
...((block as { signature?: string }).signature
|
|
396
|
+
? { signature: (block as { signature?: string }).signature }
|
|
397
|
+
: {}),
|
|
391
398
|
});
|
|
399
|
+
} else if (block.type === 'redacted_thinking') {
|
|
400
|
+
// Pass through verbatim (carries encrypted data field)
|
|
401
|
+
result.push({ ...(block as unknown as Record<string, unknown>) });
|
|
392
402
|
} else if (block.type === 'document' || block.type === 'audio') {
|
|
393
403
|
hasUnsupportedMedia = true;
|
|
394
404
|
}
|
package/src/membrane.ts
CHANGED
|
@@ -292,6 +292,12 @@ export class Membrane {
|
|
|
292
292
|
// These can't be handled by the text-based XML parser, so we capture and append them
|
|
293
293
|
const extraContentBlocks: ContentBlock[] = [];
|
|
294
294
|
|
|
295
|
+
// Native thinking blocks from the provider (with signatures). The parser
|
|
296
|
+
// derives signature-less thinking blocks from <thinking> text (via
|
|
297
|
+
// wrapThinkingTags); signatures from these are merged into those after
|
|
298
|
+
// parsing, and signature-only blocks are prepended.
|
|
299
|
+
const providerThinkingBlocks: ContentBlock[] = [];
|
|
300
|
+
|
|
295
301
|
// Transform initial request using the formatter
|
|
296
302
|
let { providerRequest, prefillResult } = this.transformRequest(request, formatter);
|
|
297
303
|
|
|
@@ -385,6 +391,10 @@ export class Membrane {
|
|
|
385
391
|
{
|
|
386
392
|
signal,
|
|
387
393
|
normalizedRequest: request,
|
|
394
|
+
// The tag-based parser tracks thinking via <thinking> tags — ask the
|
|
395
|
+
// provider to wrap native thinking deltas so they don't stream as
|
|
396
|
+
// visible text (see ProviderRequestOptions.wrapThinkingTags)
|
|
397
|
+
wrapThinkingTags: true,
|
|
388
398
|
onRequest: (req) => {
|
|
389
399
|
rawRequest = req;
|
|
390
400
|
onRequest?.(req);
|
|
@@ -412,6 +422,10 @@ export class Membrane {
|
|
|
412
422
|
} as ContentBlock);
|
|
413
423
|
}
|
|
414
424
|
}
|
|
425
|
+
// Native thinking blocks carry the signature (encrypted full
|
|
426
|
+
// reasoning) — captured so consumers can persist and round-trip
|
|
427
|
+
// them for reasoning continuity.
|
|
428
|
+
this.captureProviderThinkingBlocks(streamResult.content, providerThinkingBlocks);
|
|
415
429
|
}
|
|
416
430
|
|
|
417
431
|
rawResponse = streamResult.raw;
|
|
@@ -700,6 +714,9 @@ export class Membrane {
|
|
|
700
714
|
response.content.push(...extraContentBlocks);
|
|
701
715
|
}
|
|
702
716
|
|
|
717
|
+
// Merge provider thinking signatures into parser-derived thinking blocks
|
|
718
|
+
this.mergeProviderThinkingBlocks(response.content, providerThinkingBlocks);
|
|
719
|
+
|
|
703
720
|
return response;
|
|
704
721
|
} catch (error) {
|
|
705
722
|
// Check if this is an abort error
|
|
@@ -1005,6 +1022,19 @@ export class Membrane {
|
|
|
1005
1022
|
content: block.content,
|
|
1006
1023
|
is_error: block.isError,
|
|
1007
1024
|
});
|
|
1025
|
+
} else if (block.type === 'thinking') {
|
|
1026
|
+
// Round-trip thinking blocks verbatim including the signature — the
|
|
1027
|
+
// API validates it and (on display:'omitted' models) decrypts it to
|
|
1028
|
+
// reconstruct prior reasoning. Empty thinking + signature is valid.
|
|
1029
|
+
content.push({
|
|
1030
|
+
type: 'thinking',
|
|
1031
|
+
thinking: (block as { thinking?: string }).thinking ?? '',
|
|
1032
|
+
...((block as { signature?: string }).signature
|
|
1033
|
+
? { signature: (block as { signature?: string }).signature }
|
|
1034
|
+
: {}),
|
|
1035
|
+
});
|
|
1036
|
+
} else if (block.type === 'redacted_thinking') {
|
|
1037
|
+
content.push({ ...(block as unknown as Record<string, unknown>) });
|
|
1008
1038
|
} else if (block.type === 'image') {
|
|
1009
1039
|
if (block.source.type === 'base64') {
|
|
1010
1040
|
const imageBlock: Record<string, unknown> = {
|
|
@@ -1081,13 +1111,8 @@ export class Membrane {
|
|
|
1081
1111
|
);
|
|
1082
1112
|
}
|
|
1083
1113
|
|
|
1084
|
-
// Build thinking config for native extended thinking
|
|
1085
|
-
const thinking = request.config
|
|
1086
|
-
? {
|
|
1087
|
-
type: 'enabled' as const,
|
|
1088
|
-
budget_tokens: request.config.thinking.budgetTokens ?? 5000,
|
|
1089
|
-
}
|
|
1090
|
-
: undefined;
|
|
1114
|
+
// Build thinking config for native extended thinking (budget clamped to max_tokens)
|
|
1115
|
+
const thinking = this.buildThinkingParam(request.config);
|
|
1091
1116
|
|
|
1092
1117
|
// Anthropic requires temperature=1 when extended thinking is enabled
|
|
1093
1118
|
const temperature = thinking ? 1 : request.config.temperature;
|
|
@@ -1125,9 +1150,12 @@ export class Membrane {
|
|
|
1125
1150
|
} else if (item.type === 'thinking') {
|
|
1126
1151
|
blocks.push({
|
|
1127
1152
|
type: 'thinking',
|
|
1128
|
-
thinking: item.thinking,
|
|
1129
|
-
signature: item.signature,
|
|
1153
|
+
thinking: item.thinking ?? '',
|
|
1154
|
+
...(item.signature ? { signature: item.signature } : {}),
|
|
1130
1155
|
});
|
|
1156
|
+
} else if (item.type === 'redacted_thinking') {
|
|
1157
|
+
// Pass through verbatim — carries the encrypted `data` payload
|
|
1158
|
+
blocks.push({ ...item } as ContentBlock);
|
|
1131
1159
|
} else if (item.type === 'generated_image') {
|
|
1132
1160
|
blocks.push({
|
|
1133
1161
|
type: 'generated_image',
|
|
@@ -1138,14 +1166,75 @@ export class Membrane {
|
|
|
1138
1166
|
}
|
|
1139
1167
|
return blocks;
|
|
1140
1168
|
}
|
|
1141
|
-
|
|
1169
|
+
|
|
1142
1170
|
if (typeof content === 'string') {
|
|
1143
1171
|
return [{ type: 'text', text: content }];
|
|
1144
1172
|
}
|
|
1145
|
-
|
|
1173
|
+
|
|
1146
1174
|
return [];
|
|
1147
1175
|
}
|
|
1148
1176
|
|
|
1177
|
+
/**
|
|
1178
|
+
* Capture native thinking / redacted_thinking blocks from a provider
|
|
1179
|
+
* response so they can be merged into parser-derived content (XML paths,
|
|
1180
|
+
* where the parser only sees text). Includes signature-only thinking
|
|
1181
|
+
* blocks (display:'omitted' returns an empty thinking field).
|
|
1182
|
+
*/
|
|
1183
|
+
private captureProviderThinkingBlocks(
|
|
1184
|
+
providerContent: unknown,
|
|
1185
|
+
sink: ContentBlock[]
|
|
1186
|
+
): void {
|
|
1187
|
+
if (!Array.isArray(providerContent)) return;
|
|
1188
|
+
for (const block of providerContent) {
|
|
1189
|
+
if (block?.type === 'thinking') {
|
|
1190
|
+
sink.push({
|
|
1191
|
+
type: 'thinking',
|
|
1192
|
+
thinking: (block as any).thinking ?? '',
|
|
1193
|
+
...((block as any).signature ? { signature: (block as any).signature } : {}),
|
|
1194
|
+
} as ContentBlock);
|
|
1195
|
+
} else if (block?.type === 'redacted_thinking') {
|
|
1196
|
+
sink.push({ ...(block as any) } as ContentBlock);
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
/**
|
|
1202
|
+
* Merge provider thinking signatures into parser-derived thinking blocks
|
|
1203
|
+
* (matched in stream order), and prepend any leftover provider blocks —
|
|
1204
|
+
* signature-only thinking (display:'omitted') never appears in the text
|
|
1205
|
+
* stream, so the parser produces no block for it. redacted_thinking
|
|
1206
|
+
* blocks are always prepended verbatim.
|
|
1207
|
+
*
|
|
1208
|
+
* Mutates `content` in place. Shared by the XML stream paths
|
|
1209
|
+
* (streamWithXmlTools and runXmlToolsYielding).
|
|
1210
|
+
*/
|
|
1211
|
+
private mergeProviderThinkingBlocks(
|
|
1212
|
+
content: ContentBlock[],
|
|
1213
|
+
providerThinkingBlocks: ContentBlock[]
|
|
1214
|
+
): void {
|
|
1215
|
+
if (providerThinkingBlocks.length === 0) return;
|
|
1216
|
+
|
|
1217
|
+
const parsedThinking = content.filter(
|
|
1218
|
+
(b) => b.type === 'thinking'
|
|
1219
|
+
) as Array<{ type: 'thinking'; thinking: string; signature?: string }>;
|
|
1220
|
+
|
|
1221
|
+
const providerThinking = providerThinkingBlocks.filter((b) => b.type === 'thinking');
|
|
1222
|
+
const redacted = providerThinkingBlocks.filter((b) => b.type === 'redacted_thinking');
|
|
1223
|
+
|
|
1224
|
+
const matched = Math.min(providerThinking.length, parsedThinking.length);
|
|
1225
|
+
for (let i = 0; i < matched; i++) {
|
|
1226
|
+
const sig = (providerThinking[i] as { signature?: string }).signature;
|
|
1227
|
+
if (sig) {
|
|
1228
|
+
parsedThinking[i]!.signature = sig;
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
const leftover = providerThinking.slice(matched);
|
|
1233
|
+
if (leftover.length > 0 || redacted.length > 0) {
|
|
1234
|
+
content.unshift(...leftover, ...redacted);
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1149
1238
|
// ==========================================================================
|
|
1150
1239
|
// Internal Methods
|
|
1151
1240
|
// ==========================================================================
|
|
@@ -1172,8 +1261,10 @@ export class Membrane {
|
|
|
1172
1261
|
* Used by transformRequest, buildContinuationRequest, and buildContinuationRequestWithImages.
|
|
1173
1262
|
*/
|
|
1174
1263
|
private getBaseProviderParams(config: NormalizedRequest['config']) {
|
|
1264
|
+
// Build thinking config for native extended thinking
|
|
1265
|
+
const thinking = this.buildThinkingParam(config);
|
|
1175
1266
|
// Anthropic requires temperature=1 when extended thinking is enabled
|
|
1176
|
-
const temperature =
|
|
1267
|
+
const temperature = thinking ? 1 : config.temperature;
|
|
1177
1268
|
return {
|
|
1178
1269
|
model: config.model,
|
|
1179
1270
|
maxTokens: config.maxTokens,
|
|
@@ -1182,9 +1273,41 @@ export class Membrane {
|
|
|
1182
1273
|
topK: config.topK,
|
|
1183
1274
|
presencePenalty: config.presencePenalty,
|
|
1184
1275
|
frequencyPenalty: config.frequencyPenalty,
|
|
1276
|
+
repetitionPenalty: config.repetitionPenalty,
|
|
1277
|
+
thinking,
|
|
1185
1278
|
};
|
|
1186
1279
|
}
|
|
1187
1280
|
|
|
1281
|
+
/**
|
|
1282
|
+
* Build the provider thinking parameter from config.
|
|
1283
|
+
*
|
|
1284
|
+
* For type 'enabled', the API requires max_tokens > budget_tokens and a
|
|
1285
|
+
* minimum budget of 1024 — a misconfigured budget (e.g., default 10000 with
|
|
1286
|
+
* max_tokens 4096) is clamped to fit. If no valid budget fits (max_tokens
|
|
1287
|
+
* too small), thinking is omitted entirely rather than sending a request
|
|
1288
|
+
* the API will reject.
|
|
1289
|
+
*/
|
|
1290
|
+
private buildThinkingParam(config: NormalizedRequest['config']):
|
|
1291
|
+
| { type: 'adaptive'; display?: 'summarized' | 'omitted' }
|
|
1292
|
+
| { type: 'enabled'; budget_tokens: number; display?: 'summarized' | 'omitted' }
|
|
1293
|
+
| undefined {
|
|
1294
|
+
if (!config.thinking?.enabled) return undefined;
|
|
1295
|
+
|
|
1296
|
+
const display = config.thinking.display;
|
|
1297
|
+
if ((config.thinking.type ?? 'enabled') === 'adaptive') {
|
|
1298
|
+
return { type: 'adaptive', ...(display ? { display } : {}) };
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
const requested = config.thinking.budgetTokens ?? 5000;
|
|
1302
|
+
const maxTokens = typeof config.maxTokens === 'number' ? config.maxTokens : undefined;
|
|
1303
|
+
const budget = maxTokens !== undefined ? Math.min(requested, maxTokens - 1024) : requested;
|
|
1304
|
+
if (budget < 1024) {
|
|
1305
|
+
// Can't fit a valid thinking budget under max_tokens — skip thinking
|
|
1306
|
+
return undefined;
|
|
1307
|
+
}
|
|
1308
|
+
return { type: 'enabled', budget_tokens: budget, ...(display ? { display } : {}) };
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1188
1311
|
/**
|
|
1189
1312
|
* Transform a normalized request into provider format using the formatter
|
|
1190
1313
|
*/
|
|
@@ -1232,6 +1355,15 @@ export class Membrane {
|
|
|
1232
1355
|
},
|
|
1233
1356
|
};
|
|
1234
1357
|
|
|
1358
|
+
// The API rejects extended thinking combined with an assistant prefill.
|
|
1359
|
+
// Prefill-style builds (XML formatter) use the thinking config for the
|
|
1360
|
+
// literal `<thinking>` text prefix instead of the API feature — drop the
|
|
1361
|
+
// API param when the built request actually ends in an assistant prefill.
|
|
1362
|
+
// Chat-style builds (no prefill) keep it.
|
|
1363
|
+
if (buildResult.assistantPrefill && providerRequest.thinking) {
|
|
1364
|
+
delete providerRequest.thinking;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1235
1367
|
return { providerRequest, prefillResult: buildResult };
|
|
1236
1368
|
}
|
|
1237
1369
|
|
|
@@ -1243,6 +1375,8 @@ export class Membrane {
|
|
|
1243
1375
|
timeoutMs?: number;
|
|
1244
1376
|
idleTimeoutMs?: number;
|
|
1245
1377
|
onRequest?: (rawRequest: unknown) => void;
|
|
1378
|
+
/** See ProviderRequestOptions.wrapThinkingTags */
|
|
1379
|
+
wrapThinkingTags?: boolean;
|
|
1246
1380
|
/**
|
|
1247
1381
|
* The original NormalizedRequest, threaded through so the
|
|
1248
1382
|
* `beforeRequest` hook can see both shapes (normalized + provider).
|
|
@@ -1292,6 +1426,9 @@ export class Membrane {
|
|
|
1292
1426
|
|
|
1293
1427
|
return {
|
|
1294
1428
|
...this.getBaseProviderParams(originalRequest.config),
|
|
1429
|
+
// Continuations always end in an assistant prefill — the API rejects
|
|
1430
|
+
// extended thinking combined with prefill, so never send the param here
|
|
1431
|
+
thinking: undefined,
|
|
1295
1432
|
messages,
|
|
1296
1433
|
system: prefillResult.systemContent
|
|
1297
1434
|
? (Array.isArray(prefillResult.systemContent) && prefillResult.systemContent.length > 0
|
|
@@ -1362,6 +1499,9 @@ export class Membrane {
|
|
|
1362
1499
|
|
|
1363
1500
|
return {
|
|
1364
1501
|
...this.getBaseProviderParams(originalRequest.config),
|
|
1502
|
+
// Continuations always end in an assistant prefill — the API rejects
|
|
1503
|
+
// extended thinking combined with prefill, so never send the param here
|
|
1504
|
+
thinking: undefined,
|
|
1365
1505
|
messages,
|
|
1366
1506
|
system: prefillResult.systemContent
|
|
1367
1507
|
? (Array.isArray(prefillResult.systemContent) && prefillResult.systemContent.length > 0
|
|
@@ -1410,9 +1550,12 @@ export class Membrane {
|
|
|
1410
1550
|
} else if (block.type === 'thinking') {
|
|
1411
1551
|
content.push({
|
|
1412
1552
|
type: 'thinking',
|
|
1413
|
-
thinking: block.thinking,
|
|
1414
|
-
signature: block.signature,
|
|
1553
|
+
thinking: block.thinking ?? '',
|
|
1554
|
+
...(block.signature ? { signature: block.signature } : {}),
|
|
1415
1555
|
});
|
|
1556
|
+
} else if (block.type === 'redacted_thinking') {
|
|
1557
|
+
// Pass through verbatim — carries the encrypted `data` payload
|
|
1558
|
+
content.push({ ...(block as any) } as ContentBlock);
|
|
1416
1559
|
} else if (block.type === 'generated_image') {
|
|
1417
1560
|
content.push({
|
|
1418
1561
|
type: 'generated_image',
|
|
@@ -1595,6 +1738,11 @@ export class Membrane {
|
|
|
1595
1738
|
return 'stop_sequence';
|
|
1596
1739
|
case 'tool_use':
|
|
1597
1740
|
return 'tool_use';
|
|
1741
|
+
case 'refusal':
|
|
1742
|
+
// Safety refusal (e.g., Fable 5 reasoning_extraction). Must survive
|
|
1743
|
+
// mapping — downstream consumers react to refusals (chapterx adds a
|
|
1744
|
+
// Discord reaction). Defaulting this to end_turn silently hid them.
|
|
1745
|
+
return 'refusal';
|
|
1598
1746
|
default:
|
|
1599
1747
|
return 'end_turn';
|
|
1600
1748
|
}
|
|
@@ -1769,6 +1917,11 @@ export class Membrane {
|
|
|
1769
1917
|
let rawRequest: unknown;
|
|
1770
1918
|
let rawResponse: unknown;
|
|
1771
1919
|
|
|
1920
|
+
// Native thinking blocks from the provider (with signatures) — merged
|
|
1921
|
+
// into the parser-derived content before the final response is emitted.
|
|
1922
|
+
// See streamWithXmlTools for the matching non-yielding logic.
|
|
1923
|
+
const providerThinkingBlocks: ContentBlock[] = [];
|
|
1924
|
+
|
|
1772
1925
|
// Track executed tool calls and results
|
|
1773
1926
|
const executedToolCalls: ToolCall[] = [];
|
|
1774
1927
|
const executedToolResults: ToolResult[] = [];
|
|
@@ -1876,6 +2029,10 @@ export class Membrane {
|
|
|
1876
2029
|
timeoutMs: options.timeoutMs,
|
|
1877
2030
|
idleTimeoutMs: options.idleTimeoutMs,
|
|
1878
2031
|
normalizedRequest: request,
|
|
2032
|
+
// The tag-based parser tracks thinking via <thinking> tags — ask
|
|
2033
|
+
// the provider to wrap native thinking deltas so they don't
|
|
2034
|
+
// stream as visible text (same as streamWithXmlTools).
|
|
2035
|
+
wrapThinkingTags: true,
|
|
1879
2036
|
onRequest: (req: unknown) => { rawRequest = req; },
|
|
1880
2037
|
}
|
|
1881
2038
|
);
|
|
@@ -1888,6 +2045,11 @@ export class Membrane {
|
|
|
1888
2045
|
streamResult.stopSequence = detectedStopSequence;
|
|
1889
2046
|
}
|
|
1890
2047
|
|
|
2048
|
+
// Capture native thinking blocks (with signatures) from the provider
|
|
2049
|
+
// response — the text parser can't see signatures, so they're merged
|
|
2050
|
+
// into the final response content after parsing.
|
|
2051
|
+
this.captureProviderThinkingBlocks(streamResult.content, providerThinkingBlocks);
|
|
2052
|
+
|
|
1891
2053
|
rawResponse = streamResult.raw;
|
|
1892
2054
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
1893
2055
|
lastStopSequence = streamResult.stopSequence ?? undefined;
|
|
@@ -2171,6 +2333,9 @@ export class Membrane {
|
|
|
2171
2333
|
lastStopSequence
|
|
2172
2334
|
);
|
|
2173
2335
|
|
|
2336
|
+
// Merge provider thinking signatures into parser-derived thinking blocks
|
|
2337
|
+
this.mergeProviderThinkingBlocks(response.content, providerThinkingBlocks);
|
|
2338
|
+
|
|
2174
2339
|
stream.emit({ type: 'complete', response });
|
|
2175
2340
|
} catch (error) {
|
|
2176
2341
|
if (this.isAbortError(error)) {
|
|
@@ -2377,6 +2542,10 @@ export class Membrane {
|
|
|
2377
2542
|
depth: toolDepth,
|
|
2378
2543
|
previousResults: executedToolResults,
|
|
2379
2544
|
accumulated: allTextAccumulated,
|
|
2545
|
+
// Full normalized blocks for this round, in provider order —
|
|
2546
|
+
// lets consumers persist the assistant turn verbatim (signed
|
|
2547
|
+
// thinking must precede tool_use in the same turn).
|
|
2548
|
+
roundContent: responseBlocks,
|
|
2380
2549
|
};
|
|
2381
2550
|
|
|
2382
2551
|
// Yield control for tool execution
|
|
@@ -2483,13 +2652,16 @@ export class Membrane {
|
|
|
2483
2652
|
}
|
|
2484
2653
|
|
|
2485
2654
|
// Native tool names must match ^[a-zA-Z0-9_-]{1,128}$.
|
|
2486
|
-
//
|
|
2487
|
-
//
|
|
2488
|
-
//
|
|
2655
|
+
// Tool names use `--` namespacing, which is already API-valid; the only
|
|
2656
|
+
// character that ever needs escaping is a literal colon, encoded losslessly as
|
|
2657
|
+
// `__` and back. We deliberately do NOT escape underscores — they are valid,
|
|
2658
|
+
// and escaping them (the previous `_u`/`_c` scheme) garbled every
|
|
2659
|
+
// underscore-containing tool name in the request the model actually sees
|
|
2660
|
+
// (`send_message` → `send_umessage`), polluting its reasoning for no benefit.
|
|
2489
2661
|
function sanitizeToolName(name: string): string {
|
|
2490
|
-
return name.replace(
|
|
2662
|
+
return name.replace(/:/g, '__');
|
|
2491
2663
|
}
|
|
2492
2664
|
|
|
2493
2665
|
function unsanitizeToolName(name: string): string {
|
|
2494
|
-
return name.replace(/
|
|
2666
|
+
return name.replace(/__/g, ':');
|
|
2495
2667
|
}
|
|
@@ -122,12 +122,20 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
122
122
|
let cacheReadTokens: number | undefined;
|
|
123
123
|
let stopReason: string = 'end_turn';
|
|
124
124
|
let stopSequence: string | undefined;
|
|
125
|
+
let stopDetails: unknown;
|
|
125
126
|
|
|
126
127
|
// Content block tracking — finalized on content_block_stop
|
|
127
128
|
const contentBlocks: Record<string, unknown>[] = [];
|
|
128
129
|
let currentBlockIndex = -1;
|
|
129
130
|
let currentBlockContent = '';
|
|
130
131
|
let currentBlockInputJson = '';
|
|
132
|
+
// When wrapThinkingTags is set (XML formatter path), native thinking
|
|
133
|
+
// deltas are wrapped in <thinking>...</thinking> on the chunk stream so
|
|
134
|
+
// the tag-based parser tracks them as thinking instead of visible text.
|
|
135
|
+
// Tag opened lazily on the first delta — display:'omitted' models emit
|
|
136
|
+
// thinking blocks with no thinking_delta at all (signature only).
|
|
137
|
+
const wrapThinkingTags = options?.wrapThinkingTags === true;
|
|
138
|
+
let thinkingTagOpen = false;
|
|
131
139
|
|
|
132
140
|
for await (const event of stream) {
|
|
133
141
|
resetIdleTimer();
|
|
@@ -152,7 +160,21 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
152
160
|
callbacks.onChunk(chunk);
|
|
153
161
|
} else if (event.delta.type === 'thinking_delta') {
|
|
154
162
|
currentBlockContent += event.delta.thinking;
|
|
163
|
+
if (wrapThinkingTags && !thinkingTagOpen) {
|
|
164
|
+
callbacks.onChunk('<thinking>');
|
|
165
|
+
thinkingTagOpen = true;
|
|
166
|
+
}
|
|
155
167
|
callbacks.onChunk(event.delta.thinking);
|
|
168
|
+
} else if ((event.delta as { type: string }).type === 'signature_delta') {
|
|
169
|
+
// Accumulate the cryptographic signature that authenticates this
|
|
170
|
+
// thinking block. Without this, signatures never land on the
|
|
171
|
+
// streaming path and the next request — which carries the block
|
|
172
|
+
// back in history — fails Anthropic's signature validation.
|
|
173
|
+
const sig = (event.delta as { signature?: string }).signature;
|
|
174
|
+
const block = contentBlocks[currentBlockIndex];
|
|
175
|
+
if (block && block.type === 'thinking' && sig) {
|
|
176
|
+
block.signature = ((block.signature as string | undefined) ?? '') + sig;
|
|
177
|
+
}
|
|
156
178
|
} else if ((event.delta as { type: string }).type === 'input_json_delta') {
|
|
157
179
|
currentBlockInputJson += (event.delta as { partial_json: string }).partial_json;
|
|
158
180
|
}
|
|
@@ -166,6 +188,10 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
166
188
|
block.text = currentBlockContent;
|
|
167
189
|
} else if (block.type === 'thinking') {
|
|
168
190
|
block.thinking = currentBlockContent;
|
|
191
|
+
if (thinkingTagOpen) {
|
|
192
|
+
callbacks.onChunk('</thinking>\n');
|
|
193
|
+
thinkingTagOpen = false;
|
|
194
|
+
}
|
|
169
195
|
} else if (block.type === 'tool_use' && currentBlockInputJson) {
|
|
170
196
|
try { block.input = JSON.parse(currentBlockInputJson); } catch { /* partial JSON */ }
|
|
171
197
|
}
|
|
@@ -176,9 +202,15 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
176
202
|
// All content blocks are finalized by the time message_delta arrives.
|
|
177
203
|
// Capture final metadata and exit — message_stop and the SSE connection
|
|
178
204
|
// teardown after it add only variable latency with no useful data.
|
|
179
|
-
const delta = event.delta as {
|
|
205
|
+
const delta = event.delta as {
|
|
206
|
+
stop_reason?: string;
|
|
207
|
+
stop_sequence?: string;
|
|
208
|
+
stop_details?: unknown;
|
|
209
|
+
};
|
|
180
210
|
stopReason = delta.stop_reason ?? 'end_turn';
|
|
181
211
|
stopSequence = delta.stop_sequence ?? undefined;
|
|
212
|
+
// stop_details carries refusal metadata (e.g., category: 'reasoning_extraction')
|
|
213
|
+
stopDetails = delta.stop_details ?? undefined;
|
|
182
214
|
const deltaUsage = event.usage as unknown as {
|
|
183
215
|
output_tokens: number;
|
|
184
216
|
cache_creation_input_tokens?: number | null;
|
|
@@ -219,6 +251,7 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
219
251
|
content: contentBlocks,
|
|
220
252
|
stop_reason: stopReason,
|
|
221
253
|
stop_sequence: stopSequence ?? null,
|
|
254
|
+
stop_details: stopDetails ?? null,
|
|
222
255
|
model,
|
|
223
256
|
usage: {
|
|
224
257
|
input_tokens: inputTokens,
|
|
@@ -249,7 +282,11 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
249
282
|
|
|
250
283
|
private buildRequest(request: ProviderRequest): Anthropic.MessageCreateParams {
|
|
251
284
|
// Strip provider-specific fields (e.g., sourceUrl for Gemini) from image blocks
|
|
252
|
-
// before sending to Anthropic, which rejects extra inputs
|
|
285
|
+
// before sending to Anthropic, which rejects extra inputs.
|
|
286
|
+
// Also normalize nested tool_result content blocks: Membrane uses camelCase
|
|
287
|
+
// `mediaType`, Anthropic expects snake_case `media_type`. Without this,
|
|
288
|
+
// an image returned by a tool reaches the API as `{source: {mediaType: ...}}`
|
|
289
|
+
// and is silently rejected (the model sees the text label only).
|
|
253
290
|
const sanitizedMessages = (request.messages as any[]).map((msg: any) => {
|
|
254
291
|
if (!Array.isArray(msg.content)) return msg;
|
|
255
292
|
return {
|
|
@@ -259,6 +296,12 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
259
296
|
const { sourceUrl, ...rest } = block;
|
|
260
297
|
return rest;
|
|
261
298
|
}
|
|
299
|
+
if (block.type === 'tool_result' && Array.isArray(block.content)) {
|
|
300
|
+
return {
|
|
301
|
+
...block,
|
|
302
|
+
content: toAnthropicToolResultContent(block.content as ContentBlock[]),
|
|
303
|
+
};
|
|
304
|
+
}
|
|
262
305
|
return block;
|
|
263
306
|
}),
|
|
264
307
|
};
|
|
@@ -396,6 +439,41 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
396
439
|
// Content Conversion Utilities
|
|
397
440
|
// ============================================================================
|
|
398
441
|
|
|
442
|
+
/**
|
|
443
|
+
* Convert Membrane tool-result content blocks to Anthropic's tool_result.content
|
|
444
|
+
* mixed array (text + image). This is what carries an image returned by a tool
|
|
445
|
+
* (e.g. an MCP fetch_attachment result) all the way to the model. Other block
|
|
446
|
+
* types are not valid inside tool_result.content per the Anthropic API and are
|
|
447
|
+
* dropped.
|
|
448
|
+
*/
|
|
449
|
+
function toAnthropicToolResultContent(
|
|
450
|
+
blocks: ContentBlock[],
|
|
451
|
+
): Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> {
|
|
452
|
+
const out: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [];
|
|
453
|
+
for (const block of blocks) {
|
|
454
|
+
if (block.type === 'text') {
|
|
455
|
+
out.push({ type: 'text', text: block.text });
|
|
456
|
+
} else if (block.type === 'image') {
|
|
457
|
+
if (block.source.type === 'base64') {
|
|
458
|
+
out.push({
|
|
459
|
+
type: 'image',
|
|
460
|
+
source: {
|
|
461
|
+
type: 'base64',
|
|
462
|
+
media_type: block.source.mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
|
|
463
|
+
data: block.source.data,
|
|
464
|
+
},
|
|
465
|
+
});
|
|
466
|
+
} else if (block.source.type === 'url') {
|
|
467
|
+
out.push({
|
|
468
|
+
type: 'image',
|
|
469
|
+
source: { type: 'url', url: block.source.url },
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return out;
|
|
475
|
+
}
|
|
476
|
+
|
|
399
477
|
/**
|
|
400
478
|
* Convert normalized content blocks to Anthropic format
|
|
401
479
|
* Preserves cache_control for prompt caching
|
|
@@ -425,6 +503,11 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
425
503
|
data: block.source.data,
|
|
426
504
|
},
|
|
427
505
|
});
|
|
506
|
+
} else if (block.source.type === 'url') {
|
|
507
|
+
result.push({
|
|
508
|
+
type: 'image',
|
|
509
|
+
source: { type: 'url', url: block.source.url },
|
|
510
|
+
});
|
|
428
511
|
}
|
|
429
512
|
break;
|
|
430
513
|
|
|
@@ -454,7 +537,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
454
537
|
tool_use_id: block.toolUseId,
|
|
455
538
|
content: typeof block.content === 'string'
|
|
456
539
|
? block.content
|
|
457
|
-
:
|
|
540
|
+
: toAnthropicToolResultContent(block.content),
|
|
458
541
|
is_error: block.isError,
|
|
459
542
|
});
|
|
460
543
|
break;
|
|
@@ -463,11 +546,21 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
|
|
|
463
546
|
result.push({
|
|
464
547
|
type: 'thinking',
|
|
465
548
|
thinking: block.thinking,
|
|
549
|
+
...(block.signature ? { signature: block.signature } : {}),
|
|
550
|
+
} as any);
|
|
551
|
+
break;
|
|
552
|
+
|
|
553
|
+
case 'redacted_thinking':
|
|
554
|
+
// Round-trip verbatim — `data` is the encrypted reasoning payload;
|
|
555
|
+
// the API rejects/ignores the block without it.
|
|
556
|
+
result.push({
|
|
557
|
+
type: 'redacted_thinking',
|
|
558
|
+
data: (block as any).data,
|
|
466
559
|
} as any);
|
|
467
560
|
break;
|
|
468
561
|
}
|
|
469
562
|
}
|
|
470
|
-
|
|
563
|
+
|
|
471
564
|
return result;
|
|
472
565
|
}
|
|
473
566
|
|
|
@@ -503,7 +596,9 @@ export function fromAnthropicContent(blocks: Anthropic.ContentBlock[]): ContentB
|
|
|
503
596
|
default:
|
|
504
597
|
// Handle redacted_thinking or unknown types
|
|
505
598
|
if ((block as any).type === 'redacted_thinking') {
|
|
506
|
-
|
|
599
|
+
// Preserve the encrypted `data` payload — without it the block
|
|
600
|
+
// cannot be round-tripped and prior reasoning is lost.
|
|
601
|
+
result.push({ type: 'redacted_thinking', data: (block as any).data } as any);
|
|
507
602
|
}
|
|
508
603
|
break;
|
|
509
604
|
}
|
package/src/providers/bedrock.ts
CHANGED
|
@@ -681,7 +681,11 @@ export class BedrockAdapter implements ProviderAdapter {
|
|
|
681
681
|
role: 'assistant',
|
|
682
682
|
content: contentBlocks.map(b => {
|
|
683
683
|
if (b.type === 'thinking') {
|
|
684
|
-
return { type: 'thinking' as const, thinking: b.thinking, signature: b.signature };
|
|
684
|
+
return { type: 'thinking' as const, thinking: b.thinking ?? '', signature: b.signature };
|
|
685
|
+
}
|
|
686
|
+
if (b.type === 'redacted_thinking') {
|
|
687
|
+
// Pass through verbatim — carries the encrypted `data` payload
|
|
688
|
+
return { ...b } as unknown as { type: 'text'; text?: string };
|
|
685
689
|
}
|
|
686
690
|
return { type: b.type as 'text', text: b.text };
|
|
687
691
|
}),
|
|
@@ -709,12 +713,17 @@ export class BedrockAdapter implements ProviderAdapter {
|
|
|
709
713
|
name: block.name,
|
|
710
714
|
input: block.input as Record<string, unknown>,
|
|
711
715
|
});
|
|
712
|
-
} else if (block.type === 'thinking'
|
|
716
|
+
} else if (block.type === 'thinking') {
|
|
717
|
+
// Signature-only thinking blocks (display:'omitted') have an empty
|
|
718
|
+
// thinking field but must still be preserved for round-tripping.
|
|
713
719
|
content.push({
|
|
714
720
|
type: 'thinking',
|
|
715
|
-
thinking: block.thinking,
|
|
716
|
-
signature: block.signature,
|
|
721
|
+
thinking: block.thinking ?? '',
|
|
722
|
+
...(block.signature ? { signature: block.signature } : {}),
|
|
717
723
|
});
|
|
724
|
+
} else if ((block as any).type === 'redacted_thinking') {
|
|
725
|
+
// Pass through verbatim — carries the encrypted `data` payload
|
|
726
|
+
content.push({ ...(block as any) } as ContentBlock);
|
|
718
727
|
}
|
|
719
728
|
}
|
|
720
729
|
|
|
@@ -301,6 +301,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
301
301
|
params.frequency_penalty = request.frequencyPenalty;
|
|
302
302
|
}
|
|
303
303
|
|
|
304
|
+
if (request.repetitionPenalty !== undefined) {
|
|
305
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
306
|
+
}
|
|
307
|
+
|
|
304
308
|
// OpenAI-compatible APIs may limit stop sequences (OpenAI: 4) — truncate to be safe
|
|
305
309
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
306
310
|
params.stop = request.stopSequences.slice(0, 4);
|