@animalabs/membrane 0.5.55 → 0.5.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/formatters/native.d.ts.map +1 -1
  2. package/dist/formatters/native.js +11 -0
  3. package/dist/formatters/native.js.map +1 -1
  4. package/dist/membrane.d.ts +28 -0
  5. package/dist/membrane.d.ts.map +1 -1
  6. package/dist/membrane.js +169 -17
  7. package/dist/membrane.js.map +1 -1
  8. package/dist/providers/anthropic.d.ts.map +1 -1
  9. package/dist/providers/anthropic.js +94 -3
  10. package/dist/providers/anthropic.js.map +1 -1
  11. package/dist/providers/bedrock.d.ts.map +1 -1
  12. package/dist/providers/bedrock.js +14 -4
  13. package/dist/providers/bedrock.js.map +1 -1
  14. package/dist/providers/openai-compatible.d.ts.map +1 -1
  15. package/dist/providers/openai-compatible.js +3 -0
  16. package/dist/providers/openai-compatible.js.map +1 -1
  17. package/dist/providers/openai-completions.d.ts.map +1 -1
  18. package/dist/providers/openai-completions.js +57 -3
  19. package/dist/providers/openai-completions.js.map +1 -1
  20. package/dist/providers/openai.d.ts.map +1 -1
  21. package/dist/providers/openai.js +3 -0
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/types/content.d.ts +6 -0
  24. package/dist/types/content.d.ts.map +1 -1
  25. package/dist/types/content.js.map +1 -1
  26. package/dist/types/provider.d.ts +9 -0
  27. package/dist/types/provider.d.ts.map +1 -1
  28. package/dist/types/request.d.ts +10 -0
  29. package/dist/types/request.d.ts.map +1 -1
  30. package/dist/types/tools.d.ts +9 -0
  31. package/dist/types/tools.d.ts.map +1 -1
  32. package/package.json +1 -1
  33. package/src/formatters/native.ts +10 -0
  34. package/src/membrane.ts +191 -19
  35. package/src/providers/anthropic.ts +100 -5
  36. package/src/providers/bedrock.ts +13 -4
  37. package/src/providers/openai-compatible.ts +4 -0
  38. package/src/providers/openai-completions.ts +58 -2
  39. package/src/providers/openai.ts +4 -0
  40. package/src/types/content.ts +6 -0
  41. package/src/types/provider.ts +10 -0
  42. package/src/types/request.ts +12 -1
  43. package/src/types/tools.ts +14 -4
@@ -1 +1 @@
1
- {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../src/types/tools.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CACH;AAMD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,MAAM,GAAG,sBAAsB,EAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,sBAAsB,GAC9B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE;QAAE,IAAI,EAAE,QAAQ,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAMnF,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAEhB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IAEd,oDAAoD;IACpD,eAAe,EAAE,UAAU,EAAE,CAAC;IAE9B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;CACrB;AAMD,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAElB,uCAAuC;IACvC,UAAU,EAAE,MAAM,CAAC;IAEnB,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAElB,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;CACnB"}
1
+ {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../src/types/tools.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,CAAC,EAAE,aAAa,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE;QACX,IAAI,EAAE,QAAQ,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAC3C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;CACH;AAMD,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,MAAM,GAAG,sBAAsB,EAAE,CAAC;IAC3C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,MAAM,sBAAsB,GAC9B;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE;QAAE,IAAI,EAAE,QAAQ,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAMnF,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAEhB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IAEd,oDAAoD;IACpD,eAAe,EAAE,UAAU,EAAE,CAAC;IAE9B,gCAAgC;IAChC,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;;;OAOG;IACH,YAAY,CAAC,EAAE,OAAO,cAAc,EAAE,YAAY,EAAE,CAAC;CACtD;AAMD,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAElB,uCAAuC;IACvC,UAAU,EAAE,MAAM,CAAC;IAEnB,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAElB,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;CACnB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@animalabs/membrane",
3
- "version": "0.5.55",
3
+ "version": "0.5.64",
4
4
  "description": "LLM middleware - a selective boundary that transforms what passes through",
5
5
  "repository": {
6
6
  "type": "git",
@@ -385,10 +385,20 @@ export class NativeFormatter implements PrefillFormatter {
385
385
  is_error: block.isError,
386
386
  });
387
387
  } else if (block.type === 'thinking') {
388
+ // Round-trip thinking blocks verbatim, including the signature — the
389
+ // API validates it and (on display:'omitted' models) decrypts it to
390
+ // reconstruct the original reasoning. Signature-only blocks (empty
391
+ // thinking field) are valid and must be passed back unchanged.
388
392
  result.push({
389
393
  type: 'thinking',
390
394
  thinking: block.thinking,
395
+ ...((block as { signature?: string }).signature
396
+ ? { signature: (block as { signature?: string }).signature }
397
+ : {}),
391
398
  });
399
+ } else if (block.type === 'redacted_thinking') {
400
+ // Pass through verbatim (carries encrypted data field)
401
+ result.push({ ...(block as unknown as Record<string, unknown>) });
392
402
  } else if (block.type === 'document' || block.type === 'audio') {
393
403
  hasUnsupportedMedia = true;
394
404
  }
package/src/membrane.ts CHANGED
@@ -292,6 +292,12 @@ export class Membrane {
292
292
  // These can't be handled by the text-based XML parser, so we capture and append them
293
293
  const extraContentBlocks: ContentBlock[] = [];
294
294
 
295
+ // Native thinking blocks from the provider (with signatures). The parser
296
+ // derives signature-less thinking blocks from <thinking> text (via
297
+ // wrapThinkingTags); signatures from these are merged into those after
298
+ // parsing, and signature-only blocks are prepended.
299
+ const providerThinkingBlocks: ContentBlock[] = [];
300
+
295
301
  // Transform initial request using the formatter
296
302
  let { providerRequest, prefillResult } = this.transformRequest(request, formatter);
297
303
 
@@ -385,6 +391,10 @@ export class Membrane {
385
391
  {
386
392
  signal,
387
393
  normalizedRequest: request,
394
+ // The tag-based parser tracks thinking via <thinking> tags — ask the
395
+ // provider to wrap native thinking deltas so they don't stream as
396
+ // visible text (see ProviderRequestOptions.wrapThinkingTags)
397
+ wrapThinkingTags: true,
388
398
  onRequest: (req) => {
389
399
  rawRequest = req;
390
400
  onRequest?.(req);
@@ -412,6 +422,10 @@ export class Membrane {
412
422
  } as ContentBlock);
413
423
  }
414
424
  }
425
+ // Native thinking blocks carry the signature (encrypted full
426
+ // reasoning) — captured so consumers can persist and round-trip
427
+ // them for reasoning continuity.
428
+ this.captureProviderThinkingBlocks(streamResult.content, providerThinkingBlocks);
415
429
  }
416
430
 
417
431
  rawResponse = streamResult.raw;
@@ -700,6 +714,9 @@ export class Membrane {
700
714
  response.content.push(...extraContentBlocks);
701
715
  }
702
716
 
717
+ // Merge provider thinking signatures into parser-derived thinking blocks
718
+ this.mergeProviderThinkingBlocks(response.content, providerThinkingBlocks);
719
+
703
720
  return response;
704
721
  } catch (error) {
705
722
  // Check if this is an abort error
@@ -1005,6 +1022,19 @@ export class Membrane {
1005
1022
  content: block.content,
1006
1023
  is_error: block.isError,
1007
1024
  });
1025
+ } else if (block.type === 'thinking') {
1026
+ // Round-trip thinking blocks verbatim including the signature — the
1027
+ // API validates it and (on display:'omitted' models) decrypts it to
1028
+ // reconstruct prior reasoning. Empty thinking + signature is valid.
1029
+ content.push({
1030
+ type: 'thinking',
1031
+ thinking: (block as { thinking?: string }).thinking ?? '',
1032
+ ...((block as { signature?: string }).signature
1033
+ ? { signature: (block as { signature?: string }).signature }
1034
+ : {}),
1035
+ });
1036
+ } else if (block.type === 'redacted_thinking') {
1037
+ content.push({ ...(block as unknown as Record<string, unknown>) });
1008
1038
  } else if (block.type === 'image') {
1009
1039
  if (block.source.type === 'base64') {
1010
1040
  const imageBlock: Record<string, unknown> = {
@@ -1081,13 +1111,8 @@ export class Membrane {
1081
1111
  );
1082
1112
  }
1083
1113
 
1084
- // Build thinking config for native extended thinking
1085
- const thinking = request.config.thinking?.enabled
1086
- ? {
1087
- type: 'enabled' as const,
1088
- budget_tokens: request.config.thinking.budgetTokens ?? 5000,
1089
- }
1090
- : undefined;
1114
+ // Build thinking config for native extended thinking (budget clamped to max_tokens)
1115
+ const thinking = this.buildThinkingParam(request.config);
1091
1116
 
1092
1117
  // Anthropic requires temperature=1 when extended thinking is enabled
1093
1118
  const temperature = thinking ? 1 : request.config.temperature;
@@ -1125,9 +1150,12 @@ export class Membrane {
1125
1150
  } else if (item.type === 'thinking') {
1126
1151
  blocks.push({
1127
1152
  type: 'thinking',
1128
- thinking: item.thinking,
1129
- signature: item.signature,
1153
+ thinking: item.thinking ?? '',
1154
+ ...(item.signature ? { signature: item.signature } : {}),
1130
1155
  });
1156
+ } else if (item.type === 'redacted_thinking') {
1157
+ // Pass through verbatim — carries the encrypted `data` payload
1158
+ blocks.push({ ...item } as ContentBlock);
1131
1159
  } else if (item.type === 'generated_image') {
1132
1160
  blocks.push({
1133
1161
  type: 'generated_image',
@@ -1138,14 +1166,75 @@ export class Membrane {
1138
1166
  }
1139
1167
  return blocks;
1140
1168
  }
1141
-
1169
+
1142
1170
  if (typeof content === 'string') {
1143
1171
  return [{ type: 'text', text: content }];
1144
1172
  }
1145
-
1173
+
1146
1174
  return [];
1147
1175
  }
1148
1176
 
1177
+ /**
1178
+ * Capture native thinking / redacted_thinking blocks from a provider
1179
+ * response so they can be merged into parser-derived content (XML paths,
1180
+ * where the parser only sees text). Includes signature-only thinking
1181
+ * blocks (display:'omitted' returns an empty thinking field).
1182
+ */
1183
+ private captureProviderThinkingBlocks(
1184
+ providerContent: unknown,
1185
+ sink: ContentBlock[]
1186
+ ): void {
1187
+ if (!Array.isArray(providerContent)) return;
1188
+ for (const block of providerContent) {
1189
+ if (block?.type === 'thinking') {
1190
+ sink.push({
1191
+ type: 'thinking',
1192
+ thinking: (block as any).thinking ?? '',
1193
+ ...((block as any).signature ? { signature: (block as any).signature } : {}),
1194
+ } as ContentBlock);
1195
+ } else if (block?.type === 'redacted_thinking') {
1196
+ sink.push({ ...(block as any) } as ContentBlock);
1197
+ }
1198
+ }
1199
+ }
1200
+
1201
+ /**
1202
+ * Merge provider thinking signatures into parser-derived thinking blocks
1203
+ * (matched in stream order), and prepend any leftover provider blocks —
1204
+ * signature-only thinking (display:'omitted') never appears in the text
1205
+ * stream, so the parser produces no block for it. redacted_thinking
1206
+ * blocks are always prepended verbatim.
1207
+ *
1208
+ * Mutates `content` in place. Shared by the XML stream paths
1209
+ * (streamWithXmlTools and runXmlToolsYielding).
1210
+ */
1211
+ private mergeProviderThinkingBlocks(
1212
+ content: ContentBlock[],
1213
+ providerThinkingBlocks: ContentBlock[]
1214
+ ): void {
1215
+ if (providerThinkingBlocks.length === 0) return;
1216
+
1217
+ const parsedThinking = content.filter(
1218
+ (b) => b.type === 'thinking'
1219
+ ) as Array<{ type: 'thinking'; thinking: string; signature?: string }>;
1220
+
1221
+ const providerThinking = providerThinkingBlocks.filter((b) => b.type === 'thinking');
1222
+ const redacted = providerThinkingBlocks.filter((b) => b.type === 'redacted_thinking');
1223
+
1224
+ const matched = Math.min(providerThinking.length, parsedThinking.length);
1225
+ for (let i = 0; i < matched; i++) {
1226
+ const sig = (providerThinking[i] as { signature?: string }).signature;
1227
+ if (sig) {
1228
+ parsedThinking[i]!.signature = sig;
1229
+ }
1230
+ }
1231
+
1232
+ const leftover = providerThinking.slice(matched);
1233
+ if (leftover.length > 0 || redacted.length > 0) {
1234
+ content.unshift(...leftover, ...redacted);
1235
+ }
1236
+ }
1237
+
1149
1238
  // ==========================================================================
1150
1239
  // Internal Methods
1151
1240
  // ==========================================================================
@@ -1172,8 +1261,10 @@ export class Membrane {
1172
1261
  * Used by transformRequest, buildContinuationRequest, and buildContinuationRequestWithImages.
1173
1262
  */
1174
1263
  private getBaseProviderParams(config: NormalizedRequest['config']) {
1264
+ // Build thinking config for native extended thinking
1265
+ const thinking = this.buildThinkingParam(config);
1175
1266
  // Anthropic requires temperature=1 when extended thinking is enabled
1176
- const temperature = config.thinking?.enabled ? 1 : config.temperature;
1267
+ const temperature = thinking ? 1 : config.temperature;
1177
1268
  return {
1178
1269
  model: config.model,
1179
1270
  maxTokens: config.maxTokens,
@@ -1182,9 +1273,41 @@ export class Membrane {
1182
1273
  topK: config.topK,
1183
1274
  presencePenalty: config.presencePenalty,
1184
1275
  frequencyPenalty: config.frequencyPenalty,
1276
+ repetitionPenalty: config.repetitionPenalty,
1277
+ thinking,
1185
1278
  };
1186
1279
  }
1187
1280
 
1281
+ /**
1282
+ * Build the provider thinking parameter from config.
1283
+ *
1284
+ * For type 'enabled', the API requires max_tokens > budget_tokens and a
1285
+ * minimum budget of 1024 — a misconfigured budget (e.g., default 10000 with
1286
+ * max_tokens 4096) is clamped to fit. If no valid budget fits (max_tokens
1287
+ * too small), thinking is omitted entirely rather than sending a request
1288
+ * the API will reject.
1289
+ */
1290
+ private buildThinkingParam(config: NormalizedRequest['config']):
1291
+ | { type: 'adaptive'; display?: 'summarized' | 'omitted' }
1292
+ | { type: 'enabled'; budget_tokens: number; display?: 'summarized' | 'omitted' }
1293
+ | undefined {
1294
+ if (!config.thinking?.enabled) return undefined;
1295
+
1296
+ const display = config.thinking.display;
1297
+ if ((config.thinking.type ?? 'enabled') === 'adaptive') {
1298
+ return { type: 'adaptive', ...(display ? { display } : {}) };
1299
+ }
1300
+
1301
+ const requested = config.thinking.budgetTokens ?? 5000;
1302
+ const maxTokens = typeof config.maxTokens === 'number' ? config.maxTokens : undefined;
1303
+ const budget = maxTokens !== undefined ? Math.min(requested, maxTokens - 1024) : requested;
1304
+ if (budget < 1024) {
1305
+ // Can't fit a valid thinking budget under max_tokens — skip thinking
1306
+ return undefined;
1307
+ }
1308
+ return { type: 'enabled', budget_tokens: budget, ...(display ? { display } : {}) };
1309
+ }
1310
+
1188
1311
  /**
1189
1312
  * Transform a normalized request into provider format using the formatter
1190
1313
  */
@@ -1232,6 +1355,15 @@ export class Membrane {
1232
1355
  },
1233
1356
  };
1234
1357
 
1358
+ // The API rejects extended thinking combined with an assistant prefill.
1359
+ // Prefill-style builds (XML formatter) use the thinking config for the
1360
+ // literal `<thinking>` text prefix instead of the API feature — drop the
1361
+ // API param when the built request actually ends in an assistant prefill.
1362
+ // Chat-style builds (no prefill) keep it.
1363
+ if (buildResult.assistantPrefill && providerRequest.thinking) {
1364
+ delete providerRequest.thinking;
1365
+ }
1366
+
1235
1367
  return { providerRequest, prefillResult: buildResult };
1236
1368
  }
1237
1369
 
@@ -1243,6 +1375,8 @@ export class Membrane {
1243
1375
  timeoutMs?: number;
1244
1376
  idleTimeoutMs?: number;
1245
1377
  onRequest?: (rawRequest: unknown) => void;
1378
+ /** See ProviderRequestOptions.wrapThinkingTags */
1379
+ wrapThinkingTags?: boolean;
1246
1380
  /**
1247
1381
  * The original NormalizedRequest, threaded through so the
1248
1382
  * `beforeRequest` hook can see both shapes (normalized + provider).
@@ -1292,6 +1426,9 @@ export class Membrane {
1292
1426
 
1293
1427
  return {
1294
1428
  ...this.getBaseProviderParams(originalRequest.config),
1429
+ // Continuations always end in an assistant prefill — the API rejects
1430
+ // extended thinking combined with prefill, so never send the param here
1431
+ thinking: undefined,
1295
1432
  messages,
1296
1433
  system: prefillResult.systemContent
1297
1434
  ? (Array.isArray(prefillResult.systemContent) && prefillResult.systemContent.length > 0
@@ -1362,6 +1499,9 @@ export class Membrane {
1362
1499
 
1363
1500
  return {
1364
1501
  ...this.getBaseProviderParams(originalRequest.config),
1502
+ // Continuations always end in an assistant prefill — the API rejects
1503
+ // extended thinking combined with prefill, so never send the param here
1504
+ thinking: undefined,
1365
1505
  messages,
1366
1506
  system: prefillResult.systemContent
1367
1507
  ? (Array.isArray(prefillResult.systemContent) && prefillResult.systemContent.length > 0
@@ -1410,9 +1550,12 @@ export class Membrane {
1410
1550
  } else if (block.type === 'thinking') {
1411
1551
  content.push({
1412
1552
  type: 'thinking',
1413
- thinking: block.thinking,
1414
- signature: block.signature,
1553
+ thinking: block.thinking ?? '',
1554
+ ...(block.signature ? { signature: block.signature } : {}),
1415
1555
  });
1556
+ } else if (block.type === 'redacted_thinking') {
1557
+ // Pass through verbatim — carries the encrypted `data` payload
1558
+ content.push({ ...(block as any) } as ContentBlock);
1416
1559
  } else if (block.type === 'generated_image') {
1417
1560
  content.push({
1418
1561
  type: 'generated_image',
@@ -1595,6 +1738,11 @@ export class Membrane {
1595
1738
  return 'stop_sequence';
1596
1739
  case 'tool_use':
1597
1740
  return 'tool_use';
1741
+ case 'refusal':
1742
+ // Safety refusal (e.g., Fable 5 reasoning_extraction). Must survive
1743
+ // mapping — downstream consumers react to refusals (chapterx adds a
1744
+ // Discord reaction). Defaulting this to end_turn silently hid them.
1745
+ return 'refusal';
1598
1746
  default:
1599
1747
  return 'end_turn';
1600
1748
  }
@@ -1769,6 +1917,11 @@ export class Membrane {
1769
1917
  let rawRequest: unknown;
1770
1918
  let rawResponse: unknown;
1771
1919
 
1920
+ // Native thinking blocks from the provider (with signatures) — merged
1921
+ // into the parser-derived content before the final response is emitted.
1922
+ // See streamWithXmlTools for the matching non-yielding logic.
1923
+ const providerThinkingBlocks: ContentBlock[] = [];
1924
+
1772
1925
  // Track executed tool calls and results
1773
1926
  const executedToolCalls: ToolCall[] = [];
1774
1927
  const executedToolResults: ToolResult[] = [];
@@ -1876,6 +2029,10 @@ export class Membrane {
1876
2029
  timeoutMs: options.timeoutMs,
1877
2030
  idleTimeoutMs: options.idleTimeoutMs,
1878
2031
  normalizedRequest: request,
2032
+ // The tag-based parser tracks thinking via <thinking> tags — ask
2033
+ // the provider to wrap native thinking deltas so they don't
2034
+ // stream as visible text (same as streamWithXmlTools).
2035
+ wrapThinkingTags: true,
1879
2036
  onRequest: (req: unknown) => { rawRequest = req; },
1880
2037
  }
1881
2038
  );
@@ -1888,6 +2045,11 @@ export class Membrane {
1888
2045
  streamResult.stopSequence = detectedStopSequence;
1889
2046
  }
1890
2047
 
2048
+ // Capture native thinking blocks (with signatures) from the provider
2049
+ // response — the text parser can't see signatures, so they're merged
2050
+ // into the final response content after parsing.
2051
+ this.captureProviderThinkingBlocks(streamResult.content, providerThinkingBlocks);
2052
+
1891
2053
  rawResponse = streamResult.raw;
1892
2054
  lastStopReason = this.mapStopReason(streamResult.stopReason);
1893
2055
  lastStopSequence = streamResult.stopSequence ?? undefined;
@@ -2171,6 +2333,9 @@ export class Membrane {
2171
2333
  lastStopSequence
2172
2334
  );
2173
2335
 
2336
+ // Merge provider thinking signatures into parser-derived thinking blocks
2337
+ this.mergeProviderThinkingBlocks(response.content, providerThinkingBlocks);
2338
+
2174
2339
  stream.emit({ type: 'complete', response });
2175
2340
  } catch (error) {
2176
2341
  if (this.isAbortError(error)) {
@@ -2377,6 +2542,10 @@ export class Membrane {
2377
2542
  depth: toolDepth,
2378
2543
  previousResults: executedToolResults,
2379
2544
  accumulated: allTextAccumulated,
2545
+ // Full normalized blocks for this round, in provider order —
2546
+ // lets consumers persist the assistant turn verbatim (signed
2547
+ // thinking must precede tool_use in the same turn).
2548
+ roundContent: responseBlocks,
2380
2549
  };
2381
2550
 
2382
2551
  // Yield control for tool execution
@@ -2483,13 +2652,16 @@ export class Membrane {
2483
2652
  }
2484
2653
 
2485
2654
  // Native tool names must match ^[a-zA-Z0-9_-]{1,128}$.
2486
- // The framework uses module:tool namespacing, so we round-trip colons
2487
- // through an escape encoding for the API wire format.
2488
- // Lossless: escape underscores first (_u), then encode colons (_c).
2655
+ // Tool names use `--` namespacing, which is already API-valid; the only
2656
+ // character that ever needs escaping is a literal colon, encoded losslessly as
2657
+ // `__` and back. We deliberately do NOT escape underscores they are valid,
2658
+ // and escaping them (the previous `_u`/`_c` scheme) garbled every
2659
+ // underscore-containing tool name in the request the model actually sees
2660
+ // (`send_message` → `send_umessage`), polluting its reasoning for no benefit.
2489
2661
  function sanitizeToolName(name: string): string {
2490
- return name.replace(/_/g, '_u').replace(/:/g, '_c');
2662
+ return name.replace(/:/g, '__');
2491
2663
  }
2492
2664
 
2493
2665
  function unsanitizeToolName(name: string): string {
2494
- return name.replace(/_c/g, ':').replace(/_u/g, '_');
2666
+ return name.replace(/__/g, ':');
2495
2667
  }
@@ -122,12 +122,20 @@ export class AnthropicAdapter implements ProviderAdapter {
122
122
  let cacheReadTokens: number | undefined;
123
123
  let stopReason: string = 'end_turn';
124
124
  let stopSequence: string | undefined;
125
+ let stopDetails: unknown;
125
126
 
126
127
  // Content block tracking — finalized on content_block_stop
127
128
  const contentBlocks: Record<string, unknown>[] = [];
128
129
  let currentBlockIndex = -1;
129
130
  let currentBlockContent = '';
130
131
  let currentBlockInputJson = '';
132
+ // When wrapThinkingTags is set (XML formatter path), native thinking
133
+ // deltas are wrapped in <thinking>...</thinking> on the chunk stream so
134
+ // the tag-based parser tracks them as thinking instead of visible text.
135
+ // Tag opened lazily on the first delta — display:'omitted' models emit
136
+ // thinking blocks with no thinking_delta at all (signature only).
137
+ const wrapThinkingTags = options?.wrapThinkingTags === true;
138
+ let thinkingTagOpen = false;
131
139
 
132
140
  for await (const event of stream) {
133
141
  resetIdleTimer();
@@ -152,7 +160,21 @@ export class AnthropicAdapter implements ProviderAdapter {
152
160
  callbacks.onChunk(chunk);
153
161
  } else if (event.delta.type === 'thinking_delta') {
154
162
  currentBlockContent += event.delta.thinking;
163
+ if (wrapThinkingTags && !thinkingTagOpen) {
164
+ callbacks.onChunk('<thinking>');
165
+ thinkingTagOpen = true;
166
+ }
155
167
  callbacks.onChunk(event.delta.thinking);
168
+ } else if ((event.delta as { type: string }).type === 'signature_delta') {
169
+ // Accumulate the cryptographic signature that authenticates this
170
+ // thinking block. Without this, signatures never land on the
171
+ // streaming path and the next request — which carries the block
172
+ // back in history — fails Anthropic's signature validation.
173
+ const sig = (event.delta as { signature?: string }).signature;
174
+ const block = contentBlocks[currentBlockIndex];
175
+ if (block && block.type === 'thinking' && sig) {
176
+ block.signature = ((block.signature as string | undefined) ?? '') + sig;
177
+ }
156
178
  } else if ((event.delta as { type: string }).type === 'input_json_delta') {
157
179
  currentBlockInputJson += (event.delta as { partial_json: string }).partial_json;
158
180
  }
@@ -166,6 +188,10 @@ export class AnthropicAdapter implements ProviderAdapter {
166
188
  block.text = currentBlockContent;
167
189
  } else if (block.type === 'thinking') {
168
190
  block.thinking = currentBlockContent;
191
+ if (thinkingTagOpen) {
192
+ callbacks.onChunk('</thinking>\n');
193
+ thinkingTagOpen = false;
194
+ }
169
195
  } else if (block.type === 'tool_use' && currentBlockInputJson) {
170
196
  try { block.input = JSON.parse(currentBlockInputJson); } catch { /* partial JSON */ }
171
197
  }
@@ -176,9 +202,15 @@ export class AnthropicAdapter implements ProviderAdapter {
176
202
  // All content blocks are finalized by the time message_delta arrives.
177
203
  // Capture final metadata and exit — message_stop and the SSE connection
178
204
  // teardown after it add only variable latency with no useful data.
179
- const delta = event.delta as { stop_reason?: string; stop_sequence?: string };
205
+ const delta = event.delta as {
206
+ stop_reason?: string;
207
+ stop_sequence?: string;
208
+ stop_details?: unknown;
209
+ };
180
210
  stopReason = delta.stop_reason ?? 'end_turn';
181
211
  stopSequence = delta.stop_sequence ?? undefined;
212
+ // stop_details carries refusal metadata (e.g., category: 'reasoning_extraction')
213
+ stopDetails = delta.stop_details ?? undefined;
182
214
  const deltaUsage = event.usage as unknown as {
183
215
  output_tokens: number;
184
216
  cache_creation_input_tokens?: number | null;
@@ -219,6 +251,7 @@ export class AnthropicAdapter implements ProviderAdapter {
219
251
  content: contentBlocks,
220
252
  stop_reason: stopReason,
221
253
  stop_sequence: stopSequence ?? null,
254
+ stop_details: stopDetails ?? null,
222
255
  model,
223
256
  usage: {
224
257
  input_tokens: inputTokens,
@@ -249,7 +282,11 @@ export class AnthropicAdapter implements ProviderAdapter {
249
282
 
250
283
  private buildRequest(request: ProviderRequest): Anthropic.MessageCreateParams {
251
284
  // Strip provider-specific fields (e.g., sourceUrl for Gemini) from image blocks
252
- // before sending to Anthropic, which rejects extra inputs
285
+ // before sending to Anthropic, which rejects extra inputs.
286
+ // Also normalize nested tool_result content blocks: Membrane uses camelCase
287
+ // `mediaType`, Anthropic expects snake_case `media_type`. Without this,
288
+ // an image returned by a tool reaches the API as `{source: {mediaType: ...}}`
289
+ // and is silently rejected (the model sees the text label only).
253
290
  const sanitizedMessages = (request.messages as any[]).map((msg: any) => {
254
291
  if (!Array.isArray(msg.content)) return msg;
255
292
  return {
@@ -259,6 +296,12 @@ export class AnthropicAdapter implements ProviderAdapter {
259
296
  const { sourceUrl, ...rest } = block;
260
297
  return rest;
261
298
  }
299
+ if (block.type === 'tool_result' && Array.isArray(block.content)) {
300
+ return {
301
+ ...block,
302
+ content: toAnthropicToolResultContent(block.content as ContentBlock[]),
303
+ };
304
+ }
262
305
  return block;
263
306
  }),
264
307
  };
@@ -396,6 +439,41 @@ export class AnthropicAdapter implements ProviderAdapter {
396
439
  // Content Conversion Utilities
397
440
  // ============================================================================
398
441
 
442
+ /**
443
+ * Convert Membrane tool-result content blocks to Anthropic's tool_result.content
444
+ * mixed array (text + image). This is what carries an image returned by a tool
445
+ * (e.g. an MCP fetch_attachment result) all the way to the model. Other block
446
+ * types are not valid inside tool_result.content per the Anthropic API and are
447
+ * dropped.
448
+ */
449
+ function toAnthropicToolResultContent(
450
+ blocks: ContentBlock[],
451
+ ): Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> {
452
+ const out: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [];
453
+ for (const block of blocks) {
454
+ if (block.type === 'text') {
455
+ out.push({ type: 'text', text: block.text });
456
+ } else if (block.type === 'image') {
457
+ if (block.source.type === 'base64') {
458
+ out.push({
459
+ type: 'image',
460
+ source: {
461
+ type: 'base64',
462
+ media_type: block.source.mediaType as 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp',
463
+ data: block.source.data,
464
+ },
465
+ });
466
+ } else if (block.source.type === 'url') {
467
+ out.push({
468
+ type: 'image',
469
+ source: { type: 'url', url: block.source.url },
470
+ });
471
+ }
472
+ }
473
+ }
474
+ return out;
475
+ }
476
+
399
477
  /**
400
478
  * Convert normalized content blocks to Anthropic format
401
479
  * Preserves cache_control for prompt caching
@@ -425,6 +503,11 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
425
503
  data: block.source.data,
426
504
  },
427
505
  });
506
+ } else if (block.source.type === 'url') {
507
+ result.push({
508
+ type: 'image',
509
+ source: { type: 'url', url: block.source.url },
510
+ });
428
511
  }
429
512
  break;
430
513
 
@@ -454,7 +537,7 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
454
537
  tool_use_id: block.toolUseId,
455
538
  content: typeof block.content === 'string'
456
539
  ? block.content
457
- : JSON.stringify(block.content),
540
+ : toAnthropicToolResultContent(block.content),
458
541
  is_error: block.isError,
459
542
  });
460
543
  break;
@@ -463,11 +546,21 @@ export function toAnthropicContent(blocks: ContentBlock[]): Anthropic.ContentBlo
463
546
  result.push({
464
547
  type: 'thinking',
465
548
  thinking: block.thinking,
549
+ ...(block.signature ? { signature: block.signature } : {}),
550
+ } as any);
551
+ break;
552
+
553
+ case 'redacted_thinking':
554
+ // Round-trip verbatim — `data` is the encrypted reasoning payload;
555
+ // the API rejects/ignores the block without it.
556
+ result.push({
557
+ type: 'redacted_thinking',
558
+ data: (block as any).data,
466
559
  } as any);
467
560
  break;
468
561
  }
469
562
  }
470
-
563
+
471
564
  return result;
472
565
  }
473
566
 
@@ -503,7 +596,9 @@ export function fromAnthropicContent(blocks: Anthropic.ContentBlock[]): ContentB
503
596
  default:
504
597
  // Handle redacted_thinking or unknown types
505
598
  if ((block as any).type === 'redacted_thinking') {
506
- result.push({ type: 'redacted_thinking' });
599
+ // Preserve the encrypted `data` payload — without it the block
600
+ // cannot be round-tripped and prior reasoning is lost.
601
+ result.push({ type: 'redacted_thinking', data: (block as any).data } as any);
507
602
  }
508
603
  break;
509
604
  }
@@ -681,7 +681,11 @@ export class BedrockAdapter implements ProviderAdapter {
681
681
  role: 'assistant',
682
682
  content: contentBlocks.map(b => {
683
683
  if (b.type === 'thinking') {
684
- return { type: 'thinking' as const, thinking: b.thinking, signature: b.signature };
684
+ return { type: 'thinking' as const, thinking: b.thinking ?? '', signature: b.signature };
685
+ }
686
+ if (b.type === 'redacted_thinking') {
687
+ // Pass through verbatim — carries the encrypted `data` payload
688
+ return { ...b } as unknown as { type: 'text'; text?: string };
685
689
  }
686
690
  return { type: b.type as 'text', text: b.text };
687
691
  }),
@@ -709,12 +713,17 @@ export class BedrockAdapter implements ProviderAdapter {
709
713
  name: block.name,
710
714
  input: block.input as Record<string, unknown>,
711
715
  });
712
- } else if (block.type === 'thinking' && block.thinking) {
716
+ } else if (block.type === 'thinking') {
717
+ // Signature-only thinking blocks (display:'omitted') have an empty
718
+ // thinking field but must still be preserved for round-tripping.
713
719
  content.push({
714
720
  type: 'thinking',
715
- thinking: block.thinking,
716
- signature: block.signature,
721
+ thinking: block.thinking ?? '',
722
+ ...(block.signature ? { signature: block.signature } : {}),
717
723
  });
724
+ } else if ((block as any).type === 'redacted_thinking') {
725
+ // Pass through verbatim — carries the encrypted `data` payload
726
+ content.push({ ...(block as any) } as ContentBlock);
718
727
  }
719
728
  }
720
729
 
@@ -301,6 +301,10 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
301
301
  params.frequency_penalty = request.frequencyPenalty;
302
302
  }
303
303
 
304
+ if (request.repetitionPenalty !== undefined) {
305
+ params.repetition_penalty = request.repetitionPenalty;
306
+ }
307
+
304
308
  // OpenAI-compatible APIs may limit stop sequences (OpenAI: 4) — truncate to be safe
305
309
  if (request.stopSequences && request.stopSequences.length > 0) {
306
310
  params.stop = request.stopSequences.slice(0, 4);