@animalabs/membrane 0.5.55 → 0.5.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/formatters/native.d.ts.map +1 -1
  2. package/dist/formatters/native.js +11 -0
  3. package/dist/formatters/native.js.map +1 -1
  4. package/dist/membrane.d.ts +28 -0
  5. package/dist/membrane.d.ts.map +1 -1
  6. package/dist/membrane.js +169 -17
  7. package/dist/membrane.js.map +1 -1
  8. package/dist/providers/anthropic.d.ts.map +1 -1
  9. package/dist/providers/anthropic.js +94 -3
  10. package/dist/providers/anthropic.js.map +1 -1
  11. package/dist/providers/bedrock.d.ts.map +1 -1
  12. package/dist/providers/bedrock.js +14 -4
  13. package/dist/providers/bedrock.js.map +1 -1
  14. package/dist/providers/openai-compatible.d.ts.map +1 -1
  15. package/dist/providers/openai-compatible.js +3 -0
  16. package/dist/providers/openai-compatible.js.map +1 -1
  17. package/dist/providers/openai-completions.d.ts.map +1 -1
  18. package/dist/providers/openai-completions.js +57 -3
  19. package/dist/providers/openai-completions.js.map +1 -1
  20. package/dist/providers/openai.d.ts.map +1 -1
  21. package/dist/providers/openai.js +3 -0
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/types/content.d.ts +6 -0
  24. package/dist/types/content.d.ts.map +1 -1
  25. package/dist/types/content.js.map +1 -1
  26. package/dist/types/provider.d.ts +9 -0
  27. package/dist/types/provider.d.ts.map +1 -1
  28. package/dist/types/request.d.ts +10 -0
  29. package/dist/types/request.d.ts.map +1 -1
  30. package/dist/types/tools.d.ts +9 -0
  31. package/dist/types/tools.d.ts.map +1 -1
  32. package/package.json +1 -1
  33. package/src/formatters/native.ts +10 -0
  34. package/src/membrane.ts +191 -19
  35. package/src/providers/anthropic.ts +100 -5
  36. package/src/providers/bedrock.ts +13 -4
  37. package/src/providers/openai-compatible.ts +4 -0
  38. package/src/providers/openai-completions.ts +58 -2
  39. package/src/providers/openai.ts +4 -0
  40. package/src/types/content.ts +6 -0
  41. package/src/types/provider.ts +10 -0
  42. package/src/types/request.ts +12 -1
  43. package/src/types/tools.ts +14 -4
@@ -41,6 +41,7 @@ interface CompletionsRequest {
41
41
  top_p?: number;
42
42
  presence_penalty?: number;
43
43
  frequency_penalty?: number;
44
+ repetition_penalty?: number;
44
45
  stop?: string[];
45
46
  stream?: boolean;
46
47
  }
@@ -194,6 +195,19 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
194
195
  let accumulated = '';
195
196
  let finishReason = 'stop';
196
197
 
198
+ // Post-facto truncation of the adapter's own eotToken.
199
+ // The adapter serializes the prompt with this.eotToken and sends it as an
200
+ // API stop string, but some backends leak the stop string into streamed
201
+ // output. Since the bot-level formatter may use a different (or empty)
202
+ // turn-end token, downstream post-facto checks can't be relied on to
203
+ // catch it — the layer that introduced the token must truncate it.
204
+ // emittedLen tracks how much of `accumulated` has been emitted; a tail of
205
+ // eot.length-1 chars is held back in case the token is split across chunks.
206
+ const eot = this.eotToken;
207
+ let emittedLen = 0;
208
+ let eotFound = false;
209
+
210
+ streamLoop:
197
211
  while (true) {
198
212
  const { done, value } = await reader.read();
199
213
  if (done) break;
@@ -210,7 +224,28 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
210
224
 
211
225
  if (text) {
212
226
  accumulated += text;
213
- callbacks.onChunk(text);
227
+ if (eot) {
228
+ const idx = accumulated.indexOf(eot);
229
+ if (idx !== -1) {
230
+ // Truncate at the token, flush the un-emitted prefix, stop
231
+ accumulated = accumulated.slice(0, idx);
232
+ if (accumulated.length > emittedLen) {
233
+ callbacks.onChunk(accumulated.slice(emittedLen));
234
+ }
235
+ emittedLen = accumulated.length;
236
+ eotFound = true;
237
+ finishReason = 'stop';
238
+ break streamLoop;
239
+ }
240
+ // Emit all but a held-back tail that could be a partial token
241
+ const safeLen = Math.max(emittedLen, accumulated.length - (eot.length - 1));
242
+ if (safeLen > emittedLen) {
243
+ callbacks.onChunk(accumulated.slice(emittedLen, safeLen));
244
+ emittedLen = safeLen;
245
+ }
246
+ } else {
247
+ callbacks.onChunk(text);
248
+ }
214
249
  }
215
250
 
216
251
  if (parsed.choices?.[0]?.finish_reason) {
@@ -222,6 +257,14 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
222
257
  }
223
258
  }
224
259
 
260
+ // Flush any held-back tail if the token never completed
261
+ if (eot && !eotFound && accumulated.length > emittedLen) {
262
+ callbacks.onChunk(accumulated.slice(emittedLen));
263
+ }
264
+ if (eotFound) {
265
+ try { await reader.cancel(); } catch { /* stream already closed */ }
266
+ }
267
+
225
268
  return this.buildStreamedResponse(accumulated, finishReason, request.model, completionsRequest);
226
269
 
227
270
  } catch (error) {
@@ -383,6 +426,10 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
383
426
  params.frequency_penalty = request.frequencyPenalty;
384
427
  }
385
428
 
429
+ if (request.repetitionPenalty !== undefined) {
430
+ params.repetition_penalty = request.repetitionPenalty;
431
+ }
432
+
386
433
  if (stopSequences.length > 0) {
387
434
  params.stop = stopSequences;
388
435
  }
@@ -419,7 +466,16 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
419
466
 
420
467
  private parseResponse(response: CompletionsResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
421
468
  const choice = response.choices[0];
422
- const text = choice?.text ?? '';
469
+ let text = choice?.text ?? '';
470
+
471
+ // Post-facto truncation of the adapter's own eotToken — some backends
472
+ // leak the stop string into the output (see stream() for details)
473
+ if (this.eotToken) {
474
+ const idx = text.indexOf(this.eotToken);
475
+ if (idx !== -1) {
476
+ text = text.slice(0, idx);
477
+ }
478
+ }
423
479
 
424
480
  return {
425
481
  content: this.textToContent(text),
@@ -399,6 +399,10 @@ export class OpenAIAdapter implements ProviderAdapter {
399
399
  params.frequency_penalty = request.frequencyPenalty;
400
400
  }
401
401
 
402
+ if (request.repetitionPenalty !== undefined) {
403
+ params.repetition_penalty = request.repetitionPenalty;
404
+ }
405
+
402
406
  // Reasoning models (o1, o3, o4) don't support stop sequences
403
407
  // OpenAI limits stop sequences to 4 — truncate to fit
404
408
  if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
@@ -113,6 +113,12 @@ export interface ThinkingContent {
113
113
 
114
114
  export interface RedactedThinkingContent {
115
115
  type: 'redacted_thinking';
116
+ /**
117
+ * Encrypted reasoning payload from the provider. Opaque — must be
118
+ * round-tripped verbatim in assistant turns or the block is worthless
119
+ * (the API decrypts it to reconstruct prior reasoning).
120
+ */
121
+ data: string;
116
122
  }
117
123
 
118
124
  // ============================================================================
@@ -215,6 +215,9 @@ export interface ProviderRequest {
215
215
  /** Frequency penalty */
216
216
  frequencyPenalty?: number;
217
217
 
218
+ /** Repetition penalty (multiplicative, vLLM/HuggingFace style) */
219
+ repetitionPenalty?: number;
220
+
218
221
  /** Stop sequences */
219
222
  stopSequences?: string[];
220
223
 
@@ -232,6 +235,13 @@ export interface ProviderRequestOptions {
232
235
  idleTimeoutMs?: number;
233
236
  /** Called with the raw API request body right before fetch */
234
237
  onRequest?: (rawRequest: unknown) => void;
238
+ /**
239
+ * Wrap native thinking deltas in <thinking>...</thinking> tags on the
240
+ * onChunk stream. Used by the XML formatter path so its tag-based parser
241
+ * tracks thinking blocks; without this, native thinking content streams
242
+ * indistinguishably from visible text.
243
+ */
244
+ wrapThinkingTags?: boolean;
235
245
  }
236
246
 
237
247
  export interface ProviderResponse {
@@ -30,11 +30,22 @@ export interface GenerationConfig {
30
30
 
31
31
  /** Frequency penalty (provider-specific) */
32
32
  frequencyPenalty?: number;
33
-
33
+
34
+ /** Repetition penalty — multiplicative (vLLM/HuggingFace style, typically 1.0-1.2) */
35
+ repetitionPenalty?: number;
36
+
34
37
  /** Enable thinking/reasoning mode */
35
38
  thinking?: {
36
39
  enabled: boolean;
37
40
  budgetTokens?: number;
41
+ /** Thinking type for the API: 'enabled' (default, explicit budget) or 'adaptive' (model-managed) */
42
+ type?: 'enabled' | 'adaptive';
43
+ /**
44
+ * Controls how thinking content is returned: 'summarized' (readable summary)
45
+ * or 'omitted' (empty thinking field, signature only). Models like Fable 5 /
46
+ * Opus 4.7+ default to 'omitted' — set 'summarized' to receive thinking text.
47
+ */
48
+ display?: 'summarized' | 'omitted';
38
49
  };
39
50
 
40
51
  /** Image generation config (Gemini) */
@@ -59,18 +59,28 @@ export type ToolResultContentBlock =
59
59
  export interface ToolContext {
60
60
  /** The raw text that contained the tool calls */
61
61
  rawText: string;
62
-
62
+
63
63
  /** Text before the tool calls (already streamed to user) */
64
64
  preamble: string;
65
-
65
+
66
66
  /** Current depth in tool execution loop */
67
67
  depth: number;
68
-
68
+
69
69
  /** Previous tool results in this execution chain */
70
70
  previousResults: ToolResult[];
71
-
71
+
72
72
  /** Accumulated output so far */
73
73
  accumulated: string;
74
+
75
+ /**
76
+ * Normalized content blocks of the current assistant round, in provider
77
+ * order (thinking / redacted_thinking / text / tool_use). Present on the
78
+ * native-tools yielding path. Consumers persisting the assistant turn
79
+ * should use these verbatim instead of rebuilding from `preamble` +
80
+ * `calls` — signed thinking blocks must precede their tool_use in the
81
+ * same turn or the next request fails API validation.
82
+ */
83
+ roundContent?: import('./content.js').ContentBlock[];
74
84
  }
75
85
 
76
86
  // ============================================================================