@animalabs/membrane 0.5.55 → 0.5.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/native.d.ts.map +1 -1
- package/dist/formatters/native.js +11 -0
- package/dist/formatters/native.js.map +1 -1
- package/dist/membrane.d.ts +28 -0
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +169 -17
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +94 -3
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/bedrock.d.ts.map +1 -1
- package/dist/providers/bedrock.js +14 -4
- package/dist/providers/bedrock.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +3 -0
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +57 -3
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +3 -0
- package/dist/providers/openai.js.map +1 -1
- package/dist/types/content.d.ts +6 -0
- package/dist/types/content.d.ts.map +1 -1
- package/dist/types/content.js.map +1 -1
- package/dist/types/provider.d.ts +9 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/dist/types/request.d.ts +10 -0
- package/dist/types/request.d.ts.map +1 -1
- package/dist/types/tools.d.ts +9 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/formatters/native.ts +10 -0
- package/src/membrane.ts +191 -19
- package/src/providers/anthropic.ts +100 -5
- package/src/providers/bedrock.ts +13 -4
- package/src/providers/openai-compatible.ts +4 -0
- package/src/providers/openai-completions.ts +58 -2
- package/src/providers/openai.ts +4 -0
- package/src/types/content.ts +6 -0
- package/src/types/provider.ts +10 -0
- package/src/types/request.ts +12 -1
- package/src/types/tools.ts +14 -4
|
@@ -41,6 +41,7 @@ interface CompletionsRequest {
|
|
|
41
41
|
top_p?: number;
|
|
42
42
|
presence_penalty?: number;
|
|
43
43
|
frequency_penalty?: number;
|
|
44
|
+
repetition_penalty?: number;
|
|
44
45
|
stop?: string[];
|
|
45
46
|
stream?: boolean;
|
|
46
47
|
}
|
|
@@ -194,6 +195,19 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
194
195
|
let accumulated = '';
|
|
195
196
|
let finishReason = 'stop';
|
|
196
197
|
|
|
198
|
+
// Post-facto truncation of the adapter's own eotToken.
|
|
199
|
+
// The adapter serializes the prompt with this.eotToken and sends it as an
|
|
200
|
+
// API stop string, but some backends leak the stop string into streamed
|
|
201
|
+
// output. Since the bot-level formatter may use a different (or empty)
|
|
202
|
+
// turn-end token, downstream post-facto checks can't be relied on to
|
|
203
|
+
// catch it — the layer that introduced the token must truncate it.
|
|
204
|
+
// emittedLen tracks how much of `accumulated` has been emitted; a tail of
|
|
205
|
+
// eot.length-1 chars is held back in case the token is split across chunks.
|
|
206
|
+
const eot = this.eotToken;
|
|
207
|
+
let emittedLen = 0;
|
|
208
|
+
let eotFound = false;
|
|
209
|
+
|
|
210
|
+
streamLoop:
|
|
197
211
|
while (true) {
|
|
198
212
|
const { done, value } = await reader.read();
|
|
199
213
|
if (done) break;
|
|
@@ -210,7 +224,28 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
210
224
|
|
|
211
225
|
if (text) {
|
|
212
226
|
accumulated += text;
|
|
213
|
-
|
|
227
|
+
if (eot) {
|
|
228
|
+
const idx = accumulated.indexOf(eot);
|
|
229
|
+
if (idx !== -1) {
|
|
230
|
+
// Truncate at the token, flush the un-emitted prefix, stop
|
|
231
|
+
accumulated = accumulated.slice(0, idx);
|
|
232
|
+
if (accumulated.length > emittedLen) {
|
|
233
|
+
callbacks.onChunk(accumulated.slice(emittedLen));
|
|
234
|
+
}
|
|
235
|
+
emittedLen = accumulated.length;
|
|
236
|
+
eotFound = true;
|
|
237
|
+
finishReason = 'stop';
|
|
238
|
+
break streamLoop;
|
|
239
|
+
}
|
|
240
|
+
// Emit all but a held-back tail that could be a partial token
|
|
241
|
+
const safeLen = Math.max(emittedLen, accumulated.length - (eot.length - 1));
|
|
242
|
+
if (safeLen > emittedLen) {
|
|
243
|
+
callbacks.onChunk(accumulated.slice(emittedLen, safeLen));
|
|
244
|
+
emittedLen = safeLen;
|
|
245
|
+
}
|
|
246
|
+
} else {
|
|
247
|
+
callbacks.onChunk(text);
|
|
248
|
+
}
|
|
214
249
|
}
|
|
215
250
|
|
|
216
251
|
if (parsed.choices?.[0]?.finish_reason) {
|
|
@@ -222,6 +257,14 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
222
257
|
}
|
|
223
258
|
}
|
|
224
259
|
|
|
260
|
+
// Flush any held-back tail if the token never completed
|
|
261
|
+
if (eot && !eotFound && accumulated.length > emittedLen) {
|
|
262
|
+
callbacks.onChunk(accumulated.slice(emittedLen));
|
|
263
|
+
}
|
|
264
|
+
if (eotFound) {
|
|
265
|
+
try { await reader.cancel(); } catch { /* stream already closed */ }
|
|
266
|
+
}
|
|
267
|
+
|
|
225
268
|
return this.buildStreamedResponse(accumulated, finishReason, request.model, completionsRequest);
|
|
226
269
|
|
|
227
270
|
} catch (error) {
|
|
@@ -383,6 +426,10 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
383
426
|
params.frequency_penalty = request.frequencyPenalty;
|
|
384
427
|
}
|
|
385
428
|
|
|
429
|
+
if (request.repetitionPenalty !== undefined) {
|
|
430
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
431
|
+
}
|
|
432
|
+
|
|
386
433
|
if (stopSequences.length > 0) {
|
|
387
434
|
params.stop = stopSequences;
|
|
388
435
|
}
|
|
@@ -419,7 +466,16 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
419
466
|
|
|
420
467
|
private parseResponse(response: CompletionsResponse, requestedModel: string, rawRequest: unknown): ProviderResponse {
|
|
421
468
|
const choice = response.choices[0];
|
|
422
|
-
|
|
469
|
+
let text = choice?.text ?? '';
|
|
470
|
+
|
|
471
|
+
// Post-facto truncation of the adapter's own eotToken — some backends
|
|
472
|
+
// leak the stop string into the output (see stream() for details)
|
|
473
|
+
if (this.eotToken) {
|
|
474
|
+
const idx = text.indexOf(this.eotToken);
|
|
475
|
+
if (idx !== -1) {
|
|
476
|
+
text = text.slice(0, idx);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
423
479
|
|
|
424
480
|
return {
|
|
425
481
|
content: this.textToContent(text),
|
package/src/providers/openai.ts
CHANGED
|
@@ -399,6 +399,10 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
399
399
|
params.frequency_penalty = request.frequencyPenalty;
|
|
400
400
|
}
|
|
401
401
|
|
|
402
|
+
if (request.repetitionPenalty !== undefined) {
|
|
403
|
+
params.repetition_penalty = request.repetitionPenalty;
|
|
404
|
+
}
|
|
405
|
+
|
|
402
406
|
// Reasoning models (o1, o3, o4) don't support stop sequences
|
|
403
407
|
// OpenAI limits stop sequences to 4 — truncate to fit
|
|
404
408
|
if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
|
package/src/types/content.ts
CHANGED
|
@@ -113,6 +113,12 @@ export interface ThinkingContent {
|
|
|
113
113
|
|
|
114
114
|
export interface RedactedThinkingContent {
|
|
115
115
|
type: 'redacted_thinking';
|
|
116
|
+
/**
|
|
117
|
+
* Encrypted reasoning payload from the provider. Opaque — must be
|
|
118
|
+
* round-tripped verbatim in assistant turns or the block is worthless
|
|
119
|
+
* (the API decrypts it to reconstruct prior reasoning).
|
|
120
|
+
*/
|
|
121
|
+
data: string;
|
|
116
122
|
}
|
|
117
123
|
|
|
118
124
|
// ============================================================================
|
package/src/types/provider.ts
CHANGED
|
@@ -215,6 +215,9 @@ export interface ProviderRequest {
|
|
|
215
215
|
/** Frequency penalty */
|
|
216
216
|
frequencyPenalty?: number;
|
|
217
217
|
|
|
218
|
+
/** Repetition penalty (multiplicative, vLLM/HuggingFace style) */
|
|
219
|
+
repetitionPenalty?: number;
|
|
220
|
+
|
|
218
221
|
/** Stop sequences */
|
|
219
222
|
stopSequences?: string[];
|
|
220
223
|
|
|
@@ -232,6 +235,13 @@ export interface ProviderRequestOptions {
|
|
|
232
235
|
idleTimeoutMs?: number;
|
|
233
236
|
/** Called with the raw API request body right before fetch */
|
|
234
237
|
onRequest?: (rawRequest: unknown) => void;
|
|
238
|
+
/**
|
|
239
|
+
* Wrap native thinking deltas in <thinking>...</thinking> tags on the
|
|
240
|
+
* onChunk stream. Used by the XML formatter path so its tag-based parser
|
|
241
|
+
* tracks thinking blocks; without this, native thinking content streams
|
|
242
|
+
* indistinguishably from visible text.
|
|
243
|
+
*/
|
|
244
|
+
wrapThinkingTags?: boolean;
|
|
235
245
|
}
|
|
236
246
|
|
|
237
247
|
export interface ProviderResponse {
|
package/src/types/request.ts
CHANGED
|
@@ -30,11 +30,22 @@ export interface GenerationConfig {
|
|
|
30
30
|
|
|
31
31
|
/** Frequency penalty (provider-specific) */
|
|
32
32
|
frequencyPenalty?: number;
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
/** Repetition penalty — multiplicative (vLLM/HuggingFace style, typically 1.0-1.2) */
|
|
35
|
+
repetitionPenalty?: number;
|
|
36
|
+
|
|
34
37
|
/** Enable thinking/reasoning mode */
|
|
35
38
|
thinking?: {
|
|
36
39
|
enabled: boolean;
|
|
37
40
|
budgetTokens?: number;
|
|
41
|
+
/** Thinking type for the API: 'enabled' (default, explicit budget) or 'adaptive' (model-managed) */
|
|
42
|
+
type?: 'enabled' | 'adaptive';
|
|
43
|
+
/**
|
|
44
|
+
* Controls how thinking content is returned: 'summarized' (readable summary)
|
|
45
|
+
* or 'omitted' (empty thinking field, signature only). Models like Fable 5 /
|
|
46
|
+
* Opus 4.7+ default to 'omitted' — set 'summarized' to receive thinking text.
|
|
47
|
+
*/
|
|
48
|
+
display?: 'summarized' | 'omitted';
|
|
38
49
|
};
|
|
39
50
|
|
|
40
51
|
/** Image generation config (Gemini) */
|
package/src/types/tools.ts
CHANGED
|
@@ -59,18 +59,28 @@ export type ToolResultContentBlock =
|
|
|
59
59
|
export interface ToolContext {
|
|
60
60
|
/** The raw text that contained the tool calls */
|
|
61
61
|
rawText: string;
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
/** Text before the tool calls (already streamed to user) */
|
|
64
64
|
preamble: string;
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
/** Current depth in tool execution loop */
|
|
67
67
|
depth: number;
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
/** Previous tool results in this execution chain */
|
|
70
70
|
previousResults: ToolResult[];
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
/** Accumulated output so far */
|
|
73
73
|
accumulated: string;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Normalized content blocks of the current assistant round, in provider
|
|
77
|
+
* order (thinking / redacted_thinking / text / tool_use). Present on the
|
|
78
|
+
* native-tools yielding path. Consumers persisting the assistant turn
|
|
79
|
+
* should use these verbatim instead of rebuilding from `preamble` +
|
|
80
|
+
* `calls` — signed thinking blocks must precede their tool_use in the
|
|
81
|
+
* same turn or the next request fails API validation.
|
|
82
|
+
*/
|
|
83
|
+
roundContent?: import('./content.js').ContentBlock[];
|
|
74
84
|
}
|
|
75
85
|
|
|
76
86
|
// ============================================================================
|