@animalabs/membrane 0.5.19 → 0.5.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/completions.d.ts.map +1 -1
- package/dist/formatters/completions.js +6 -1
- package/dist/formatters/completions.js.map +1 -1
- package/dist/formatters/native.d.ts.map +1 -1
- package/dist/formatters/native.js +13 -2
- package/dist/formatters/native.js.map +1 -1
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +26 -9
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +6 -0
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/bedrock.d.ts.map +1 -1
- package/dist/providers/bedrock.js +6 -0
- package/dist/providers/bedrock.js.map +1 -1
- package/dist/providers/gemini.d.ts.map +1 -1
- package/dist/providers/gemini.js +6 -0
- package/dist/providers/gemini.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +9 -0
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +9 -0
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +9 -0
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers/openrouter.d.ts.map +1 -1
- package/dist/providers/openrouter.js +9 -0
- package/dist/providers/openrouter.js.map +1 -1
- package/dist/types/provider.d.ts +8 -0
- package/dist/types/provider.d.ts.map +1 -1
- package/dist/types/request.d.ts +6 -0
- package/dist/types/request.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/formatters/completions.ts +7 -0
- package/src/formatters/native.ts +15 -1
- package/src/membrane.ts +29 -12
- package/src/providers/anthropic.ts +9 -1
- package/src/providers/bedrock.ts +10 -0
- package/src/providers/gemini.ts +9 -0
- package/src/providers/openai-compatible.ts +13 -1
- package/src/providers/openai-completions.ts +15 -0
- package/src/providers/openai.ts +13 -1
- package/src/providers/openrouter.ts +13 -1
- package/src/types/provider.ts +13 -1
- package/src/types/request.ts +8 -1
package/src/membrane.ts
CHANGED
|
@@ -233,7 +233,7 @@ export class Membrane {
|
|
|
233
233
|
// Initialize parser from formatter for format-specific tracking
|
|
234
234
|
const parser = formatter.createStreamParser();
|
|
235
235
|
let toolDepth = 0;
|
|
236
|
-
let totalUsage:
|
|
236
|
+
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
237
237
|
const contentBlocks: ContentBlock[] = [];
|
|
238
238
|
let lastStopReason: StopReason = 'end_turn';
|
|
239
239
|
let rawRequest: unknown;
|
|
@@ -352,9 +352,15 @@ export class Membrane {
|
|
|
352
352
|
|
|
353
353
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
354
354
|
|
|
355
|
-
// Accumulate usage
|
|
355
|
+
// Accumulate usage (including cache metrics)
|
|
356
356
|
totalUsage.inputTokens += streamResult.usage.inputTokens;
|
|
357
357
|
totalUsage.outputTokens += streamResult.usage.outputTokens;
|
|
358
|
+
if (streamResult.usage.cacheCreationTokens) {
|
|
359
|
+
totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
|
|
360
|
+
}
|
|
361
|
+
if (streamResult.usage.cacheReadTokens) {
|
|
362
|
+
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
363
|
+
}
|
|
358
364
|
onUsage?.(totalUsage);
|
|
359
365
|
|
|
360
366
|
// Flush the parser to complete any in-progress streaming block
|
|
@@ -649,7 +655,7 @@ export class Membrane {
|
|
|
649
655
|
} = options;
|
|
650
656
|
|
|
651
657
|
let toolDepth = 0;
|
|
652
|
-
let totalUsage:
|
|
658
|
+
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
653
659
|
let lastStopReason: StopReason = 'end_turn';
|
|
654
660
|
let rawRequest: unknown;
|
|
655
661
|
let rawResponse: unknown;
|
|
@@ -709,9 +715,15 @@ export class Membrane {
|
|
|
709
715
|
|
|
710
716
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
711
717
|
|
|
712
|
-
// Accumulate usage
|
|
718
|
+
// Accumulate usage (including cache metrics)
|
|
713
719
|
totalUsage.inputTokens += streamResult.usage.inputTokens;
|
|
714
720
|
totalUsage.outputTokens += streamResult.usage.outputTokens;
|
|
721
|
+
if (streamResult.usage.cacheCreationTokens) {
|
|
722
|
+
totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
|
|
723
|
+
}
|
|
724
|
+
if (streamResult.usage.cacheReadTokens) {
|
|
725
|
+
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
726
|
+
}
|
|
715
727
|
onUsage?.(totalUsage);
|
|
716
728
|
|
|
717
729
|
// Parse content blocks from response
|
|
@@ -822,9 +834,9 @@ export class Membrane {
|
|
|
822
834
|
},
|
|
823
835
|
cache: {
|
|
824
836
|
markersInRequest: 0,
|
|
825
|
-
tokensCreated: 0,
|
|
826
|
-
tokensRead: 0,
|
|
827
|
-
hitRatio:
|
|
837
|
+
tokensCreated: totalUsage.cacheCreationTokens ?? 0,
|
|
838
|
+
tokensRead: totalUsage.cacheReadTokens ?? 0,
|
|
839
|
+
hitRatio: this.calculateCacheHitRatio(totalUsage),
|
|
828
840
|
},
|
|
829
841
|
},
|
|
830
842
|
raw: {
|
|
@@ -1003,12 +1015,17 @@ export class Membrane {
|
|
|
1003
1015
|
cacheTtl: request.cacheTtl,
|
|
1004
1016
|
additionalStopSequences,
|
|
1005
1017
|
maxParticipantsForStop,
|
|
1018
|
+
contextPrefix: request.contextPrefix,
|
|
1006
1019
|
});
|
|
1007
1020
|
|
|
1008
1021
|
const providerRequest = {
|
|
1009
1022
|
model: request.config.model,
|
|
1010
1023
|
maxTokens: request.config.maxTokens,
|
|
1011
1024
|
temperature: request.config.temperature,
|
|
1025
|
+
topP: request.config.topP,
|
|
1026
|
+
topK: request.config.topK,
|
|
1027
|
+
presencePenalty: request.config.presencePenalty,
|
|
1028
|
+
frequencyPenalty: request.config.frequencyPenalty,
|
|
1012
1029
|
messages: buildResult.messages,
|
|
1013
1030
|
system: buildResult.systemContent,
|
|
1014
1031
|
stopSequences: buildResult.stopSequences,
|
|
@@ -1272,7 +1289,7 @@ export class Membrane {
|
|
|
1272
1289
|
accumulated: string,
|
|
1273
1290
|
contentBlocks: ContentBlock[],
|
|
1274
1291
|
stopReason: StopReason,
|
|
1275
|
-
usage:
|
|
1292
|
+
usage: DetailedUsage,
|
|
1276
1293
|
request: NormalizedRequest,
|
|
1277
1294
|
prefillResult: {
|
|
1278
1295
|
cacheMarkersApplied?: number;
|
|
@@ -1334,10 +1351,10 @@ export class Membrane {
|
|
|
1334
1351
|
provider: this.adapter.name,
|
|
1335
1352
|
},
|
|
1336
1353
|
cache: {
|
|
1337
|
-
markersInRequest: 0,
|
|
1338
|
-
tokensCreated: 0,
|
|
1339
|
-
tokensRead: 0,
|
|
1340
|
-
hitRatio:
|
|
1354
|
+
markersInRequest: prefillResult.cacheMarkersApplied ?? 0,
|
|
1355
|
+
tokensCreated: usage.cacheCreationTokens ?? 0,
|
|
1356
|
+
tokensRead: usage.cacheReadTokens ?? 0,
|
|
1357
|
+
hitRatio: this.calculateCacheHitRatio(usage),
|
|
1341
1358
|
},
|
|
1342
1359
|
},
|
|
1343
1360
|
raw: {
|
|
@@ -141,7 +141,15 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
141
141
|
if (request.temperature !== undefined) {
|
|
142
142
|
params.temperature = request.temperature;
|
|
143
143
|
}
|
|
144
|
-
|
|
144
|
+
|
|
145
|
+
if (request.topP !== undefined) {
|
|
146
|
+
params.top_p = request.topP;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (request.topK !== undefined) {
|
|
150
|
+
params.top_k = request.topK;
|
|
151
|
+
}
|
|
152
|
+
|
|
145
153
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
146
154
|
params.stop_sequences = request.stopSequences;
|
|
147
155
|
}
|
package/src/providers/bedrock.ts
CHANGED
|
@@ -58,6 +58,8 @@ interface BedrockMessageRequest {
|
|
|
58
58
|
}>;
|
|
59
59
|
system?: string | Array<{ type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }>;
|
|
60
60
|
temperature?: number;
|
|
61
|
+
top_p?: number;
|
|
62
|
+
top_k?: number;
|
|
61
63
|
stop_sequences?: string[];
|
|
62
64
|
tools?: unknown[];
|
|
63
65
|
thinking?: { type: 'enabled'; budget_tokens: number };
|
|
@@ -343,6 +345,14 @@ export class BedrockAdapter implements ProviderAdapter {
|
|
|
343
345
|
params.temperature = request.temperature;
|
|
344
346
|
}
|
|
345
347
|
|
|
348
|
+
if (request.topP !== undefined) {
|
|
349
|
+
params.top_p = request.topP;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (request.topK !== undefined) {
|
|
353
|
+
params.top_k = request.topK;
|
|
354
|
+
}
|
|
355
|
+
|
|
346
356
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
347
357
|
params.stop_sequences = request.stopSequences;
|
|
348
358
|
}
|
package/src/providers/gemini.ts
CHANGED
|
@@ -52,6 +52,7 @@ interface GeminiRequest {
|
|
|
52
52
|
maxOutputTokens?: number;
|
|
53
53
|
temperature?: number;
|
|
54
54
|
topP?: number;
|
|
55
|
+
topK?: number;
|
|
55
56
|
stopSequences?: string[];
|
|
56
57
|
};
|
|
57
58
|
tools?: { functionDeclarations: GeminiFunctionDeclaration[] }[];
|
|
@@ -336,6 +337,14 @@ export class GeminiAdapter implements ProviderAdapter {
|
|
|
336
337
|
geminiRequest.generationConfig.temperature = request.temperature;
|
|
337
338
|
}
|
|
338
339
|
|
|
340
|
+
if (request.topP !== undefined) {
|
|
341
|
+
geminiRequest.generationConfig.topP = request.topP;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (request.topK !== undefined) {
|
|
345
|
+
geminiRequest.generationConfig.topK = request.topK;
|
|
346
|
+
}
|
|
347
|
+
|
|
339
348
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
340
349
|
// Gemini API limits stop sequences to 5
|
|
341
350
|
geminiRequest.generationConfig.stopSequences = request.stopSequences.slice(0, 5);
|
|
@@ -278,7 +278,19 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
|
|
|
278
278
|
if (request.temperature !== undefined) {
|
|
279
279
|
params.temperature = request.temperature;
|
|
280
280
|
}
|
|
281
|
-
|
|
281
|
+
|
|
282
|
+
if (request.topP !== undefined) {
|
|
283
|
+
params.top_p = request.topP;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (request.presencePenalty !== undefined) {
|
|
287
|
+
params.presence_penalty = request.presencePenalty;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (request.frequencyPenalty !== undefined) {
|
|
291
|
+
params.frequency_penalty = request.frequencyPenalty;
|
|
292
|
+
}
|
|
293
|
+
|
|
282
294
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
283
295
|
params.stop = request.stopSequences;
|
|
284
296
|
}
|
|
@@ -37,6 +37,9 @@ interface CompletionsRequest {
|
|
|
37
37
|
prompt: string;
|
|
38
38
|
max_tokens?: number;
|
|
39
39
|
temperature?: number;
|
|
40
|
+
top_p?: number;
|
|
41
|
+
presence_penalty?: number;
|
|
42
|
+
frequency_penalty?: number;
|
|
40
43
|
stop?: string[];
|
|
41
44
|
stream?: boolean;
|
|
42
45
|
}
|
|
@@ -344,6 +347,18 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
|
|
|
344
347
|
params.temperature = request.temperature;
|
|
345
348
|
}
|
|
346
349
|
|
|
350
|
+
if (request.topP !== undefined) {
|
|
351
|
+
params.top_p = request.topP;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (request.presencePenalty !== undefined) {
|
|
355
|
+
params.presence_penalty = request.presencePenalty;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (request.frequencyPenalty !== undefined) {
|
|
359
|
+
params.frequency_penalty = request.frequencyPenalty;
|
|
360
|
+
}
|
|
361
|
+
|
|
347
362
|
// Generate stop sequences from participant names + EOT token + any extras
|
|
348
363
|
const stopSequences = [
|
|
349
364
|
...this.generateStopSequences(participants),
|
package/src/providers/openai.ts
CHANGED
|
@@ -376,7 +376,19 @@ export class OpenAIAdapter implements ProviderAdapter {
|
|
|
376
376
|
if (request.temperature !== undefined && !noTemperatureSupport(model)) {
|
|
377
377
|
params.temperature = request.temperature;
|
|
378
378
|
}
|
|
379
|
-
|
|
379
|
+
|
|
380
|
+
if (request.topP !== undefined && !noTemperatureSupport(model)) {
|
|
381
|
+
params.top_p = request.topP;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (request.presencePenalty !== undefined) {
|
|
385
|
+
params.presence_penalty = request.presencePenalty;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
if (request.frequencyPenalty !== undefined) {
|
|
389
|
+
params.frequency_penalty = request.frequencyPenalty;
|
|
390
|
+
}
|
|
391
|
+
|
|
380
392
|
// Reasoning models (o1, o3, o4) don't support stop sequences
|
|
381
393
|
if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
|
|
382
394
|
params.stop = request.stopSequences;
|
|
@@ -298,7 +298,19 @@ export class OpenRouterAdapter implements ProviderAdapter {
|
|
|
298
298
|
if (request.temperature !== undefined) {
|
|
299
299
|
params.temperature = request.temperature;
|
|
300
300
|
}
|
|
301
|
-
|
|
301
|
+
|
|
302
|
+
if (request.topP !== undefined) {
|
|
303
|
+
params.top_p = request.topP;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (request.presencePenalty !== undefined) {
|
|
307
|
+
params.presence_penalty = request.presencePenalty;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (request.frequencyPenalty !== undefined) {
|
|
311
|
+
params.frequency_penalty = request.frequencyPenalty;
|
|
312
|
+
}
|
|
313
|
+
|
|
302
314
|
if (request.stopSequences && request.stopSequences.length > 0) {
|
|
303
315
|
params.stop = request.stopSequences;
|
|
304
316
|
}
|
package/src/types/provider.ts
CHANGED
|
@@ -202,7 +202,19 @@ export interface ProviderRequest {
|
|
|
202
202
|
|
|
203
203
|
/** Temperature */
|
|
204
204
|
temperature?: number;
|
|
205
|
-
|
|
205
|
+
|
|
206
|
+
/** Top P nucleus sampling */
|
|
207
|
+
topP?: number;
|
|
208
|
+
|
|
209
|
+
/** Top K sampling */
|
|
210
|
+
topK?: number;
|
|
211
|
+
|
|
212
|
+
/** Presence penalty */
|
|
213
|
+
presencePenalty?: number;
|
|
214
|
+
|
|
215
|
+
/** Frequency penalty */
|
|
216
|
+
frequencyPenalty?: number;
|
|
217
|
+
|
|
206
218
|
/** Stop sequences */
|
|
207
219
|
stopSequences?: string[];
|
|
208
220
|
|
package/src/types/request.ts
CHANGED
|
@@ -139,7 +139,14 @@ export interface NormalizedRequest {
|
|
|
139
139
|
* '1h' = 1 hour TTL (extended caching)
|
|
140
140
|
*/
|
|
141
141
|
cacheTtl?: '5m' | '1h';
|
|
142
|
-
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Context prefix for simulacrum seeding.
|
|
145
|
+
* Injected as first assistant message (before conversation history).
|
|
146
|
+
* Cached when promptCaching is enabled.
|
|
147
|
+
*/
|
|
148
|
+
contextPrefix?: string;
|
|
149
|
+
|
|
143
150
|
/** Provider-specific parameters (pass-through) */
|
|
144
151
|
providerParams?: Record<string, unknown>;
|
|
145
152
|
}
|