@animalabs/membrane 0.5.19 → 0.5.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/formatters/completions.d.ts.map +1 -1
  2. package/dist/formatters/completions.js +6 -1
  3. package/dist/formatters/completions.js.map +1 -1
  4. package/dist/formatters/native.d.ts.map +1 -1
  5. package/dist/formatters/native.js +13 -2
  6. package/dist/formatters/native.js.map +1 -1
  7. package/dist/membrane.d.ts.map +1 -1
  8. package/dist/membrane.js +26 -9
  9. package/dist/membrane.js.map +1 -1
  10. package/dist/providers/anthropic.d.ts.map +1 -1
  11. package/dist/providers/anthropic.js +6 -0
  12. package/dist/providers/anthropic.js.map +1 -1
  13. package/dist/providers/bedrock.d.ts.map +1 -1
  14. package/dist/providers/bedrock.js +6 -0
  15. package/dist/providers/bedrock.js.map +1 -1
  16. package/dist/providers/gemini.d.ts.map +1 -1
  17. package/dist/providers/gemini.js +6 -0
  18. package/dist/providers/gemini.js.map +1 -1
  19. package/dist/providers/openai-compatible.d.ts.map +1 -1
  20. package/dist/providers/openai-compatible.js +9 -0
  21. package/dist/providers/openai-compatible.js.map +1 -1
  22. package/dist/providers/openai-completions.d.ts.map +1 -1
  23. package/dist/providers/openai-completions.js +9 -0
  24. package/dist/providers/openai-completions.js.map +1 -1
  25. package/dist/providers/openai.d.ts.map +1 -1
  26. package/dist/providers/openai.js +9 -0
  27. package/dist/providers/openai.js.map +1 -1
  28. package/dist/providers/openrouter.d.ts.map +1 -1
  29. package/dist/providers/openrouter.js +9 -0
  30. package/dist/providers/openrouter.js.map +1 -1
  31. package/dist/types/provider.d.ts +8 -0
  32. package/dist/types/provider.d.ts.map +1 -1
  33. package/dist/types/request.d.ts +6 -0
  34. package/dist/types/request.d.ts.map +1 -1
  35. package/package.json +1 -1
  36. package/src/formatters/completions.ts +7 -0
  37. package/src/formatters/native.ts +15 -1
  38. package/src/membrane.ts +29 -12
  39. package/src/providers/anthropic.ts +9 -1
  40. package/src/providers/bedrock.ts +10 -0
  41. package/src/providers/gemini.ts +9 -0
  42. package/src/providers/openai-compatible.ts +13 -1
  43. package/src/providers/openai-completions.ts +15 -0
  44. package/src/providers/openai.ts +13 -1
  45. package/src/providers/openrouter.ts +13 -1
  46. package/src/types/provider.ts +13 -1
  47. package/src/types/request.ts +8 -1
package/src/membrane.ts CHANGED
@@ -233,7 +233,7 @@ export class Membrane {
233
233
  // Initialize parser from formatter for format-specific tracking
234
234
  const parser = formatter.createStreamParser();
235
235
  let toolDepth = 0;
236
- let totalUsage: BasicUsage = { inputTokens: 0, outputTokens: 0 };
236
+ let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
237
237
  const contentBlocks: ContentBlock[] = [];
238
238
  let lastStopReason: StopReason = 'end_turn';
239
239
  let rawRequest: unknown;
@@ -352,9 +352,15 @@ export class Membrane {
352
352
 
353
353
  lastStopReason = this.mapStopReason(streamResult.stopReason);
354
354
 
355
- // Accumulate usage
355
+ // Accumulate usage (including cache metrics)
356
356
  totalUsage.inputTokens += streamResult.usage.inputTokens;
357
357
  totalUsage.outputTokens += streamResult.usage.outputTokens;
358
+ if (streamResult.usage.cacheCreationTokens) {
359
+ totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
360
+ }
361
+ if (streamResult.usage.cacheReadTokens) {
362
+ totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
363
+ }
358
364
  onUsage?.(totalUsage);
359
365
 
360
366
  // Flush the parser to complete any in-progress streaming block
@@ -649,7 +655,7 @@ export class Membrane {
649
655
  } = options;
650
656
 
651
657
  let toolDepth = 0;
652
- let totalUsage: BasicUsage = { inputTokens: 0, outputTokens: 0 };
658
+ let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
653
659
  let lastStopReason: StopReason = 'end_turn';
654
660
  let rawRequest: unknown;
655
661
  let rawResponse: unknown;
@@ -709,9 +715,15 @@ export class Membrane {
709
715
 
710
716
  lastStopReason = this.mapStopReason(streamResult.stopReason);
711
717
 
712
- // Accumulate usage
718
+ // Accumulate usage (including cache metrics)
713
719
  totalUsage.inputTokens += streamResult.usage.inputTokens;
714
720
  totalUsage.outputTokens += streamResult.usage.outputTokens;
721
+ if (streamResult.usage.cacheCreationTokens) {
722
+ totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
723
+ }
724
+ if (streamResult.usage.cacheReadTokens) {
725
+ totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
726
+ }
715
727
  onUsage?.(totalUsage);
716
728
 
717
729
  // Parse content blocks from response
@@ -822,9 +834,9 @@ export class Membrane {
822
834
  },
823
835
  cache: {
824
836
  markersInRequest: 0,
825
- tokensCreated: 0,
826
- tokensRead: 0,
827
- hitRatio: 0,
837
+ tokensCreated: totalUsage.cacheCreationTokens ?? 0,
838
+ tokensRead: totalUsage.cacheReadTokens ?? 0,
839
+ hitRatio: this.calculateCacheHitRatio(totalUsage),
828
840
  },
829
841
  },
830
842
  raw: {
@@ -1003,12 +1015,17 @@ export class Membrane {
1003
1015
  cacheTtl: request.cacheTtl,
1004
1016
  additionalStopSequences,
1005
1017
  maxParticipantsForStop,
1018
+ contextPrefix: request.contextPrefix,
1006
1019
  });
1007
1020
 
1008
1021
  const providerRequest = {
1009
1022
  model: request.config.model,
1010
1023
  maxTokens: request.config.maxTokens,
1011
1024
  temperature: request.config.temperature,
1025
+ topP: request.config.topP,
1026
+ topK: request.config.topK,
1027
+ presencePenalty: request.config.presencePenalty,
1028
+ frequencyPenalty: request.config.frequencyPenalty,
1012
1029
  messages: buildResult.messages,
1013
1030
  system: buildResult.systemContent,
1014
1031
  stopSequences: buildResult.stopSequences,
@@ -1272,7 +1289,7 @@ export class Membrane {
1272
1289
  accumulated: string,
1273
1290
  contentBlocks: ContentBlock[],
1274
1291
  stopReason: StopReason,
1275
- usage: BasicUsage,
1292
+ usage: DetailedUsage,
1276
1293
  request: NormalizedRequest,
1277
1294
  prefillResult: {
1278
1295
  cacheMarkersApplied?: number;
@@ -1334,10 +1351,10 @@ export class Membrane {
1334
1351
  provider: this.adapter.name,
1335
1352
  },
1336
1353
  cache: {
1337
- markersInRequest: 0,
1338
- tokensCreated: 0,
1339
- tokensRead: 0,
1340
- hitRatio: 0,
1354
+ markersInRequest: prefillResult.cacheMarkersApplied ?? 0,
1355
+ tokensCreated: usage.cacheCreationTokens ?? 0,
1356
+ tokensRead: usage.cacheReadTokens ?? 0,
1357
+ hitRatio: this.calculateCacheHitRatio(usage),
1341
1358
  },
1342
1359
  },
1343
1360
  raw: {
@@ -141,7 +141,15 @@ export class AnthropicAdapter implements ProviderAdapter {
141
141
  if (request.temperature !== undefined) {
142
142
  params.temperature = request.temperature;
143
143
  }
144
-
144
+
145
+ if (request.topP !== undefined) {
146
+ params.top_p = request.topP;
147
+ }
148
+
149
+ if (request.topK !== undefined) {
150
+ params.top_k = request.topK;
151
+ }
152
+
145
153
  if (request.stopSequences && request.stopSequences.length > 0) {
146
154
  params.stop_sequences = request.stopSequences;
147
155
  }
@@ -58,6 +58,8 @@ interface BedrockMessageRequest {
58
58
  }>;
59
59
  system?: string | Array<{ type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }>;
60
60
  temperature?: number;
61
+ top_p?: number;
62
+ top_k?: number;
61
63
  stop_sequences?: string[];
62
64
  tools?: unknown[];
63
65
  thinking?: { type: 'enabled'; budget_tokens: number };
@@ -343,6 +345,14 @@ export class BedrockAdapter implements ProviderAdapter {
343
345
  params.temperature = request.temperature;
344
346
  }
345
347
 
348
+ if (request.topP !== undefined) {
349
+ params.top_p = request.topP;
350
+ }
351
+
352
+ if (request.topK !== undefined) {
353
+ params.top_k = request.topK;
354
+ }
355
+
346
356
  if (request.stopSequences && request.stopSequences.length > 0) {
347
357
  params.stop_sequences = request.stopSequences;
348
358
  }
@@ -52,6 +52,7 @@ interface GeminiRequest {
52
52
  maxOutputTokens?: number;
53
53
  temperature?: number;
54
54
  topP?: number;
55
+ topK?: number;
55
56
  stopSequences?: string[];
56
57
  };
57
58
  tools?: { functionDeclarations: GeminiFunctionDeclaration[] }[];
@@ -336,6 +337,14 @@ export class GeminiAdapter implements ProviderAdapter {
336
337
  geminiRequest.generationConfig.temperature = request.temperature;
337
338
  }
338
339
 
340
+ if (request.topP !== undefined) {
341
+ geminiRequest.generationConfig.topP = request.topP;
342
+ }
343
+
344
+ if (request.topK !== undefined) {
345
+ geminiRequest.generationConfig.topK = request.topK;
346
+ }
347
+
339
348
  if (request.stopSequences && request.stopSequences.length > 0) {
340
349
  // Gemini API limits stop sequences to 5
341
350
  geminiRequest.generationConfig.stopSequences = request.stopSequences.slice(0, 5);
@@ -278,7 +278,19 @@ export class OpenAICompatibleAdapter implements ProviderAdapter {
278
278
  if (request.temperature !== undefined) {
279
279
  params.temperature = request.temperature;
280
280
  }
281
-
281
+
282
+ if (request.topP !== undefined) {
283
+ params.top_p = request.topP;
284
+ }
285
+
286
+ if (request.presencePenalty !== undefined) {
287
+ params.presence_penalty = request.presencePenalty;
288
+ }
289
+
290
+ if (request.frequencyPenalty !== undefined) {
291
+ params.frequency_penalty = request.frequencyPenalty;
292
+ }
293
+
282
294
  if (request.stopSequences && request.stopSequences.length > 0) {
283
295
  params.stop = request.stopSequences;
284
296
  }
@@ -37,6 +37,9 @@ interface CompletionsRequest {
37
37
  prompt: string;
38
38
  max_tokens?: number;
39
39
  temperature?: number;
40
+ top_p?: number;
41
+ presence_penalty?: number;
42
+ frequency_penalty?: number;
40
43
  stop?: string[];
41
44
  stream?: boolean;
42
45
  }
@@ -344,6 +347,18 @@ export class OpenAICompletionsAdapter implements ProviderAdapter {
344
347
  params.temperature = request.temperature;
345
348
  }
346
349
 
350
+ if (request.topP !== undefined) {
351
+ params.top_p = request.topP;
352
+ }
353
+
354
+ if (request.presencePenalty !== undefined) {
355
+ params.presence_penalty = request.presencePenalty;
356
+ }
357
+
358
+ if (request.frequencyPenalty !== undefined) {
359
+ params.frequency_penalty = request.frequencyPenalty;
360
+ }
361
+
347
362
  // Generate stop sequences from participant names + EOT token + any extras
348
363
  const stopSequences = [
349
364
  ...this.generateStopSequences(participants),
@@ -376,7 +376,19 @@ export class OpenAIAdapter implements ProviderAdapter {
376
376
  if (request.temperature !== undefined && !noTemperatureSupport(model)) {
377
377
  params.temperature = request.temperature;
378
378
  }
379
-
379
+
380
+ if (request.topP !== undefined && !noTemperatureSupport(model)) {
381
+ params.top_p = request.topP;
382
+ }
383
+
384
+ if (request.presencePenalty !== undefined) {
385
+ params.presence_penalty = request.presencePenalty;
386
+ }
387
+
388
+ if (request.frequencyPenalty !== undefined) {
389
+ params.frequency_penalty = request.frequencyPenalty;
390
+ }
391
+
380
392
  // Reasoning models (o1, o3, o4) don't support stop sequences
381
393
  if (request.stopSequences && request.stopSequences.length > 0 && !noStopSupport(model)) {
382
394
  params.stop = request.stopSequences;
@@ -298,7 +298,19 @@ export class OpenRouterAdapter implements ProviderAdapter {
298
298
  if (request.temperature !== undefined) {
299
299
  params.temperature = request.temperature;
300
300
  }
301
-
301
+
302
+ if (request.topP !== undefined) {
303
+ params.top_p = request.topP;
304
+ }
305
+
306
+ if (request.presencePenalty !== undefined) {
307
+ params.presence_penalty = request.presencePenalty;
308
+ }
309
+
310
+ if (request.frequencyPenalty !== undefined) {
311
+ params.frequency_penalty = request.frequencyPenalty;
312
+ }
313
+
302
314
  if (request.stopSequences && request.stopSequences.length > 0) {
303
315
  params.stop = request.stopSequences;
304
316
  }
@@ -202,7 +202,19 @@ export interface ProviderRequest {
202
202
 
203
203
  /** Temperature */
204
204
  temperature?: number;
205
-
205
+
206
+ /** Top P nucleus sampling */
207
+ topP?: number;
208
+
209
+ /** Top K sampling */
210
+ topK?: number;
211
+
212
+ /** Presence penalty */
213
+ presencePenalty?: number;
214
+
215
+ /** Frequency penalty */
216
+ frequencyPenalty?: number;
217
+
206
218
  /** Stop sequences */
207
219
  stopSequences?: string[];
208
220
 
@@ -139,7 +139,14 @@ export interface NormalizedRequest {
139
139
  * '1h' = 1 hour TTL (extended caching)
140
140
  */
141
141
  cacheTtl?: '5m' | '1h';
142
-
142
+
143
+ /**
144
+ * Context prefix for simulacrum seeding.
145
+ * Injected as first assistant message (before conversation history).
146
+ * Cached when promptCaching is enabled.
147
+ */
148
+ contextPrefix?: string;
149
+
143
150
  /** Provider-specific parameters (pass-through) */
144
151
  providerParams?: Record<string, unknown>;
145
152
  }