@animalabs/membrane 0.5.42 → 0.5.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/completions.d.ts +7 -0
- package/dist/formatters/completions.d.ts.map +1 -1
- package/dist/formatters/completions.js +9 -0
- package/dist/formatters/completions.js.map +1 -1
- package/dist/membrane.d.ts +3 -0
- package/dist/membrane.d.ts.map +1 -1
- package/dist/membrane.js +69 -11
- package/dist/membrane.js.map +1 -1
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +9 -1
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/bedrock.js +2 -2
- package/dist/providers/bedrock.js.map +1 -1
- package/dist/registry/default-pricing.d.ts +3 -0
- package/dist/registry/default-pricing.d.ts.map +1 -0
- package/dist/registry/default-pricing.js +75 -0
- package/dist/registry/default-pricing.js.map +1 -0
- package/dist/transforms/prefill.d.ts.map +1 -1
- package/dist/transforms/prefill.js +45 -44
- package/dist/transforms/prefill.js.map +1 -1
- package/dist/types/yielding-stream.d.ts +2 -2
- package/dist/types/yielding-stream.d.ts.map +1 -1
- package/dist/utils/cost.d.ts +10 -0
- package/dist/utils/cost.d.ts.map +1 -0
- package/dist/utils/cost.js +19 -0
- package/dist/utils/cost.js.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +1 -0
- package/dist/utils/index.js.map +1 -1
- package/package.json +1 -1
- package/src/formatters/completions.ts +19 -0
- package/src/membrane.ts +77 -17
- package/src/providers/anthropic.ts +13 -1
- package/src/registry/default-pricing.ts +77 -0
- package/src/types/yielding-stream.ts +2 -2
- package/src/utils/cost.ts +29 -0
- package/src/utils/index.ts +3 -0
- package/dist/formatters/pseudo-prefill.d.ts +0 -71
- package/dist/formatters/pseudo-prefill.d.ts.map +0 -1
- package/dist/formatters/pseudo-prefill.js +0 -410
- package/dist/formatters/pseudo-prefill.js.map +0 -1
package/src/membrane.ts
CHANGED
|
@@ -53,6 +53,8 @@ import type {
|
|
|
53
53
|
import type { PrefillFormatter, StreamParser } from './formatters/types.js';
|
|
54
54
|
import { AnthropicXmlFormatter } from './formatters/anthropic-xml.js';
|
|
55
55
|
import { YieldingStreamImpl } from './yielding-stream.js';
|
|
56
|
+
import { calculateCost } from './utils/cost.js';
|
|
57
|
+
import { getDefaultPricing } from './registry/default-pricing.js';
|
|
56
58
|
|
|
57
59
|
// ============================================================================
|
|
58
60
|
// Membrane Class
|
|
@@ -273,6 +275,7 @@ export class Membrane {
|
|
|
273
275
|
const parser = formatter.createStreamParser();
|
|
274
276
|
let toolDepth = 0;
|
|
275
277
|
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
278
|
+
const pricing = this.resolvePricing(request.config.model);
|
|
276
279
|
const contentBlocks: ContentBlock[] = [];
|
|
277
280
|
let lastStopReason: StopReason = 'end_turn';
|
|
278
281
|
let lastStopSequence: string | undefined;
|
|
@@ -425,6 +428,7 @@ export class Membrane {
|
|
|
425
428
|
if (streamResult.usage.cacheReadTokens) {
|
|
426
429
|
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
427
430
|
}
|
|
431
|
+
if (pricing) totalUsage.estimatedCost = calculateCost(totalUsage, pricing);
|
|
428
432
|
onUsage?.(totalUsage);
|
|
429
433
|
|
|
430
434
|
// Flush the parser to complete any in-progress streaming block
|
|
@@ -737,6 +741,7 @@ export class Membrane {
|
|
|
737
741
|
|
|
738
742
|
let toolDepth = 0;
|
|
739
743
|
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
744
|
+
const pricing = this.resolvePricing(request.config.model);
|
|
740
745
|
let lastStopReason: StopReason = 'end_turn';
|
|
741
746
|
let lastStopSequence: string | undefined;
|
|
742
747
|
let rawRequest: unknown;
|
|
@@ -807,6 +812,7 @@ export class Membrane {
|
|
|
807
812
|
if (streamResult.usage.cacheReadTokens) {
|
|
808
813
|
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
809
814
|
}
|
|
815
|
+
if (pricing) totalUsage.estimatedCost = calculateCost(totalUsage, pricing);
|
|
810
816
|
onUsage?.(totalUsage);
|
|
811
817
|
|
|
812
818
|
// Parse content blocks from response
|
|
@@ -960,10 +966,13 @@ export class Membrane {
|
|
|
960
966
|
const assistantName = request.assistantParticipant
|
|
961
967
|
?? this.config.assistantParticipant ?? 'Claude';
|
|
962
968
|
|
|
969
|
+
const promptCaching = request.promptCaching ?? true;
|
|
970
|
+
const cacheControl = promptCaching ? { type: 'ephemeral' as const, ...(request.cacheTtl ? { ttl: request.cacheTtl } : {}) } : undefined;
|
|
971
|
+
|
|
963
972
|
for (const msg of messages) {
|
|
964
973
|
const isAssistant = msg.participant === assistantName;
|
|
965
974
|
const role = isAssistant ? 'assistant' : 'user';
|
|
966
|
-
|
|
975
|
+
|
|
967
976
|
// Convert content blocks
|
|
968
977
|
const content: any[] = [];
|
|
969
978
|
for (const block of msg.content) {
|
|
@@ -1005,19 +1014,42 @@ export class Membrane {
|
|
|
1005
1014
|
}
|
|
1006
1015
|
}
|
|
1007
1016
|
}
|
|
1008
|
-
|
|
1017
|
+
|
|
1018
|
+
// Apply cache_control to last block of messages with cacheBreakpoint
|
|
1019
|
+
if (msg.cacheBreakpoint && cacheControl && content.length > 0) {
|
|
1020
|
+
content[content.length - 1].cache_control = cacheControl;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1009
1023
|
providerMessages.push({ role, content });
|
|
1010
1024
|
}
|
|
1011
1025
|
|
|
1012
1026
|
// Convert tools to provider format.
|
|
1013
1027
|
// Native tool names must match ^[a-zA-Z0-9_-]{1,128}$ — sanitize colons
|
|
1014
1028
|
// from the module:tool namespace convention. Reversed in parseProviderContent.
|
|
1015
|
-
const tools = request.tools?.map(tool =>
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1029
|
+
const tools = request.tools?.map((tool, idx) => {
|
|
1030
|
+
const t: Record<string, unknown> = {
|
|
1031
|
+
name: sanitizeToolName(tool.name),
|
|
1032
|
+
description: tool.description,
|
|
1033
|
+
input_schema: tool.inputSchema,
|
|
1034
|
+
};
|
|
1035
|
+
// Cache the tool list — mark the last tool with cache_control
|
|
1036
|
+
if (cacheControl && request.tools && idx === request.tools.length - 1) {
|
|
1037
|
+
t.cache_control = cacheControl;
|
|
1038
|
+
}
|
|
1039
|
+
return t;
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
// Wrap system prompt with cache_control if prompt caching is enabled
|
|
1043
|
+
let system: unknown = request.system;
|
|
1044
|
+
if (cacheControl && typeof system === 'string' && system.length > 0) {
|
|
1045
|
+
system = [{ type: 'text', text: system, cache_control: cacheControl }];
|
|
1046
|
+
} else if (cacheControl && Array.isArray(system) && system.length > 0) {
|
|
1047
|
+
const blocks = system as Record<string, unknown>[];
|
|
1048
|
+
system = blocks.map((block, idx) =>
|
|
1049
|
+
idx === blocks.length - 1 ? { ...block, cache_control: cacheControl } : block
|
|
1050
|
+
);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1021
1053
|
// Build thinking config for native extended thinking
|
|
1022
1054
|
const thinking = request.config.thinking?.enabled
|
|
1023
1055
|
? {
|
|
@@ -1034,7 +1066,7 @@ export class Membrane {
|
|
|
1034
1066
|
maxTokens: request.config.maxTokens,
|
|
1035
1067
|
temperature,
|
|
1036
1068
|
messages: providerMessages,
|
|
1037
|
-
system
|
|
1069
|
+
system,
|
|
1038
1070
|
tools,
|
|
1039
1071
|
thinking,
|
|
1040
1072
|
extra: request.providerParams,
|
|
@@ -1377,6 +1409,7 @@ export class Membrane {
|
|
|
1377
1409
|
outputTokens: providerResponse.usage.outputTokens,
|
|
1378
1410
|
cacheCreationTokens: providerResponse.usage.cacheCreationTokens,
|
|
1379
1411
|
cacheReadTokens: providerResponse.usage.cacheReadTokens,
|
|
1412
|
+
estimatedCost: this.estimateCost(providerResponse.usage, request.config.model),
|
|
1380
1413
|
},
|
|
1381
1414
|
timing: {
|
|
1382
1415
|
totalDurationMs: durationMs,
|
|
@@ -1458,6 +1491,7 @@ export class Membrane {
|
|
|
1458
1491
|
},
|
|
1459
1492
|
usage: {
|
|
1460
1493
|
...usage,
|
|
1494
|
+
estimatedCost: usage.estimatedCost ?? this.estimateCost(usage, request.config.model),
|
|
1461
1495
|
},
|
|
1462
1496
|
timing: {
|
|
1463
1497
|
totalDurationMs: durationMs,
|
|
@@ -1497,13 +1531,23 @@ export class Membrane {
|
|
|
1497
1531
|
}
|
|
1498
1532
|
}
|
|
1499
1533
|
|
|
1500
|
-
private calculateCacheHitRatio(usage:
|
|
1534
|
+
private calculateCacheHitRatio(usage: Pick<DetailedUsage, 'inputTokens' | 'cacheReadTokens'>): number {
|
|
1501
1535
|
const cacheRead = usage.cacheReadTokens ?? 0;
|
|
1502
1536
|
const total = usage.inputTokens ?? 0;
|
|
1503
1537
|
if (total === 0) return 0;
|
|
1504
1538
|
return cacheRead / total;
|
|
1505
1539
|
}
|
|
1506
1540
|
|
|
1541
|
+
private resolvePricing(model: string): import('./types/provider.js').ModelPricing | undefined {
|
|
1542
|
+
return this.registry?.getPricing(model) ?? getDefaultPricing(model);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
/** Resolve pricing + calculate cost in one call (for one-shot use outside loops). */
|
|
1546
|
+
private estimateCost(usage: import('./utils/cost.js').CostableUsage, model: string): import('./types/response.js').CostBreakdown | undefined {
|
|
1547
|
+
const pricing = this.resolvePricing(model);
|
|
1548
|
+
return pricing ? calculateCost(usage, pricing) : undefined;
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1507
1551
|
private calculateRetryDelay(attempt: number): number {
|
|
1508
1552
|
const { retryDelayMs, backoffMultiplier, maxRetryDelayMs } = this.retryConfig;
|
|
1509
1553
|
const delay = retryDelayMs * Math.pow(backoffMultiplier, attempt - 1);
|
|
@@ -1639,7 +1683,8 @@ export class Membrane {
|
|
|
1639
1683
|
const formatter = this.formatter;
|
|
1640
1684
|
const parser = formatter.createStreamParser();
|
|
1641
1685
|
let toolDepth = 0;
|
|
1642
|
-
let totalUsage:
|
|
1686
|
+
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
1687
|
+
const pricing = this.resolvePricing(request.config.model);
|
|
1643
1688
|
const contentBlocks: ContentBlock[] = [];
|
|
1644
1689
|
let lastStopReason: StopReason = 'end_turn';
|
|
1645
1690
|
let lastStopSequence: string | undefined;
|
|
@@ -1768,9 +1813,16 @@ export class Membrane {
|
|
|
1768
1813
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
1769
1814
|
lastStopSequence = streamResult.stopSequence ?? undefined;
|
|
1770
1815
|
|
|
1771
|
-
// Accumulate usage
|
|
1816
|
+
// Accumulate usage (including cache metrics)
|
|
1772
1817
|
totalUsage.inputTokens += streamResult.usage.inputTokens;
|
|
1773
1818
|
totalUsage.outputTokens += streamResult.usage.outputTokens;
|
|
1819
|
+
if (streamResult.usage.cacheCreationTokens) {
|
|
1820
|
+
totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
|
|
1821
|
+
}
|
|
1822
|
+
if (streamResult.usage.cacheReadTokens) {
|
|
1823
|
+
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
1824
|
+
}
|
|
1825
|
+
if (pricing) totalUsage.estimatedCost = calculateCost(totalUsage, pricing);
|
|
1774
1826
|
if (emitUsage) {
|
|
1775
1827
|
stream.emit({ type: 'usage', usage: { ...totalUsage } });
|
|
1776
1828
|
}
|
|
@@ -2075,7 +2127,8 @@ export class Membrane {
|
|
|
2075
2127
|
} = options;
|
|
2076
2128
|
|
|
2077
2129
|
let toolDepth = 0;
|
|
2078
|
-
let totalUsage:
|
|
2130
|
+
let totalUsage: DetailedUsage = { inputTokens: 0, outputTokens: 0 };
|
|
2131
|
+
const pricing = this.resolvePricing(request.config.model);
|
|
2079
2132
|
let lastStopReason: StopReason = 'end_turn';
|
|
2080
2133
|
let lastStopSequence: string | undefined;
|
|
2081
2134
|
let rawRequest: unknown;
|
|
@@ -2141,9 +2194,16 @@ export class Membrane {
|
|
|
2141
2194
|
lastStopReason = this.mapStopReason(streamResult.stopReason);
|
|
2142
2195
|
lastStopSequence = streamResult.stopSequence ?? undefined;
|
|
2143
2196
|
|
|
2144
|
-
// Accumulate usage
|
|
2197
|
+
// Accumulate usage (including cache metrics)
|
|
2145
2198
|
totalUsage.inputTokens += streamResult.usage.inputTokens;
|
|
2146
2199
|
totalUsage.outputTokens += streamResult.usage.outputTokens;
|
|
2200
|
+
if (streamResult.usage.cacheCreationTokens) {
|
|
2201
|
+
totalUsage.cacheCreationTokens = (totalUsage.cacheCreationTokens ?? 0) + streamResult.usage.cacheCreationTokens;
|
|
2202
|
+
}
|
|
2203
|
+
if (streamResult.usage.cacheReadTokens) {
|
|
2204
|
+
totalUsage.cacheReadTokens = (totalUsage.cacheReadTokens ?? 0) + streamResult.usage.cacheReadTokens;
|
|
2205
|
+
}
|
|
2206
|
+
if (pricing) totalUsage.estimatedCost = calculateCost(totalUsage, pricing);
|
|
2147
2207
|
if (emitUsage) {
|
|
2148
2208
|
stream.emit({ type: 'usage', usage: { ...totalUsage } });
|
|
2149
2209
|
}
|
|
@@ -2252,9 +2312,9 @@ export class Membrane {
|
|
|
2252
2312
|
},
|
|
2253
2313
|
cache: {
|
|
2254
2314
|
markersInRequest: 0,
|
|
2255
|
-
tokensCreated: 0,
|
|
2256
|
-
tokensRead: 0,
|
|
2257
|
-
hitRatio:
|
|
2315
|
+
tokensCreated: totalUsage.cacheCreationTokens ?? 0,
|
|
2316
|
+
tokensRead: totalUsage.cacheReadTokens ?? 0,
|
|
2317
|
+
hitRatio: this.calculateCacheHitRatio(totalUsage),
|
|
2258
2318
|
},
|
|
2259
2319
|
},
|
|
2260
2320
|
raw: {
|
|
@@ -178,7 +178,19 @@ export class AnthropicAdapter implements ProviderAdapter {
|
|
|
178
178
|
const delta = event.delta as { stop_reason?: string; stop_sequence?: string };
|
|
179
179
|
stopReason = delta.stop_reason ?? 'end_turn';
|
|
180
180
|
stopSequence = delta.stop_sequence ?? undefined;
|
|
181
|
-
|
|
181
|
+
const deltaUsage = event.usage as unknown as {
|
|
182
|
+
output_tokens: number;
|
|
183
|
+
cache_creation_input_tokens?: number | null;
|
|
184
|
+
cache_read_input_tokens?: number | null;
|
|
185
|
+
};
|
|
186
|
+
outputTokens = deltaUsage.output_tokens ?? 0;
|
|
187
|
+
// message_delta carries cumulative cache metrics — use as authoritative
|
|
188
|
+
if (deltaUsage.cache_creation_input_tokens != null) {
|
|
189
|
+
cacheCreationTokens = deltaUsage.cache_creation_input_tokens;
|
|
190
|
+
}
|
|
191
|
+
if (deltaUsage.cache_read_input_tokens != null) {
|
|
192
|
+
cacheReadTokens = deltaUsage.cache_read_input_tokens;
|
|
193
|
+
}
|
|
182
194
|
break;
|
|
183
195
|
}
|
|
184
196
|
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { ModelPricing } from '../types/provider.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Built-in pricing table for known models.
|
|
5
|
+
* Prices in USD per million tokens. Last updated: 2025-07.
|
|
6
|
+
*
|
|
7
|
+
* Used as fallback when no ModelRegistry is configured.
|
|
8
|
+
* Registry pricing (if available) takes precedence.
|
|
9
|
+
*/
|
|
10
|
+
const PRICING_TABLE: Array<{ prefix: string; pricing: ModelPricing }> = [
|
|
11
|
+
// Anthropic — Claude 4.6
|
|
12
|
+
{
|
|
13
|
+
prefix: 'claude-opus-4-6',
|
|
14
|
+
pricing: { inputPerMillion: 15, outputPerMillion: 75, cacheWritePerMillion: 18.75, cacheReadPerMillion: 1.50, currency: 'USD' },
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
prefix: 'claude-sonnet-4-6',
|
|
18
|
+
pricing: { inputPerMillion: 3, outputPerMillion: 15, cacheWritePerMillion: 3.75, cacheReadPerMillion: 0.30, currency: 'USD' },
|
|
19
|
+
},
|
|
20
|
+
// Anthropic — Claude 4.5
|
|
21
|
+
{
|
|
22
|
+
prefix: 'claude-haiku-4-5',
|
|
23
|
+
pricing: { inputPerMillion: 0.80, outputPerMillion: 4, cacheWritePerMillion: 1.00, cacheReadPerMillion: 0.08, currency: 'USD' },
|
|
24
|
+
},
|
|
25
|
+
// Anthropic — Claude 4
|
|
26
|
+
{
|
|
27
|
+
prefix: 'claude-opus-4',
|
|
28
|
+
pricing: { inputPerMillion: 15, outputPerMillion: 75, cacheWritePerMillion: 18.75, cacheReadPerMillion: 1.50, currency: 'USD' },
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
prefix: 'claude-sonnet-4',
|
|
32
|
+
pricing: { inputPerMillion: 3, outputPerMillion: 15, cacheWritePerMillion: 3.75, cacheReadPerMillion: 0.30, currency: 'USD' },
|
|
33
|
+
},
|
|
34
|
+
// Anthropic — Claude 3.5
|
|
35
|
+
{
|
|
36
|
+
prefix: 'claude-3-5-sonnet',
|
|
37
|
+
pricing: { inputPerMillion: 3, outputPerMillion: 15, cacheWritePerMillion: 3.75, cacheReadPerMillion: 0.30, currency: 'USD' },
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
prefix: 'claude-3-5-haiku',
|
|
41
|
+
pricing: { inputPerMillion: 0.80, outputPerMillion: 4, cacheWritePerMillion: 1.00, cacheReadPerMillion: 0.08, currency: 'USD' },
|
|
42
|
+
},
|
|
43
|
+
// OpenAI — GPT-4o
|
|
44
|
+
{
|
|
45
|
+
prefix: 'gpt-4o-2024',
|
|
46
|
+
pricing: { inputPerMillion: 2.50, outputPerMillion: 10, cacheReadPerMillion: 1.25, currency: 'USD' },
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
prefix: 'gpt-4o',
|
|
50
|
+
pricing: { inputPerMillion: 2.50, outputPerMillion: 10, cacheReadPerMillion: 1.25, currency: 'USD' },
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
prefix: 'gpt-4o-mini',
|
|
54
|
+
pricing: { inputPerMillion: 0.15, outputPerMillion: 0.60, cacheReadPerMillion: 0.075, currency: 'USD' },
|
|
55
|
+
},
|
|
56
|
+
// Google — Gemini 2.5
|
|
57
|
+
{
|
|
58
|
+
prefix: 'gemini-2.5-pro',
|
|
59
|
+
pricing: { inputPerMillion: 1.25, outputPerMillion: 10, currency: 'USD' },
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
prefix: 'gemini-2.5-flash',
|
|
63
|
+
pricing: { inputPerMillion: 0.15, outputPerMillion: 0.60, currency: 'USD' },
|
|
64
|
+
},
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
export function getDefaultPricing(modelId: string): ModelPricing | undefined {
|
|
68
|
+
let best: ModelPricing | undefined;
|
|
69
|
+
let bestLen = 0;
|
|
70
|
+
for (const entry of PRICING_TABLE) {
|
|
71
|
+
if (modelId.startsWith(entry.prefix) && entry.prefix.length > bestLen) {
|
|
72
|
+
best = entry.pricing;
|
|
73
|
+
bestLen = entry.prefix.length;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return best;
|
|
77
|
+
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import type { ContentBlock } from './content.js';
|
|
12
12
|
import type { ToolCall, ToolResult, ToolContext } from './tools.js';
|
|
13
|
-
import type {
|
|
13
|
+
import type { DetailedUsage, NormalizedResponse, StopReason } from './response.js';
|
|
14
14
|
import type { ChunkMeta, BlockEvent } from './streaming.js';
|
|
15
15
|
|
|
16
16
|
// ============================================================================
|
|
@@ -49,7 +49,7 @@ export interface ToolCallsEvent {
|
|
|
49
49
|
*/
|
|
50
50
|
export interface UsageEvent {
|
|
51
51
|
type: 'usage';
|
|
52
|
-
usage:
|
|
52
|
+
usage: DetailedUsage;
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
/**
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { CostBreakdown } from '../types/response.js';
|
|
2
|
+
import type { ModelPricing } from '../types/provider.js';
|
|
3
|
+
|
|
4
|
+
export interface CostableUsage {
|
|
5
|
+
inputTokens: number;
|
|
6
|
+
outputTokens: number;
|
|
7
|
+
cacheCreationTokens?: number;
|
|
8
|
+
cacheReadTokens?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function calculateCost(usage: CostableUsage, pricing: ModelPricing): CostBreakdown {
|
|
12
|
+
const input = usage.inputTokens * pricing.inputPerMillion / 1_000_000;
|
|
13
|
+
const output = usage.outputTokens * pricing.outputPerMillion / 1_000_000;
|
|
14
|
+
const cacheWrite = pricing.cacheWritePerMillion != null
|
|
15
|
+
? (usage.cacheCreationTokens ?? 0) * pricing.cacheWritePerMillion / 1_000_000
|
|
16
|
+
: undefined;
|
|
17
|
+
const cacheRead = pricing.cacheReadPerMillion != null
|
|
18
|
+
? (usage.cacheReadTokens ?? 0) * pricing.cacheReadPerMillion / 1_000_000
|
|
19
|
+
: undefined;
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
input,
|
|
23
|
+
output,
|
|
24
|
+
cacheWrite,
|
|
25
|
+
cacheRead,
|
|
26
|
+
total: input + output + (cacheWrite ?? 0) + (cacheRead ?? 0),
|
|
27
|
+
currency: pricing.currency,
|
|
28
|
+
};
|
|
29
|
+
}
|
package/src/utils/index.ts
CHANGED
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pseudo-Prefill Formatter
|
|
3
|
-
*
|
|
4
|
-
* Recovers prefill-like behavior for models that don't support native
|
|
5
|
-
* assistant message prefill (e.g., Sonnet 4.6, Opus 4.6). Uses a CLI
|
|
6
|
-
* simulation framing trick:
|
|
7
|
-
*
|
|
8
|
-
* System: "The assistant is in CLI simulation mode..."
|
|
9
|
-
* User: "<cmd>cut -c 1-N < conversation.txt</cmd>"
|
|
10
|
-
* Assistant: <the full conversation log, N chars>
|
|
11
|
-
* User: "<cmd>cat conversation.txt</cmd>" (or cut -c N+1-)
|
|
12
|
-
* <model continues from where the cut output ended>
|
|
13
|
-
*
|
|
14
|
-
* Two continuation modes:
|
|
15
|
-
* - 'cat': model repeats full file then continues (reliable, caller strips log)
|
|
16
|
-
* - 'tail-cut': model outputs only new content (efficient, needs simulated stops)
|
|
17
|
-
*
|
|
18
|
-
* IMPORTANT: API-level stop sequences should NOT be used with pseudo-prefill.
|
|
19
|
-
* In 'cat' mode, the model repeats participant names from the log which would
|
|
20
|
-
* trigger stops prematurely. The caller should handle stop sequences post-facto
|
|
21
|
-
* after stripping the repeated log. The stop sequences returned in BuildResult
|
|
22
|
-
* are for the caller's post-facto detection, not for the API.
|
|
23
|
-
*
|
|
24
|
-
* Uses PassthroughParser and native API tools (same as NativeFormatter).
|
|
25
|
-
*/
|
|
26
|
-
import type { NormalizedMessage, ContentBlock, ToolCall, ToolResult } from '../types/index.js';
|
|
27
|
-
import type { PrefillFormatter, StreamParser, BuildOptions, BuildResult, FormatterConfig } from './types.js';
|
|
28
|
-
export interface PseudoPrefillFormatterConfig extends FormatterConfig {
|
|
29
|
-
/**
|
|
30
|
-
* Filename used in the CLI simulation commands.
|
|
31
|
-
* Default: 'conversation.txt'
|
|
32
|
-
*/
|
|
33
|
-
filename?: string;
|
|
34
|
-
/**
|
|
35
|
-
* Continuation mode:
|
|
36
|
-
* - 'cat': `cat filename` — model repeats full file then continues.
|
|
37
|
-
* More reliable but uses more output tokens. Caller must strip the
|
|
38
|
-
* repeated conversation log from the response.
|
|
39
|
-
* - 'tail-cut': `cut -c N+1- < filename` — model outputs only new content.
|
|
40
|
-
* More efficient but may be less reliable. Caller needs simulated stop
|
|
41
|
-
* sequences (only after \n\n, not at position 0).
|
|
42
|
-
* Default: 'cat'
|
|
43
|
-
*/
|
|
44
|
-
continuationMode?: 'cat' | 'tail-cut';
|
|
45
|
-
/**
|
|
46
|
-
* Maximum participants to include in stop sequences.
|
|
47
|
-
* Default: 10
|
|
48
|
-
*/
|
|
49
|
-
maxParticipantsForStop?: number;
|
|
50
|
-
/**
|
|
51
|
-
* Message delimiter between participant entries.
|
|
52
|
-
* Default: '' (none, just newlines)
|
|
53
|
-
*/
|
|
54
|
-
messageDelimiter?: string;
|
|
55
|
-
}
|
|
56
|
-
export declare class PseudoPrefillFormatter implements PrefillFormatter {
|
|
57
|
-
readonly name = "pseudo-prefill";
|
|
58
|
-
readonly usesPrefill = false;
|
|
59
|
-
private config;
|
|
60
|
-
constructor(config?: PseudoPrefillFormatterConfig);
|
|
61
|
-
buildMessages(messages: NormalizedMessage[], options: BuildOptions): BuildResult;
|
|
62
|
-
formatToolResults(results: ToolResult[]): string;
|
|
63
|
-
createStreamParser(): StreamParser;
|
|
64
|
-
parseToolCalls(_content: string): ToolCall[];
|
|
65
|
-
hasToolUse(_content: string): boolean;
|
|
66
|
-
parseContentBlocks(content: string): ContentBlock[];
|
|
67
|
-
private extractContent;
|
|
68
|
-
private buildStopSequences;
|
|
69
|
-
private convertToNativeTools;
|
|
70
|
-
}
|
|
71
|
-
//# sourceMappingURL=pseudo-prefill.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pseudo-prefill.d.ts","sourceRoot":"","sources":["../../src/formatters/pseudo-prefill.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EACV,iBAAiB,EACjB,YAAY,EAEZ,QAAQ,EACR,UAAU,EACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,gBAAgB,EAChB,YAAY,EACZ,YAAY,EACZ,WAAW,EACX,eAAe,EAMhB,MAAM,YAAY,CAAC;AAMpB,MAAM,WAAW,4BAA6B,SAAQ,eAAe;IACnE;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;;;;OASG;IACH,gBAAgB,CAAC,EAAE,KAAK,GAAG,UAAU,CAAC;IAEtC;;;OAGG;IACH,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAEhC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAqGD,qBAAa,sBAAuB,YAAW,gBAAgB;IAC7D,QAAQ,CAAC,IAAI,oBAAoB;IACjC,QAAQ,CAAC,WAAW,SAAS;IAE7B,OAAO,CAAC,MAAM,CAAyC;gBAE3C,MAAM,GAAE,4BAAiC;IAerD,aAAa,CAAC,QAAQ,EAAE,iBAAiB,EAAE,EAAE,OAAO,EAAE,YAAY,GAAG,WAAW;IA+NhF,iBAAiB,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,MAAM;IAahD,kBAAkB,IAAI,YAAY;IAIlC,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,EAAE;IAK5C,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAKrC,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,EAAE;IAWnD,OAAO,CAAC,cAAc;IAqCtB,OAAO,CAAC,kBAAkB;IA8B1B,OAAO,CAAC,oBAAoB;CAO7B"}
|