ai-token-estimator 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -3
- package/dist/index.cjs +300 -86
- package/dist/index.d.cts +151 -1
- package/dist/index.d.ts +151 -1
- package/dist/index.js +296 -86
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -410,11 +410,17 @@ interface EstimateInput {
|
|
|
410
410
|
model: string; // Model ID (e.g., 'gpt-4o', 'claude-opus-4.5')
|
|
411
411
|
rounding?: 'ceil' | 'round' | 'floor'; // Rounding strategy (default: 'ceil')
|
|
412
412
|
tokenizer?: 'heuristic' | 'openai_exact' | 'auto'; // Token counting strategy (default: 'heuristic')
|
|
413
|
+
|
|
414
|
+
// Extended cost estimation (optional)
|
|
415
|
+
outputTokens?: number; // Output tokens for cost calculation
|
|
416
|
+
cachedInputTokens?: number; // Cached input tokens (OpenAI only, must be <= estimatedTokens)
|
|
417
|
+
mode?: 'standard' | 'batch'; // Pricing mode (default: 'standard')
|
|
413
418
|
}
|
|
414
419
|
```
|
|
415
420
|
|
|
416
421
|
Note:
|
|
417
422
|
- Provider-backed modes (`anthropic_count_tokens`, `gemini_count_tokens`, `gemma_sentencepiece`) are only supported in `estimateAsync()`.
|
|
423
|
+
- When `outputTokens`, `cachedInputTokens`, or `mode` is provided, the model must have the corresponding pricing available or an error is thrown.
|
|
418
424
|
|
|
419
425
|
**Returns:**
|
|
420
426
|
|
|
@@ -423,10 +429,16 @@ interface EstimateOutput {
|
|
|
423
429
|
model: string; // The model used
|
|
424
430
|
characterCount: number; // Number of Unicode code points
|
|
425
431
|
estimatedTokens: number; // Estimated token count (integer)
|
|
426
|
-
estimatedInputCost: number; // Estimated cost in USD
|
|
432
|
+
estimatedInputCost: number; // Estimated input cost in USD
|
|
427
433
|
charsPerToken: number; // The ratio used for this model
|
|
428
434
|
tokenizerMode?: 'heuristic' | 'openai_exact' | 'auto'; // Which strategy was used
|
|
429
435
|
encodingUsed?: string; // OpenAI encoding when using exact tokenization
|
|
436
|
+
|
|
437
|
+
// Extended cost fields (when cost inputs are provided)
|
|
438
|
+
outputTokens?: number; // Echoed from input
|
|
439
|
+
estimatedOutputCost?: number; // Output token cost in USD
|
|
440
|
+
estimatedCachedInputCost?: number; // Cached input cost in USD
|
|
441
|
+
estimatedTotalCost: number; // Total cost (input + output + cached)
|
|
430
442
|
}
|
|
431
443
|
```
|
|
432
444
|
|
|
@@ -555,8 +567,12 @@ Returns the configuration for a specific model. Throws if the model is not found
|
|
|
555
567
|
|
|
556
568
|
```typescript
|
|
557
569
|
interface ModelConfig {
|
|
558
|
-
charsPerToken: number;
|
|
559
|
-
inputCostPerMillion: number;
|
|
570
|
+
charsPerToken: number; // Characters per token ratio
|
|
571
|
+
inputCostPerMillion: number; // USD per 1M input tokens
|
|
572
|
+
outputCostPerMillion?: number; // USD per 1M output tokens (when available)
|
|
573
|
+
cachedInputCostPerMillion?: number; // USD per 1M cached input tokens (OpenAI)
|
|
574
|
+
batchInputCostPerMillion?: number; // USD per 1M batch input tokens (OpenAI)
|
|
575
|
+
batchOutputCostPerMillion?: number; // USD per 1M batch output tokens (OpenAI)
|
|
560
576
|
}
|
|
561
577
|
```
|
|
562
578
|
|
|
@@ -564,6 +580,84 @@ interface ModelConfig {
|
|
|
564
580
|
|
|
565
581
|
Read-only object containing all model configurations. Frozen to prevent runtime mutation.
|
|
566
582
|
|
|
583
|
+
### Cost Estimation API
|
|
584
|
+
|
|
585
|
+
#### `estimateCost(options): CostEstimate`
|
|
586
|
+
|
|
587
|
+
Calculate cost from explicit token counts. Provides detailed cost breakdown for input, output, cached, and batch pricing.
|
|
588
|
+
|
|
589
|
+
```typescript
|
|
590
|
+
import { estimateCost } from 'ai-token-estimator';
|
|
591
|
+
|
|
592
|
+
const result = estimateCost({
|
|
593
|
+
model: 'gpt-4o',
|
|
594
|
+
inputTokens: 1_000_000,
|
|
595
|
+
outputTokens: 500_000,
|
|
596
|
+
cachedInputTokens: 200_000, // optional
|
|
597
|
+
mode: 'standard', // or 'batch'
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
console.log(result);
|
|
601
|
+
// {
|
|
602
|
+
// model: 'gpt-4o',
|
|
603
|
+
// mode: 'standard',
|
|
604
|
+
// tokens: { input: 1000000, cachedInput: 200000, nonCachedInput: 800000, output: 500000 },
|
|
605
|
+
// costs: { input: 2.0, cachedInput: 0.25, output: 5.0, total: 7.25 },
|
|
606
|
+
// rates: { inputPerMillion: 2.5, outputPerMillion: 10.0, cachedInputPerMillion: 1.25, ... }
|
|
607
|
+
// }
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
Throws if:
|
|
611
|
+
- Model is unknown
|
|
612
|
+
- Token counts are negative or non-integer
|
|
613
|
+
- `cachedInputTokens > inputTokens`
|
|
614
|
+
- Required pricing is missing (output/cached/batch)
|
|
615
|
+
- `mode: 'batch'` with `cachedInputTokens > 0`
|
|
616
|
+
|
|
617
|
+
#### `estimateCostFromText(options): CostEstimate`
|
|
618
|
+
|
|
619
|
+
Sync version that counts input tokens from text. Uses heuristic/exact tokenization based on model.
|
|
620
|
+
|
|
621
|
+
```typescript
|
|
622
|
+
import { estimateCostFromText } from 'ai-token-estimator';
|
|
623
|
+
|
|
624
|
+
const result = estimateCostFromText({
|
|
625
|
+
model: 'gpt-4o',
|
|
626
|
+
inputText: 'Hello, world!',
|
|
627
|
+
outputText: 'Hi there!', // optional: auto-count output tokens
|
|
628
|
+
outputTokens: 100, // or: explicit output count (takes precedence)
|
|
629
|
+
cachedInputTokens: 0,
|
|
630
|
+
mode: 'standard',
|
|
631
|
+
});
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
#### `estimateCostFromTextAsync(options): Promise<CostEstimate>`
|
|
635
|
+
|
|
636
|
+
Async version that supports provider-backed tokenizers for accurate counts.
|
|
637
|
+
|
|
638
|
+
```typescript
|
|
639
|
+
import { estimateCostFromTextAsync } from 'ai-token-estimator';
|
|
640
|
+
|
|
641
|
+
const result = await estimateCostFromTextAsync({
|
|
642
|
+
model: 'claude-sonnet-4',
|
|
643
|
+
inputText: 'Hello, world!',
|
|
644
|
+
outputText: 'Hi there!',
|
|
645
|
+
tokenizer: 'anthropic_count_tokens',
|
|
646
|
+
anthropic: { apiKey: process.env.ANTHROPIC_API_KEY },
|
|
647
|
+
});
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
#### `getTotalCost(model, inputTokens, outputTokens?): number`
|
|
651
|
+
|
|
652
|
+
Quick helper to get total cost for a model.
|
|
653
|
+
|
|
654
|
+
```typescript
|
|
655
|
+
import { getTotalCost } from 'ai-token-estimator';
|
|
656
|
+
|
|
657
|
+
const cost = getTotalCost('gpt-4o', 1_000_000, 500_000);
|
|
658
|
+
// 7.5 (USD)
|
|
659
|
+
```
|
|
660
|
+
|
|
567
661
|
### SentencePiece API
|
|
568
662
|
|
|
569
663
|
#### `loadSentencePieceTokenizer(options: FileOptions): Promise<SentencePieceTokenizer>`
|
package/dist/index.cjs
CHANGED
|
@@ -49,10 +49,14 @@ __export(index_exports, {
|
|
|
49
49
|
ensureSentencePieceModel: () => ensureSentencePieceModel,
|
|
50
50
|
estimate: () => estimate,
|
|
51
51
|
estimateAsync: () => estimateAsync,
|
|
52
|
+
estimateCost: () => estimateCost,
|
|
53
|
+
estimateCostFromText: () => estimateCostFromText,
|
|
54
|
+
estimateCostFromTextAsync: () => estimateCostFromTextAsync,
|
|
52
55
|
getAvailableModels: () => getAvailableModels,
|
|
53
56
|
getModelConfig: () => getModelConfig,
|
|
54
57
|
getOpenAIEncoding: () => getOpenAIEncoding,
|
|
55
58
|
getSentencePieceTokenizer: () => getSentencePieceTokenizer,
|
|
59
|
+
getTotalCost: () => getTotalCost,
|
|
56
60
|
loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
|
|
57
61
|
parseModelProto: () => parseModelProto
|
|
58
62
|
});
|
|
@@ -159,7 +163,11 @@ var models = {
|
|
|
159
163
|
},
|
|
160
164
|
"gpt-4o": {
|
|
161
165
|
charsPerToken: 4,
|
|
162
|
-
inputCostPerMillion: 2.5
|
|
166
|
+
inputCostPerMillion: 2.5,
|
|
167
|
+
outputCostPerMillion: 10,
|
|
168
|
+
cachedInputCostPerMillion: 1.25,
|
|
169
|
+
batchInputCostPerMillion: 1.25,
|
|
170
|
+
batchOutputCostPerMillion: 5
|
|
163
171
|
},
|
|
164
172
|
"gpt-4o-2024-05-13": {
|
|
165
173
|
charsPerToken: 4,
|
|
@@ -171,7 +179,11 @@ var models = {
|
|
|
171
179
|
},
|
|
172
180
|
"gpt-4o-mini": {
|
|
173
181
|
charsPerToken: 4,
|
|
174
|
-
inputCostPerMillion: 0.15
|
|
182
|
+
inputCostPerMillion: 0.15,
|
|
183
|
+
outputCostPerMillion: 0.6,
|
|
184
|
+
cachedInputCostPerMillion: 0.075,
|
|
185
|
+
batchInputCostPerMillion: 0.075,
|
|
186
|
+
batchOutputCostPerMillion: 0.3
|
|
175
187
|
},
|
|
176
188
|
"gpt-4o-mini-audio-preview": {
|
|
177
189
|
charsPerToken: 4,
|
|
@@ -401921,67 +401933,30 @@ function decode(tokens, options) {
|
|
|
401921
401933
|
return api.decode(tokens);
|
|
401922
401934
|
}
|
|
401923
401935
|
|
|
401924
|
-
// src/
|
|
401925
|
-
function
|
|
401926
|
-
|
|
401927
|
-
for (const _char of text) {
|
|
401928
|
-
count++;
|
|
401929
|
-
}
|
|
401930
|
-
return count;
|
|
401936
|
+
// src/token-counter.ts
|
|
401937
|
+
function isNonOpenAIModel(model) {
|
|
401938
|
+
return model.startsWith("claude-") || model.startsWith("gemini-");
|
|
401931
401939
|
}
|
|
401932
|
-
function
|
|
401933
|
-
const { text, model
|
|
401934
|
-
|
|
401935
|
-
|
|
401936
|
-
|
|
401937
|
-
|
|
401938
|
-
|
|
401939
|
-
const characterCount = countCodePoints(text);
|
|
401940
|
-
const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
|
|
401941
|
-
let estimatedTokens;
|
|
401942
|
-
let tokenizerModeUsed = "heuristic";
|
|
401943
|
-
let encodingUsed;
|
|
401944
|
-
const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
|
|
401945
|
-
if (shouldTryExact && !isNonOpenAIModel3) {
|
|
401946
|
-
try {
|
|
401947
|
-
estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
|
|
401948
|
-
tokenizerModeUsed = "openai_exact";
|
|
401949
|
-
encodingUsed = getOpenAIEncoding({ model });
|
|
401950
|
-
} catch (error) {
|
|
401951
|
-
if (tokenizer === "openai_exact") {
|
|
401952
|
-
throw error;
|
|
401953
|
-
}
|
|
401954
|
-
}
|
|
401955
|
-
} else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
|
|
401956
|
-
throw new Error(
|
|
401957
|
-
`Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
|
|
401958
|
-
);
|
|
401940
|
+
function countTokens(input) {
|
|
401941
|
+
const { text, model } = input;
|
|
401942
|
+
if (isNonOpenAIModel(model)) {
|
|
401943
|
+
return {
|
|
401944
|
+
tokens: estimate({ text, model }).estimatedTokens,
|
|
401945
|
+
exact: false
|
|
401946
|
+
};
|
|
401959
401947
|
}
|
|
401960
|
-
|
|
401961
|
-
|
|
401962
|
-
|
|
401963
|
-
|
|
401964
|
-
|
|
401965
|
-
|
|
401966
|
-
|
|
401967
|
-
|
|
401968
|
-
|
|
401969
|
-
|
|
401970
|
-
|
|
401971
|
-
estimatedTokens = Math.ceil(rawTokens);
|
|
401972
|
-
}
|
|
401973
|
-
tokenizerModeUsed = "heuristic";
|
|
401948
|
+
try {
|
|
401949
|
+
return {
|
|
401950
|
+
tokens: encode(text, { model, allowSpecial: "none" }).length,
|
|
401951
|
+
exact: true,
|
|
401952
|
+
encoding: getOpenAIEncoding({ model })
|
|
401953
|
+
};
|
|
401954
|
+
} catch {
|
|
401955
|
+
return {
|
|
401956
|
+
tokens: estimate({ text, model }).estimatedTokens,
|
|
401957
|
+
exact: false
|
|
401958
|
+
};
|
|
401974
401959
|
}
|
|
401975
|
-
const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
|
|
401976
|
-
return {
|
|
401977
|
-
model,
|
|
401978
|
-
characterCount,
|
|
401979
|
-
estimatedTokens,
|
|
401980
|
-
estimatedInputCost,
|
|
401981
|
-
charsPerToken: config.charsPerToken,
|
|
401982
|
-
tokenizerMode: tokenizerModeUsed,
|
|
401983
|
-
encodingUsed
|
|
401984
|
-
};
|
|
401985
401960
|
}
|
|
401986
401961
|
|
|
401987
401962
|
// src/providers/anthropic.ts
|
|
@@ -404598,12 +404573,12 @@ async function countGemmaSentencePieceTokens(params) {
|
|
|
404598
404573
|
}
|
|
404599
404574
|
|
|
404600
404575
|
// src/estimator-async.ts
|
|
404601
|
-
function
|
|
404576
|
+
function countCodePoints(text) {
|
|
404602
404577
|
let count = 0;
|
|
404603
404578
|
for (const _char of text) count++;
|
|
404604
404579
|
return count;
|
|
404605
404580
|
}
|
|
404606
|
-
function
|
|
404581
|
+
function isNonOpenAIModel2(model) {
|
|
404607
404582
|
return model.startsWith("claude-") || model.startsWith("gemini-");
|
|
404608
404583
|
}
|
|
404609
404584
|
function shouldFallbackToHeuristic(err) {
|
|
@@ -404617,9 +404592,17 @@ function shouldFallbackToHeuristic(err) {
|
|
|
404617
404592
|
return false;
|
|
404618
404593
|
}
|
|
404619
404594
|
async function estimateAsync(input) {
|
|
404620
|
-
const {
|
|
404595
|
+
const {
|
|
404596
|
+
text,
|
|
404597
|
+
model,
|
|
404598
|
+
rounding = "ceil",
|
|
404599
|
+
tokenizer = "heuristic",
|
|
404600
|
+
outputTokens,
|
|
404601
|
+
cachedInputTokens,
|
|
404602
|
+
mode
|
|
404603
|
+
} = input;
|
|
404621
404604
|
const config = getModelConfig(model);
|
|
404622
|
-
const characterCount =
|
|
404605
|
+
const characterCount = countCodePoints(text);
|
|
404623
404606
|
let estimatedTokens;
|
|
404624
404607
|
let tokenizerModeUsed = "heuristic";
|
|
404625
404608
|
let encodingUsed;
|
|
@@ -404670,7 +404653,7 @@ async function estimateAsync(input) {
|
|
|
404670
404653
|
tokenizerModeUsed = "gemma_sentencepiece";
|
|
404671
404654
|
} else {
|
|
404672
404655
|
const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
|
|
404673
|
-
if (shouldTryExact && !
|
|
404656
|
+
if (shouldTryExact && !isNonOpenAIModel2(model)) {
|
|
404674
404657
|
try {
|
|
404675
404658
|
estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
|
|
404676
404659
|
tokenizerModeUsed = "openai_exact";
|
|
@@ -404678,7 +404661,7 @@ async function estimateAsync(input) {
|
|
|
404678
404661
|
} catch (error) {
|
|
404679
404662
|
if (tokenizer === "openai_exact") throw error;
|
|
404680
404663
|
}
|
|
404681
|
-
} else if (tokenizer === "openai_exact" &&
|
|
404664
|
+
} else if (tokenizer === "openai_exact" && isNonOpenAIModel2(model)) {
|
|
404682
404665
|
throw new Error(`Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`);
|
|
404683
404666
|
}
|
|
404684
404667
|
}
|
|
@@ -404698,6 +404681,26 @@ async function estimateAsync(input) {
|
|
|
404698
404681
|
tokenizerModeUsed = "heuristic";
|
|
404699
404682
|
}
|
|
404700
404683
|
const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
|
|
404684
|
+
let estimatedOutputCost;
|
|
404685
|
+
let estimatedCachedInputCost;
|
|
404686
|
+
let estimatedTotalCost = estimatedInputCost;
|
|
404687
|
+
const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
|
|
404688
|
+
if (hasCostInputs) {
|
|
404689
|
+
try {
|
|
404690
|
+
const costResult = estimateCost({
|
|
404691
|
+
model,
|
|
404692
|
+
inputTokens: estimatedTokens,
|
|
404693
|
+
outputTokens,
|
|
404694
|
+
cachedInputTokens,
|
|
404695
|
+
mode
|
|
404696
|
+
});
|
|
404697
|
+
estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
|
|
404698
|
+
estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
|
|
404699
|
+
estimatedTotalCost = costResult.costs.total;
|
|
404700
|
+
} catch (error) {
|
|
404701
|
+
throw error;
|
|
404702
|
+
}
|
|
404703
|
+
}
|
|
404701
404704
|
return {
|
|
404702
404705
|
model,
|
|
404703
404706
|
characterCount,
|
|
@@ -404705,34 +404708,241 @@ async function estimateAsync(input) {
|
|
|
404705
404708
|
estimatedInputCost,
|
|
404706
404709
|
charsPerToken: config.charsPerToken,
|
|
404707
404710
|
tokenizerMode: tokenizerModeUsed,
|
|
404708
|
-
encodingUsed
|
|
404711
|
+
encodingUsed,
|
|
404712
|
+
outputTokens,
|
|
404713
|
+
estimatedOutputCost,
|
|
404714
|
+
estimatedCachedInputCost,
|
|
404715
|
+
estimatedTotalCost
|
|
404709
404716
|
};
|
|
404710
404717
|
}
|
|
404711
404718
|
|
|
404712
|
-
// src/
|
|
404713
|
-
function
|
|
404714
|
-
|
|
404719
|
+
// src/cost.ts
|
|
404720
|
+
function validateTokenCount(value, name) {
|
|
404721
|
+
const n = value ?? 0;
|
|
404722
|
+
if (!Number.isFinite(n) || n < 0 || !Number.isInteger(n)) {
|
|
404723
|
+
throw new Error(`${name} must be a non-negative integer, got: ${n}`);
|
|
404724
|
+
}
|
|
404725
|
+
return n;
|
|
404715
404726
|
}
|
|
404716
|
-
function
|
|
404717
|
-
const {
|
|
404718
|
-
|
|
404719
|
-
|
|
404720
|
-
|
|
404721
|
-
|
|
404722
|
-
|
|
404727
|
+
function estimateCost(options) {
|
|
404728
|
+
const { model, mode = "standard" } = options;
|
|
404729
|
+
const inputTokens = validateTokenCount(options.inputTokens, "inputTokens");
|
|
404730
|
+
const outputTokens = validateTokenCount(options.outputTokens, "outputTokens");
|
|
404731
|
+
const cachedInputTokens = validateTokenCount(options.cachedInputTokens, "cachedInputTokens");
|
|
404732
|
+
if (cachedInputTokens > inputTokens) {
|
|
404733
|
+
throw new Error(
|
|
404734
|
+
`cachedInputTokens (${cachedInputTokens}) cannot exceed inputTokens (${inputTokens})`
|
|
404735
|
+
);
|
|
404723
404736
|
}
|
|
404724
|
-
|
|
404725
|
-
|
|
404726
|
-
|
|
404727
|
-
|
|
404728
|
-
|
|
404729
|
-
|
|
404730
|
-
|
|
404737
|
+
const config = getModelConfig(model);
|
|
404738
|
+
if (outputTokens > 0 && config.outputCostPerMillion === void 0) {
|
|
404739
|
+
throw new Error(
|
|
404740
|
+
`Output pricing not available for model "${model}". Cannot estimate cost for ${outputTokens} output tokens.`
|
|
404741
|
+
);
|
|
404742
|
+
}
|
|
404743
|
+
if (mode === "batch") {
|
|
404744
|
+
if (cachedInputTokens > 0) {
|
|
404745
|
+
throw new Error(
|
|
404746
|
+
`Batch mode does not support cached tokens. Got cachedInputTokens: ${cachedInputTokens}. Use mode: 'standard' for cached pricing.`
|
|
404747
|
+
);
|
|
404748
|
+
}
|
|
404749
|
+
if (config.batchInputCostPerMillion === void 0) {
|
|
404750
|
+
throw new Error(
|
|
404751
|
+
`Batch input pricing not available for model "${model}". Use mode: 'standard' or choose a model with batch pricing.`
|
|
404752
|
+
);
|
|
404753
|
+
}
|
|
404754
|
+
if (outputTokens > 0 && config.batchOutputCostPerMillion === void 0) {
|
|
404755
|
+
throw new Error(
|
|
404756
|
+
`Batch output pricing not available for model "${model}". Cannot estimate batch cost for ${outputTokens} output tokens.`
|
|
404757
|
+
);
|
|
404758
|
+
}
|
|
404759
|
+
}
|
|
404760
|
+
const nonCachedInputTokens = inputTokens - cachedInputTokens;
|
|
404761
|
+
if (mode === "batch") {
|
|
404762
|
+
const inputCost2 = inputTokens * config.batchInputCostPerMillion / 1e6;
|
|
404763
|
+
const outputCost2 = outputTokens > 0 ? outputTokens * config.batchOutputCostPerMillion / 1e6 : 0;
|
|
404731
404764
|
return {
|
|
404732
|
-
|
|
404733
|
-
|
|
404765
|
+
model,
|
|
404766
|
+
mode: "batch",
|
|
404767
|
+
tokens: {
|
|
404768
|
+
input: inputTokens,
|
|
404769
|
+
cachedInput: 0,
|
|
404770
|
+
// Batch mode doesn't use cached pricing
|
|
404771
|
+
nonCachedInput: inputTokens,
|
|
404772
|
+
output: outputTokens
|
|
404773
|
+
},
|
|
404774
|
+
costs: {
|
|
404775
|
+
input: inputCost2,
|
|
404776
|
+
cachedInput: 0,
|
|
404777
|
+
output: outputCost2,
|
|
404778
|
+
total: inputCost2 + outputCost2
|
|
404779
|
+
},
|
|
404780
|
+
rates: {
|
|
404781
|
+
// In batch mode, inputPerMillion/outputPerMillion reflect the batch rates used
|
|
404782
|
+
inputPerMillion: config.batchInputCostPerMillion,
|
|
404783
|
+
outputPerMillion: config.batchOutputCostPerMillion,
|
|
404784
|
+
batchInputPerMillion: config.batchInputCostPerMillion,
|
|
404785
|
+
batchOutputPerMillion: config.batchOutputCostPerMillion
|
|
404786
|
+
}
|
|
404734
404787
|
};
|
|
404735
404788
|
}
|
|
404789
|
+
if (cachedInputTokens > 0 && config.cachedInputCostPerMillion === void 0) {
|
|
404790
|
+
throw new Error(
|
|
404791
|
+
`Cached input pricing not available for model "${model}". Cannot estimate cost for ${cachedInputTokens} cached input tokens.`
|
|
404792
|
+
);
|
|
404793
|
+
}
|
|
404794
|
+
const inputCost = nonCachedInputTokens * config.inputCostPerMillion / 1e6;
|
|
404795
|
+
const cachedInputCost = cachedInputTokens > 0 ? cachedInputTokens * config.cachedInputCostPerMillion / 1e6 : 0;
|
|
404796
|
+
const outputCost = outputTokens > 0 ? outputTokens * config.outputCostPerMillion / 1e6 : 0;
|
|
404797
|
+
return {
|
|
404798
|
+
model,
|
|
404799
|
+
mode: "standard",
|
|
404800
|
+
tokens: {
|
|
404801
|
+
input: inputTokens,
|
|
404802
|
+
cachedInput: cachedInputTokens,
|
|
404803
|
+
nonCachedInput: nonCachedInputTokens,
|
|
404804
|
+
output: outputTokens
|
|
404805
|
+
},
|
|
404806
|
+
costs: {
|
|
404807
|
+
input: inputCost,
|
|
404808
|
+
cachedInput: cachedInputCost,
|
|
404809
|
+
output: outputCost,
|
|
404810
|
+
total: inputCost + cachedInputCost + outputCost
|
|
404811
|
+
},
|
|
404812
|
+
rates: {
|
|
404813
|
+
inputPerMillion: config.inputCostPerMillion,
|
|
404814
|
+
outputPerMillion: config.outputCostPerMillion,
|
|
404815
|
+
cachedInputPerMillion: config.cachedInputCostPerMillion
|
|
404816
|
+
}
|
|
404817
|
+
};
|
|
404818
|
+
}
|
|
404819
|
+
function estimateCostFromText(options) {
|
|
404820
|
+
const { model, inputText, outputText, outputTokens: manualOutputTokens, ...rest } = options;
|
|
404821
|
+
const inputTokens = countTokens({ text: inputText, model }).tokens;
|
|
404822
|
+
let outputTokens = manualOutputTokens;
|
|
404823
|
+
if (manualOutputTokens === void 0 && outputText !== void 0) {
|
|
404824
|
+
outputTokens = countTokens({ text: outputText, model }).tokens;
|
|
404825
|
+
}
|
|
404826
|
+
return estimateCost({ model, inputTokens, outputTokens, ...rest });
|
|
404827
|
+
}
|
|
404828
|
+
async function estimateCostFromTextAsync(options) {
|
|
404829
|
+
const {
|
|
404830
|
+
inputText,
|
|
404831
|
+
outputText,
|
|
404832
|
+
outputTokens: manualOutputTokens,
|
|
404833
|
+
cachedInputTokens,
|
|
404834
|
+
mode,
|
|
404835
|
+
...providerOptions
|
|
404836
|
+
// Includes model + all EstimateAsyncInput options
|
|
404837
|
+
} = options;
|
|
404838
|
+
const { model } = providerOptions;
|
|
404839
|
+
const inputResult = await estimateAsync({ text: inputText, ...providerOptions });
|
|
404840
|
+
const inputTokens = inputResult.estimatedTokens;
|
|
404841
|
+
let outputTokens = manualOutputTokens;
|
|
404842
|
+
if (manualOutputTokens === void 0 && outputText !== void 0) {
|
|
404843
|
+
const outputResult = await estimateAsync({ text: outputText, ...providerOptions });
|
|
404844
|
+
outputTokens = outputResult.estimatedTokens;
|
|
404845
|
+
}
|
|
404846
|
+
return estimateCost({ model, inputTokens, outputTokens, cachedInputTokens, mode });
|
|
404847
|
+
}
|
|
404848
|
+
function getTotalCost(model, inputTokens, outputTokens = 0) {
|
|
404849
|
+
const estimate2 = estimateCost({ model, inputTokens, outputTokens });
|
|
404850
|
+
return estimate2.costs.total;
|
|
404851
|
+
}
|
|
404852
|
+
|
|
404853
|
+
// src/estimator.ts
|
|
404854
|
+
function countCodePoints2(text) {
|
|
404855
|
+
let count = 0;
|
|
404856
|
+
for (const _char of text) {
|
|
404857
|
+
count++;
|
|
404858
|
+
}
|
|
404859
|
+
return count;
|
|
404860
|
+
}
|
|
404861
|
+
function estimate(input) {
|
|
404862
|
+
const {
|
|
404863
|
+
text,
|
|
404864
|
+
model,
|
|
404865
|
+
rounding = "ceil",
|
|
404866
|
+
tokenizer = "heuristic",
|
|
404867
|
+
outputTokens,
|
|
404868
|
+
cachedInputTokens,
|
|
404869
|
+
mode
|
|
404870
|
+
} = input;
|
|
404871
|
+
const config = getModelConfig(model);
|
|
404872
|
+
const tokenizerStr = tokenizer;
|
|
404873
|
+
if (tokenizerStr === "anthropic_count_tokens" || tokenizerStr === "gemini_count_tokens" || tokenizerStr === "gemma_sentencepiece") {
|
|
404874
|
+
throw new Error(`Tokenizer mode "${tokenizerStr}" requires async execution. Use estimateAsync(...) instead.`);
|
|
404875
|
+
}
|
|
404876
|
+
const characterCount = countCodePoints2(text);
|
|
404877
|
+
const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
|
|
404878
|
+
let estimatedTokens;
|
|
404879
|
+
let tokenizerModeUsed = "heuristic";
|
|
404880
|
+
let encodingUsed;
|
|
404881
|
+
const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
|
|
404882
|
+
if (shouldTryExact && !isNonOpenAIModel3) {
|
|
404883
|
+
try {
|
|
404884
|
+
estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
|
|
404885
|
+
tokenizerModeUsed = "openai_exact";
|
|
404886
|
+
encodingUsed = getOpenAIEncoding({ model });
|
|
404887
|
+
} catch (error) {
|
|
404888
|
+
if (tokenizer === "openai_exact") {
|
|
404889
|
+
throw error;
|
|
404890
|
+
}
|
|
404891
|
+
}
|
|
404892
|
+
} else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
|
|
404893
|
+
throw new Error(
|
|
404894
|
+
`Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
|
|
404895
|
+
);
|
|
404896
|
+
}
|
|
404897
|
+
if (estimatedTokens === void 0) {
|
|
404898
|
+
const rawTokens = characterCount / config.charsPerToken;
|
|
404899
|
+
switch (rounding) {
|
|
404900
|
+
case "floor":
|
|
404901
|
+
estimatedTokens = Math.floor(rawTokens);
|
|
404902
|
+
break;
|
|
404903
|
+
case "round":
|
|
404904
|
+
estimatedTokens = Math.round(rawTokens);
|
|
404905
|
+
break;
|
|
404906
|
+
case "ceil":
|
|
404907
|
+
default:
|
|
404908
|
+
estimatedTokens = Math.ceil(rawTokens);
|
|
404909
|
+
}
|
|
404910
|
+
tokenizerModeUsed = "heuristic";
|
|
404911
|
+
}
|
|
404912
|
+
const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
|
|
404913
|
+
let estimatedOutputCost;
|
|
404914
|
+
let estimatedCachedInputCost;
|
|
404915
|
+
let estimatedTotalCost = estimatedInputCost;
|
|
404916
|
+
const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
|
|
404917
|
+
if (hasCostInputs) {
|
|
404918
|
+
try {
|
|
404919
|
+
const costResult = estimateCost({
|
|
404920
|
+
model,
|
|
404921
|
+
inputTokens: estimatedTokens,
|
|
404922
|
+
outputTokens,
|
|
404923
|
+
cachedInputTokens,
|
|
404924
|
+
mode
|
|
404925
|
+
});
|
|
404926
|
+
estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
|
|
404927
|
+
estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
|
|
404928
|
+
estimatedTotalCost = costResult.costs.total;
|
|
404929
|
+
} catch (error) {
|
|
404930
|
+
throw error;
|
|
404931
|
+
}
|
|
404932
|
+
}
|
|
404933
|
+
return {
|
|
404934
|
+
model,
|
|
404935
|
+
characterCount,
|
|
404936
|
+
estimatedTokens,
|
|
404937
|
+
estimatedInputCost,
|
|
404938
|
+
charsPerToken: config.charsPerToken,
|
|
404939
|
+
tokenizerMode: tokenizerModeUsed,
|
|
404940
|
+
encodingUsed,
|
|
404941
|
+
outputTokens,
|
|
404942
|
+
estimatedOutputCost,
|
|
404943
|
+
estimatedCachedInputCost,
|
|
404944
|
+
estimatedTotalCost
|
|
404945
|
+
};
|
|
404736
404946
|
}
|
|
404737
404947
|
|
|
404738
404948
|
// src/chat-token-constants.ts
|
|
@@ -405019,10 +405229,14 @@ function countChatCompletionTokens(input) {
|
|
|
405019
405229
|
ensureSentencePieceModel,
|
|
405020
405230
|
estimate,
|
|
405021
405231
|
estimateAsync,
|
|
405232
|
+
estimateCost,
|
|
405233
|
+
estimateCostFromText,
|
|
405234
|
+
estimateCostFromTextAsync,
|
|
405022
405235
|
getAvailableModels,
|
|
405023
405236
|
getModelConfig,
|
|
405024
405237
|
getOpenAIEncoding,
|
|
405025
405238
|
getSentencePieceTokenizer,
|
|
405239
|
+
getTotalCost,
|
|
405026
405240
|
loadSentencePieceTokenizer,
|
|
405027
405241
|
parseModelProto
|
|
405028
405242
|
});
|