npm - ai-token-estimator - Versions diffs - 1.3.0 → 1.4.0 - Mend

ai-token-estimator 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -410,11 +410,17 @@ interface EstimateInput {
   model: string;          // Model ID (e.g., 'gpt-4o', 'claude-opus-4.5')
   rounding?: 'ceil' | 'round' | 'floor';  // Rounding strategy (default: 'ceil')
   tokenizer?: 'heuristic' | 'openai_exact' | 'auto'; // Token counting strategy (default: 'heuristic')
+  // Extended cost estimation (optional)
+  outputTokens?: number;        // Output tokens for cost calculation
+  cachedInputTokens?: number;   // Cached input tokens (OpenAI only, must be <= estimatedTokens)
+  mode?: 'standard' | 'batch';  // Pricing mode (default: 'standard')
 }
 ```
 Note:
 - Provider-backed modes (`anthropic_count_tokens`, `gemini_count_tokens`, `gemma_sentencepiece`) are only supported in `estimateAsync()`.
+- When `outputTokens`, `cachedInputTokens`, or `mode` is provided, the model must have the corresponding pricing available or an error is thrown.
 **Returns:**
@@ -423,10 +429,16 @@ interface EstimateOutput {
   model: string;           // The model used
   characterCount: number;  // Number of Unicode code points
   estimatedTokens: number; // Estimated token count (integer)
-  estimatedInputCost: number; // Estimated cost in USD
+  estimatedInputCost: number; // Estimated input cost in USD
   charsPerToken: number;   // The ratio used for this model
   tokenizerMode?: 'heuristic' | 'openai_exact' | 'auto'; // Which strategy was used
   encodingUsed?: string;   // OpenAI encoding when using exact tokenization
+  // Extended cost fields (when cost inputs are provided)
+  outputTokens?: number;           // Echoed from input
+  estimatedOutputCost?: number;    // Output token cost in USD
+  estimatedCachedInputCost?: number; // Cached input cost in USD
+  estimatedTotalCost: number;      // Total cost (input + output + cached)
 }
 ```
@@ -555,8 +567,12 @@ Returns the configuration for a specific model. Throws if the model is not found
 ```typescript
 interface ModelConfig {
-  charsPerToken: number;      // Characters per token ratio
-  inputCostPerMillion: number; // USD per 1M input tokens
+  charsPerToken: number;           // Characters per token ratio
+  inputCostPerMillion: number;     // USD per 1M input tokens
+  outputCostPerMillion?: number;   // USD per 1M output tokens (when available)
+  cachedInputCostPerMillion?: number;  // USD per 1M cached input tokens (OpenAI)
+  batchInputCostPerMillion?: number;   // USD per 1M batch input tokens (OpenAI)
+  batchOutputCostPerMillion?: number;  // USD per 1M batch output tokens (OpenAI)
 }
 ```
@@ -564,6 +580,84 @@ interface ModelConfig {
 Read-only object containing all model configurations. Frozen to prevent runtime mutation.
+### Cost Estimation API
+#### `estimateCost(options): CostEstimate`
+Calculate cost from explicit token counts. Provides detailed cost breakdown for input, output, cached, and batch pricing.
+```typescript
+import { estimateCost } from 'ai-token-estimator';
+const result = estimateCost({
+  model: 'gpt-4o',
+  inputTokens: 1_000_000,
+  outputTokens: 500_000,
+  cachedInputTokens: 200_000,  // optional
+  mode: 'standard',            // or 'batch'
+});
+console.log(result);
+// {
+//   model: 'gpt-4o',
+//   mode: 'standard',
+//   tokens: { input: 1000000, cachedInput: 200000, nonCachedInput: 800000, output: 500000 },
+//   costs: { input: 2.0, cachedInput: 0.25, output: 5.0, total: 7.25 },
+//   rates: { inputPerMillion: 2.5, outputPerMillion: 10.0, cachedInputPerMillion: 1.25, ... }
+// }
+```
+Throws if:
+- Model is unknown
+- Token counts are negative or non-integer
+- `cachedInputTokens > inputTokens`
+- Required pricing is missing (output/cached/batch)
+- `mode: 'batch'` with `cachedInputTokens > 0`
+#### `estimateCostFromText(options): CostEstimate`
+Sync version that counts input tokens from text. Uses heuristic/exact tokenization based on model.
+```typescript
+import { estimateCostFromText } from 'ai-token-estimator';
+const result = estimateCostFromText({
+  model: 'gpt-4o',
+  inputText: 'Hello, world!',
+  outputText: 'Hi there!',     // optional: auto-count output tokens
+  outputTokens: 100,           // or: explicit output count (takes precedence)
+  cachedInputTokens: 0,
+  mode: 'standard',
+});
+```
+#### `estimateCostFromTextAsync(options): Promise<CostEstimate>`
+Async version that supports provider-backed tokenizers for accurate counts.
+```typescript
+import { estimateCostFromTextAsync } from 'ai-token-estimator';
+const result = await estimateCostFromTextAsync({
+  model: 'claude-sonnet-4',
+  inputText: 'Hello, world!',
+  outputText: 'Hi there!',
+  tokenizer: 'anthropic_count_tokens',
+  anthropic: { apiKey: process.env.ANTHROPIC_API_KEY },
+});
+```
+#### `getTotalCost(model, inputTokens, outputTokens?): number`
+Quick helper to get total cost for a model.
+```typescript
+import { getTotalCost } from 'ai-token-estimator';
+const cost = getTotalCost('gpt-4o', 1_000_000, 500_000);
+// 7.5 (USD)
+```
 ### SentencePiece API
 #### `loadSentencePieceTokenizer(options: FileOptions): Promise<SentencePieceTokenizer>`

package/dist/index.cjs CHANGED Viewed

@@ -49,10 +49,14 @@ __export(index_exports, {
   ensureSentencePieceModel: () => ensureSentencePieceModel,
   estimate: () => estimate,
   estimateAsync: () => estimateAsync,
+  estimateCost: () => estimateCost,
+  estimateCostFromText: () => estimateCostFromText,
+  estimateCostFromTextAsync: () => estimateCostFromTextAsync,
   getAvailableModels: () => getAvailableModels,
   getModelConfig: () => getModelConfig,
   getOpenAIEncoding: () => getOpenAIEncoding,
   getSentencePieceTokenizer: () => getSentencePieceTokenizer,
+  getTotalCost: () => getTotalCost,
   loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
   parseModelProto: () => parseModelProto
 });
@@ -159,7 +163,11 @@ var models = {
   },
   "gpt-4o": {
     charsPerToken: 4,
-    inputCostPerMillion: 2.5
+    inputCostPerMillion: 2.5,
+    outputCostPerMillion: 10,
+    cachedInputCostPerMillion: 1.25,
+    batchInputCostPerMillion: 1.25,
+    batchOutputCostPerMillion: 5
   },
   "gpt-4o-2024-05-13": {
     charsPerToken: 4,
@@ -171,7 +179,11 @@ var models = {
   },
   "gpt-4o-mini": {
     charsPerToken: 4,
-    inputCostPerMillion: 0.15
+    inputCostPerMillion: 0.15,
+    outputCostPerMillion: 0.6,
+    cachedInputCostPerMillion: 0.075,
+    batchInputCostPerMillion: 0.075,
+    batchOutputCostPerMillion: 0.3
   },
   "gpt-4o-mini-audio-preview": {
     charsPerToken: 4,
@@ -401921,67 +401933,30 @@ function decode(tokens, options) {
   return api.decode(tokens);
 }
-// src/estimator.ts
-function countCodePoints(text) {
-  let count = 0;
-  for (const _char of text) {
-    count++;
-  }
-  return count;
+// src/token-counter.ts
+function isNonOpenAIModel(model) {
+  return model.startsWith("claude-") || model.startsWith("gemini-");
 }
-function estimate(input) {
-  const { text, model, rounding = "ceil", tokenizer = "heuristic" } = input;
-  const config = getModelConfig(model);
-  const tokenizerStr = tokenizer;
-  if (tokenizerStr === "anthropic_count_tokens" || tokenizerStr === "gemini_count_tokens" || tokenizerStr === "gemma_sentencepiece") {
-    throw new Error(`Tokenizer mode "${tokenizerStr}" requires async execution. Use estimateAsync(...) instead.`);
-  }
-  const characterCount = countCodePoints(text);
-  const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
-  let estimatedTokens;
-  let tokenizerModeUsed = "heuristic";
-  let encodingUsed;
-  const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
-  if (shouldTryExact && !isNonOpenAIModel3) {
-    try {
-      estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
-      tokenizerModeUsed = "openai_exact";
-      encodingUsed = getOpenAIEncoding({ model });
-    } catch (error) {
-      if (tokenizer === "openai_exact") {
-        throw error;
-      }
-    }
-  } else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
-    throw new Error(
-      `Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
-    );
+function countTokens(input) {
+  const { text, model } = input;
+  if (isNonOpenAIModel(model)) {
+    return {
+      tokens: estimate({ text, model }).estimatedTokens,
+      exact: false
+    };
   }
-  if (estimatedTokens === void 0) {
-    const rawTokens = characterCount / config.charsPerToken;
-    switch (rounding) {
-      case "floor":
-        estimatedTokens = Math.floor(rawTokens);
-        break;
-      case "round":
-        estimatedTokens = Math.round(rawTokens);
-        break;
-      case "ceil":
-      default:
-        estimatedTokens = Math.ceil(rawTokens);
-    }
-    tokenizerModeUsed = "heuristic";
+  try {
+    return {
+      tokens: encode(text, { model, allowSpecial: "none" }).length,
+      exact: true,
+      encoding: getOpenAIEncoding({ model })
+    };
+  } catch {
+    return {
+      tokens: estimate({ text, model }).estimatedTokens,
+      exact: false
+    };
   }
-  const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
-  return {
-    model,
-    characterCount,
-    estimatedTokens,
-    estimatedInputCost,
-    charsPerToken: config.charsPerToken,
-    tokenizerMode: tokenizerModeUsed,
-    encodingUsed
-  };
 }
 // src/providers/anthropic.ts
@@ -404598,12 +404573,12 @@ async function countGemmaSentencePieceTokens(params) {
 }
 // src/estimator-async.ts
-function countCodePoints2(text) {
+function countCodePoints(text) {
   let count = 0;
   for (const _char of text) count++;
   return count;
 }
-function isNonOpenAIModel(model) {
+function isNonOpenAIModel2(model) {
   return model.startsWith("claude-") || model.startsWith("gemini-");
 }
 function shouldFallbackToHeuristic(err) {
@@ -404617,9 +404592,17 @@ function shouldFallbackToHeuristic(err) {
   return false;
 }
 async function estimateAsync(input) {
-  const { text, model, rounding = "ceil", tokenizer = "heuristic" } = input;
+  const {
+    text,
+    model,
+    rounding = "ceil",
+    tokenizer = "heuristic",
+    outputTokens,
+    cachedInputTokens,
+    mode
+  } = input;
   const config = getModelConfig(model);
-  const characterCount = countCodePoints2(text);
+  const characterCount = countCodePoints(text);
   let estimatedTokens;
   let tokenizerModeUsed = "heuristic";
   let encodingUsed;
@@ -404670,7 +404653,7 @@ async function estimateAsync(input) {
     tokenizerModeUsed = "gemma_sentencepiece";
   } else {
     const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
-    if (shouldTryExact && !isNonOpenAIModel(model)) {
+    if (shouldTryExact && !isNonOpenAIModel2(model)) {
       try {
         estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
         tokenizerModeUsed = "openai_exact";
@@ -404678,7 +404661,7 @@ async function estimateAsync(input) {
       } catch (error) {
         if (tokenizer === "openai_exact") throw error;
       }
-    } else if (tokenizer === "openai_exact" && isNonOpenAIModel(model)) {
+    } else if (tokenizer === "openai_exact" && isNonOpenAIModel2(model)) {
       throw new Error(`Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`);
     }
   }
@@ -404698,6 +404681,26 @@ async function estimateAsync(input) {
     tokenizerModeUsed = "heuristic";
   }
   const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
+  let estimatedOutputCost;
+  let estimatedCachedInputCost;
+  let estimatedTotalCost = estimatedInputCost;
+  const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
+  if (hasCostInputs) {
+    try {
+      const costResult = estimateCost({
+        model,
+        inputTokens: estimatedTokens,
+        outputTokens,
+        cachedInputTokens,
+        mode
+      });
+      estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
+      estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
+      estimatedTotalCost = costResult.costs.total;
+    } catch (error) {
+      throw error;
+    }
+  }
   return {
     model,
     characterCount,
@@ -404705,34 +404708,241 @@ async function estimateAsync(input) {
     estimatedInputCost,
     charsPerToken: config.charsPerToken,
     tokenizerMode: tokenizerModeUsed,
-    encodingUsed
+    encodingUsed,
+    outputTokens,
+    estimatedOutputCost,
+    estimatedCachedInputCost,
+    estimatedTotalCost
   };
 }
-// src/token-counter.ts
-function isNonOpenAIModel2(model) {
-  return model.startsWith("claude-") || model.startsWith("gemini-");
+// src/cost.ts
+function validateTokenCount(value, name) {
+  const n = value ?? 0;
+  if (!Number.isFinite(n) || n < 0 || !Number.isInteger(n)) {
+    throw new Error(`${name} must be a non-negative integer, got: ${n}`);
+  }
+  return n;
 }
-function countTokens(input) {
-  const { text, model } = input;
-  if (isNonOpenAIModel2(model)) {
-    return {
-      tokens: estimate({ text, model }).estimatedTokens,
-      exact: false
-    };
+function estimateCost(options) {
+  const { model, mode = "standard" } = options;
+  const inputTokens = validateTokenCount(options.inputTokens, "inputTokens");
+  const outputTokens = validateTokenCount(options.outputTokens, "outputTokens");
+  const cachedInputTokens = validateTokenCount(options.cachedInputTokens, "cachedInputTokens");
+  if (cachedInputTokens > inputTokens) {
+    throw new Error(
+      `cachedInputTokens (${cachedInputTokens}) cannot exceed inputTokens (${inputTokens})`
+    );
   }
-  try {
-    return {
-      tokens: encode(text, { model, allowSpecial: "none" }).length,
-      exact: true,
-      encoding: getOpenAIEncoding({ model })
-    };
-  } catch {
+  const config = getModelConfig(model);
+  if (outputTokens > 0 && config.outputCostPerMillion === void 0) {
+    throw new Error(
+      `Output pricing not available for model "${model}". Cannot estimate cost for ${outputTokens} output tokens.`
+    );
+  }
+  if (mode === "batch") {
+    if (cachedInputTokens > 0) {
+      throw new Error(
+        `Batch mode does not support cached tokens. Got cachedInputTokens: ${cachedInputTokens}. Use mode: 'standard' for cached pricing.`
+      );
+    }
+    if (config.batchInputCostPerMillion === void 0) {
+      throw new Error(
+        `Batch input pricing not available for model "${model}". Use mode: 'standard' or choose a model with batch pricing.`
+      );
+    }
+    if (outputTokens > 0 && config.batchOutputCostPerMillion === void 0) {
+      throw new Error(
+        `Batch output pricing not available for model "${model}". Cannot estimate batch cost for ${outputTokens} output tokens.`
+      );
+    }
+  }
+  const nonCachedInputTokens = inputTokens - cachedInputTokens;
+  if (mode === "batch") {
+    const inputCost2 = inputTokens * config.batchInputCostPerMillion / 1e6;
+    const outputCost2 = outputTokens > 0 ? outputTokens * config.batchOutputCostPerMillion / 1e6 : 0;
     return {
-      tokens: estimate({ text, model }).estimatedTokens,
-      exact: false
+      model,
+      mode: "batch",
+      tokens: {
+        input: inputTokens,
+        cachedInput: 0,
+        // Batch mode doesn't use cached pricing
+        nonCachedInput: inputTokens,
+        output: outputTokens
+      },
+      costs: {
+        input: inputCost2,
+        cachedInput: 0,
+        output: outputCost2,
+        total: inputCost2 + outputCost2
+      },
+      rates: {
+        // In batch mode, inputPerMillion/outputPerMillion reflect the batch rates used
+        inputPerMillion: config.batchInputCostPerMillion,
+        outputPerMillion: config.batchOutputCostPerMillion,
+        batchInputPerMillion: config.batchInputCostPerMillion,
+        batchOutputPerMillion: config.batchOutputCostPerMillion
+      }
     };
   }
+  if (cachedInputTokens > 0 && config.cachedInputCostPerMillion === void 0) {
+    throw new Error(
+      `Cached input pricing not available for model "${model}". Cannot estimate cost for ${cachedInputTokens} cached input tokens.`
+    );
+  }
+  const inputCost = nonCachedInputTokens * config.inputCostPerMillion / 1e6;
+  const cachedInputCost = cachedInputTokens > 0 ? cachedInputTokens * config.cachedInputCostPerMillion / 1e6 : 0;
+  const outputCost = outputTokens > 0 ? outputTokens * config.outputCostPerMillion / 1e6 : 0;
+  return {
+    model,
+    mode: "standard",
+    tokens: {
+      input: inputTokens,
+      cachedInput: cachedInputTokens,
+      nonCachedInput: nonCachedInputTokens,
+      output: outputTokens
+    },
+    costs: {
+      input: inputCost,
+      cachedInput: cachedInputCost,
+      output: outputCost,
+      total: inputCost + cachedInputCost + outputCost
+    },
+    rates: {
+      inputPerMillion: config.inputCostPerMillion,
+      outputPerMillion: config.outputCostPerMillion,
+      cachedInputPerMillion: config.cachedInputCostPerMillion
+    }
+  };
+}
+function estimateCostFromText(options) {
+  const { model, inputText, outputText, outputTokens: manualOutputTokens, ...rest } = options;
+  const inputTokens = countTokens({ text: inputText, model }).tokens;
+  let outputTokens = manualOutputTokens;
+  if (manualOutputTokens === void 0 && outputText !== void 0) {
+    outputTokens = countTokens({ text: outputText, model }).tokens;
+  }
+  return estimateCost({ model, inputTokens, outputTokens, ...rest });
+}
+async function estimateCostFromTextAsync(options) {
+  const {
+    inputText,
+    outputText,
+    outputTokens: manualOutputTokens,
+    cachedInputTokens,
+    mode,
+    ...providerOptions
+    // Includes model + all EstimateAsyncInput options
+  } = options;
+  const { model } = providerOptions;
+  const inputResult = await estimateAsync({ text: inputText, ...providerOptions });
+  const inputTokens = inputResult.estimatedTokens;
+  let outputTokens = manualOutputTokens;
+  if (manualOutputTokens === void 0 && outputText !== void 0) {
+    const outputResult = await estimateAsync({ text: outputText, ...providerOptions });
+    outputTokens = outputResult.estimatedTokens;
+  }
+  return estimateCost({ model, inputTokens, outputTokens, cachedInputTokens, mode });
+}
+function getTotalCost(model, inputTokens, outputTokens = 0) {
+  const estimate2 = estimateCost({ model, inputTokens, outputTokens });
+  return estimate2.costs.total;
+}
+// src/estimator.ts
+function countCodePoints2(text) {
+  let count = 0;
+  for (const _char of text) {
+    count++;
+  }
+  return count;
+}
+function estimate(input) {
+  const {
+    text,
+    model,
+    rounding = "ceil",
+    tokenizer = "heuristic",
+    outputTokens,
+    cachedInputTokens,
+    mode
+  } = input;
+  const config = getModelConfig(model);
+  const tokenizerStr = tokenizer;
+  if (tokenizerStr === "anthropic_count_tokens" || tokenizerStr === "gemini_count_tokens" || tokenizerStr === "gemma_sentencepiece") {
+    throw new Error(`Tokenizer mode "${tokenizerStr}" requires async execution. Use estimateAsync(...) instead.`);
+  }
+  const characterCount = countCodePoints2(text);
+  const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
+  let estimatedTokens;
+  let tokenizerModeUsed = "heuristic";
+  let encodingUsed;
+  const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
+  if (shouldTryExact && !isNonOpenAIModel3) {
+    try {
+      estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
+      tokenizerModeUsed = "openai_exact";
+      encodingUsed = getOpenAIEncoding({ model });
+    } catch (error) {
+      if (tokenizer === "openai_exact") {
+        throw error;
+      }
+    }
+  } else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
+    throw new Error(
+      `Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
+    );
+  }
+  if (estimatedTokens === void 0) {
+    const rawTokens = characterCount / config.charsPerToken;
+    switch (rounding) {
+      case "floor":
+        estimatedTokens = Math.floor(rawTokens);
+        break;
+      case "round":
+        estimatedTokens = Math.round(rawTokens);
+        break;
+      case "ceil":
+      default:
+        estimatedTokens = Math.ceil(rawTokens);
+    }
+    tokenizerModeUsed = "heuristic";
+  }
+  const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
+  let estimatedOutputCost;
+  let estimatedCachedInputCost;
+  let estimatedTotalCost = estimatedInputCost;
+  const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
+  if (hasCostInputs) {
+    try {
+      const costResult = estimateCost({
+        model,
+        inputTokens: estimatedTokens,
+        outputTokens,
+        cachedInputTokens,
+        mode
+      });
+      estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
+      estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
+      estimatedTotalCost = costResult.costs.total;
+    } catch (error) {
+      throw error;
+    }
+  }
+  return {
+    model,
+    characterCount,
+    estimatedTokens,
+    estimatedInputCost,
+    charsPerToken: config.charsPerToken,
+    tokenizerMode: tokenizerModeUsed,
+    encodingUsed,
+    outputTokens,
+    estimatedOutputCost,
+    estimatedCachedInputCost,
+    estimatedTotalCost
+  };
 }
 // src/chat-token-constants.ts
@@ -405019,10 +405229,14 @@ function countChatCompletionTokens(input) {
   ensureSentencePieceModel,
   estimate,
   estimateAsync,
+  estimateCost,
+  estimateCostFromText,
+  estimateCostFromTextAsync,
   getAvailableModels,
   getModelConfig,
   getOpenAIEncoding,
   getSentencePieceTokenizer,
+  getTotalCost,
   loadSentencePieceTokenizer,
   parseModelProto
 });