ai-token-estimator 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 BitsAndBytesAI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,167 @@
1
+ # ai-token-estimator
2
+
3
+ Estimate token counts and costs for LLM API calls based on character count and model-specific ratios.
4
+
5
+ > **Important:** This is a rough estimation tool for budgeting purposes, not a precise tokenizer. Actual token counts may vary by ±20% depending on:
6
+ > - Content type (code vs prose)
7
+ > - Language (CJK languages use more tokens)
8
+ > - API message framing overhead
9
+ > - Special characters and formatting
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ npm install ai-token-estimator
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```typescript
20
+ import { estimate, getAvailableModels } from 'ai-token-estimator';
21
+
22
+ // Basic usage
23
+ const result = estimate({
24
+ text: 'Hello, world! This is a test message.',
25
+ model: 'gpt-4o'
26
+ });
27
+
28
+ console.log(result);
29
+ // {
30
+ // model: 'gpt-4o',
31
+ // characterCount: 38,
32
+ // estimatedTokens: 10,
33
+ // estimatedInputCost: 0.000025,
34
+ // charsPerToken: 4
35
+ // }
36
+
37
+ // List available models
38
+ console.log(getAvailableModels());
39
+ // ['gpt-5.2', 'gpt-4o', 'claude-opus-4.5', 'gemini-3-pro', ...]
40
+ ```
41
+
42
+ ## API Reference
43
+
44
+ ### `estimate(input: EstimateInput): EstimateOutput`
45
+
46
+ Estimates token count and cost for the given text and model.
47
+
48
+ **Parameters:**
49
+
50
+ ```typescript
51
+ interface EstimateInput {
52
+ text: string; // The text to estimate tokens for
53
+ model: string; // Model ID (e.g., 'gpt-4o', 'claude-opus-4.5')
54
+ rounding?: 'ceil' | 'round' | 'floor'; // Rounding strategy (default: 'ceil')
55
+ }
56
+ ```
57
+
58
+ **Returns:**
59
+
60
+ ```typescript
61
+ interface EstimateOutput {
62
+ model: string; // The model used
63
+ characterCount: number; // Number of Unicode code points
64
+ estimatedTokens: number; // Estimated token count (integer)
65
+ estimatedInputCost: number; // Estimated cost in USD
66
+ charsPerToken: number; // The ratio used for this model
67
+ }
68
+ ```
69
+
70
+ ### `getAvailableModels(): string[]`
71
+
72
+ Returns an array of all supported model IDs.
73
+
74
+ ### `getModelConfig(model: string): ModelConfig`
75
+
76
+ Returns the configuration for a specific model. Throws if the model is not found.
77
+
78
+ ```typescript
79
+ interface ModelConfig {
80
+ charsPerToken: number; // Characters per token ratio
81
+ inputCostPerMillion: number; // USD per 1M input tokens
82
+ }
83
+ ```
84
+
85
+ ### `DEFAULT_MODELS`
86
+
87
+ Read-only object containing all model configurations. Frozen to prevent runtime mutation.
88
+
89
+ ## Rounding Options
90
+
91
+ By default, token counts are rounded up (`ceil`) for conservative budgeting. You can override this:
92
+
93
+ ```typescript
94
+ // Round up (default) - conservative for budgeting
95
+ estimate({ text, model: 'gpt-4o', rounding: 'ceil' });
96
+
97
+ // Round down - optimistic estimate
98
+ estimate({ text, model: 'gpt-4o', rounding: 'floor' });
99
+
100
+ // Round to nearest - balanced estimate
101
+ estimate({ text, model: 'gpt-4o', rounding: 'round' });
102
+ ```
103
+
104
+ ## Character Counting
105
+
106
+ This package counts Unicode code points, not UTF-16 code units. This means:
107
+ - Emojis count as 1 character (not 2)
108
+ - Accented characters count correctly
109
+ - Most source code characters count as 1
110
+
111
+ ## Supported Models
112
+
113
+ ### OpenAI Models
114
+
115
+ | Model | Chars/Token | Input Cost (per 1M tokens) |
116
+ |-------|-------------|---------------------------|
117
+ | gpt-5.2 | 4 | $1.75 |
118
+ | gpt-5.2-pro | 4 | $21.00 |
119
+ | gpt-5-mini | 4 | $0.25 |
120
+ | gpt-4.1 | 4 | $3.00 |
121
+ | gpt-4.1-mini | 4 | $0.80 |
122
+ | gpt-4.1-nano | 4 | $0.20 |
123
+ | gpt-4o | 4 | $2.50 |
124
+ | gpt-4o-mini | 4 | $0.15 |
125
+ | o3 | 4 | $2.00 |
126
+ | o4-mini | 4 | $4.00 |
127
+ | o1 | 4 | $15.00 |
128
+ | o1-pro | 4 | $150.00 |
129
+
130
+ ### Anthropic Claude Models
131
+
132
+ | Model | Chars/Token | Input Cost (per 1M tokens) |
133
+ |-------|-------------|---------------------------|
134
+ | claude-opus-4.5 | 3.5 | $5.00 |
135
+ | claude-sonnet-4.5 | 3.5 | $3.00 |
136
+ | claude-haiku-4.5 | 3.5 | $1.00 |
137
+ | claude-opus-4 | 3.5 | $15.00 |
138
+ | claude-opus-4.1 | 3.5 | $15.00 |
139
+ | claude-sonnet-4 | 3.5 | $3.00 |
140
+ | claude-opus-3 | 3.5 | $15.00 |
141
+ | claude-haiku-3 | 3.5 | $0.25 |
142
+ | claude-haiku-3.5 | 3.5 | $0.80 |
143
+
144
+ ### Google Gemini Models
145
+
146
+ | Model | Chars/Token | Input Cost (per 1M tokens) |
147
+ |-------|-------------|---------------------------|
148
+ | gemini-3-pro | 4 | $2.00 |
149
+ | gemini-3-flash | 4 | $0.50 |
150
+ | gemini-2.5-pro | 4 | $1.25 |
151
+ | gemini-2.5-flash | 4 | $0.30 |
152
+ | gemini-2.5-flash-lite | 4 | $0.10 |
153
+ | gemini-2.0-flash | 4 | $0.10 |
154
+ | gemini-2.0-flash-lite | 4 | $0.075 |
155
+
156
+ *Pricing last verified: December 2025*
157
+
158
+ ## Updating Pricing
159
+
160
+ Model configurations are embedded in the package. To update pricing:
161
+ 1. Modify `src/models.ts`
162
+ 2. Create a changeset: `npx changeset`
163
+ 3. Publish a new version
164
+
165
+ ## License
166
+
167
+ MIT
package/dist/index.cjs ADDED
@@ -0,0 +1,225 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ DEFAULT_MODELS: () => DEFAULT_MODELS,
24
+ estimate: () => estimate,
25
+ getAvailableModels: () => getAvailableModels,
26
+ getModelConfig: () => getModelConfig
27
+ });
28
+ module.exports = __toCommonJS(index_exports);
29
+
30
+ // src/models.ts
31
+ var models = {
32
+ // ===================
33
+ // OpenAI Models
34
+ // ===================
35
+ // OpenAI uses ~4 chars per token for English text
36
+ // GPT-5 series (Flagship)
37
+ "gpt-5.2": {
38
+ charsPerToken: 4,
39
+ inputCostPerMillion: 1.75
40
+ },
41
+ "gpt-5.2-pro": {
42
+ charsPerToken: 4,
43
+ inputCostPerMillion: 21
44
+ },
45
+ "gpt-5-mini": {
46
+ charsPerToken: 4,
47
+ inputCostPerMillion: 0.25
48
+ },
49
+ // GPT-4.1 series
50
+ "gpt-4.1": {
51
+ charsPerToken: 4,
52
+ inputCostPerMillion: 3
53
+ },
54
+ "gpt-4.1-mini": {
55
+ charsPerToken: 4,
56
+ inputCostPerMillion: 0.8
57
+ },
58
+ "gpt-4.1-nano": {
59
+ charsPerToken: 4,
60
+ inputCostPerMillion: 0.2
61
+ },
62
+ // GPT-4o series
63
+ "gpt-4o": {
64
+ charsPerToken: 4,
65
+ inputCostPerMillion: 2.5
66
+ },
67
+ "gpt-4o-mini": {
68
+ charsPerToken: 4,
69
+ inputCostPerMillion: 0.15
70
+ },
71
+ // OpenAI Reasoning models (o-series)
72
+ "o3": {
73
+ charsPerToken: 4,
74
+ inputCostPerMillion: 2
75
+ // Estimated based on similar tier
76
+ },
77
+ "o4-mini": {
78
+ charsPerToken: 4,
79
+ inputCostPerMillion: 4
80
+ },
81
+ "o1": {
82
+ charsPerToken: 4,
83
+ inputCostPerMillion: 15
84
+ },
85
+ "o1-pro": {
86
+ charsPerToken: 4,
87
+ inputCostPerMillion: 150
88
+ // High-end reasoning
89
+ },
90
+ // ===================
91
+ // Anthropic Models
92
+ // ===================
93
+ // Anthropic uses ~3.5 chars per token for English text
94
+ // Claude 4.5 series (Latest)
95
+ "claude-opus-4.5": {
96
+ charsPerToken: 3.5,
97
+ inputCostPerMillion: 5
98
+ },
99
+ "claude-sonnet-4.5": {
100
+ charsPerToken: 3.5,
101
+ inputCostPerMillion: 3
102
+ },
103
+ "claude-haiku-4.5": {
104
+ charsPerToken: 3.5,
105
+ inputCostPerMillion: 1
106
+ },
107
+ // Claude 4 series
108
+ "claude-opus-4": {
109
+ charsPerToken: 3.5,
110
+ inputCostPerMillion: 15
111
+ },
112
+ "claude-opus-4.1": {
113
+ charsPerToken: 3.5,
114
+ inputCostPerMillion: 15
115
+ },
116
+ "claude-sonnet-4": {
117
+ charsPerToken: 3.5,
118
+ inputCostPerMillion: 3
119
+ },
120
+ // Claude 3 series (Legacy)
121
+ "claude-opus-3": {
122
+ charsPerToken: 3.5,
123
+ inputCostPerMillion: 15
124
+ },
125
+ "claude-haiku-3": {
126
+ charsPerToken: 3.5,
127
+ inputCostPerMillion: 0.25
128
+ },
129
+ "claude-haiku-3.5": {
130
+ charsPerToken: 3.5,
131
+ inputCostPerMillion: 0.8
132
+ },
133
+ // ===================
134
+ // Google Gemini Models
135
+ // ===================
136
+ // Gemini uses similar tokenization to OpenAI (~4 chars per token)
137
+ // Gemini 3 series (Latest)
138
+ "gemini-3-pro": {
139
+ charsPerToken: 4,
140
+ inputCostPerMillion: 2
141
+ },
142
+ "gemini-3-flash": {
143
+ charsPerToken: 4,
144
+ inputCostPerMillion: 0.5
145
+ },
146
+ // Gemini 2.5 series
147
+ "gemini-2.5-pro": {
148
+ charsPerToken: 4,
149
+ inputCostPerMillion: 1.25
150
+ },
151
+ "gemini-2.5-flash": {
152
+ charsPerToken: 4,
153
+ inputCostPerMillion: 0.3
154
+ },
155
+ "gemini-2.5-flash-lite": {
156
+ charsPerToken: 4,
157
+ inputCostPerMillion: 0.1
158
+ },
159
+ // Gemini 2.0 series
160
+ "gemini-2.0-flash": {
161
+ charsPerToken: 4,
162
+ inputCostPerMillion: 0.1
163
+ },
164
+ "gemini-2.0-flash-lite": {
165
+ charsPerToken: 4,
166
+ inputCostPerMillion: 0.075
167
+ }
168
+ };
169
+ Object.values(models).forEach((config) => Object.freeze(config));
170
+ var DEFAULT_MODELS = Object.freeze(models);
171
+ function getModelConfig(model) {
172
+ const config = DEFAULT_MODELS[model];
173
+ if (!config) {
174
+ const available = Object.keys(DEFAULT_MODELS).join(", ");
175
+ throw new Error(
176
+ `Unknown model: "${model}". Available models: ${available}`
177
+ );
178
+ }
179
+ return config;
180
+ }
181
+ function getAvailableModels() {
182
+ return Object.keys(DEFAULT_MODELS);
183
+ }
184
+
185
+ // src/estimator.ts
186
+ function countCodePoints(text) {
187
+ let count = 0;
188
+ for (const _char of text) {
189
+ count++;
190
+ }
191
+ return count;
192
+ }
193
+ function estimate(input) {
194
+ const { text, model, rounding = "ceil" } = input;
195
+ const config = getModelConfig(model);
196
+ const characterCount = countCodePoints(text);
197
+ const rawTokens = characterCount / config.charsPerToken;
198
+ let estimatedTokens;
199
+ switch (rounding) {
200
+ case "floor":
201
+ estimatedTokens = Math.floor(rawTokens);
202
+ break;
203
+ case "round":
204
+ estimatedTokens = Math.round(rawTokens);
205
+ break;
206
+ case "ceil":
207
+ default:
208
+ estimatedTokens = Math.ceil(rawTokens);
209
+ }
210
+ const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
211
+ return {
212
+ model,
213
+ characterCount,
214
+ estimatedTokens,
215
+ estimatedInputCost,
216
+ charsPerToken: config.charsPerToken
217
+ };
218
+ }
219
+ // Annotate the CommonJS export names for ESM import in node:
220
+ 0 && (module.exports = {
221
+ DEFAULT_MODELS,
222
+ estimate,
223
+ getAvailableModels,
224
+ getModelConfig
225
+ });
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Configuration for a specific LLM model.
3
+ */
4
+ interface ModelConfig {
5
+ /** Characters per token ratio for this model */
6
+ charsPerToken: number;
7
+ /** Cost in USD per 1 million input tokens */
8
+ inputCostPerMillion: number;
9
+ }
10
+ /**
11
+ * Input parameters for the estimate function.
12
+ */
13
+ interface EstimateInput {
14
+ /** The text to estimate tokens for */
15
+ text: string;
16
+ /** The model ID (must exist in default config) */
17
+ model: string;
18
+ /** Rounding strategy for token count (default: 'ceil') */
19
+ rounding?: 'ceil' | 'round' | 'floor';
20
+ }
21
+ /**
22
+ * Output from the estimate function.
23
+ */
24
+ interface EstimateOutput {
25
+ /** The model used for estimation */
26
+ model: string;
27
+ /** Number of Unicode code points in the input */
28
+ characterCount: number;
29
+ /** Estimated token count (integer, rounded per rounding strategy) */
30
+ estimatedTokens: number;
31
+ /** Estimated input cost in USD */
32
+ estimatedInputCost: number;
33
+ /** The chars-per-token ratio used */
34
+ charsPerToken: number;
35
+ }
36
+
37
+ /**
38
+ * Estimate token count and cost for the given text and model.
39
+ *
40
+ * @param input - The estimation input parameters
41
+ * @returns The estimation output with token count and cost
42
+ * @throws Error if the model is not found in the configuration
43
+ *
44
+ * @example
45
+ * ```typescript
46
+ * const result = estimate({
47
+ * text: 'Hello, world!',
48
+ * model: 'gpt-4o'
49
+ * });
50
+ * console.log(result.estimatedTokens); // 4
51
+ * console.log(result.estimatedInputCost); // 0.00001
52
+ * ```
53
+ */
54
+ declare function estimate(input: EstimateInput): EstimateOutput;
55
+
56
+ declare const DEFAULT_MODELS: Readonly<Record<string, Readonly<ModelConfig>>>;
57
+ /**
58
+ * Get configuration for a specific model.
59
+ * @param model - The model ID to look up
60
+ * @returns The model configuration
61
+ * @throws Error if model is not found
62
+ */
63
+ declare function getModelConfig(model: string): ModelConfig;
64
+ /**
65
+ * Get list of all available model IDs.
66
+ * @returns Array of model ID strings
67
+ */
68
+ declare function getAvailableModels(): string[];
69
+
70
+ export { DEFAULT_MODELS, type EstimateInput, type EstimateOutput, type ModelConfig, estimate, getAvailableModels, getModelConfig };
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Configuration for a specific LLM model.
3
+ */
4
+ interface ModelConfig {
5
+ /** Characters per token ratio for this model */
6
+ charsPerToken: number;
7
+ /** Cost in USD per 1 million input tokens */
8
+ inputCostPerMillion: number;
9
+ }
10
+ /**
11
+ * Input parameters for the estimate function.
12
+ */
13
+ interface EstimateInput {
14
+ /** The text to estimate tokens for */
15
+ text: string;
16
+ /** The model ID (must exist in default config) */
17
+ model: string;
18
+ /** Rounding strategy for token count (default: 'ceil') */
19
+ rounding?: 'ceil' | 'round' | 'floor';
20
+ }
21
+ /**
22
+ * Output from the estimate function.
23
+ */
24
+ interface EstimateOutput {
25
+ /** The model used for estimation */
26
+ model: string;
27
+ /** Number of Unicode code points in the input */
28
+ characterCount: number;
29
+ /** Estimated token count (integer, rounded per rounding strategy) */
30
+ estimatedTokens: number;
31
+ /** Estimated input cost in USD */
32
+ estimatedInputCost: number;
33
+ /** The chars-per-token ratio used */
34
+ charsPerToken: number;
35
+ }
36
+
37
+ /**
38
+ * Estimate token count and cost for the given text and model.
39
+ *
40
+ * @param input - The estimation input parameters
41
+ * @returns The estimation output with token count and cost
42
+ * @throws Error if the model is not found in the configuration
43
+ *
44
+ * @example
45
+ * ```typescript
46
+ * const result = estimate({
47
+ * text: 'Hello, world!',
48
+ * model: 'gpt-4o'
49
+ * });
50
+ * console.log(result.estimatedTokens); // 4
51
+ * console.log(result.estimatedInputCost); // 0.00001
52
+ * ```
53
+ */
54
+ declare function estimate(input: EstimateInput): EstimateOutput;
55
+
56
+ declare const DEFAULT_MODELS: Readonly<Record<string, Readonly<ModelConfig>>>;
57
+ /**
58
+ * Get configuration for a specific model.
59
+ * @param model - The model ID to look up
60
+ * @returns The model configuration
61
+ * @throws Error if model is not found
62
+ */
63
+ declare function getModelConfig(model: string): ModelConfig;
64
+ /**
65
+ * Get list of all available model IDs.
66
+ * @returns Array of model ID strings
67
+ */
68
+ declare function getAvailableModels(): string[];
69
+
70
+ export { DEFAULT_MODELS, type EstimateInput, type EstimateOutput, type ModelConfig, estimate, getAvailableModels, getModelConfig };
package/dist/index.js ADDED
@@ -0,0 +1,195 @@
1
+ // src/models.ts
2
+ var models = {
3
+ // ===================
4
+ // OpenAI Models
5
+ // ===================
6
+ // OpenAI uses ~4 chars per token for English text
7
+ // GPT-5 series (Flagship)
8
+ "gpt-5.2": {
9
+ charsPerToken: 4,
10
+ inputCostPerMillion: 1.75
11
+ },
12
+ "gpt-5.2-pro": {
13
+ charsPerToken: 4,
14
+ inputCostPerMillion: 21
15
+ },
16
+ "gpt-5-mini": {
17
+ charsPerToken: 4,
18
+ inputCostPerMillion: 0.25
19
+ },
20
+ // GPT-4.1 series
21
+ "gpt-4.1": {
22
+ charsPerToken: 4,
23
+ inputCostPerMillion: 3
24
+ },
25
+ "gpt-4.1-mini": {
26
+ charsPerToken: 4,
27
+ inputCostPerMillion: 0.8
28
+ },
29
+ "gpt-4.1-nano": {
30
+ charsPerToken: 4,
31
+ inputCostPerMillion: 0.2
32
+ },
33
+ // GPT-4o series
34
+ "gpt-4o": {
35
+ charsPerToken: 4,
36
+ inputCostPerMillion: 2.5
37
+ },
38
+ "gpt-4o-mini": {
39
+ charsPerToken: 4,
40
+ inputCostPerMillion: 0.15
41
+ },
42
+ // OpenAI Reasoning models (o-series)
43
+ "o3": {
44
+ charsPerToken: 4,
45
+ inputCostPerMillion: 2
46
+ // Estimated based on similar tier
47
+ },
48
+ "o4-mini": {
49
+ charsPerToken: 4,
50
+ inputCostPerMillion: 4
51
+ },
52
+ "o1": {
53
+ charsPerToken: 4,
54
+ inputCostPerMillion: 15
55
+ },
56
+ "o1-pro": {
57
+ charsPerToken: 4,
58
+ inputCostPerMillion: 150
59
+ // High-end reasoning
60
+ },
61
+ // ===================
62
+ // Anthropic Models
63
+ // ===================
64
+ // Anthropic uses ~3.5 chars per token for English text
65
+ // Claude 4.5 series (Latest)
66
+ "claude-opus-4.5": {
67
+ charsPerToken: 3.5,
68
+ inputCostPerMillion: 5
69
+ },
70
+ "claude-sonnet-4.5": {
71
+ charsPerToken: 3.5,
72
+ inputCostPerMillion: 3
73
+ },
74
+ "claude-haiku-4.5": {
75
+ charsPerToken: 3.5,
76
+ inputCostPerMillion: 1
77
+ },
78
+ // Claude 4 series
79
+ "claude-opus-4": {
80
+ charsPerToken: 3.5,
81
+ inputCostPerMillion: 15
82
+ },
83
+ "claude-opus-4.1": {
84
+ charsPerToken: 3.5,
85
+ inputCostPerMillion: 15
86
+ },
87
+ "claude-sonnet-4": {
88
+ charsPerToken: 3.5,
89
+ inputCostPerMillion: 3
90
+ },
91
+ // Claude 3 series (Legacy)
92
+ "claude-opus-3": {
93
+ charsPerToken: 3.5,
94
+ inputCostPerMillion: 15
95
+ },
96
+ "claude-haiku-3": {
97
+ charsPerToken: 3.5,
98
+ inputCostPerMillion: 0.25
99
+ },
100
+ "claude-haiku-3.5": {
101
+ charsPerToken: 3.5,
102
+ inputCostPerMillion: 0.8
103
+ },
104
+ // ===================
105
+ // Google Gemini Models
106
+ // ===================
107
+ // Gemini uses similar tokenization to OpenAI (~4 chars per token)
108
+ // Gemini 3 series (Latest)
109
+ "gemini-3-pro": {
110
+ charsPerToken: 4,
111
+ inputCostPerMillion: 2
112
+ },
113
+ "gemini-3-flash": {
114
+ charsPerToken: 4,
115
+ inputCostPerMillion: 0.5
116
+ },
117
+ // Gemini 2.5 series
118
+ "gemini-2.5-pro": {
119
+ charsPerToken: 4,
120
+ inputCostPerMillion: 1.25
121
+ },
122
+ "gemini-2.5-flash": {
123
+ charsPerToken: 4,
124
+ inputCostPerMillion: 0.3
125
+ },
126
+ "gemini-2.5-flash-lite": {
127
+ charsPerToken: 4,
128
+ inputCostPerMillion: 0.1
129
+ },
130
+ // Gemini 2.0 series
131
+ "gemini-2.0-flash": {
132
+ charsPerToken: 4,
133
+ inputCostPerMillion: 0.1
134
+ },
135
+ "gemini-2.0-flash-lite": {
136
+ charsPerToken: 4,
137
+ inputCostPerMillion: 0.075
138
+ }
139
+ };
140
+ Object.values(models).forEach((config) => Object.freeze(config));
141
+ var DEFAULT_MODELS = Object.freeze(models);
142
+ function getModelConfig(model) {
143
+ const config = DEFAULT_MODELS[model];
144
+ if (!config) {
145
+ const available = Object.keys(DEFAULT_MODELS).join(", ");
146
+ throw new Error(
147
+ `Unknown model: "${model}". Available models: ${available}`
148
+ );
149
+ }
150
+ return config;
151
+ }
152
+ function getAvailableModels() {
153
+ return Object.keys(DEFAULT_MODELS);
154
+ }
155
+
156
+ // src/estimator.ts
157
+ function countCodePoints(text) {
158
+ let count = 0;
159
+ for (const _char of text) {
160
+ count++;
161
+ }
162
+ return count;
163
+ }
164
+ function estimate(input) {
165
+ const { text, model, rounding = "ceil" } = input;
166
+ const config = getModelConfig(model);
167
+ const characterCount = countCodePoints(text);
168
+ const rawTokens = characterCount / config.charsPerToken;
169
+ let estimatedTokens;
170
+ switch (rounding) {
171
+ case "floor":
172
+ estimatedTokens = Math.floor(rawTokens);
173
+ break;
174
+ case "round":
175
+ estimatedTokens = Math.round(rawTokens);
176
+ break;
177
+ case "ceil":
178
+ default:
179
+ estimatedTokens = Math.ceil(rawTokens);
180
+ }
181
+ const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
182
+ return {
183
+ model,
184
+ characterCount,
185
+ estimatedTokens,
186
+ estimatedInputCost,
187
+ charsPerToken: config.charsPerToken
188
+ };
189
+ }
190
+ export {
191
+ DEFAULT_MODELS,
192
+ estimate,
193
+ getAvailableModels,
194
+ getModelConfig
195
+ };
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "ai-token-estimator",
3
+ "version": "1.0.0",
4
+ "description": "Estimate token counts and costs for LLM API calls",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": {
12
+ "types": "./dist/index.d.ts",
13
+ "default": "./dist/index.js"
14
+ },
15
+ "require": {
16
+ "types": "./dist/index.d.cts",
17
+ "default": "./dist/index.cjs"
18
+ }
19
+ }
20
+ },
21
+ "files": [
22
+ "dist",
23
+ "LICENSE",
24
+ "README.md"
25
+ ],
26
+ "scripts": {
27
+ "build": "tsup src/index.ts --format cjs,esm --dts",
28
+ "test": "vitest run",
29
+ "test:watch": "vitest",
30
+ "lint": "eslint src tests",
31
+ "prepublishOnly": "npm run lint && npm run test && npm run build"
32
+ },
33
+ "keywords": [
34
+ "llm",
35
+ "tokens",
36
+ "estimator",
37
+ "openai",
38
+ "anthropic",
39
+ "claude",
40
+ "gpt",
41
+ "cost"
42
+ ],
43
+ "author": "BitsAndBytesAI",
44
+ "license": "MIT",
45
+ "repository": {
46
+ "type": "git",
47
+ "url": "https://github.com/BitsAndBytesAI/ai-token-estimator.git"
48
+ },
49
+ "engines": {
50
+ "node": ">=18.0.0"
51
+ },
52
+ "devDependencies": {
53
+ "@changesets/cli": "^2.29.8",
54
+ "@typescript-eslint/eslint-plugin": "^8.50.1",
55
+ "@typescript-eslint/parser": "^8.50.1",
56
+ "eslint": "^9.39.2",
57
+ "tsup": "^8.5.1",
58
+ "typescript": "^5.9.3",
59
+ "vitest": "^4.0.16"
60
+ }
61
+ }