@huen123/llm-token-counter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 angusmhlee113
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,162 @@
1
+ # @huen123/llm-token-counter
2
+
3
+ Count prompt text and simple chat-message tokens for mainstream LLM families without making remote API calls.
4
+
5
+ `@huen123/llm-token-counter` is a Node.js-first npm package for quick token budgeting across OpenAI, Anthropic, Gemini, Mistral, Cohere, and Llama model families. Every result includes both the token count and metadata that tells you whether the count is exact or estimated.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @huen123/llm-token-counter
11
+ ```
12
+
13
+ ## What It Counts
14
+
15
+ - Plain text with `countTokens({ model, input })`
16
+ - Structured chat messages with `countChatTokens({ model, messages })`
17
+ - Supported model metadata with `getModelInfo(model)`
18
+ - Curated canonical model families with `listSupportedModels()`
19
+
20
+ ## Supported Precision
21
+
22
+ | Provider family | Plain text | Chat messages | Strategy |
23
+ | --- | --- | --- | --- |
24
+ | OpenAI (`gpt-*`, `o*`, `chatgpt-*`) | Exact | Estimated | `openai-tiktoken` |
25
+ | Anthropic (`claude-*`) | Estimated | Estimated | `anthropic-tokenizer` |
26
+ | Google (`gemini-*`) | Estimated | Estimated | `gemini-char4` |
27
+ | Mistral (`mistral-*`) | Estimated | Estimated | `cl100k-heuristic` |
28
+ | Cohere (`command-*`, `aya-*`) | Estimated | Estimated | `cl100k-heuristic` |
29
+ | Meta (`llama-*`) | Estimated | Estimated | `cl100k-heuristic` |
30
+
31
+ OpenAI plain-text counts use the model tokenizer directly. Chat-message counts are estimated for every provider because message wrappers vary across APIs and versions.
32
+
33
+ ## Quick Start
34
+
35
+ ```ts
36
+ import {
37
+ countChatTokens,
38
+ countTokens,
39
+ getModelInfo,
40
+ listSupportedModels,
41
+ } from "@huen123/llm-token-counter";
42
+
43
+ const promptResult = countTokens({
44
+ model: "gpt-4o",
45
+ input: "hello world!",
46
+ });
47
+
48
+ console.log(promptResult);
49
+ // {
50
+ // requestedModel: 'gpt-4o',
51
+ // resolvedModel: 'gpt-4o',
52
+ // provider: 'openai',
53
+ // family: 'gpt-4o',
54
+ // tokenCount: 3,
55
+ // precision: 'exact',
56
+ // strategy: 'openai-tiktoken'
57
+ // }
58
+
59
+ const chatResult = countChatTokens({
60
+ model: "claude-3-5-sonnet-latest",
61
+ messages: [
62
+ { role: "system", content: "Be concise." },
63
+ { role: "user", content: "Summarize this paragraph." },
64
+ ],
65
+ });
66
+
67
+ console.log(chatResult.precision); // "estimated"
68
+
69
+ console.log(getModelInfo("chatgpt-4o-latest"));
70
+ console.log(listSupportedModels());
71
+ ```
72
+
73
+ ## API
74
+
75
+ ### `countTokens({ model, input })`
76
+
77
+ Counts tokens for a plain string and returns:
78
+
79
+ ```ts
80
+ type TokenCountResult = {
81
+ requestedModel: string;
82
+ resolvedModel: string;
83
+ provider: "openai" | "anthropic" | "google" | "mistral" | "cohere" | "meta";
84
+ family: string;
85
+ tokenCount: number;
86
+ precision: "exact" | "estimated";
87
+ strategy:
88
+ | "openai-tiktoken"
89
+ | "anthropic-tokenizer"
90
+ | "gemini-char4"
91
+ | "cl100k-heuristic";
92
+ };
93
+ ```
94
+
95
+ ### `countChatTokens({ model, messages })`
96
+
97
+ Accepts only simple string chat messages:
98
+
99
+ ```ts
100
+ type ChatMessage = {
101
+ role: "system" | "user" | "assistant";
102
+ content: string;
103
+ };
104
+ ```
105
+
106
+ Chat counts add a lightweight per-message overhead estimate on top of each message content count.
107
+
108
+ ### `getModelInfo(model)`
109
+
110
+ Resolves aliases to canonical families and returns:
111
+
112
+ ```ts
113
+ type ModelInfo = {
114
+ resolvedModel: string;
115
+ provider: "openai" | "anthropic" | "google" | "mistral" | "cohere" | "meta";
116
+ family: string;
117
+ precision: "exact" | "estimated";
118
+ strategy:
119
+ | "openai-tiktoken"
120
+ | "anthropic-tokenizer"
121
+ | "gemini-char4"
122
+ | "cl100k-heuristic";
123
+ aliases: string[];
124
+ };
125
+ ```
126
+
127
+ ### `listSupportedModels()`
128
+
129
+ Returns the curated canonical families bundled in the current package release.
130
+
131
+ ## Aliases and Unknown Models
132
+
133
+ - Known aliases are normalized to canonical families before counting.
134
+ - Unknown models throw an error with close suggestions instead of silently guessing providers.
135
+ - New provider releases usually only need registry updates in a new package version.
136
+
137
+ ## Unsupported in v1
138
+
139
+ - Tool-call accounting
140
+ - Multimodal inputs such as images, files, and audio
141
+ - Full provider request-body parsing
142
+ - Pricing estimates
143
+ - Remote model-catalog updates
144
+ - Browser runtime support
145
+
146
+ ## Development
147
+
148
+ ```bash
149
+ npm test
150
+ npm run build
151
+ npm pack
152
+ ```
153
+
154
+ ## Publish Checklist
155
+
156
+ ```bash
157
+ npm test
158
+ npm run build
159
+ npm publish --access public
160
+ ```
161
+
162
+ `prepublishOnly` already runs the build and test steps before `npm publish`.
package/dist/index.cjs ADDED
@@ -0,0 +1,316 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ countChatTokens: () => countChatTokens,
34
+ countTokens: () => countTokens2,
35
+ getModelInfo: () => getModelInfo,
36
+ listSupportedModels: () => listSupportedModels
37
+ });
38
+ module.exports = __toCommonJS(index_exports);
39
+
40
+ // src/adapters/count-text.ts
41
+ var anthropicTokenizer = __toESM(require("@anthropic-ai/tokenizer"), 1);
42
+ var import_js_tiktoken = require("js-tiktoken");
43
+ var CL100K = (0, import_js_tiktoken.getEncoding)("cl100k_base");
44
+ var normalizeGeminiText = (input) => input.normalize("NFC").replace(/\r\n/g, "\n");
45
+ var countOpenAiTokens = (model, input) => (0, import_js_tiktoken.encodingForModel)(model).encode(input).length;
46
+ var countAnthropicTokens = (input) => anthropicTokenizer.countTokens(input);
47
+ var countGeminiTokens = (input) => Math.ceil(normalizeGeminiText(input).length / 4);
48
+ var countCl100kTokens = (input) => CL100K.encode(input).length;
49
+ var countTextWithEntry = (entry, input) => {
50
+ switch (entry.strategy) {
51
+ case "openai-tiktoken":
52
+ return countOpenAiTokens(entry.resolvedModel, input);
53
+ case "anthropic-tokenizer":
54
+ return countAnthropicTokens(input);
55
+ case "gemini-char4":
56
+ return countGeminiTokens(input);
57
+ case "cl100k-heuristic":
58
+ return countCl100kTokens(input);
59
+ default: {
60
+ const exhaustive = entry.strategy;
61
+ throw new Error(`Unsupported token strategy: ${exhaustive}`);
62
+ }
63
+ }
64
+ };
65
+ var countChatWithEntry = (entry, messages) => {
66
+ const textCount = messages.reduce((sum, message) => sum + countTextWithEntry(entry, message.content), 0);
67
+ return textCount + messages.length * 4 + 2;
68
+ };
69
+
70
+ // src/registry/models.ts
71
+ var MODEL_REGISTRY = [
72
+ {
73
+ resolvedModel: "gpt-4",
74
+ provider: "openai",
75
+ family: "gpt-4",
76
+ precision: "exact",
77
+ strategy: "openai-tiktoken",
78
+ aliases: [],
79
+ prefixes: []
80
+ },
81
+ {
82
+ resolvedModel: "gpt-4o",
83
+ provider: "openai",
84
+ family: "gpt-4o",
85
+ precision: "exact",
86
+ strategy: "openai-tiktoken",
87
+ aliases: ["chatgpt-4o-latest"],
88
+ prefixes: ["gpt-4o-", "chatgpt-4o-"]
89
+ },
90
+ {
91
+ resolvedModel: "gpt-4o-mini",
92
+ provider: "openai",
93
+ family: "gpt-4o-mini",
94
+ precision: "exact",
95
+ strategy: "openai-tiktoken",
96
+ aliases: [],
97
+ prefixes: ["gpt-4o-mini-"]
98
+ },
99
+ {
100
+ resolvedModel: "gpt-3.5-turbo",
101
+ provider: "openai",
102
+ family: "gpt-3.5-turbo",
103
+ precision: "exact",
104
+ strategy: "openai-tiktoken",
105
+ aliases: [],
106
+ prefixes: ["gpt-3.5-turbo-"]
107
+ },
108
+ {
109
+ resolvedModel: "o1",
110
+ provider: "openai",
111
+ family: "o1",
112
+ precision: "exact",
113
+ strategy: "openai-tiktoken",
114
+ aliases: [],
115
+ prefixes: ["o1-"]
116
+ },
117
+ {
118
+ resolvedModel: "o3-mini",
119
+ provider: "openai",
120
+ family: "o3-mini",
121
+ precision: "exact",
122
+ strategy: "openai-tiktoken",
123
+ aliases: [],
124
+ prefixes: ["o3-mini-"]
125
+ },
126
+ {
127
+ resolvedModel: "claude-3-5-sonnet",
128
+ provider: "anthropic",
129
+ family: "claude-3-5-sonnet",
130
+ precision: "estimated",
131
+ strategy: "anthropic-tokenizer",
132
+ aliases: ["claude-3-5-sonnet-latest"],
133
+ prefixes: ["claude-3-5-sonnet-"]
134
+ },
135
+ {
136
+ resolvedModel: "claude-3-haiku",
137
+ provider: "anthropic",
138
+ family: "claude-3-haiku",
139
+ precision: "estimated",
140
+ strategy: "anthropic-tokenizer",
141
+ aliases: ["claude-3-haiku-latest"],
142
+ prefixes: ["claude-3-haiku-"]
143
+ },
144
+ {
145
+ resolvedModel: "gemini-2.5-pro",
146
+ provider: "google",
147
+ family: "gemini-2.5-pro",
148
+ precision: "estimated",
149
+ strategy: "gemini-char4",
150
+ aliases: [],
151
+ prefixes: ["gemini-2.5-pro-"]
152
+ },
153
+ {
154
+ resolvedModel: "gemini-2.5-flash",
155
+ provider: "google",
156
+ family: "gemini-2.5-flash",
157
+ precision: "estimated",
158
+ strategy: "gemini-char4",
159
+ aliases: [],
160
+ prefixes: ["gemini-2.5-flash-"]
161
+ },
162
+ {
163
+ resolvedModel: "mistral-large",
164
+ provider: "mistral",
165
+ family: "mistral-large",
166
+ precision: "estimated",
167
+ strategy: "cl100k-heuristic",
168
+ aliases: ["mistral-large-latest"],
169
+ prefixes: ["mistral-large-"]
170
+ },
171
+ {
172
+ resolvedModel: "command-r-plus",
173
+ provider: "cohere",
174
+ family: "command-r-plus",
175
+ precision: "estimated",
176
+ strategy: "cl100k-heuristic",
177
+ aliases: [],
178
+ prefixes: ["command-r-plus-"]
179
+ },
180
+ {
181
+ resolvedModel: "aya-expanse-32b",
182
+ provider: "cohere",
183
+ family: "aya-expanse-32b",
184
+ precision: "estimated",
185
+ strategy: "cl100k-heuristic",
186
+ aliases: [],
187
+ prefixes: ["aya-expanse-32b-"]
188
+ },
189
+ {
190
+ resolvedModel: "llama-3.1-70b",
191
+ provider: "meta",
192
+ family: "llama-3.1-70b",
193
+ precision: "estimated",
194
+ strategy: "cl100k-heuristic",
195
+ aliases: ["llama-3.1-70b-instruct"],
196
+ prefixes: ["llama-3.1-70b-"]
197
+ }
198
+ ];
199
+
200
+ // src/registry/resolve-model.ts
201
+ var normalizeModelId = (model) => model.trim().toLowerCase();
202
+ var levenshtein = (left, right) => {
203
+ const rows = left.length + 1;
204
+ const cols = right.length + 1;
205
+ const matrix = Array.from({ length: rows }, () => Array(cols).fill(0));
206
+ for (let row = 0; row < rows; row += 1) {
207
+ matrix[row][0] = row;
208
+ }
209
+ for (let col = 0; col < cols; col += 1) {
210
+ matrix[0][col] = col;
211
+ }
212
+ for (let row = 1; row < rows; row += 1) {
213
+ for (let col = 1; col < cols; col += 1) {
214
+ const cost = left[row - 1] === right[col - 1] ? 0 : 1;
215
+ matrix[row][col] = Math.min(
216
+ matrix[row - 1][col] + 1,
217
+ matrix[row][col - 1] + 1,
218
+ matrix[row - 1][col - 1] + cost
219
+ );
220
+ }
221
+ }
222
+ return matrix[left.length][right.length];
223
+ };
224
+ var normalizedName = (value) => value.replace(/[^a-z0-9]/g, "");
225
+ var findClosestModels = (model) => {
226
+ const normalizedInput = normalizedName(model);
227
+ const candidates = MODEL_REGISTRY.flatMap((entry) => [entry.resolvedModel, ...entry.aliases]);
228
+ return candidates.map((candidate) => ({
229
+ candidate,
230
+ distance: levenshtein(normalizedInput, normalizedName(candidate)),
231
+ lengthDifference: Math.abs(normalizedInput.length - normalizedName(candidate).length)
232
+ })).sort(
233
+ (left, right) => left.distance - right.distance || left.lengthDifference - right.lengthDifference || left.candidate.localeCompare(right.candidate)
234
+ ).slice(0, 3).map(({ candidate }) => candidate);
235
+ };
236
+ var findEntry = (normalizedModel) => {
237
+ const exactMatch = MODEL_REGISTRY.find(
238
+ (entry) => entry.resolvedModel === normalizedModel || entry.aliases.includes(normalizedModel)
239
+ );
240
+ if (exactMatch) {
241
+ return exactMatch;
242
+ }
243
+ return MODEL_REGISTRY.find(
244
+ (entry) => entry.prefixes.some((prefix) => normalizedModel.startsWith(prefix))
245
+ );
246
+ };
247
+ var resolveModel = (model) => {
248
+ const normalizedModel = normalizeModelId(model);
249
+ const entry = findEntry(normalizedModel);
250
+ if (!entry) {
251
+ const suggestions = findClosestModels(normalizedModel);
252
+ const suggestionText = suggestions.length > 0 ? ` Did you mean: ${suggestions.join(", ")}?` : "";
253
+ throw new Error(`Unsupported model "${model}".${suggestionText}`);
254
+ }
255
+ return entry;
256
+ };
257
+ var toModelInfo = (entry) => ({
258
+ resolvedModel: entry.resolvedModel,
259
+ provider: entry.provider,
260
+ family: entry.family,
261
+ precision: entry.precision,
262
+ strategy: entry.strategy,
263
+ aliases: entry.aliases
264
+ });
265
+
266
+ // src/index.ts
267
+ var VALID_CHAT_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant"]);
268
+ function assertStringInput(input, label) {
269
+ if (typeof input !== "string") {
270
+ throw new TypeError(`${label} must be a string`);
271
+ }
272
+ }
273
+ function assertChatMessages(messages) {
274
+ for (const message of messages) {
275
+ if (!VALID_CHAT_ROLES.has(message.role)) {
276
+ throw new TypeError(`Unsupported chat role "${String(message.role)}"`);
277
+ }
278
+ if (typeof message.content !== "string") {
279
+ throw new TypeError("Chat messages must use string content");
280
+ }
281
+ }
282
+ }
283
+ var toResult = (requestedModel, tokenCount, resolved) => ({
284
+ requestedModel,
285
+ resolvedModel: resolved.resolvedModel,
286
+ provider: resolved.provider,
287
+ family: resolved.family,
288
+ tokenCount,
289
+ precision: resolved.precision,
290
+ strategy: resolved.strategy
291
+ });
292
+ var countTokens2 = ({ model, input }) => {
293
+ assertStringInput(input, "Input");
294
+ const resolved = resolveModel(model);
295
+ return toResult(model, countTextWithEntry(resolved, input), resolved);
296
+ };
297
+ var countChatTokens = ({
298
+ model,
299
+ messages
300
+ }) => {
301
+ assertChatMessages(messages);
302
+ const resolved = resolveModel(model);
303
+ return toResult(model, countChatWithEntry(resolved, messages), {
304
+ ...resolved,
305
+ precision: "estimated"
306
+ });
307
+ };
308
+ var getModelInfo = (model) => toModelInfo(resolveModel(model));
309
+ var listSupportedModels = () => MODEL_REGISTRY.map(toModelInfo);
310
+ // Annotate the CommonJS export names for ESM import in node:
311
+ 0 && (module.exports = {
312
+ countChatTokens,
313
+ countTokens,
314
+ getModelInfo,
315
+ listSupportedModels
316
+ });
@@ -0,0 +1,40 @@
1
+ type Provider = "openai" | "anthropic" | "google" | "mistral" | "cohere" | "meta";
2
+ type Precision = "exact" | "estimated";
3
+ type Strategy = "openai-tiktoken" | "anthropic-tokenizer" | "gemini-char4" | "cl100k-heuristic";
4
+ type ChatRole = "system" | "user" | "assistant";
5
+ interface ChatMessage {
6
+ role: ChatRole;
7
+ content: string;
8
+ }
9
+ interface CountTokensParams {
10
+ model: string;
11
+ input: string;
12
+ }
13
+ interface CountChatTokensParams {
14
+ model: string;
15
+ messages: ChatMessage[];
16
+ }
17
+ interface TokenCountResult {
18
+ requestedModel: string;
19
+ resolvedModel: string;
20
+ provider: Provider;
21
+ family: string;
22
+ tokenCount: number;
23
+ precision: Precision;
24
+ strategy: Strategy;
25
+ }
26
+ interface ModelInfo {
27
+ resolvedModel: string;
28
+ provider: Provider;
29
+ family: string;
30
+ precision: Precision;
31
+ strategy: Strategy;
32
+ aliases: string[];
33
+ }
34
+
35
+ declare const countTokens: ({ model, input }: CountTokensParams) => TokenCountResult;
36
+ declare const countChatTokens: ({ model, messages, }: CountChatTokensParams) => TokenCountResult;
37
+ declare const getModelInfo: (model: string) => ModelInfo;
38
+ declare const listSupportedModels: () => ModelInfo[];
39
+
40
+ export { type ChatMessage, type ChatRole, type CountChatTokensParams, type CountTokensParams, type ModelInfo, type Precision, type Provider, type Strategy, type TokenCountResult, countChatTokens, countTokens, getModelInfo, listSupportedModels };
@@ -0,0 +1,40 @@
1
+ type Provider = "openai" | "anthropic" | "google" | "mistral" | "cohere" | "meta";
2
+ type Precision = "exact" | "estimated";
3
+ type Strategy = "openai-tiktoken" | "anthropic-tokenizer" | "gemini-char4" | "cl100k-heuristic";
4
+ type ChatRole = "system" | "user" | "assistant";
5
+ interface ChatMessage {
6
+ role: ChatRole;
7
+ content: string;
8
+ }
9
+ interface CountTokensParams {
10
+ model: string;
11
+ input: string;
12
+ }
13
+ interface CountChatTokensParams {
14
+ model: string;
15
+ messages: ChatMessage[];
16
+ }
17
+ interface TokenCountResult {
18
+ requestedModel: string;
19
+ resolvedModel: string;
20
+ provider: Provider;
21
+ family: string;
22
+ tokenCount: number;
23
+ precision: Precision;
24
+ strategy: Strategy;
25
+ }
26
+ interface ModelInfo {
27
+ resolvedModel: string;
28
+ provider: Provider;
29
+ family: string;
30
+ precision: Precision;
31
+ strategy: Strategy;
32
+ aliases: string[];
33
+ }
34
+
35
+ declare const countTokens: ({ model, input }: CountTokensParams) => TokenCountResult;
36
+ declare const countChatTokens: ({ model, messages, }: CountChatTokensParams) => TokenCountResult;
37
+ declare const getModelInfo: (model: string) => ModelInfo;
38
+ declare const listSupportedModels: () => ModelInfo[];
39
+
40
+ export { type ChatMessage, type ChatRole, type CountChatTokensParams, type CountTokensParams, type ModelInfo, type Precision, type Provider, type Strategy, type TokenCountResult, countChatTokens, countTokens, getModelInfo, listSupportedModels };
package/dist/index.js ADDED
@@ -0,0 +1,276 @@
1
+ // src/adapters/count-text.ts
2
+ import * as anthropicTokenizer from "@anthropic-ai/tokenizer";
3
+ import { encodingForModel, getEncoding } from "js-tiktoken";
4
+ var CL100K = getEncoding("cl100k_base");
5
+ var normalizeGeminiText = (input) => input.normalize("NFC").replace(/\r\n/g, "\n");
6
+ var countOpenAiTokens = (model, input) => encodingForModel(model).encode(input).length;
7
+ var countAnthropicTokens = (input) => anthropicTokenizer.countTokens(input);
8
+ var countGeminiTokens = (input) => Math.ceil(normalizeGeminiText(input).length / 4);
9
+ var countCl100kTokens = (input) => CL100K.encode(input).length;
10
+ var countTextWithEntry = (entry, input) => {
11
+ switch (entry.strategy) {
12
+ case "openai-tiktoken":
13
+ return countOpenAiTokens(entry.resolvedModel, input);
14
+ case "anthropic-tokenizer":
15
+ return countAnthropicTokens(input);
16
+ case "gemini-char4":
17
+ return countGeminiTokens(input);
18
+ case "cl100k-heuristic":
19
+ return countCl100kTokens(input);
20
+ default: {
21
+ const exhaustive = entry.strategy;
22
+ throw new Error(`Unsupported token strategy: ${exhaustive}`);
23
+ }
24
+ }
25
+ };
26
+ var countChatWithEntry = (entry, messages) => {
27
+ const textCount = messages.reduce((sum, message) => sum + countTextWithEntry(entry, message.content), 0);
28
+ return textCount + messages.length * 4 + 2;
29
+ };
30
+
31
+ // src/registry/models.ts
32
+ var MODEL_REGISTRY = [
33
+ {
34
+ resolvedModel: "gpt-4",
35
+ provider: "openai",
36
+ family: "gpt-4",
37
+ precision: "exact",
38
+ strategy: "openai-tiktoken",
39
+ aliases: [],
40
+ prefixes: []
41
+ },
42
+ {
43
+ resolvedModel: "gpt-4o",
44
+ provider: "openai",
45
+ family: "gpt-4o",
46
+ precision: "exact",
47
+ strategy: "openai-tiktoken",
48
+ aliases: ["chatgpt-4o-latest"],
49
+ prefixes: ["gpt-4o-", "chatgpt-4o-"]
50
+ },
51
+ {
52
+ resolvedModel: "gpt-4o-mini",
53
+ provider: "openai",
54
+ family: "gpt-4o-mini",
55
+ precision: "exact",
56
+ strategy: "openai-tiktoken",
57
+ aliases: [],
58
+ prefixes: ["gpt-4o-mini-"]
59
+ },
60
+ {
61
+ resolvedModel: "gpt-3.5-turbo",
62
+ provider: "openai",
63
+ family: "gpt-3.5-turbo",
64
+ precision: "exact",
65
+ strategy: "openai-tiktoken",
66
+ aliases: [],
67
+ prefixes: ["gpt-3.5-turbo-"]
68
+ },
69
+ {
70
+ resolvedModel: "o1",
71
+ provider: "openai",
72
+ family: "o1",
73
+ precision: "exact",
74
+ strategy: "openai-tiktoken",
75
+ aliases: [],
76
+ prefixes: ["o1-"]
77
+ },
78
+ {
79
+ resolvedModel: "o3-mini",
80
+ provider: "openai",
81
+ family: "o3-mini",
82
+ precision: "exact",
83
+ strategy: "openai-tiktoken",
84
+ aliases: [],
85
+ prefixes: ["o3-mini-"]
86
+ },
87
+ {
88
+ resolvedModel: "claude-3-5-sonnet",
89
+ provider: "anthropic",
90
+ family: "claude-3-5-sonnet",
91
+ precision: "estimated",
92
+ strategy: "anthropic-tokenizer",
93
+ aliases: ["claude-3-5-sonnet-latest"],
94
+ prefixes: ["claude-3-5-sonnet-"]
95
+ },
96
+ {
97
+ resolvedModel: "claude-3-haiku",
98
+ provider: "anthropic",
99
+ family: "claude-3-haiku",
100
+ precision: "estimated",
101
+ strategy: "anthropic-tokenizer",
102
+ aliases: ["claude-3-haiku-latest"],
103
+ prefixes: ["claude-3-haiku-"]
104
+ },
105
+ {
106
+ resolvedModel: "gemini-2.5-pro",
107
+ provider: "google",
108
+ family: "gemini-2.5-pro",
109
+ precision: "estimated",
110
+ strategy: "gemini-char4",
111
+ aliases: [],
112
+ prefixes: ["gemini-2.5-pro-"]
113
+ },
114
+ {
115
+ resolvedModel: "gemini-2.5-flash",
116
+ provider: "google",
117
+ family: "gemini-2.5-flash",
118
+ precision: "estimated",
119
+ strategy: "gemini-char4",
120
+ aliases: [],
121
+ prefixes: ["gemini-2.5-flash-"]
122
+ },
123
+ {
124
+ resolvedModel: "mistral-large",
125
+ provider: "mistral",
126
+ family: "mistral-large",
127
+ precision: "estimated",
128
+ strategy: "cl100k-heuristic",
129
+ aliases: ["mistral-large-latest"],
130
+ prefixes: ["mistral-large-"]
131
+ },
132
+ {
133
+ resolvedModel: "command-r-plus",
134
+ provider: "cohere",
135
+ family: "command-r-plus",
136
+ precision: "estimated",
137
+ strategy: "cl100k-heuristic",
138
+ aliases: [],
139
+ prefixes: ["command-r-plus-"]
140
+ },
141
+ {
142
+ resolvedModel: "aya-expanse-32b",
143
+ provider: "cohere",
144
+ family: "aya-expanse-32b",
145
+ precision: "estimated",
146
+ strategy: "cl100k-heuristic",
147
+ aliases: [],
148
+ prefixes: ["aya-expanse-32b-"]
149
+ },
150
+ {
151
+ resolvedModel: "llama-3.1-70b",
152
+ provider: "meta",
153
+ family: "llama-3.1-70b",
154
+ precision: "estimated",
155
+ strategy: "cl100k-heuristic",
156
+ aliases: ["llama-3.1-70b-instruct"],
157
+ prefixes: ["llama-3.1-70b-"]
158
+ }
159
+ ];
160
+
161
+ // src/registry/resolve-model.ts
162
+ var normalizeModelId = (model) => model.trim().toLowerCase();
163
+ var levenshtein = (left, right) => {
164
+ const rows = left.length + 1;
165
+ const cols = right.length + 1;
166
+ const matrix = Array.from({ length: rows }, () => Array(cols).fill(0));
167
+ for (let row = 0; row < rows; row += 1) {
168
+ matrix[row][0] = row;
169
+ }
170
+ for (let col = 0; col < cols; col += 1) {
171
+ matrix[0][col] = col;
172
+ }
173
+ for (let row = 1; row < rows; row += 1) {
174
+ for (let col = 1; col < cols; col += 1) {
175
+ const cost = left[row - 1] === right[col - 1] ? 0 : 1;
176
+ matrix[row][col] = Math.min(
177
+ matrix[row - 1][col] + 1,
178
+ matrix[row][col - 1] + 1,
179
+ matrix[row - 1][col - 1] + cost
180
+ );
181
+ }
182
+ }
183
+ return matrix[left.length][right.length];
184
+ };
185
+ var normalizedName = (value) => value.replace(/[^a-z0-9]/g, "");
186
+ var findClosestModels = (model) => {
187
+ const normalizedInput = normalizedName(model);
188
+ const candidates = MODEL_REGISTRY.flatMap((entry) => [entry.resolvedModel, ...entry.aliases]);
189
+ return candidates.map((candidate) => ({
190
+ candidate,
191
+ distance: levenshtein(normalizedInput, normalizedName(candidate)),
192
+ lengthDifference: Math.abs(normalizedInput.length - normalizedName(candidate).length)
193
+ })).sort(
194
+ (left, right) => left.distance - right.distance || left.lengthDifference - right.lengthDifference || left.candidate.localeCompare(right.candidate)
195
+ ).slice(0, 3).map(({ candidate }) => candidate);
196
+ };
197
+ var findEntry = (normalizedModel) => {
198
+ const exactMatch = MODEL_REGISTRY.find(
199
+ (entry) => entry.resolvedModel === normalizedModel || entry.aliases.includes(normalizedModel)
200
+ );
201
+ if (exactMatch) {
202
+ return exactMatch;
203
+ }
204
+ return MODEL_REGISTRY.find(
205
+ (entry) => entry.prefixes.some((prefix) => normalizedModel.startsWith(prefix))
206
+ );
207
+ };
208
+ var resolveModel = (model) => {
209
+ const normalizedModel = normalizeModelId(model);
210
+ const entry = findEntry(normalizedModel);
211
+ if (!entry) {
212
+ const suggestions = findClosestModels(normalizedModel);
213
+ const suggestionText = suggestions.length > 0 ? ` Did you mean: ${suggestions.join(", ")}?` : "";
214
+ throw new Error(`Unsupported model "${model}".${suggestionText}`);
215
+ }
216
+ return entry;
217
+ };
218
+ var toModelInfo = (entry) => ({
219
+ resolvedModel: entry.resolvedModel,
220
+ provider: entry.provider,
221
+ family: entry.family,
222
+ precision: entry.precision,
223
+ strategy: entry.strategy,
224
+ aliases: entry.aliases
225
+ });
226
+
227
+ // src/index.ts
228
+ var VALID_CHAT_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant"]);
229
+ function assertStringInput(input, label) {
230
+ if (typeof input !== "string") {
231
+ throw new TypeError(`${label} must be a string`);
232
+ }
233
+ }
234
+ function assertChatMessages(messages) {
235
+ for (const message of messages) {
236
+ if (!VALID_CHAT_ROLES.has(message.role)) {
237
+ throw new TypeError(`Unsupported chat role "${String(message.role)}"`);
238
+ }
239
+ if (typeof message.content !== "string") {
240
+ throw new TypeError("Chat messages must use string content");
241
+ }
242
+ }
243
+ }
244
+ var toResult = (requestedModel, tokenCount, resolved) => ({
245
+ requestedModel,
246
+ resolvedModel: resolved.resolvedModel,
247
+ provider: resolved.provider,
248
+ family: resolved.family,
249
+ tokenCount,
250
+ precision: resolved.precision,
251
+ strategy: resolved.strategy
252
+ });
253
+ var countTokens2 = ({ model, input }) => {
254
+ assertStringInput(input, "Input");
255
+ const resolved = resolveModel(model);
256
+ return toResult(model, countTextWithEntry(resolved, input), resolved);
257
+ };
258
+ var countChatTokens = ({
259
+ model,
260
+ messages
261
+ }) => {
262
+ assertChatMessages(messages);
263
+ const resolved = resolveModel(model);
264
+ return toResult(model, countChatWithEntry(resolved, messages), {
265
+ ...resolved,
266
+ precision: "estimated"
267
+ });
268
+ };
269
+ var getModelInfo = (model) => toModelInfo(resolveModel(model));
270
+ var listSupportedModels = () => MODEL_REGISTRY.map(toModelInfo);
271
+ export {
272
+ countChatTokens,
273
+ countTokens2 as countTokens,
274
+ getModelInfo,
275
+ listSupportedModels
276
+ };
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "@huen123/llm-token-counter",
3
+ "version": "0.1.0",
4
+ "description": "Count prompt and chat message tokens for mainstream LLM families with explicit precision metadata.",
5
+ "keywords": [
6
+ "llm",
7
+ "tokens",
8
+ "tokenizer",
9
+ "openai",
10
+ "anthropic",
11
+ "gemini",
12
+ "claude"
13
+ ],
14
+ "license": "MIT",
15
+ "author": "angusmhlee113",
16
+ "type": "module",
17
+ "sideEffects": false,
18
+ "main": "./dist/index.cjs",
19
+ "module": "./dist/index.js",
20
+ "types": "./dist/index.d.ts",
21
+ "exports": {
22
+ ".": {
23
+ "types": "./dist/index.d.ts",
24
+ "import": "./dist/index.js",
25
+ "require": "./dist/index.cjs"
26
+ }
27
+ },
28
+ "files": [
29
+ "dist"
30
+ ],
31
+ "engines": {
32
+ "node": ">=18"
33
+ },
34
+ "publishConfig": {
35
+ "access": "public"
36
+ },
37
+ "scripts": {
38
+ "build": "tsup src/index.ts --format esm,cjs --dts --clean",
39
+ "test": "vitest run",
40
+ "test:watch": "vitest",
41
+ "prepublishOnly": "npm run build && npm test"
42
+ },
43
+ "dependencies": {
44
+ "@anthropic-ai/tokenizer": "^0.0.4",
45
+ "js-tiktoken": "^1.0.21"
46
+ },
47
+ "devDependencies": {
48
+ "@types/node": "^25.5.2",
49
+ "tsup": "^8.5.1",
50
+ "typescript": "^6.0.2",
51
+ "vitest": "^4.1.2"
52
+ }
53
+ }