token-warden 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,133 @@
1
+ # Token Warden
2
+
3
+ Track LLM costs per feature, team, and user — with budget alerts and anomaly detection.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/token-warden)](https://www.npmjs.com/package/token-warden)
6
+ [![CI](https://github.com/Flagship-Software/token-warden/actions/workflows/ci.yml/badge.svg)](https://github.com/Flagship-Software/token-warden/actions/workflows/ci.yml)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ## Why
10
+
11
+ LLM API costs are opaque and hard to attribute. When multiple features share the same API keys, you can't tell which feature is responsible for cost spikes. Token Warden wraps your LLM clients to automatically capture token usage and attribute costs per feature.
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ npm install token-warden
17
+ ```
18
+
19
+ ```typescript
20
+ import { warden } from "token-warden";
21
+ import OpenAI from "openai";
22
+
23
+ // Initialize Token Warden
24
+ warden.init({
25
+ apiKey: "tw_your_api_key",
26
+ endpoint: "https://your-token-warden-endpoint.com/v1/events",
27
+ });
28
+
29
+ // Wrap your OpenAI client — tracking happens automatically
30
+ const openai = warden.wrap(new OpenAI(), {
31
+ feature: "chat-support",
32
+ team: "cx-team",
33
+ });
34
+
35
+ // Use the client as usual
36
+ const response = await openai.chat.completions.create({
37
+ model: "gpt-4o",
38
+ messages: [{ role: "user", content: "Hello!" }],
39
+ });
40
+
41
+ // In serverless environments, flush before the function exits
42
+ await warden.flush();
43
+ ```
44
+
45
+ ## Manual Tracking
46
+
47
+ For custom providers or direct HTTP calls that bypass SDK clients, use `warden.track()` to record usage manually.
48
+
49
+ ```typescript
50
+ warden.track({
51
+ provider: "together",
52
+ model: "llama-3.1-70b",
53
+ feature: "document-summary",
54
+ inputTokens: 1200,
55
+ outputTokens: 350,
56
+ durationMs: 820,
57
+ });
58
+ ```
59
+
60
+ ## Supported Providers
61
+
62
+ | Provider | SDK Pattern | Auto-detected |
63
+ | -------------------- | ----------------------- | ------------- |
64
+ | OpenAI | `new OpenAI()` | ✓ |
65
+ | Anthropic | `new Anthropic()` | ✓ |
66
+ | Google Generative AI | `new GoogleGenerativeAI()` | ✓ |
67
+ | Amazon Bedrock | `new BedrockRuntimeClient()` | ✓ |
68
+ | DeepSeek | `new OpenAI()` (compatible) | ✓ |
69
+ | Mistral | `new Mistral()` | ✓ |
70
+ | xAI | `new OpenAI()` (compatible) | ✓ |
71
+ | Cohere | `new CohereClient()` | ✓ |
72
+
73
+ ## API Reference
74
+
75
+ ### `warden.init(config)`
76
+
77
+ Initialize Token Warden. Call once at application startup.
78
+
79
+ | Parameter | Type | Required | Description |
80
+ | ----------- | -------- | -------- | ------------------------------------------------ |
81
+ | `apiKey` | `string` | Yes | Your Token Warden API key |
82
+ | `endpoint` | `string` | Yes | URL of the Token Warden ingestion endpoint |
83
+ | `flushIntervalMs` | `number` | No | How often to flush events (default: `5000`) |
84
+ | `maxBatchSize` | `number` | No | Max events per batch (default: `100`) |
85
+ | `debug` | `boolean` | No | Enable debug logging (default: `false`) |
86
+
87
+ ### `warden.wrap(client, opts)`
88
+
89
+ Wrap an LLM SDK client to automatically capture usage.
90
+
91
+ | Parameter | Type | Required | Description |
92
+ | ----------- | -------- | -------- | ------------------------------------------------ |
93
+ | `client` | `object` | Yes | An LLM SDK client instance |
94
+ | `opts.feature` | `string` | No | Feature name for cost attribution |
95
+ | `opts.team` | `string` | No | Team name for cost attribution |
96
+ | `opts.user` | `string` | No | User identifier for cost attribution |
97
+ | `opts.metadata` | `Record<string, string>` | No | Additional key-value metadata |
98
+
99
+ ### `warden.track(event)`
100
+
101
+ Manually record a usage event.
102
+
103
+ | Parameter | Type | Required | Description |
104
+ | --------------- | -------- | -------- | -------------------------------------------- |
105
+ | `provider` | `string` | Yes | LLM provider name |
106
+ | `model` | `string` | Yes | Model identifier |
107
+ | `feature` | `string` | No | Feature name for cost attribution |
108
+ | `team` | `string` | No | Team name for cost attribution |
109
+ | `user` | `string` | No | User identifier for cost attribution |
110
+ | `inputTokens` | `number` | Yes | Number of input tokens consumed |
111
+ | `outputTokens` | `number` | Yes | Number of output tokens generated |
112
+ | `durationMs` | `number` | No | Request duration in milliseconds |
113
+ | `metadata` | `Record<string, string>` | No | Additional key-value metadata |
114
+
115
+ ### `warden.flush()`
116
+
117
+ Flush all pending events to the ingestion endpoint. Returns a promise that resolves when all events have been sent. Call this before process exit in serverless environments.
118
+
119
+ ### `warden.shutdown()`
120
+
121
+ Flush pending events and stop the background flush interval. Call this during graceful application shutdown.
122
+
123
+ ## Dashboard (Coming Soon)
124
+
125
+ A full-featured dashboard with budget alerts, anomaly detection, and per-feature cost drill-down is in development. Star this repo to get notified when it launches.
126
+
127
+ ## Contributing
128
+
129
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
130
+
131
+ ## License
132
+
133
+ MIT — see [LICENSE](LICENSE) for details.
package/dist/index.cjs ADDED
@@ -0,0 +1,331 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ warden: () => warden
24
+ });
25
+ module.exports = __toCommonJS(index_exports);
26
+
27
+ // src/providers.ts
28
+ function detectProvider(client) {
29
+ const proto = Object.getPrototypeOf(client);
30
+ const name = proto?.constructor?.name?.toLowerCase() ?? "";
31
+ if (name.includes("openai")) return "openai";
32
+ if (name.includes("anthropic")) return "anthropic";
33
+ if (name.includes("google") || name.includes("generativeai")) return "google";
34
+ if (name.includes("deepseek")) return "deepseek";
35
+ if (name.includes("mistral")) return "mistral";
36
+ if (name.includes("xai")) return "xai";
37
+ if (name.includes("cohere")) return "cohere";
38
+ if (name.includes("bedrock")) return "amazon";
39
+ return "unknown";
40
+ }
41
+
42
+ // src/proxy.ts
43
+ function extractCompletionTokens(result) {
44
+ const usage = result.usage;
45
+ const inputTokens = usage?.prompt_tokens ?? usage?.input_tokens ?? 0;
46
+ const outputTokens = usage?.completion_tokens ?? usage?.output_tokens ?? 0;
47
+ return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
48
+ }
49
+ function extractGoogleTokens(result) {
50
+ const usageMetadata = result.response?.usageMetadata;
51
+ const inputTokens = usageMetadata?.promptTokenCount ?? 0;
52
+ const outputTokens = usageMetadata?.candidatesTokenCount ?? 0;
53
+ const totalTokens = usageMetadata?.totalTokenCount ?? inputTokens + outputTokens;
54
+ return { inputTokens, outputTokens, totalTokens };
55
+ }
56
+ function modelFromArgs(args) {
57
+ return args[0]?.model ?? "unknown";
58
+ }
59
+ function createMethodProxy(original, cfg) {
60
+ return new Proxy(original, {
61
+ get(target, prop) {
62
+ if (prop === cfg.methodName) {
63
+ return async (...args) => {
64
+ const start = Date.now();
65
+ const model = cfg.modelExtractor(args);
66
+ try {
67
+ const fn = target[cfg.methodName];
68
+ const result = await fn.apply(target, args);
69
+ const usage = cfg.tokenExtractor(result);
70
+ cfg.pushEvent({
71
+ feature: cfg.opts.feature,
72
+ team: cfg.opts.team,
73
+ userId: cfg.opts.userId,
74
+ provider: cfg.provider,
75
+ model,
76
+ ...usage,
77
+ latencyMs: Date.now() - start,
78
+ status: "success",
79
+ timestamp: Date.now()
80
+ });
81
+ if (cfg.shouldFlush()) cfg.triggerFlush();
82
+ return result;
83
+ } catch (err) {
84
+ cfg.pushEvent({
85
+ feature: cfg.opts.feature,
86
+ team: cfg.opts.team,
87
+ userId: cfg.opts.userId,
88
+ provider: cfg.provider,
89
+ model,
90
+ inputTokens: 0,
91
+ outputTokens: 0,
92
+ totalTokens: 0,
93
+ latencyMs: Date.now() - start,
94
+ status: "error",
95
+ timestamp: Date.now()
96
+ });
97
+ throw err;
98
+ }
99
+ };
100
+ }
101
+ return target[prop];
102
+ }
103
+ });
104
+ }
105
+
106
+ // src/index.ts
107
+ var MAX_BUFFER_SIZE = 1e3;
108
+ var config = null;
109
+ var buffer = [];
110
+ var flushTimer = null;
111
+ var initialized = false;
112
+ var flushing = false;
113
+ function pushEvent(event) {
114
+ if (buffer.length >= MAX_BUFFER_SIZE) {
115
+ buffer.shift();
116
+ }
117
+ buffer.push(event);
118
+ }
119
+ function shouldFlush() {
120
+ return buffer.length >= (config?.batchSize ?? 50);
121
+ }
122
+ function triggerFlush() {
123
+ void flush();
124
+ }
125
+ async function flush() {
126
+ if (!config || buffer.length === 0 || flushing) return;
127
+ flushing = true;
128
+ const batch = buffer.slice();
129
+ try {
130
+ const response = await fetch(config.endpoint, {
131
+ method: "POST",
132
+ headers: {
133
+ "Content-Type": "application/json",
134
+ Authorization: `Bearer ${config.apiKey}`
135
+ },
136
+ body: JSON.stringify({ events: batch })
137
+ });
138
+ if (!response.ok) {
139
+ const error = new Error(
140
+ `Token Warden: flush failed with status ${response.status}`
141
+ );
142
+ config.onError?.(error);
143
+ return;
144
+ }
145
+ buffer.splice(0, batch.length);
146
+ } catch (err) {
147
+ config.onError?.(err);
148
+ } finally {
149
+ flushing = false;
150
+ }
151
+ }
152
+ var beforeExitHandler = () => void flush();
153
+ function scheduleFlush() {
154
+ if (flushTimer) return;
155
+ const interval = config?.flushIntervalMs ?? 5e3;
156
+ flushTimer = setInterval(() => {
157
+ void flush();
158
+ }, interval);
159
+ flushTimer.unref();
160
+ if (typeof process !== "undefined" && process.once) {
161
+ process.once("beforeExit", beforeExitHandler);
162
+ }
163
+ }
164
+ var warden = {
165
+ init(cfg) {
166
+ if (initialized) {
167
+ throw new Error(
168
+ "Token Warden: already initialized. Call warden.shutdown() before re-initializing."
169
+ );
170
+ }
171
+ if (!cfg.endpoint) {
172
+ throw new Error("Token Warden: endpoint is required in config");
173
+ }
174
+ config = cfg;
175
+ initialized = true;
176
+ scheduleFlush();
177
+ },
178
+ wrap(client, opts) {
179
+ if (!initialized || !config) {
180
+ throw new Error(
181
+ "Token Warden: call warden.init() before guardian.wrap()"
182
+ );
183
+ }
184
+ const provider = detectProvider(client);
185
+ return new Proxy(client, {
186
+ get(target, prop) {
187
+ const value = target[prop];
188
+ if (prop === "chat" && value && typeof value === "object") {
189
+ return new Proxy(value, {
190
+ get(chatTarget, chatProp) {
191
+ if (chatProp === "completions") {
192
+ const completions = chatTarget.completions;
193
+ if (completions && typeof completions === "object") {
194
+ return createMethodProxy(
195
+ completions,
196
+ {
197
+ methodName: "create",
198
+ provider,
199
+ modelExtractor: modelFromArgs,
200
+ tokenExtractor: extractCompletionTokens,
201
+ opts,
202
+ pushEvent,
203
+ shouldFlush,
204
+ triggerFlush
205
+ }
206
+ );
207
+ }
208
+ }
209
+ return chatTarget[chatProp];
210
+ }
211
+ });
212
+ }
213
+ if (prop === "messages" && value && typeof value === "object") {
214
+ return createMethodProxy(value, {
215
+ methodName: "create",
216
+ provider: provider === "unknown" ? "anthropic" : provider,
217
+ modelExtractor: modelFromArgs,
218
+ tokenExtractor: extractCompletionTokens,
219
+ opts,
220
+ pushEvent,
221
+ shouldFlush,
222
+ triggerFlush
223
+ });
224
+ }
225
+ if (prop === "getGenerativeModel" && typeof value === "function") {
226
+ return (...args) => {
227
+ const modelInstance = value.apply(target, args);
228
+ const modelName = args[0]?.model ?? "unknown";
229
+ return createMethodProxy(
230
+ modelInstance,
231
+ {
232
+ methodName: "generateContent",
233
+ provider: provider === "unknown" ? "google" : provider,
234
+ modelExtractor: () => modelName,
235
+ tokenExtractor: extractGoogleTokens,
236
+ opts,
237
+ pushEvent,
238
+ shouldFlush,
239
+ triggerFlush
240
+ }
241
+ );
242
+ };
243
+ }
244
+ if (prop === "send" && typeof value === "function" && provider === "amazon") {
245
+ return async (...args) => {
246
+ const start = Date.now();
247
+ const command = args[0];
248
+ const modelId = command?.modelId ?? command?.input?.modelId ?? "unknown";
249
+ try {
250
+ const result = await value.apply(target, args);
251
+ const usage = result.usage;
252
+ const inputTokens = usage?.inputTokens ?? 0;
253
+ const outputTokens = usage?.outputTokens ?? 0;
254
+ pushEvent({
255
+ feature: opts.feature,
256
+ team: opts.team,
257
+ userId: opts.userId,
258
+ provider,
259
+ model: modelId,
260
+ inputTokens,
261
+ outputTokens,
262
+ totalTokens: inputTokens + outputTokens,
263
+ latencyMs: Date.now() - start,
264
+ status: "success",
265
+ timestamp: Date.now()
266
+ });
267
+ if (shouldFlush()) triggerFlush();
268
+ return result;
269
+ } catch (err) {
270
+ pushEvent({
271
+ feature: opts.feature,
272
+ team: opts.team,
273
+ userId: opts.userId,
274
+ provider,
275
+ model: modelId,
276
+ inputTokens: 0,
277
+ outputTokens: 0,
278
+ totalTokens: 0,
279
+ latencyMs: Date.now() - start,
280
+ status: "error",
281
+ timestamp: Date.now()
282
+ });
283
+ throw err;
284
+ }
285
+ };
286
+ }
287
+ return value;
288
+ }
289
+ });
290
+ },
291
+ track(event) {
292
+ if (!initialized || !config) {
293
+ throw new Error(
294
+ "Token Warden: call warden.init() before guardian.track()"
295
+ );
296
+ }
297
+ if (!event.feature) {
298
+ throw new Error("Token Warden: feature is required in track event");
299
+ }
300
+ if (!event.provider) {
301
+ throw new Error("Token Warden: provider is required in track event");
302
+ }
303
+ if (!event.model) {
304
+ throw new Error("Token Warden: model is required in track event");
305
+ }
306
+ pushEvent({
307
+ ...event,
308
+ timestamp: Date.now()
309
+ });
310
+ if (shouldFlush()) triggerFlush();
311
+ },
312
+ flush,
313
+ async shutdown() {
314
+ if (flushTimer) {
315
+ clearInterval(flushTimer);
316
+ flushTimer = null;
317
+ }
318
+ if (typeof process !== "undefined") {
319
+ process.removeListener("beforeExit", beforeExitHandler);
320
+ }
321
+ flushing = false;
322
+ await flush();
323
+ buffer.length = 0;
324
+ config = null;
325
+ initialized = false;
326
+ }
327
+ };
328
+ // Annotate the CommonJS export names for ESM import in node:
329
+ 0 && (module.exports = {
330
+ warden
331
+ });
@@ -0,0 +1,43 @@
1
+ type WardenConfig = {
2
+ apiKey: string;
3
+ endpoint: string;
4
+ batchSize?: number;
5
+ flushIntervalMs?: number;
6
+ onError?: (error: unknown) => void;
7
+ };
8
+ type WrapOptions = {
9
+ feature: string;
10
+ team?: string;
11
+ userId?: string;
12
+ };
13
+ type CostEvent = {
14
+ feature: string;
15
+ team?: string;
16
+ userId?: string;
17
+ provider: string;
18
+ model: string;
19
+ inputTokens: number;
20
+ outputTokens: number;
21
+ totalTokens: number;
22
+ latencyMs: number;
23
+ status: "success" | "error";
24
+ timestamp: number;
25
+ metadata?: Record<string, unknown>;
26
+ };
27
+ type TrackEvent = Omit<CostEvent, "timestamp">;
28
+ type TokenUsage = {
29
+ inputTokens: number;
30
+ outputTokens: number;
31
+ totalTokens: number;
32
+ };
33
+
34
+ declare function flush(): Promise<void>;
35
+ declare const warden: {
36
+ init(cfg: WardenConfig): void;
37
+ wrap<T>(client: T, opts: WrapOptions): T;
38
+ track(event: TrackEvent): void;
39
+ flush: typeof flush;
40
+ shutdown(): Promise<void>;
41
+ };
42
+
43
+ export { type CostEvent, type TokenUsage, type TrackEvent, type WardenConfig, type WrapOptions, warden };
@@ -0,0 +1,43 @@
1
+ type WardenConfig = {
2
+ apiKey: string;
3
+ endpoint: string;
4
+ batchSize?: number;
5
+ flushIntervalMs?: number;
6
+ onError?: (error: unknown) => void;
7
+ };
8
+ type WrapOptions = {
9
+ feature: string;
10
+ team?: string;
11
+ userId?: string;
12
+ };
13
+ type CostEvent = {
14
+ feature: string;
15
+ team?: string;
16
+ userId?: string;
17
+ provider: string;
18
+ model: string;
19
+ inputTokens: number;
20
+ outputTokens: number;
21
+ totalTokens: number;
22
+ latencyMs: number;
23
+ status: "success" | "error";
24
+ timestamp: number;
25
+ metadata?: Record<string, unknown>;
26
+ };
27
+ type TrackEvent = Omit<CostEvent, "timestamp">;
28
+ type TokenUsage = {
29
+ inputTokens: number;
30
+ outputTokens: number;
31
+ totalTokens: number;
32
+ };
33
+
34
+ declare function flush(): Promise<void>;
35
+ declare const warden: {
36
+ init(cfg: WardenConfig): void;
37
+ wrap<T>(client: T, opts: WrapOptions): T;
38
+ track(event: TrackEvent): void;
39
+ flush: typeof flush;
40
+ shutdown(): Promise<void>;
41
+ };
42
+
43
+ export { type CostEvent, type TokenUsage, type TrackEvent, type WardenConfig, type WrapOptions, warden };
package/dist/index.js ADDED
@@ -0,0 +1,304 @@
1
+ // src/providers.ts
2
+ function detectProvider(client) {
3
+ const proto = Object.getPrototypeOf(client);
4
+ const name = proto?.constructor?.name?.toLowerCase() ?? "";
5
+ if (name.includes("openai")) return "openai";
6
+ if (name.includes("anthropic")) return "anthropic";
7
+ if (name.includes("google") || name.includes("generativeai")) return "google";
8
+ if (name.includes("deepseek")) return "deepseek";
9
+ if (name.includes("mistral")) return "mistral";
10
+ if (name.includes("xai")) return "xai";
11
+ if (name.includes("cohere")) return "cohere";
12
+ if (name.includes("bedrock")) return "amazon";
13
+ return "unknown";
14
+ }
15
+
16
+ // src/proxy.ts
17
+ function extractCompletionTokens(result) {
18
+ const usage = result.usage;
19
+ const inputTokens = usage?.prompt_tokens ?? usage?.input_tokens ?? 0;
20
+ const outputTokens = usage?.completion_tokens ?? usage?.output_tokens ?? 0;
21
+ return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
22
+ }
23
+ function extractGoogleTokens(result) {
24
+ const usageMetadata = result.response?.usageMetadata;
25
+ const inputTokens = usageMetadata?.promptTokenCount ?? 0;
26
+ const outputTokens = usageMetadata?.candidatesTokenCount ?? 0;
27
+ const totalTokens = usageMetadata?.totalTokenCount ?? inputTokens + outputTokens;
28
+ return { inputTokens, outputTokens, totalTokens };
29
+ }
30
+ function modelFromArgs(args) {
31
+ return args[0]?.model ?? "unknown";
32
+ }
33
+ function createMethodProxy(original, cfg) {
34
+ return new Proxy(original, {
35
+ get(target, prop) {
36
+ if (prop === cfg.methodName) {
37
+ return async (...args) => {
38
+ const start = Date.now();
39
+ const model = cfg.modelExtractor(args);
40
+ try {
41
+ const fn = target[cfg.methodName];
42
+ const result = await fn.apply(target, args);
43
+ const usage = cfg.tokenExtractor(result);
44
+ cfg.pushEvent({
45
+ feature: cfg.opts.feature,
46
+ team: cfg.opts.team,
47
+ userId: cfg.opts.userId,
48
+ provider: cfg.provider,
49
+ model,
50
+ ...usage,
51
+ latencyMs: Date.now() - start,
52
+ status: "success",
53
+ timestamp: Date.now()
54
+ });
55
+ if (cfg.shouldFlush()) cfg.triggerFlush();
56
+ return result;
57
+ } catch (err) {
58
+ cfg.pushEvent({
59
+ feature: cfg.opts.feature,
60
+ team: cfg.opts.team,
61
+ userId: cfg.opts.userId,
62
+ provider: cfg.provider,
63
+ model,
64
+ inputTokens: 0,
65
+ outputTokens: 0,
66
+ totalTokens: 0,
67
+ latencyMs: Date.now() - start,
68
+ status: "error",
69
+ timestamp: Date.now()
70
+ });
71
+ throw err;
72
+ }
73
+ };
74
+ }
75
+ return target[prop];
76
+ }
77
+ });
78
+ }
79
+
80
+ // src/index.ts
81
+ var MAX_BUFFER_SIZE = 1e3;
82
+ var config = null;
83
+ var buffer = [];
84
+ var flushTimer = null;
85
+ var initialized = false;
86
+ var flushing = false;
87
+ function pushEvent(event) {
88
+ if (buffer.length >= MAX_BUFFER_SIZE) {
89
+ buffer.shift();
90
+ }
91
+ buffer.push(event);
92
+ }
93
+ function shouldFlush() {
94
+ return buffer.length >= (config?.batchSize ?? 50);
95
+ }
96
+ function triggerFlush() {
97
+ void flush();
98
+ }
99
+ async function flush() {
100
+ if (!config || buffer.length === 0 || flushing) return;
101
+ flushing = true;
102
+ const batch = buffer.slice();
103
+ try {
104
+ const response = await fetch(config.endpoint, {
105
+ method: "POST",
106
+ headers: {
107
+ "Content-Type": "application/json",
108
+ Authorization: `Bearer ${config.apiKey}`
109
+ },
110
+ body: JSON.stringify({ events: batch })
111
+ });
112
+ if (!response.ok) {
113
+ const error = new Error(
114
+ `Token Warden: flush failed with status ${response.status}`
115
+ );
116
+ config.onError?.(error);
117
+ return;
118
+ }
119
+ buffer.splice(0, batch.length);
120
+ } catch (err) {
121
+ config.onError?.(err);
122
+ } finally {
123
+ flushing = false;
124
+ }
125
+ }
126
+ var beforeExitHandler = () => void flush();
127
+ function scheduleFlush() {
128
+ if (flushTimer) return;
129
+ const interval = config?.flushIntervalMs ?? 5e3;
130
+ flushTimer = setInterval(() => {
131
+ void flush();
132
+ }, interval);
133
+ flushTimer.unref();
134
+ if (typeof process !== "undefined" && process.once) {
135
+ process.once("beforeExit", beforeExitHandler);
136
+ }
137
+ }
138
+ var warden = {
139
+ init(cfg) {
140
+ if (initialized) {
141
+ throw new Error(
142
+ "Token Warden: already initialized. Call warden.shutdown() before re-initializing."
143
+ );
144
+ }
145
+ if (!cfg.endpoint) {
146
+ throw new Error("Token Warden: endpoint is required in config");
147
+ }
148
+ config = cfg;
149
+ initialized = true;
150
+ scheduleFlush();
151
+ },
152
+ wrap(client, opts) {
153
+ if (!initialized || !config) {
154
+ throw new Error(
155
+ "Token Warden: call warden.init() before guardian.wrap()"
156
+ );
157
+ }
158
+ const provider = detectProvider(client);
159
+ return new Proxy(client, {
160
+ get(target, prop) {
161
+ const value = target[prop];
162
+ if (prop === "chat" && value && typeof value === "object") {
163
+ return new Proxy(value, {
164
+ get(chatTarget, chatProp) {
165
+ if (chatProp === "completions") {
166
+ const completions = chatTarget.completions;
167
+ if (completions && typeof completions === "object") {
168
+ return createMethodProxy(
169
+ completions,
170
+ {
171
+ methodName: "create",
172
+ provider,
173
+ modelExtractor: modelFromArgs,
174
+ tokenExtractor: extractCompletionTokens,
175
+ opts,
176
+ pushEvent,
177
+ shouldFlush,
178
+ triggerFlush
179
+ }
180
+ );
181
+ }
182
+ }
183
+ return chatTarget[chatProp];
184
+ }
185
+ });
186
+ }
187
+ if (prop === "messages" && value && typeof value === "object") {
188
+ return createMethodProxy(value, {
189
+ methodName: "create",
190
+ provider: provider === "unknown" ? "anthropic" : provider,
191
+ modelExtractor: modelFromArgs,
192
+ tokenExtractor: extractCompletionTokens,
193
+ opts,
194
+ pushEvent,
195
+ shouldFlush,
196
+ triggerFlush
197
+ });
198
+ }
199
+ if (prop === "getGenerativeModel" && typeof value === "function") {
200
+ return (...args) => {
201
+ const modelInstance = value.apply(target, args);
202
+ const modelName = args[0]?.model ?? "unknown";
203
+ return createMethodProxy(
204
+ modelInstance,
205
+ {
206
+ methodName: "generateContent",
207
+ provider: provider === "unknown" ? "google" : provider,
208
+ modelExtractor: () => modelName,
209
+ tokenExtractor: extractGoogleTokens,
210
+ opts,
211
+ pushEvent,
212
+ shouldFlush,
213
+ triggerFlush
214
+ }
215
+ );
216
+ };
217
+ }
218
+ if (prop === "send" && typeof value === "function" && provider === "amazon") {
219
+ return async (...args) => {
220
+ const start = Date.now();
221
+ const command = args[0];
222
+ const modelId = command?.modelId ?? command?.input?.modelId ?? "unknown";
223
+ try {
224
+ const result = await value.apply(target, args);
225
+ const usage = result.usage;
226
+ const inputTokens = usage?.inputTokens ?? 0;
227
+ const outputTokens = usage?.outputTokens ?? 0;
228
+ pushEvent({
229
+ feature: opts.feature,
230
+ team: opts.team,
231
+ userId: opts.userId,
232
+ provider,
233
+ model: modelId,
234
+ inputTokens,
235
+ outputTokens,
236
+ totalTokens: inputTokens + outputTokens,
237
+ latencyMs: Date.now() - start,
238
+ status: "success",
239
+ timestamp: Date.now()
240
+ });
241
+ if (shouldFlush()) triggerFlush();
242
+ return result;
243
+ } catch (err) {
244
+ pushEvent({
245
+ feature: opts.feature,
246
+ team: opts.team,
247
+ userId: opts.userId,
248
+ provider,
249
+ model: modelId,
250
+ inputTokens: 0,
251
+ outputTokens: 0,
252
+ totalTokens: 0,
253
+ latencyMs: Date.now() - start,
254
+ status: "error",
255
+ timestamp: Date.now()
256
+ });
257
+ throw err;
258
+ }
259
+ };
260
+ }
261
+ return value;
262
+ }
263
+ });
264
+ },
265
+ track(event) {
266
+ if (!initialized || !config) {
267
+ throw new Error(
268
+ "Token Warden: call warden.init() before guardian.track()"
269
+ );
270
+ }
271
+ if (!event.feature) {
272
+ throw new Error("Token Warden: feature is required in track event");
273
+ }
274
+ if (!event.provider) {
275
+ throw new Error("Token Warden: provider is required in track event");
276
+ }
277
+ if (!event.model) {
278
+ throw new Error("Token Warden: model is required in track event");
279
+ }
280
+ pushEvent({
281
+ ...event,
282
+ timestamp: Date.now()
283
+ });
284
+ if (shouldFlush()) triggerFlush();
285
+ },
286
+ flush,
287
+ async shutdown() {
288
+ if (flushTimer) {
289
+ clearInterval(flushTimer);
290
+ flushTimer = null;
291
+ }
292
+ if (typeof process !== "undefined") {
293
+ process.removeListener("beforeExit", beforeExitHandler);
294
+ }
295
+ flushing = false;
296
+ await flush();
297
+ buffer.length = 0;
298
+ config = null;
299
+ initialized = false;
300
+ }
301
+ };
302
+ export {
303
+ warden
304
+ };
package/package.json ADDED
@@ -0,0 +1,60 @@
1
+ {
2
+ "name": "token-warden",
3
+ "version": "1.0.0",
4
+ "description": "Lightweight LLM cost tracking SDK — auto-instrument OpenAI, Anthropic, Google, Bedrock, and more",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "sideEffects": false,
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "require": "./dist/index.cjs"
13
+ }
14
+ },
15
+ "main": "./dist/index.cjs",
16
+ "module": "./dist/index.js",
17
+ "types": "./dist/index.d.ts",
18
+ "files": [
19
+ "dist"
20
+ ],
21
+ "repository": {
22
+ "type": "git",
23
+ "url": "https://github.com/Flagship-Software/token-warden.git",
24
+ "directory": "packages/sdk"
25
+ },
26
+ "homepage": "https://github.com/Flagship-Software/token-warden#readme",
27
+ "bugs": {
28
+ "url": "https://github.com/Flagship-Software/token-warden/issues"
29
+ },
30
+ "keywords": [
31
+ "llm",
32
+ "cost",
33
+ "monitoring",
34
+ "openai",
35
+ "anthropic",
36
+ "google",
37
+ "bedrock",
38
+ "ai",
39
+ "tokens",
40
+ "usage",
41
+ "budget",
42
+ "tracking"
43
+ ],
44
+ "engines": {
45
+ "node": ">=18"
46
+ },
47
+ "scripts": {
48
+ "build": "tsup",
49
+ "test": "vitest run",
50
+ "test:watch": "vitest",
51
+ "typecheck": "tsc --noEmit",
52
+ "prepublishOnly": "cp ../../README.md ./README.md && npm run build"
53
+ },
54
+ "devDependencies": {
55
+ "@types/node": "^25.5.0",
56
+ "tsup": "^8.0.0",
57
+ "typescript": "^5.7.0",
58
+ "vitest": "^3.2.0"
59
+ }
60
+ }