token-warden 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +133 -0
- package/dist/index.cjs +331 -0
- package/dist/index.d.cts +43 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.js +304 -0
- package/package.json +60 -0
package/README.md
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Token Warden
|
|
2
|
+
|
|
3
|
+
Track LLM costs per feature, team, and user — with budget alerts and anomaly detection.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/token-warden)
|
|
6
|
+
[](https://github.com/Flagship-Software/token-warden/actions/workflows/ci.yml)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
## Why
|
|
10
|
+
|
|
11
|
+
LLM API costs are opaque and hard to attribute. When multiple features share the same API keys, you can't tell which feature is responsible for cost spikes. Token Warden wraps your LLM clients to automatically capture token usage and attribute costs per feature.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install token-warden
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import { warden } from "token-warden";
|
|
21
|
+
import OpenAI from "openai";
|
|
22
|
+
|
|
23
|
+
// Initialize Token Warden
|
|
24
|
+
warden.init({
|
|
25
|
+
apiKey: "tw_your_api_key",
|
|
26
|
+
endpoint: "https://your-token-warden-endpoint.com/v1/events",
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Wrap your OpenAI client — tracking happens automatically
|
|
30
|
+
const openai = warden.wrap(new OpenAI(), {
|
|
31
|
+
feature: "chat-support",
|
|
32
|
+
team: "cx-team",
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Use the client as usual
|
|
36
|
+
const response = await openai.chat.completions.create({
|
|
37
|
+
model: "gpt-4o",
|
|
38
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// In serverless environments, flush before the function exits
|
|
42
|
+
await warden.flush();
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Manual Tracking
|
|
46
|
+
|
|
47
|
+
For custom providers or direct HTTP calls that bypass SDK clients, use `warden.track()` to record usage manually.
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
warden.track({
|
|
51
|
+
provider: "together",
|
|
52
|
+
model: "llama-3.1-70b",
|
|
53
|
+
feature: "document-summary",
|
|
54
|
+
inputTokens: 1200,
|
|
55
|
+
outputTokens: 350,
|
|
56
|
+
durationMs: 820,
|
|
57
|
+
});
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Supported Providers
|
|
61
|
+
|
|
62
|
+
| Provider | SDK Pattern | Auto-detected |
|
|
63
|
+
| -------------------- | ----------------------- | ------------- |
|
|
64
|
+
| OpenAI | `new OpenAI()` | ✓ |
|
|
65
|
+
| Anthropic | `new Anthropic()` | ✓ |
|
|
66
|
+
| Google Generative AI | `new GoogleGenerativeAI()` | ✓ |
|
|
67
|
+
| Amazon Bedrock | `new BedrockRuntimeClient()` | ✓ |
|
|
68
|
+
| DeepSeek | `new OpenAI()` (compatible) | ✓ |
|
|
69
|
+
| Mistral | `new Mistral()` | ✓ |
|
|
70
|
+
| xAI | `new OpenAI()` (compatible) | ✓ |
|
|
71
|
+
| Cohere | `new CohereClient()` | ✓ |
|
|
72
|
+
|
|
73
|
+
## API Reference
|
|
74
|
+
|
|
75
|
+
### `warden.init(config)`
|
|
76
|
+
|
|
77
|
+
Initialize Token Warden. Call once at application startup.
|
|
78
|
+
|
|
79
|
+
| Parameter | Type | Required | Description |
|
|
80
|
+
| ----------- | -------- | -------- | ------------------------------------------------ |
|
|
81
|
+
| `apiKey` | `string` | Yes | Your Token Warden API key |
|
|
82
|
+
| `endpoint` | `string` | Yes | URL of the Token Warden ingestion endpoint |
|
|
83
|
+
| `flushIntervalMs` | `number` | No | How often to flush events (default: `5000`) |
|
|
84
|
+
| `maxBatchSize` | `number` | No | Max events per batch (default: `100`) |
|
|
85
|
+
| `debug` | `boolean` | No | Enable debug logging (default: `false`) |
|
|
86
|
+
|
|
87
|
+
### `warden.wrap(client, opts)`
|
|
88
|
+
|
|
89
|
+
Wrap an LLM SDK client to automatically capture usage.
|
|
90
|
+
|
|
91
|
+
| Parameter | Type | Required | Description |
|
|
92
|
+
| ----------- | -------- | -------- | ------------------------------------------------ |
|
|
93
|
+
| `client` | `object` | Yes | An LLM SDK client instance |
|
|
94
|
+
| `opts.feature` | `string` | No | Feature name for cost attribution |
|
|
95
|
+
| `opts.team` | `string` | No | Team name for cost attribution |
|
|
96
|
+
| `opts.user` | `string` | No | User identifier for cost attribution |
|
|
97
|
+
| `opts.metadata` | `Record<string, string>` | No | Additional key-value metadata |
|
|
98
|
+
|
|
99
|
+
### `warden.track(event)`
|
|
100
|
+
|
|
101
|
+
Manually record a usage event.
|
|
102
|
+
|
|
103
|
+
| Parameter | Type | Required | Description |
|
|
104
|
+
| --------------- | -------- | -------- | -------------------------------------------- |
|
|
105
|
+
| `provider` | `string` | Yes | LLM provider name |
|
|
106
|
+
| `model` | `string` | Yes | Model identifier |
|
|
107
|
+
| `feature` | `string` | No | Feature name for cost attribution |
|
|
108
|
+
| `team` | `string` | No | Team name for cost attribution |
|
|
109
|
+
| `user` | `string` | No | User identifier for cost attribution |
|
|
110
|
+
| `inputTokens` | `number` | Yes | Number of input tokens consumed |
|
|
111
|
+
| `outputTokens` | `number` | Yes | Number of output tokens generated |
|
|
112
|
+
| `durationMs` | `number` | No | Request duration in milliseconds |
|
|
113
|
+
| `metadata` | `Record<string, string>` | No | Additional key-value metadata |
|
|
114
|
+
|
|
115
|
+
### `warden.flush()`
|
|
116
|
+
|
|
117
|
+
Flush all pending events to the ingestion endpoint. Returns a promise that resolves when all events have been sent. Call this before process exit in serverless environments.
|
|
118
|
+
|
|
119
|
+
### `warden.shutdown()`
|
|
120
|
+
|
|
121
|
+
Flush pending events and stop the background flush interval. Call this during graceful application shutdown.
|
|
122
|
+
|
|
123
|
+
## Dashboard (Coming Soon)
|
|
124
|
+
|
|
125
|
+
A full-featured dashboard with budget alerts, anomaly detection, and per-feature cost drill-down is in development. Star this repo to get notified when it launches.
|
|
126
|
+
|
|
127
|
+
## Contributing
|
|
128
|
+
|
|
129
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT — see [LICENSE](LICENSE) for details.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
warden: () => warden
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(index_exports);
|
|
26
|
+
|
|
27
|
+
// src/providers.ts
|
|
28
|
+
function detectProvider(client) {
|
|
29
|
+
const proto = Object.getPrototypeOf(client);
|
|
30
|
+
const name = proto?.constructor?.name?.toLowerCase() ?? "";
|
|
31
|
+
if (name.includes("openai")) return "openai";
|
|
32
|
+
if (name.includes("anthropic")) return "anthropic";
|
|
33
|
+
if (name.includes("google") || name.includes("generativeai")) return "google";
|
|
34
|
+
if (name.includes("deepseek")) return "deepseek";
|
|
35
|
+
if (name.includes("mistral")) return "mistral";
|
|
36
|
+
if (name.includes("xai")) return "xai";
|
|
37
|
+
if (name.includes("cohere")) return "cohere";
|
|
38
|
+
if (name.includes("bedrock")) return "amazon";
|
|
39
|
+
return "unknown";
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// src/proxy.ts
|
|
43
|
+
function extractCompletionTokens(result) {
|
|
44
|
+
const usage = result.usage;
|
|
45
|
+
const inputTokens = usage?.prompt_tokens ?? usage?.input_tokens ?? 0;
|
|
46
|
+
const outputTokens = usage?.completion_tokens ?? usage?.output_tokens ?? 0;
|
|
47
|
+
return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
|
|
48
|
+
}
|
|
49
|
+
function extractGoogleTokens(result) {
|
|
50
|
+
const usageMetadata = result.response?.usageMetadata;
|
|
51
|
+
const inputTokens = usageMetadata?.promptTokenCount ?? 0;
|
|
52
|
+
const outputTokens = usageMetadata?.candidatesTokenCount ?? 0;
|
|
53
|
+
const totalTokens = usageMetadata?.totalTokenCount ?? inputTokens + outputTokens;
|
|
54
|
+
return { inputTokens, outputTokens, totalTokens };
|
|
55
|
+
}
|
|
56
|
+
function modelFromArgs(args) {
|
|
57
|
+
return args[0]?.model ?? "unknown";
|
|
58
|
+
}
|
|
59
|
+
function createMethodProxy(original, cfg) {
|
|
60
|
+
return new Proxy(original, {
|
|
61
|
+
get(target, prop) {
|
|
62
|
+
if (prop === cfg.methodName) {
|
|
63
|
+
return async (...args) => {
|
|
64
|
+
const start = Date.now();
|
|
65
|
+
const model = cfg.modelExtractor(args);
|
|
66
|
+
try {
|
|
67
|
+
const fn = target[cfg.methodName];
|
|
68
|
+
const result = await fn.apply(target, args);
|
|
69
|
+
const usage = cfg.tokenExtractor(result);
|
|
70
|
+
cfg.pushEvent({
|
|
71
|
+
feature: cfg.opts.feature,
|
|
72
|
+
team: cfg.opts.team,
|
|
73
|
+
userId: cfg.opts.userId,
|
|
74
|
+
provider: cfg.provider,
|
|
75
|
+
model,
|
|
76
|
+
...usage,
|
|
77
|
+
latencyMs: Date.now() - start,
|
|
78
|
+
status: "success",
|
|
79
|
+
timestamp: Date.now()
|
|
80
|
+
});
|
|
81
|
+
if (cfg.shouldFlush()) cfg.triggerFlush();
|
|
82
|
+
return result;
|
|
83
|
+
} catch (err) {
|
|
84
|
+
cfg.pushEvent({
|
|
85
|
+
feature: cfg.opts.feature,
|
|
86
|
+
team: cfg.opts.team,
|
|
87
|
+
userId: cfg.opts.userId,
|
|
88
|
+
provider: cfg.provider,
|
|
89
|
+
model,
|
|
90
|
+
inputTokens: 0,
|
|
91
|
+
outputTokens: 0,
|
|
92
|
+
totalTokens: 0,
|
|
93
|
+
latencyMs: Date.now() - start,
|
|
94
|
+
status: "error",
|
|
95
|
+
timestamp: Date.now()
|
|
96
|
+
});
|
|
97
|
+
throw err;
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
return target[prop];
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// src/index.ts
|
|
107
|
+
var MAX_BUFFER_SIZE = 1e3;
|
|
108
|
+
var config = null;
|
|
109
|
+
var buffer = [];
|
|
110
|
+
var flushTimer = null;
|
|
111
|
+
var initialized = false;
|
|
112
|
+
var flushing = false;
|
|
113
|
+
function pushEvent(event) {
|
|
114
|
+
if (buffer.length >= MAX_BUFFER_SIZE) {
|
|
115
|
+
buffer.shift();
|
|
116
|
+
}
|
|
117
|
+
buffer.push(event);
|
|
118
|
+
}
|
|
119
|
+
function shouldFlush() {
|
|
120
|
+
return buffer.length >= (config?.batchSize ?? 50);
|
|
121
|
+
}
|
|
122
|
+
function triggerFlush() {
|
|
123
|
+
void flush();
|
|
124
|
+
}
|
|
125
|
+
async function flush() {
|
|
126
|
+
if (!config || buffer.length === 0 || flushing) return;
|
|
127
|
+
flushing = true;
|
|
128
|
+
const batch = buffer.slice();
|
|
129
|
+
try {
|
|
130
|
+
const response = await fetch(config.endpoint, {
|
|
131
|
+
method: "POST",
|
|
132
|
+
headers: {
|
|
133
|
+
"Content-Type": "application/json",
|
|
134
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
135
|
+
},
|
|
136
|
+
body: JSON.stringify({ events: batch })
|
|
137
|
+
});
|
|
138
|
+
if (!response.ok) {
|
|
139
|
+
const error = new Error(
|
|
140
|
+
`Token Warden: flush failed with status ${response.status}`
|
|
141
|
+
);
|
|
142
|
+
config.onError?.(error);
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
buffer.splice(0, batch.length);
|
|
146
|
+
} catch (err) {
|
|
147
|
+
config.onError?.(err);
|
|
148
|
+
} finally {
|
|
149
|
+
flushing = false;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
var beforeExitHandler = () => void flush();
|
|
153
|
+
function scheduleFlush() {
|
|
154
|
+
if (flushTimer) return;
|
|
155
|
+
const interval = config?.flushIntervalMs ?? 5e3;
|
|
156
|
+
flushTimer = setInterval(() => {
|
|
157
|
+
void flush();
|
|
158
|
+
}, interval);
|
|
159
|
+
flushTimer.unref();
|
|
160
|
+
if (typeof process !== "undefined" && process.once) {
|
|
161
|
+
process.once("beforeExit", beforeExitHandler);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
var warden = {
|
|
165
|
+
init(cfg) {
|
|
166
|
+
if (initialized) {
|
|
167
|
+
throw new Error(
|
|
168
|
+
"Token Warden: already initialized. Call warden.shutdown() before re-initializing."
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
if (!cfg.endpoint) {
|
|
172
|
+
throw new Error("Token Warden: endpoint is required in config");
|
|
173
|
+
}
|
|
174
|
+
config = cfg;
|
|
175
|
+
initialized = true;
|
|
176
|
+
scheduleFlush();
|
|
177
|
+
},
|
|
178
|
+
wrap(client, opts) {
|
|
179
|
+
if (!initialized || !config) {
|
|
180
|
+
throw new Error(
|
|
181
|
+
"Token Warden: call warden.init() before guardian.wrap()"
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
const provider = detectProvider(client);
|
|
185
|
+
return new Proxy(client, {
|
|
186
|
+
get(target, prop) {
|
|
187
|
+
const value = target[prop];
|
|
188
|
+
if (prop === "chat" && value && typeof value === "object") {
|
|
189
|
+
return new Proxy(value, {
|
|
190
|
+
get(chatTarget, chatProp) {
|
|
191
|
+
if (chatProp === "completions") {
|
|
192
|
+
const completions = chatTarget.completions;
|
|
193
|
+
if (completions && typeof completions === "object") {
|
|
194
|
+
return createMethodProxy(
|
|
195
|
+
completions,
|
|
196
|
+
{
|
|
197
|
+
methodName: "create",
|
|
198
|
+
provider,
|
|
199
|
+
modelExtractor: modelFromArgs,
|
|
200
|
+
tokenExtractor: extractCompletionTokens,
|
|
201
|
+
opts,
|
|
202
|
+
pushEvent,
|
|
203
|
+
shouldFlush,
|
|
204
|
+
triggerFlush
|
|
205
|
+
}
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return chatTarget[chatProp];
|
|
210
|
+
}
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
if (prop === "messages" && value && typeof value === "object") {
|
|
214
|
+
return createMethodProxy(value, {
|
|
215
|
+
methodName: "create",
|
|
216
|
+
provider: provider === "unknown" ? "anthropic" : provider,
|
|
217
|
+
modelExtractor: modelFromArgs,
|
|
218
|
+
tokenExtractor: extractCompletionTokens,
|
|
219
|
+
opts,
|
|
220
|
+
pushEvent,
|
|
221
|
+
shouldFlush,
|
|
222
|
+
triggerFlush
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
if (prop === "getGenerativeModel" && typeof value === "function") {
|
|
226
|
+
return (...args) => {
|
|
227
|
+
const modelInstance = value.apply(target, args);
|
|
228
|
+
const modelName = args[0]?.model ?? "unknown";
|
|
229
|
+
return createMethodProxy(
|
|
230
|
+
modelInstance,
|
|
231
|
+
{
|
|
232
|
+
methodName: "generateContent",
|
|
233
|
+
provider: provider === "unknown" ? "google" : provider,
|
|
234
|
+
modelExtractor: () => modelName,
|
|
235
|
+
tokenExtractor: extractGoogleTokens,
|
|
236
|
+
opts,
|
|
237
|
+
pushEvent,
|
|
238
|
+
shouldFlush,
|
|
239
|
+
triggerFlush
|
|
240
|
+
}
|
|
241
|
+
);
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
if (prop === "send" && typeof value === "function" && provider === "amazon") {
|
|
245
|
+
return async (...args) => {
|
|
246
|
+
const start = Date.now();
|
|
247
|
+
const command = args[0];
|
|
248
|
+
const modelId = command?.modelId ?? command?.input?.modelId ?? "unknown";
|
|
249
|
+
try {
|
|
250
|
+
const result = await value.apply(target, args);
|
|
251
|
+
const usage = result.usage;
|
|
252
|
+
const inputTokens = usage?.inputTokens ?? 0;
|
|
253
|
+
const outputTokens = usage?.outputTokens ?? 0;
|
|
254
|
+
pushEvent({
|
|
255
|
+
feature: opts.feature,
|
|
256
|
+
team: opts.team,
|
|
257
|
+
userId: opts.userId,
|
|
258
|
+
provider,
|
|
259
|
+
model: modelId,
|
|
260
|
+
inputTokens,
|
|
261
|
+
outputTokens,
|
|
262
|
+
totalTokens: inputTokens + outputTokens,
|
|
263
|
+
latencyMs: Date.now() - start,
|
|
264
|
+
status: "success",
|
|
265
|
+
timestamp: Date.now()
|
|
266
|
+
});
|
|
267
|
+
if (shouldFlush()) triggerFlush();
|
|
268
|
+
return result;
|
|
269
|
+
} catch (err) {
|
|
270
|
+
pushEvent({
|
|
271
|
+
feature: opts.feature,
|
|
272
|
+
team: opts.team,
|
|
273
|
+
userId: opts.userId,
|
|
274
|
+
provider,
|
|
275
|
+
model: modelId,
|
|
276
|
+
inputTokens: 0,
|
|
277
|
+
outputTokens: 0,
|
|
278
|
+
totalTokens: 0,
|
|
279
|
+
latencyMs: Date.now() - start,
|
|
280
|
+
status: "error",
|
|
281
|
+
timestamp: Date.now()
|
|
282
|
+
});
|
|
283
|
+
throw err;
|
|
284
|
+
}
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
return value;
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
},
|
|
291
|
+
track(event) {
|
|
292
|
+
if (!initialized || !config) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
"Token Warden: call warden.init() before guardian.track()"
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
if (!event.feature) {
|
|
298
|
+
throw new Error("Token Warden: feature is required in track event");
|
|
299
|
+
}
|
|
300
|
+
if (!event.provider) {
|
|
301
|
+
throw new Error("Token Warden: provider is required in track event");
|
|
302
|
+
}
|
|
303
|
+
if (!event.model) {
|
|
304
|
+
throw new Error("Token Warden: model is required in track event");
|
|
305
|
+
}
|
|
306
|
+
pushEvent({
|
|
307
|
+
...event,
|
|
308
|
+
timestamp: Date.now()
|
|
309
|
+
});
|
|
310
|
+
if (shouldFlush()) triggerFlush();
|
|
311
|
+
},
|
|
312
|
+
flush,
|
|
313
|
+
async shutdown() {
|
|
314
|
+
if (flushTimer) {
|
|
315
|
+
clearInterval(flushTimer);
|
|
316
|
+
flushTimer = null;
|
|
317
|
+
}
|
|
318
|
+
if (typeof process !== "undefined") {
|
|
319
|
+
process.removeListener("beforeExit", beforeExitHandler);
|
|
320
|
+
}
|
|
321
|
+
flushing = false;
|
|
322
|
+
await flush();
|
|
323
|
+
buffer.length = 0;
|
|
324
|
+
config = null;
|
|
325
|
+
initialized = false;
|
|
326
|
+
}
|
|
327
|
+
};
|
|
328
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
329
|
+
0 && (module.exports = {
|
|
330
|
+
warden
|
|
331
|
+
});
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
type WardenConfig = {
|
|
2
|
+
apiKey: string;
|
|
3
|
+
endpoint: string;
|
|
4
|
+
batchSize?: number;
|
|
5
|
+
flushIntervalMs?: number;
|
|
6
|
+
onError?: (error: unknown) => void;
|
|
7
|
+
};
|
|
8
|
+
type WrapOptions = {
|
|
9
|
+
feature: string;
|
|
10
|
+
team?: string;
|
|
11
|
+
userId?: string;
|
|
12
|
+
};
|
|
13
|
+
type CostEvent = {
|
|
14
|
+
feature: string;
|
|
15
|
+
team?: string;
|
|
16
|
+
userId?: string;
|
|
17
|
+
provider: string;
|
|
18
|
+
model: string;
|
|
19
|
+
inputTokens: number;
|
|
20
|
+
outputTokens: number;
|
|
21
|
+
totalTokens: number;
|
|
22
|
+
latencyMs: number;
|
|
23
|
+
status: "success" | "error";
|
|
24
|
+
timestamp: number;
|
|
25
|
+
metadata?: Record<string, unknown>;
|
|
26
|
+
};
|
|
27
|
+
type TrackEvent = Omit<CostEvent, "timestamp">;
|
|
28
|
+
type TokenUsage = {
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
totalTokens: number;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
declare function flush(): Promise<void>;
|
|
35
|
+
declare const warden: {
|
|
36
|
+
init(cfg: WardenConfig): void;
|
|
37
|
+
wrap<T>(client: T, opts: WrapOptions): T;
|
|
38
|
+
track(event: TrackEvent): void;
|
|
39
|
+
flush: typeof flush;
|
|
40
|
+
shutdown(): Promise<void>;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export { type CostEvent, type TokenUsage, type TrackEvent, type WardenConfig, type WrapOptions, warden };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
type WardenConfig = {
|
|
2
|
+
apiKey: string;
|
|
3
|
+
endpoint: string;
|
|
4
|
+
batchSize?: number;
|
|
5
|
+
flushIntervalMs?: number;
|
|
6
|
+
onError?: (error: unknown) => void;
|
|
7
|
+
};
|
|
8
|
+
type WrapOptions = {
|
|
9
|
+
feature: string;
|
|
10
|
+
team?: string;
|
|
11
|
+
userId?: string;
|
|
12
|
+
};
|
|
13
|
+
type CostEvent = {
|
|
14
|
+
feature: string;
|
|
15
|
+
team?: string;
|
|
16
|
+
userId?: string;
|
|
17
|
+
provider: string;
|
|
18
|
+
model: string;
|
|
19
|
+
inputTokens: number;
|
|
20
|
+
outputTokens: number;
|
|
21
|
+
totalTokens: number;
|
|
22
|
+
latencyMs: number;
|
|
23
|
+
status: "success" | "error";
|
|
24
|
+
timestamp: number;
|
|
25
|
+
metadata?: Record<string, unknown>;
|
|
26
|
+
};
|
|
27
|
+
type TrackEvent = Omit<CostEvent, "timestamp">;
|
|
28
|
+
type TokenUsage = {
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
totalTokens: number;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
declare function flush(): Promise<void>;
|
|
35
|
+
declare const warden: {
|
|
36
|
+
init(cfg: WardenConfig): void;
|
|
37
|
+
wrap<T>(client: T, opts: WrapOptions): T;
|
|
38
|
+
track(event: TrackEvent): void;
|
|
39
|
+
flush: typeof flush;
|
|
40
|
+
shutdown(): Promise<void>;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export { type CostEvent, type TokenUsage, type TrackEvent, type WardenConfig, type WrapOptions, warden };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
// src/providers.ts
|
|
2
|
+
function detectProvider(client) {
|
|
3
|
+
const proto = Object.getPrototypeOf(client);
|
|
4
|
+
const name = proto?.constructor?.name?.toLowerCase() ?? "";
|
|
5
|
+
if (name.includes("openai")) return "openai";
|
|
6
|
+
if (name.includes("anthropic")) return "anthropic";
|
|
7
|
+
if (name.includes("google") || name.includes("generativeai")) return "google";
|
|
8
|
+
if (name.includes("deepseek")) return "deepseek";
|
|
9
|
+
if (name.includes("mistral")) return "mistral";
|
|
10
|
+
if (name.includes("xai")) return "xai";
|
|
11
|
+
if (name.includes("cohere")) return "cohere";
|
|
12
|
+
if (name.includes("bedrock")) return "amazon";
|
|
13
|
+
return "unknown";
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// src/proxy.ts
|
|
17
|
+
function extractCompletionTokens(result) {
|
|
18
|
+
const usage = result.usage;
|
|
19
|
+
const inputTokens = usage?.prompt_tokens ?? usage?.input_tokens ?? 0;
|
|
20
|
+
const outputTokens = usage?.completion_tokens ?? usage?.output_tokens ?? 0;
|
|
21
|
+
return { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens };
|
|
22
|
+
}
|
|
23
|
+
function extractGoogleTokens(result) {
|
|
24
|
+
const usageMetadata = result.response?.usageMetadata;
|
|
25
|
+
const inputTokens = usageMetadata?.promptTokenCount ?? 0;
|
|
26
|
+
const outputTokens = usageMetadata?.candidatesTokenCount ?? 0;
|
|
27
|
+
const totalTokens = usageMetadata?.totalTokenCount ?? inputTokens + outputTokens;
|
|
28
|
+
return { inputTokens, outputTokens, totalTokens };
|
|
29
|
+
}
|
|
30
|
+
function modelFromArgs(args) {
|
|
31
|
+
return args[0]?.model ?? "unknown";
|
|
32
|
+
}
|
|
33
|
+
function createMethodProxy(original, cfg) {
|
|
34
|
+
return new Proxy(original, {
|
|
35
|
+
get(target, prop) {
|
|
36
|
+
if (prop === cfg.methodName) {
|
|
37
|
+
return async (...args) => {
|
|
38
|
+
const start = Date.now();
|
|
39
|
+
const model = cfg.modelExtractor(args);
|
|
40
|
+
try {
|
|
41
|
+
const fn = target[cfg.methodName];
|
|
42
|
+
const result = await fn.apply(target, args);
|
|
43
|
+
const usage = cfg.tokenExtractor(result);
|
|
44
|
+
cfg.pushEvent({
|
|
45
|
+
feature: cfg.opts.feature,
|
|
46
|
+
team: cfg.opts.team,
|
|
47
|
+
userId: cfg.opts.userId,
|
|
48
|
+
provider: cfg.provider,
|
|
49
|
+
model,
|
|
50
|
+
...usage,
|
|
51
|
+
latencyMs: Date.now() - start,
|
|
52
|
+
status: "success",
|
|
53
|
+
timestamp: Date.now()
|
|
54
|
+
});
|
|
55
|
+
if (cfg.shouldFlush()) cfg.triggerFlush();
|
|
56
|
+
return result;
|
|
57
|
+
} catch (err) {
|
|
58
|
+
cfg.pushEvent({
|
|
59
|
+
feature: cfg.opts.feature,
|
|
60
|
+
team: cfg.opts.team,
|
|
61
|
+
userId: cfg.opts.userId,
|
|
62
|
+
provider: cfg.provider,
|
|
63
|
+
model,
|
|
64
|
+
inputTokens: 0,
|
|
65
|
+
outputTokens: 0,
|
|
66
|
+
totalTokens: 0,
|
|
67
|
+
latencyMs: Date.now() - start,
|
|
68
|
+
status: "error",
|
|
69
|
+
timestamp: Date.now()
|
|
70
|
+
});
|
|
71
|
+
throw err;
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
return target[prop];
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// src/index.ts
|
|
81
|
+
var MAX_BUFFER_SIZE = 1e3;
|
|
82
|
+
var config = null;
|
|
83
|
+
var buffer = [];
|
|
84
|
+
var flushTimer = null;
|
|
85
|
+
var initialized = false;
|
|
86
|
+
var flushing = false;
|
|
87
|
+
function pushEvent(event) {
|
|
88
|
+
if (buffer.length >= MAX_BUFFER_SIZE) {
|
|
89
|
+
buffer.shift();
|
|
90
|
+
}
|
|
91
|
+
buffer.push(event);
|
|
92
|
+
}
|
|
93
|
+
function shouldFlush() {
|
|
94
|
+
return buffer.length >= (config?.batchSize ?? 50);
|
|
95
|
+
}
|
|
96
|
+
function triggerFlush() {
|
|
97
|
+
void flush();
|
|
98
|
+
}
|
|
99
|
+
async function flush() {
|
|
100
|
+
if (!config || buffer.length === 0 || flushing) return;
|
|
101
|
+
flushing = true;
|
|
102
|
+
const batch = buffer.slice();
|
|
103
|
+
try {
|
|
104
|
+
const response = await fetch(config.endpoint, {
|
|
105
|
+
method: "POST",
|
|
106
|
+
headers: {
|
|
107
|
+
"Content-Type": "application/json",
|
|
108
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
109
|
+
},
|
|
110
|
+
body: JSON.stringify({ events: batch })
|
|
111
|
+
});
|
|
112
|
+
if (!response.ok) {
|
|
113
|
+
const error = new Error(
|
|
114
|
+
`Token Warden: flush failed with status ${response.status}`
|
|
115
|
+
);
|
|
116
|
+
config.onError?.(error);
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
buffer.splice(0, batch.length);
|
|
120
|
+
} catch (err) {
|
|
121
|
+
config.onError?.(err);
|
|
122
|
+
} finally {
|
|
123
|
+
flushing = false;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
var beforeExitHandler = () => void flush();
|
|
127
|
+
function scheduleFlush() {
|
|
128
|
+
if (flushTimer) return;
|
|
129
|
+
const interval = config?.flushIntervalMs ?? 5e3;
|
|
130
|
+
flushTimer = setInterval(() => {
|
|
131
|
+
void flush();
|
|
132
|
+
}, interval);
|
|
133
|
+
flushTimer.unref();
|
|
134
|
+
if (typeof process !== "undefined" && process.once) {
|
|
135
|
+
process.once("beforeExit", beforeExitHandler);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
var warden = {
|
|
139
|
+
init(cfg) {
|
|
140
|
+
if (initialized) {
|
|
141
|
+
throw new Error(
|
|
142
|
+
"Token Warden: already initialized. Call warden.shutdown() before re-initializing."
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
if (!cfg.endpoint) {
|
|
146
|
+
throw new Error("Token Warden: endpoint is required in config");
|
|
147
|
+
}
|
|
148
|
+
config = cfg;
|
|
149
|
+
initialized = true;
|
|
150
|
+
scheduleFlush();
|
|
151
|
+
},
|
|
152
|
+
wrap(client, opts) {
|
|
153
|
+
if (!initialized || !config) {
|
|
154
|
+
throw new Error(
|
|
155
|
+
"Token Warden: call warden.init() before guardian.wrap()"
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
const provider = detectProvider(client);
|
|
159
|
+
return new Proxy(client, {
|
|
160
|
+
get(target, prop) {
|
|
161
|
+
const value = target[prop];
|
|
162
|
+
if (prop === "chat" && value && typeof value === "object") {
|
|
163
|
+
return new Proxy(value, {
|
|
164
|
+
get(chatTarget, chatProp) {
|
|
165
|
+
if (chatProp === "completions") {
|
|
166
|
+
const completions = chatTarget.completions;
|
|
167
|
+
if (completions && typeof completions === "object") {
|
|
168
|
+
return createMethodProxy(
|
|
169
|
+
completions,
|
|
170
|
+
{
|
|
171
|
+
methodName: "create",
|
|
172
|
+
provider,
|
|
173
|
+
modelExtractor: modelFromArgs,
|
|
174
|
+
tokenExtractor: extractCompletionTokens,
|
|
175
|
+
opts,
|
|
176
|
+
pushEvent,
|
|
177
|
+
shouldFlush,
|
|
178
|
+
triggerFlush
|
|
179
|
+
}
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return chatTarget[chatProp];
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
if (prop === "messages" && value && typeof value === "object") {
|
|
188
|
+
return createMethodProxy(value, {
|
|
189
|
+
methodName: "create",
|
|
190
|
+
provider: provider === "unknown" ? "anthropic" : provider,
|
|
191
|
+
modelExtractor: modelFromArgs,
|
|
192
|
+
tokenExtractor: extractCompletionTokens,
|
|
193
|
+
opts,
|
|
194
|
+
pushEvent,
|
|
195
|
+
shouldFlush,
|
|
196
|
+
triggerFlush
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
if (prop === "getGenerativeModel" && typeof value === "function") {
|
|
200
|
+
return (...args) => {
|
|
201
|
+
const modelInstance = value.apply(target, args);
|
|
202
|
+
const modelName = args[0]?.model ?? "unknown";
|
|
203
|
+
return createMethodProxy(
|
|
204
|
+
modelInstance,
|
|
205
|
+
{
|
|
206
|
+
methodName: "generateContent",
|
|
207
|
+
provider: provider === "unknown" ? "google" : provider,
|
|
208
|
+
modelExtractor: () => modelName,
|
|
209
|
+
tokenExtractor: extractGoogleTokens,
|
|
210
|
+
opts,
|
|
211
|
+
pushEvent,
|
|
212
|
+
shouldFlush,
|
|
213
|
+
triggerFlush
|
|
214
|
+
}
|
|
215
|
+
);
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
if (prop === "send" && typeof value === "function" && provider === "amazon") {
|
|
219
|
+
return async (...args) => {
|
|
220
|
+
const start = Date.now();
|
|
221
|
+
const command = args[0];
|
|
222
|
+
const modelId = command?.modelId ?? command?.input?.modelId ?? "unknown";
|
|
223
|
+
try {
|
|
224
|
+
const result = await value.apply(target, args);
|
|
225
|
+
const usage = result.usage;
|
|
226
|
+
const inputTokens = usage?.inputTokens ?? 0;
|
|
227
|
+
const outputTokens = usage?.outputTokens ?? 0;
|
|
228
|
+
pushEvent({
|
|
229
|
+
feature: opts.feature,
|
|
230
|
+
team: opts.team,
|
|
231
|
+
userId: opts.userId,
|
|
232
|
+
provider,
|
|
233
|
+
model: modelId,
|
|
234
|
+
inputTokens,
|
|
235
|
+
outputTokens,
|
|
236
|
+
totalTokens: inputTokens + outputTokens,
|
|
237
|
+
latencyMs: Date.now() - start,
|
|
238
|
+
status: "success",
|
|
239
|
+
timestamp: Date.now()
|
|
240
|
+
});
|
|
241
|
+
if (shouldFlush()) triggerFlush();
|
|
242
|
+
return result;
|
|
243
|
+
} catch (err) {
|
|
244
|
+
pushEvent({
|
|
245
|
+
feature: opts.feature,
|
|
246
|
+
team: opts.team,
|
|
247
|
+
userId: opts.userId,
|
|
248
|
+
provider,
|
|
249
|
+
model: modelId,
|
|
250
|
+
inputTokens: 0,
|
|
251
|
+
outputTokens: 0,
|
|
252
|
+
totalTokens: 0,
|
|
253
|
+
latencyMs: Date.now() - start,
|
|
254
|
+
status: "error",
|
|
255
|
+
timestamp: Date.now()
|
|
256
|
+
});
|
|
257
|
+
throw err;
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
return value;
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
},
|
|
265
|
+
track(event) {
|
|
266
|
+
if (!initialized || !config) {
|
|
267
|
+
throw new Error(
|
|
268
|
+
"Token Warden: call warden.init() before guardian.track()"
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
if (!event.feature) {
|
|
272
|
+
throw new Error("Token Warden: feature is required in track event");
|
|
273
|
+
}
|
|
274
|
+
if (!event.provider) {
|
|
275
|
+
throw new Error("Token Warden: provider is required in track event");
|
|
276
|
+
}
|
|
277
|
+
if (!event.model) {
|
|
278
|
+
throw new Error("Token Warden: model is required in track event");
|
|
279
|
+
}
|
|
280
|
+
pushEvent({
|
|
281
|
+
...event,
|
|
282
|
+
timestamp: Date.now()
|
|
283
|
+
});
|
|
284
|
+
if (shouldFlush()) triggerFlush();
|
|
285
|
+
},
|
|
286
|
+
flush,
|
|
287
|
+
async shutdown() {
|
|
288
|
+
if (flushTimer) {
|
|
289
|
+
clearInterval(flushTimer);
|
|
290
|
+
flushTimer = null;
|
|
291
|
+
}
|
|
292
|
+
if (typeof process !== "undefined") {
|
|
293
|
+
process.removeListener("beforeExit", beforeExitHandler);
|
|
294
|
+
}
|
|
295
|
+
flushing = false;
|
|
296
|
+
await flush();
|
|
297
|
+
buffer.length = 0;
|
|
298
|
+
config = null;
|
|
299
|
+
initialized = false;
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
export {
|
|
303
|
+
warden
|
|
304
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "token-warden",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Lightweight LLM cost tracking SDK — auto-instrument OpenAI, Anthropic, Google, Bedrock, and more",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"sideEffects": false,
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"main": "./dist/index.cjs",
|
|
16
|
+
"module": "./dist/index.js",
|
|
17
|
+
"types": "./dist/index.d.ts",
|
|
18
|
+
"files": [
|
|
19
|
+
"dist"
|
|
20
|
+
],
|
|
21
|
+
"repository": {
|
|
22
|
+
"type": "git",
|
|
23
|
+
"url": "https://github.com/Flagship-Software/token-warden.git",
|
|
24
|
+
"directory": "packages/sdk"
|
|
25
|
+
},
|
|
26
|
+
"homepage": "https://github.com/Flagship-Software/token-warden#readme",
|
|
27
|
+
"bugs": {
|
|
28
|
+
"url": "https://github.com/Flagship-Software/token-warden/issues"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"llm",
|
|
32
|
+
"cost",
|
|
33
|
+
"monitoring",
|
|
34
|
+
"openai",
|
|
35
|
+
"anthropic",
|
|
36
|
+
"google",
|
|
37
|
+
"bedrock",
|
|
38
|
+
"ai",
|
|
39
|
+
"tokens",
|
|
40
|
+
"usage",
|
|
41
|
+
"budget",
|
|
42
|
+
"tracking"
|
|
43
|
+
],
|
|
44
|
+
"engines": {
|
|
45
|
+
"node": ">=18"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"build": "tsup",
|
|
49
|
+
"test": "vitest run",
|
|
50
|
+
"test:watch": "vitest",
|
|
51
|
+
"typecheck": "tsc --noEmit",
|
|
52
|
+
"prepublishOnly": "cp ../../README.md ./README.md && npm run build"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@types/node": "^25.5.0",
|
|
56
|
+
"tsup": "^8.0.0",
|
|
57
|
+
"typescript": "^5.7.0",
|
|
58
|
+
"vitest": "^3.2.0"
|
|
59
|
+
}
|
|
60
|
+
}
|