@tollgateai/sdk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,72 +1,106 @@
1
1
  # @tollgateai/sdk
2
2
 
3
- Track **real** LLM model usage and compute live gross margin with
4
- [Tollgate](https://tollgateai.vercel.app). The SDK reads the actual `usage`
5
- object off each provider response — you never hand-count tokens.
3
+ > Real-time gross-margin observability for AI agents. Track every LLM call's cost, attribute it to a customer, and see whether you're making money — before the invoice goes out.
6
4
 
7
- Published on npm: [@tollgateai/sdk](https://www.npmjs.com/package/@tollgateai/sdk) (v0.2.0).
5
+ **v0.3.0** · [npm](https://www.npmjs.com/package/@tollgateai/sdk) · [Dashboard](https://tollgateai.vercel.app)
8
6
 
9
- Works with **OpenAI**, **Anthropic**, **AWS Bedrock**, and **every OpenAI-compatible
10
- gateway** (Vercel AI Gateway, OpenRouter, Groq, Together, Nebius, local vLLM, …) —
11
- streaming and non-streaming. Cost is computed server-side from the token counts the
12
- wrappers capture, so no provider has to return a dollar figure.
7
+ ---
8
+
9
+ ## Why Tollgate
10
+
11
+ You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, cached tokens, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
12
+
13
+ ## Installation
13
14
 
14
15
  ```bash
15
16
  npm install @tollgateai/sdk
16
- # or: pnpm add @tollgateai/sdk / yarn add @tollgateai/sdk
17
17
  ```
18
18
 
19
- Create an API key in **Tollgate → Integrations**, then set:
20
-
21
19
  ```bash
22
- TOLLGATE_API_KEY=tg_live_xxx
23
- # optional, defaults to the hosted app:
24
- TOLLGATE_BASE_URL=https://tollgateai.vercel.app
20
+ pnpm add @tollgateai/sdk # or yarn add @tollgateai/sdk
25
21
  ```
26
22
 
27
- ## Auto-instrumentation (recommended)
23
+ Requires Node.js 18+. Zero runtime dependencies.
28
24
 
29
- Wrap your provider client once; every call reports real usage in the background.
30
-
31
- ### Anthropic
25
+ ## Quick Start
32
26
 
33
27
  ```ts
34
28
  import Anthropic from '@anthropic-ai/sdk';
35
29
  import { createTollgateClient, wrapAnthropic } from '@tollgateai/sdk';
36
30
 
37
- const tollgate = createTollgateClient(); // reads TOLLGATE_API_KEY
38
-
39
- // Pin a runId so every call in this run is grouped and reports cost only.
40
- const runId = 'ticket_8842';
31
+ const tollgate = createTollgateClient(); // reads TOLLGATE_API_KEY from env
41
32
  const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
42
- customerId: 'cust_A', // your end customer
43
- runId,
33
+ customerId: 'cust_acme',
34
+ runId: 'ticket_8842',
44
35
  });
45
36
 
46
- // Use the client normallyusage is tracked automatically.
47
- await anthropic.messages.create({
37
+ // Every call is tracked automatically tokens, cost, tool calls.
38
+ const msg = await anthropic.messages.create({
48
39
  model: 'claude-sonnet-4-6',
49
- max_tokens: 512,
50
- messages: [{ role: 'user', content: 'Resolve this ticket…' }],
40
+ max_tokens: 1024,
41
+ messages: [{ role: 'user', content: 'Resolve this billing dispute…' }],
51
42
  });
52
43
 
53
- // Book revenue once, when the run finishes "no outcome, no charge".
44
+ // Close the run and book revenue.
54
45
  await tollgate.resolve({
55
- runId,
56
- customerId: 'cust_A',
57
- outcome: 'resolved', // 'resolved' | 'escalated' | 'failed'
58
- revenueUnitCents: 50, // charge for this resolved unit ($0.50)
46
+ runId: 'ticket_8842',
47
+ customerId: 'cust_acme',
48
+ outcome: 'resolved',
49
+ revenueUnitCents: 50, // $0.50 per resolved ticket
50
+ });
51
+ ```
52
+
53
+ ## Provider Support
54
+
55
+ | Provider | Wrapper | Streaming | Tool-Call Tracking |
56
+ |---|---|---|---|
57
+ | Anthropic | `wrapAnthropic` | Automatic | Counts `tool_use` content blocks |
58
+ | OpenAI | `wrapOpenAI` | Needs `stream_options: { include_usage: true }` | Counts `tool_calls` on choices |
59
+ | OpenAI-compatible (Groq, OpenRouter, Together, Nebius, vLLM, …) | `wrapOpenAI` with `provider: 'openai_compatible'` | Same as OpenAI | Same as OpenAI |
60
+ | AWS Bedrock | `wrapBedrock` | Automatic | Counts `toolUse` content blocks |
61
+
62
+ ## Configuration
63
+
64
+ | Environment Variable | Required | Default |
65
+ |---|---|---|
66
+ | `TOLLGATE_API_KEY` | Yes | — |
67
+ | `TOLLGATE_BASE_URL` | No | `https://tollgateai.vercel.app` |
68
+
69
+ Or pass them directly:
70
+
71
+ ```ts
72
+ const tollgate = createTollgateClient({
73
+ apiKey: 'tg_live_xxx',
74
+ baseUrl: 'https://tollgateai.vercel.app',
75
+ timeoutMs: 10_000, // per-request timeout (default 10s)
76
+ maxRetries: 2, // retries on 5xx/429/network (default 2)
59
77
  });
60
78
  ```
61
79
 
62
- ### Outcome-based pricing
80
+ ---
81
+
82
+ ## Auto-Instrumentation
83
+
84
+ Wrap your provider client once. Every `create` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
85
+
86
+ ### Anthropic
87
+
88
+ ```ts
89
+ import Anthropic from '@anthropic-ai/sdk';
90
+ import { createTollgateClient, wrapAnthropic } from '@tollgateai/sdk';
63
91
 
64
- Under per-resolution / outcome pricing, only a **resolved** run earns revenue —
65
- an `escalated`/`failed` run earns $0 but its provider cost still counts against
66
- you. Wrap your client to meter cost on every call, then call `resolve()` once at
67
- the end of the run to book the outcome (and, if resolved, its revenue). For
68
- simple per-call billing you can instead pass `revenueUnitCents` in the wrap
69
- options and skip `resolve()`.
92
+ const tollgate = createTollgateClient();
93
+ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
94
+ customerId: 'cust_acme',
95
+ runId: 'ticket_8842',
96
+ });
97
+
98
+ await anthropic.messages.create({
99
+ model: 'claude-sonnet-4-6',
100
+ max_tokens: 512,
101
+ messages: [{ role: 'user', content: 'Summarize this ticket…' }],
102
+ });
103
+ ```
70
104
 
71
105
  ### OpenAI
72
106
 
@@ -75,7 +109,7 @@ import OpenAI from 'openai';
75
109
  import { createTollgateClient, wrapOpenAI } from '@tollgateai/sdk';
76
110
 
77
111
  const tollgate = createTollgateClient();
78
- const openai = wrapOpenAI(new OpenAI(), tollgate, { customerId: 'cust_A' });
112
+ const openai = wrapOpenAI(new OpenAI(), tollgate, { customerId: 'cust_acme' });
79
113
 
80
114
  await openai.chat.completions.create({
81
115
  model: 'gpt-4o',
@@ -83,66 +117,140 @@ await openai.chat.completions.create({
83
117
  });
84
118
  ```
85
119
 
86
- `revenueUnitCents` may also be a function of the response, e.g.
87
- `revenueUnitCents: (res) => res.someField ? 50 : 0`.
88
-
89
- ### OpenAI-compatible gateways
120
+ ### OpenAI-Compatible Gateways
90
121
 
91
- Point the OpenAI SDK at any compatible endpoint and set `provider:
92
- 'openai_compatible'` so the server prices it from the gateway-echoed model name:
122
+ Point the OpenAI SDK at any compatible endpoint and set `provider: 'openai_compatible'`:
93
123
 
94
124
  ```ts
95
- const openai = new OpenAI({ apiKey: process.env.GROQ_API_KEY, baseURL: 'https://api.groq.com/openai/v1' });
96
- const client = wrapOpenAI(openai, tollgate, {
97
- customerId: 'cust_A',
98
- provider: 'openai_compatible', // Groq / OpenRouter / Together / Nebius / vLLM …
125
+ import OpenAI from 'openai';
126
+ import { createTollgateClient, wrapOpenAI } from '@tollgateai/sdk';
127
+
128
+ const tollgate = createTollgateClient();
129
+ const groq = wrapOpenAI(
130
+ new OpenAI({ apiKey: process.env.GROQ_API_KEY, baseURL: 'https://api.groq.com/openai/v1' }),
131
+ tollgate,
132
+ { customerId: 'cust_acme', provider: 'openai_compatible' },
133
+ );
134
+
135
+ await groq.chat.completions.create({
136
+ model: 'llama-3.3-70b-versatile',
137
+ messages: [{ role: 'user', content: 'Hello' }],
99
138
  });
100
- await client.chat.completions.create({ model: 'llama-3.3-70b-versatile', messages: [...] });
139
+ ```
140
+
141
+ ### AWS Bedrock
142
+
143
+ ```ts
144
+ import { BedrockRuntimeClient, ConverseCommand } from '@aws-sdk/client-bedrock-runtime';
145
+ import { createTollgateClient, wrapBedrock } from '@tollgateai/sdk';
146
+
147
+ const tollgate = createTollgateClient();
148
+ const bedrock = wrapBedrock(
149
+ new BedrockRuntimeClient({ region: 'us-east-1' }),
150
+ tollgate,
151
+ { customerId: 'cust_acme' },
152
+ );
153
+
154
+ await bedrock.send(new ConverseCommand({
155
+ modelId: 'anthropic.claude-3-5-sonnet-20241022-v2:0',
156
+ messages: [{ role: 'user', content: [{ text: 'Hello' }] }],
157
+ }));
101
158
  ```
102
159
 
103
160
  ### Streaming
104
161
 
105
- Streaming is captured automatically. For **OpenAI / compatible**, pass
106
- `stream_options: { include_usage: true }` (required for a final usage chunk); for
107
- **Anthropic** no flag is needed. Just iterate the stream as usual:
162
+ Streaming is captured automatically iterate the stream as usual and usage is reported when the stream ends.
163
+
164
+ **OpenAI / compatible** requires `stream_options: { include_usage: true }` for the final usage chunk. **Anthropic** and **Bedrock** need no extra flags.
108
165
 
109
166
  ```ts
110
- const stream = await client.chat.completions.create({
111
- model: 'gpt-4o', stream: true, stream_options: { include_usage: true },
167
+ const stream = await openai.chat.completions.create({
168
+ model: 'gpt-4o',
169
+ stream: true,
170
+ stream_options: { include_usage: true },
112
171
  messages: [{ role: 'user', content: 'Hello' }],
113
172
  });
114
- for await (const chunk of stream) { /* */ } // usage is reported when the stream ends
173
+ for await (const chunk of stream) { /* render to UI */ }
174
+ // Usage reported automatically when stream ends.
115
175
  ```
116
176
 
117
- ### AWS Bedrock
177
+ ---
178
+
179
+ ## What Gets Tracked
180
+
181
+ Every auto-instrumented call captures the following from the provider response:
182
+
183
+ | Field | Source | Description |
184
+ |---|---|---|
185
+ | `tokensIn` | `usage.input_tokens` / `prompt_tokens` | Input tokens consumed |
186
+ | `tokensOut` | `usage.output_tokens` / `completion_tokens` | Output tokens generated |
187
+ | `reasoningTokens` | `completion_tokens_details.reasoning_tokens` | Reasoning/chain-of-thought tokens (OpenAI) |
188
+ | `cachedTokens` | `cache_read_input_tokens` / `cached_tokens` | Prompt cache read tokens |
189
+ | `cacheWrite5mTokens` | `cache_creation_input_tokens` | 5-min TTL cache write tokens |
190
+ | `cacheWrite1hTokens` | `cache_creation.ephemeral_1h_input_tokens` | 1-hour TTL cache write tokens |
191
+ | `toolCalls` | Content block / choice inspection | Number of tool calls in the response |
192
+ | `provider` | Wrapper default or override | `anthropic`, `openai`, `openai_compatible`, `bedrock` |
193
+ | `model` | Response object | Model identifier as reported by the provider |
194
+
195
+ Cost is computed **server-side** from token counts and a rate card that auto-syncs daily from the public LiteLLM registry. Unknown models are priced at $0 and flagged in logs.
118
196
 
119
- Wrap a `BedrockRuntimeClient` so `ConverseCommand` / `ConverseStreamCommand`
120
- auto-report usage (the model id is read from the command):
197
+ ---
198
+
199
+ ## Outcome-Based Pricing
200
+
201
+ Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts. The pattern:
202
+
203
+ 1. **Wrap** to meter cost on every LLM call (automatic).
204
+ 2. **Resolve** once at the end to book the outcome.
121
205
 
122
206
  ```ts
123
- import { BedrockRuntimeClient, ConverseCommand } from '@aws-sdk/client-bedrock-runtime';
124
- import { wrapBedrock } from '@tollgateai/sdk';
207
+ const runId = 'ticket_8842';
208
+ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
209
+ customerId: 'cust_acme',
210
+ runId,
211
+ });
125
212
 
126
- const bedrock = wrapBedrock(new BedrockRuntimeClient({ region: 'us-east-1' }), tollgate, { customerId: 'cust_A' });
127
- await bedrock.send(new ConverseCommand({ modelId: 'anthropic.claude-3-5-sonnet-20241022-v2:0', messages: [...] }));
213
+ // multiple LLM calls within this run
214
+
215
+ await tollgate.resolve({
216
+ runId,
217
+ customerId: 'cust_acme',
218
+ outcome: 'resolved', // 'resolved' | 'escalated' | 'failed'
219
+ revenueUnitCents: 50,
220
+ });
128
221
  ```
129
222
 
130
- ### Already have an exact cost?
223
+ For simple per-call billing, pass `revenueUnitCents` in the wrap options and skip `resolve()`.
131
224
 
132
- Pass `providerCostCents` (a number or a function of the response) and the server
133
- uses it verbatim, skipping the rate card entirely.
225
+ ---
134
226
 
135
- ## Manual tracking
227
+ ## Customer & Plan Setup
136
228
 
137
- For full control or unusual providers:
229
+ Create customers and assign plans **before** sending usage so plan-priced revenue is recognized from the first event. Idempotent — safe to run on every boot.
138
230
 
139
231
  ```ts
140
- import { createTollgateClient } from '@tollgateai/sdk';
232
+ await tollgate.upsertCustomer({
233
+ customerId: 'cust_acme',
234
+ name: 'Acme Corp',
235
+ company: 'Acme Corp',
236
+ seats: 5,
237
+ plan: {
238
+ name: 'Pro Plan',
239
+ pricingModel: 'usage_based', // per_unit | per_resolution | usage_based | per_seat | flat | hybrid
240
+ unitRevenueCents: 10,
241
+ },
242
+ });
243
+ ```
141
244
 
142
- const tollgate = createTollgateClient({ apiKey: process.env.TOLLGATE_API_KEY });
245
+ ---
143
246
 
247
+ ## Manual Tracking
248
+
249
+ For full control, unusual providers, or non-LLM cost events:
250
+
251
+ ```ts
144
252
  await tollgate.track({
145
- customerId: 'cust_A',
253
+ customerId: 'cust_acme',
146
254
  runId: 'run_12345',
147
255
  provider: 'anthropic',
148
256
  model: 'claude-sonnet-4-6',
@@ -150,35 +258,83 @@ await tollgate.track({
150
258
  tokensOut: 450,
151
259
  reasoningTokens: 0,
152
260
  cachedTokens: 0,
261
+ toolCalls: 2,
153
262
  revenueUnitCents: 50,
154
- idempotencyKey: 'run_12345#step_1', // exactly-once: safe to retry
263
+ idempotencyKey: 'run_12345#step_1',
155
264
  });
156
265
  ```
157
266
 
158
- ## Notes
159
-
160
- - **Idempotent.** Events are deduplicated on `idempotencyKey` (auto-set to the
161
- provider response id by the wrappers), so retries never double-count.
162
- - **No prompt content is ever sent** — only token counts and metadata.
163
- - **Streaming is auto-tracked** (OpenAI needs `stream_options.include_usage`).
164
- - **Cost from tokens.** The server prices every event from token counts × a rate
165
- card that auto-syncs daily from the public LiteLLM registry — unknown models are
166
- priced at $0 and flagged in logs. See [docs/PRICING.md](../../docs/PRICING.md).
167
- - **Non-blocking.** Auto-instrumented tracking runs in the background; failures
168
- are passed to `onError` (default `console.warn`) and never break your call.
169
-
170
- ## API
171
-
172
- - `createTollgateClient(options?)` → `{ track(event), resolve(input) }`
173
- - `resolve({ runId, customerId, outcome, revenueUnitCents? })` → close a run with
174
- its outcome; books revenue once, only when `outcome` is `'resolved'`
175
- - `wrapAnthropic(client, tollgate, options)` → instrumented Anthropic client
176
- - `wrapOpenAI(client, tollgate, options)` → instrumented OpenAI / compatible client
177
- - `wrapBedrock(client, tollgate, options)`instrumented Bedrock Runtime client
178
- - `anthropicEventFrom` / `openAIEventFrom` / `bedrockEventFrom` → build a track
179
- payload manually from a provider response
180
-
181
- `options` accepts `customerId`, `agentId`, `runId`, `revenueUnitCents`,
182
- `provider` (override; e.g. `'openai_compatible'`), `providerCostCents`, and `onError`.
183
-
184
- Licensed for use with Tollgate. Not open source.
267
+ ### Already have an exact cost?
268
+
269
+ Pass `providerCostCents` (a number or a function of the response) and the server uses it verbatim, skipping the rate card entirely:
270
+
271
+ ```ts
272
+ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
273
+ customerId: 'cust_acme',
274
+ providerCostCents: 3.5, // or: (response) => computeMyOwnCost(response)
275
+ });
276
+ ```
277
+
278
+ ---
279
+
280
+ ## API Reference
281
+
282
+ ### Exports
283
+
284
+ ```ts
285
+ // Client
286
+ createTollgateClient(options?) //TollgateClient
287
+ TollgateError // Custom error with status & body
288
+
289
+ // Auto-instrumentation wrappers
290
+ wrapAnthropic(client, tollgate, options) // instrumented Anthropic client
291
+ wrapOpenAI(client, tollgate, options) // instrumented OpenAI / compatible client
292
+ wrapBedrock(client, tollgate, options) // → instrumented Bedrock Runtime client
293
+
294
+ // Low-level event builders (for manual track payloads)
295
+ anthropicEventFrom(msg, options) // → TrackEventInput | null
296
+ openAIEventFrom(completion, options) // → TrackEventInput | null
297
+ bedrockEventFrom(usage, model, options) // → TrackEventInput | null
298
+ ```
299
+
300
+ ### TollgateClient
301
+
302
+ | Method | Description |
303
+ |---|---|
304
+ | `track(event)` | Report a single usage event. Idempotent on `idempotencyKey`. |
305
+ | `resolve(input)` | Close a run with an outcome. Books revenue only when `outcome` is `'resolved'`. |
306
+ | `upsertCustomer(input)` | Create or update a customer and optionally assign a plan. |
307
+
308
+ ### InstrumentOptions
309
+
310
+ | Field | Type | Required | Description |
311
+ |---|---|---|---|
312
+ | `customerId` | `string` | Yes | Your end customer's stable identifier. |
313
+ | `agentId` | `string` | No | Agent or workflow identifier. |
314
+ | `runId` | `string \| () => string` | No | Logical run ID. Defaults to the provider response ID. |
315
+ | `provider` | `Provider` | No | Override the reported provider (e.g. `'openai_compatible'`). |
316
+ | `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents. |
317
+ | `providerCostCents` | `number \| (response) => number` | No | Exact cost override — skips rate card. |
318
+ | `onError` | `(err) => void` | No | Error handler for background tracking (default: `console.warn`). |
319
+
320
+ ---
321
+
322
+ ## How It Works
323
+
324
+ 1. **Proxy wrappers** intercept `messages.create` / `chat.completions.create` / `send` without modifying the request or response.
325
+ 2. After the provider responds, the wrapper extracts token counts, tool call counts, and metadata from the response's `usage` object and content blocks.
326
+ 3. A `POST /api/track` is fired **in the background** — non-blocking, with automatic retries on transient failures.
327
+ 4. The server computes cost from tokens via rate cards, joins it with your plan-configured revenue, and updates real-time margin rollups.
328
+ 5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID), so retries and stream replays never double-count.
329
+
330
+ ## Privacy & Security
331
+
332
+ - **No prompt content is ever sent.** Only token counts, model name, and metadata.
333
+ - Events are deduplicated server-side — safe to retry.
334
+ - Background tracking never throws into your application code.
335
+
336
+ ---
337
+
338
+ ## License
339
+
340
+ Licensed for use with Tollgate.
package/dist/index.cjs CHANGED
@@ -23,7 +23,7 @@ function createTollgateClient(opts = {}) {
23
23
  if (typeof doFetch !== "function") {
24
24
  throw new TollgateError("No fetch implementation available \u2014 pass `fetch` in options.");
25
25
  }
26
- async function track(event) {
26
+ async function postJson(path, body) {
27
27
  if (!apiKey) {
28
28
  throw new TollgateError("Missing API key \u2014 set opts.apiKey or TOLLGATE_API_KEY.");
29
29
  }
@@ -32,23 +32,23 @@ function createTollgateClient(opts = {}) {
32
32
  const controller = new AbortController();
33
33
  const timer = setTimeout(() => controller.abort(), timeoutMs);
34
34
  try {
35
- const res = await doFetch(`${baseUrl}/api/track`, {
35
+ const res = await doFetch(`${baseUrl}${path}`, {
36
36
  method: "POST",
37
37
  headers: {
38
38
  "Content-Type": "application/json",
39
39
  Authorization: `Bearer ${apiKey}`
40
40
  },
41
- body: JSON.stringify(event),
41
+ body: JSON.stringify(body),
42
42
  signal: controller.signal
43
43
  });
44
44
  if (res.ok) {
45
45
  return await res.json();
46
46
  }
47
47
  if (res.status >= 500 || res.status === 429) {
48
- lastErr = new TollgateError(`Tollgate track failed (${res.status})`, res.status);
48
+ lastErr = new TollgateError(`Tollgate request failed (${res.status})`, res.status);
49
49
  } else {
50
- const body = await res.json().catch(() => ({}));
51
- throw new TollgateError(`Tollgate track failed (${res.status})`, res.status, body);
50
+ const errBody = await res.json().catch(() => ({}));
51
+ throw new TollgateError(`Tollgate request failed (${res.status})`, res.status, errBody);
52
52
  }
53
53
  } catch (err) {
54
54
  if (err instanceof TollgateError && err.status && err.status < 500 && err.status !== 429) {
@@ -62,7 +62,13 @@ function createTollgateClient(opts = {}) {
62
62
  await sleep(2 ** attempt * 200);
63
63
  }
64
64
  }
65
- throw lastErr instanceof Error ? lastErr : new TollgateError("Tollgate track failed after retries");
65
+ throw lastErr instanceof Error ? lastErr : new TollgateError("Tollgate request failed after retries");
66
+ }
67
+ function track(event) {
68
+ return postJson("/api/track", event);
69
+ }
70
+ function upsertCustomer(input) {
71
+ return postJson("/api/sdk/customer", input);
66
72
  }
67
73
  function resolve(input) {
68
74
  return track({
@@ -80,7 +86,7 @@ function createTollgateClient(opts = {}) {
80
86
  ts: input.ts
81
87
  });
82
88
  }
83
- return { track, resolve };
89
+ return { track, resolve, upsertCustomer };
84
90
  }
85
91
 
86
92
  // src/instrument.ts
@@ -155,6 +161,7 @@ function anthropicEventFrom(msg, opts) {
155
161
  const fivem = usage.cache_creation?.ephemeral_5m_input_tokens;
156
162
  const oneh = usage.cache_creation?.ephemeral_1h_input_tokens;
157
163
  const hasSplit = fivem !== void 0 || oneh !== void 0;
164
+ const toolCalls = Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "tool_use").length : 0;
158
165
  const event = {
159
166
  customerId: opts.customerId,
160
167
  agentId: opts.agentId,
@@ -166,6 +173,7 @@ function anthropicEventFrom(msg, opts) {
166
173
  cachedTokens: usage.cache_read_input_tokens ?? 0,
167
174
  cacheWrite5mTokens: hasSplit ? fivem ?? 0 : usage.cache_creation_input_tokens ?? 0,
168
175
  cacheWrite1hTokens: hasSplit ? oneh ?? 0 : 0,
176
+ toolCalls,
169
177
  revenueUnitCents: resolveRevenue(opts, msg),
170
178
  idempotencyKey: msg.id ?? `${runId}#${randomId()}`
171
179
  };
@@ -178,6 +186,7 @@ function wrapAnthropic(client, tollgate, opts) {
178
186
  const result = await original(...args);
179
187
  if (isAsyncIterable(result)) {
180
188
  const msg = {};
189
+ const toolUseBlocks = [];
181
190
  return instrumentStream(
182
191
  result,
183
192
  (ev) => {
@@ -187,9 +196,12 @@ function wrapAnthropic(client, tollgate, opts) {
187
196
  msg.usage = { ...ev.message.usage };
188
197
  } else if (ev.type === "message_delta" && ev.usage) {
189
198
  msg.usage = { ...msg.usage ?? {}, output_tokens: ev.usage.output_tokens };
199
+ } else if (ev.type === "content_block_start" && ev.content_block?.type === "tool_use") {
200
+ toolUseBlocks.push(ev.content_block);
190
201
  }
191
202
  },
192
203
  () => {
204
+ msg.content = toolUseBlocks;
193
205
  const event2 = anthropicEventFrom(msg, opts);
194
206
  if (event2) fireAndForget(tollgate.track(event2), opts.onError);
195
207
  }
@@ -214,6 +226,7 @@ function openAIEventFrom(completion, opts) {
214
226
  const usage = completion?.usage;
215
227
  if (!usage) return null;
216
228
  const runId = resolveRunId(opts, completion.id);
229
+ const toolCalls = completion.choices?.[0]?.message?.tool_calls?.length ?? 0;
217
230
  const event = {
218
231
  customerId: opts.customerId,
219
232
  agentId: opts.agentId,
@@ -224,6 +237,7 @@ function openAIEventFrom(completion, opts) {
224
237
  tokensOut: usage.completion_tokens ?? 0,
225
238
  reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
226
239
  cachedTokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
240
+ toolCalls,
227
241
  revenueUnitCents: resolveRevenue(opts, completion),
228
242
  idempotencyKey: completion.id ?? `${runId}#${randomId()}`
229
243
  };
@@ -238,16 +252,26 @@ function wrapOpenAI(client, tollgate, opts) {
238
252
  let id;
239
253
  let model;
240
254
  let usage;
255
+ const toolCallIndices = /* @__PURE__ */ new Set();
241
256
  return instrumentStream(
242
257
  result,
243
258
  (chunk) => {
244
259
  if (chunk.id) id = chunk.id;
245
260
  if (chunk.model) model = chunk.model;
246
261
  if (chunk.usage) usage = chunk.usage;
262
+ for (const c of chunk.choices ?? []) {
263
+ for (const tc of c.delta?.tool_calls ?? []) {
264
+ if (tc.index !== void 0) toolCallIndices.add(tc.index);
265
+ }
266
+ }
247
267
  },
248
268
  () => {
249
269
  if (!usage) return;
250
- const event2 = openAIEventFrom({ id, model, usage }, opts);
270
+ const synth = { id, model, usage };
271
+ if (toolCallIndices.size > 0) {
272
+ synth.choices = [{ message: { tool_calls: new Array(toolCallIndices.size) } }];
273
+ }
274
+ const event2 = openAIEventFrom(synth, opts);
251
275
  if (event2) fireAndForget(tollgate.track(event2), opts.onError);
252
276
  }
253
277
  );
@@ -270,7 +294,7 @@ function wrapOpenAI(client, tollgate, opts) {
270
294
  }
271
295
  });
272
296
  }
273
- function bedrockEventFrom(usage, model, opts, response = void 0) {
297
+ function bedrockEventFrom(usage, model, opts, response = void 0, toolCalls = 0) {
274
298
  if (!usage) return null;
275
299
  const runId = resolveRunId(opts, void 0);
276
300
  const event = {
@@ -283,6 +307,7 @@ function bedrockEventFrom(usage, model, opts, response = void 0) {
283
307
  tokensOut: usage.outputTokens ?? 0,
284
308
  cachedTokens: usage.cacheReadInputTokens ?? 0,
285
309
  cacheWrite5mTokens: usage.cacheWriteInputTokens ?? 0,
310
+ toolCalls,
286
311
  revenueUnitCents: resolveRevenue(opts, response),
287
312
  idempotencyKey: `${runId}#${randomId()}`
288
313
  };
@@ -295,20 +320,23 @@ function wrapBedrock(client, tollgate, opts) {
295
320
  const model = command?.input?.modelId ?? "unknown";
296
321
  if (result?.stream && isAsyncIterable(result.stream)) {
297
322
  let usage;
323
+ let streamToolCalls = 0;
298
324
  result.stream = instrumentStream(
299
325
  result.stream,
300
326
  (ev) => {
301
327
  if (ev.metadata?.usage) usage = ev.metadata.usage;
328
+ if (ev.contentBlockStart?.start?.toolUse) streamToolCalls++;
302
329
  },
303
330
  () => {
304
- const event = bedrockEventFrom(usage, model, opts, result);
331
+ const event = bedrockEventFrom(usage, model, opts, result, streamToolCalls);
305
332
  if (event) fireAndForget(tollgate.track(event), opts.onError);
306
333
  }
307
334
  );
308
335
  return result;
309
336
  }
310
337
  if (result?.usage) {
311
- const event = bedrockEventFrom(result.usage, model, opts, result);
338
+ const tc = result.output?.message?.content?.filter((b) => b.toolUse != null).length ?? 0;
339
+ const event = bedrockEventFrom(result.usage, model, opts, result, tc);
312
340
  if (event) fireAndForget(tollgate.track(event), opts.onError);
313
341
  }
314
342
  return result;