@tollgateai/sdk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +257 -101
- package/dist/index.cjs +40 -12
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +55 -2
- package/dist/index.d.ts +55 -2
- package/dist/index.js +40 -12
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,72 +1,106 @@
|
|
|
1
1
|
# @tollgateai/sdk
|
|
2
2
|
|
|
3
|
-
Track
|
|
4
|
-
[Tollgate](https://tollgateai.vercel.app). The SDK reads the actual `usage`
|
|
5
|
-
object off each provider response — you never hand-count tokens.
|
|
3
|
+
> Real-time gross-margin observability for AI agents. Track every LLM call's cost, attribute it to a customer, and see whether you're making money — before the invoice goes out.
|
|
6
4
|
|
|
7
|
-
|
|
5
|
+
**v0.3.0** · [npm](https://www.npmjs.com/package/@tollgateai/sdk) · [Dashboard](https://tollgateai.vercel.app)
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Why Tollgate
|
|
10
|
+
|
|
11
|
+
You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, cached tokens, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
13
14
|
|
|
14
15
|
```bash
|
|
15
16
|
npm install @tollgateai/sdk
|
|
16
|
-
# or: pnpm add @tollgateai/sdk / yarn add @tollgateai/sdk
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
Create an API key in **Tollgate → Integrations**, then set:
|
|
20
|
-
|
|
21
19
|
```bash
|
|
22
|
-
|
|
23
|
-
# optional, defaults to the hosted app:
|
|
24
|
-
TOLLGATE_BASE_URL=https://tollgateai.vercel.app
|
|
20
|
+
pnpm add @tollgateai/sdk # or yarn add @tollgateai/sdk
|
|
25
21
|
```
|
|
26
22
|
|
|
27
|
-
|
|
23
|
+
Requires Node.js 18+. Zero runtime dependencies.
|
|
28
24
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
### Anthropic
|
|
25
|
+
## Quick Start
|
|
32
26
|
|
|
33
27
|
```ts
|
|
34
28
|
import Anthropic from '@anthropic-ai/sdk';
|
|
35
29
|
import { createTollgateClient, wrapAnthropic } from '@tollgateai/sdk';
|
|
36
30
|
|
|
37
|
-
const tollgate = createTollgateClient();
|
|
38
|
-
|
|
39
|
-
// Pin a runId so every call in this run is grouped and reports cost only.
|
|
40
|
-
const runId = 'ticket_8842';
|
|
31
|
+
const tollgate = createTollgateClient(); // reads TOLLGATE_API_KEY from env
|
|
41
32
|
const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
42
|
-
customerId: '
|
|
43
|
-
runId,
|
|
33
|
+
customerId: 'cust_acme',
|
|
34
|
+
runId: 'ticket_8842',
|
|
44
35
|
});
|
|
45
36
|
|
|
46
|
-
//
|
|
47
|
-
await anthropic.messages.create({
|
|
37
|
+
// Every call is tracked automatically — tokens, cost, tool calls.
|
|
38
|
+
const msg = await anthropic.messages.create({
|
|
48
39
|
model: 'claude-sonnet-4-6',
|
|
49
|
-
max_tokens:
|
|
50
|
-
messages: [{ role: 'user', content: 'Resolve this
|
|
40
|
+
max_tokens: 1024,
|
|
41
|
+
messages: [{ role: 'user', content: 'Resolve this billing dispute…' }],
|
|
51
42
|
});
|
|
52
43
|
|
|
53
|
-
//
|
|
44
|
+
// Close the run and book revenue.
|
|
54
45
|
await tollgate.resolve({
|
|
55
|
-
runId,
|
|
56
|
-
customerId: '
|
|
57
|
-
outcome: 'resolved',
|
|
58
|
-
revenueUnitCents: 50,
|
|
46
|
+
runId: 'ticket_8842',
|
|
47
|
+
customerId: 'cust_acme',
|
|
48
|
+
outcome: 'resolved',
|
|
49
|
+
revenueUnitCents: 50, // $0.50 per resolved ticket
|
|
50
|
+
});
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Provider Support
|
|
54
|
+
|
|
55
|
+
| Provider | Wrapper | Streaming | Tool-Call Tracking |
|
|
56
|
+
|---|---|---|---|
|
|
57
|
+
| Anthropic | `wrapAnthropic` | Automatic | Counts `tool_use` content blocks |
|
|
58
|
+
| OpenAI | `wrapOpenAI` | Needs `stream_options: { include_usage: true }` | Counts `tool_calls` on choices |
|
|
59
|
+
| OpenAI-compatible (Groq, OpenRouter, Together, Nebius, vLLM, …) | `wrapOpenAI` with `provider: 'openai_compatible'` | Same as OpenAI | Same as OpenAI |
|
|
60
|
+
| AWS Bedrock | `wrapBedrock` | Automatic | Counts `toolUse` content blocks |
|
|
61
|
+
|
|
62
|
+
## Configuration
|
|
63
|
+
|
|
64
|
+
| Environment Variable | Required | Default |
|
|
65
|
+
|---|---|---|
|
|
66
|
+
| `TOLLGATE_API_KEY` | Yes | — |
|
|
67
|
+
| `TOLLGATE_BASE_URL` | No | `https://tollgateai.vercel.app` |
|
|
68
|
+
|
|
69
|
+
Or pass them directly:
|
|
70
|
+
|
|
71
|
+
```ts
|
|
72
|
+
const tollgate = createTollgateClient({
|
|
73
|
+
apiKey: 'tg_live_xxx',
|
|
74
|
+
baseUrl: 'https://tollgateai.vercel.app',
|
|
75
|
+
timeoutMs: 10_000, // per-request timeout (default 10s)
|
|
76
|
+
maxRetries: 2, // retries on 5xx/429/network (default 2)
|
|
59
77
|
});
|
|
60
78
|
```
|
|
61
79
|
|
|
62
|
-
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Auto-Instrumentation
|
|
83
|
+
|
|
84
|
+
Wrap your provider client once. Every `create` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
|
|
85
|
+
|
|
86
|
+
### Anthropic
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
90
|
+
import { createTollgateClient, wrapAnthropic } from '@tollgateai/sdk';
|
|
63
91
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
92
|
+
const tollgate = createTollgateClient();
|
|
93
|
+
const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
94
|
+
customerId: 'cust_acme',
|
|
95
|
+
runId: 'ticket_8842',
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
await anthropic.messages.create({
|
|
99
|
+
model: 'claude-sonnet-4-6',
|
|
100
|
+
max_tokens: 512,
|
|
101
|
+
messages: [{ role: 'user', content: 'Summarize this ticket…' }],
|
|
102
|
+
});
|
|
103
|
+
```
|
|
70
104
|
|
|
71
105
|
### OpenAI
|
|
72
106
|
|
|
@@ -75,7 +109,7 @@ import OpenAI from 'openai';
|
|
|
75
109
|
import { createTollgateClient, wrapOpenAI } from '@tollgateai/sdk';
|
|
76
110
|
|
|
77
111
|
const tollgate = createTollgateClient();
|
|
78
|
-
const openai = wrapOpenAI(new OpenAI(), tollgate, { customerId: '
|
|
112
|
+
const openai = wrapOpenAI(new OpenAI(), tollgate, { customerId: 'cust_acme' });
|
|
79
113
|
|
|
80
114
|
await openai.chat.completions.create({
|
|
81
115
|
model: 'gpt-4o',
|
|
@@ -83,66 +117,140 @@ await openai.chat.completions.create({
|
|
|
83
117
|
});
|
|
84
118
|
```
|
|
85
119
|
|
|
86
|
-
|
|
87
|
-
`revenueUnitCents: (res) => res.someField ? 50 : 0`.
|
|
88
|
-
|
|
89
|
-
### OpenAI-compatible gateways
|
|
120
|
+
### OpenAI-Compatible Gateways
|
|
90
121
|
|
|
91
|
-
Point the OpenAI SDK at any compatible endpoint and set `provider:
|
|
92
|
-
'openai_compatible'` so the server prices it from the gateway-echoed model name:
|
|
122
|
+
Point the OpenAI SDK at any compatible endpoint and set `provider: 'openai_compatible'`:
|
|
93
123
|
|
|
94
124
|
```ts
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
125
|
+
import OpenAI from 'openai';
|
|
126
|
+
import { createTollgateClient, wrapOpenAI } from '@tollgateai/sdk';
|
|
127
|
+
|
|
128
|
+
const tollgate = createTollgateClient();
|
|
129
|
+
const groq = wrapOpenAI(
|
|
130
|
+
new OpenAI({ apiKey: process.env.GROQ_API_KEY, baseURL: 'https://api.groq.com/openai/v1' }),
|
|
131
|
+
tollgate,
|
|
132
|
+
{ customerId: 'cust_acme', provider: 'openai_compatible' },
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
await groq.chat.completions.create({
|
|
136
|
+
model: 'llama-3.3-70b-versatile',
|
|
137
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
99
138
|
});
|
|
100
|
-
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### AWS Bedrock
|
|
142
|
+
|
|
143
|
+
```ts
|
|
144
|
+
import { BedrockRuntimeClient, ConverseCommand } from '@aws-sdk/client-bedrock-runtime';
|
|
145
|
+
import { createTollgateClient, wrapBedrock } from '@tollgateai/sdk';
|
|
146
|
+
|
|
147
|
+
const tollgate = createTollgateClient();
|
|
148
|
+
const bedrock = wrapBedrock(
|
|
149
|
+
new BedrockRuntimeClient({ region: 'us-east-1' }),
|
|
150
|
+
tollgate,
|
|
151
|
+
{ customerId: 'cust_acme' },
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
await bedrock.send(new ConverseCommand({
|
|
155
|
+
modelId: 'anthropic.claude-3-5-sonnet-20241022-v2:0',
|
|
156
|
+
messages: [{ role: 'user', content: [{ text: 'Hello' }] }],
|
|
157
|
+
}));
|
|
101
158
|
```
|
|
102
159
|
|
|
103
160
|
### Streaming
|
|
104
161
|
|
|
105
|
-
Streaming is captured automatically
|
|
106
|
-
|
|
107
|
-
**
|
|
162
|
+
Streaming is captured automatically — iterate the stream as usual and usage is reported when the stream ends.
|
|
163
|
+
|
|
164
|
+
**OpenAI / compatible** requires `stream_options: { include_usage: true }` for the final usage chunk. **Anthropic** and **Bedrock** need no extra flags.
|
|
108
165
|
|
|
109
166
|
```ts
|
|
110
|
-
const stream = await
|
|
111
|
-
model: 'gpt-4o',
|
|
167
|
+
const stream = await openai.chat.completions.create({
|
|
168
|
+
model: 'gpt-4o',
|
|
169
|
+
stream: true,
|
|
170
|
+
stream_options: { include_usage: true },
|
|
112
171
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
113
172
|
});
|
|
114
|
-
for await (const chunk of stream) { /*
|
|
173
|
+
for await (const chunk of stream) { /* render to UI */ }
|
|
174
|
+
// Usage reported automatically when stream ends.
|
|
115
175
|
```
|
|
116
176
|
|
|
117
|
-
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## What Gets Tracked
|
|
180
|
+
|
|
181
|
+
Every auto-instrumented call captures the following from the provider response:
|
|
182
|
+
|
|
183
|
+
| Field | Source | Description |
|
|
184
|
+
|---|---|---|
|
|
185
|
+
| `tokensIn` | `usage.input_tokens` / `prompt_tokens` | Input tokens consumed |
|
|
186
|
+
| `tokensOut` | `usage.output_tokens` / `completion_tokens` | Output tokens generated |
|
|
187
|
+
| `reasoningTokens` | `completion_tokens_details.reasoning_tokens` | Reasoning/chain-of-thought tokens (OpenAI) |
|
|
188
|
+
| `cachedTokens` | `cache_read_input_tokens` / `cached_tokens` | Prompt cache read tokens |
|
|
189
|
+
| `cacheWrite5mTokens` | `cache_creation_input_tokens` | 5-min TTL cache write tokens |
|
|
190
|
+
| `cacheWrite1hTokens` | `cache_creation.ephemeral_1h_input_tokens` | 1-hour TTL cache write tokens |
|
|
191
|
+
| `toolCalls` | Content block / choice inspection | Number of tool calls in the response |
|
|
192
|
+
| `provider` | Wrapper default or override | `anthropic`, `openai`, `openai_compatible`, `bedrock` |
|
|
193
|
+
| `model` | Response object | Model identifier as reported by the provider |
|
|
194
|
+
|
|
195
|
+
Cost is computed **server-side** from token counts and a rate card that auto-syncs daily from the public LiteLLM registry. Unknown models are priced at $0 and flagged in logs.
|
|
118
196
|
|
|
119
|
-
|
|
120
|
-
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Outcome-Based Pricing
|
|
200
|
+
|
|
201
|
+
Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts. The pattern:
|
|
202
|
+
|
|
203
|
+
1. **Wrap** to meter cost on every LLM call (automatic).
|
|
204
|
+
2. **Resolve** once at the end to book the outcome.
|
|
121
205
|
|
|
122
206
|
```ts
|
|
123
|
-
|
|
124
|
-
|
|
207
|
+
const runId = 'ticket_8842';
|
|
208
|
+
const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
209
|
+
customerId: 'cust_acme',
|
|
210
|
+
runId,
|
|
211
|
+
});
|
|
125
212
|
|
|
126
|
-
|
|
127
|
-
|
|
213
|
+
// … multiple LLM calls within this run …
|
|
214
|
+
|
|
215
|
+
await tollgate.resolve({
|
|
216
|
+
runId,
|
|
217
|
+
customerId: 'cust_acme',
|
|
218
|
+
outcome: 'resolved', // 'resolved' | 'escalated' | 'failed'
|
|
219
|
+
revenueUnitCents: 50,
|
|
220
|
+
});
|
|
128
221
|
```
|
|
129
222
|
|
|
130
|
-
|
|
223
|
+
For simple per-call billing, pass `revenueUnitCents` in the wrap options and skip `resolve()`.
|
|
131
224
|
|
|
132
|
-
|
|
133
|
-
uses it verbatim, skipping the rate card entirely.
|
|
225
|
+
---
|
|
134
226
|
|
|
135
|
-
##
|
|
227
|
+
## Customer & Plan Setup
|
|
136
228
|
|
|
137
|
-
|
|
229
|
+
Create customers and assign plans **before** sending usage so plan-priced revenue is recognized from the first event. Idempotent — safe to run on every boot.
|
|
138
230
|
|
|
139
231
|
```ts
|
|
140
|
-
|
|
232
|
+
await tollgate.upsertCustomer({
|
|
233
|
+
customerId: 'cust_acme',
|
|
234
|
+
name: 'Acme Corp',
|
|
235
|
+
company: 'Acme Corp',
|
|
236
|
+
seats: 5,
|
|
237
|
+
plan: {
|
|
238
|
+
name: 'Pro Plan',
|
|
239
|
+
pricingModel: 'usage_based', // per_unit | per_resolution | usage_based | per_seat | flat | hybrid
|
|
240
|
+
unitRevenueCents: 10,
|
|
241
|
+
},
|
|
242
|
+
});
|
|
243
|
+
```
|
|
141
244
|
|
|
142
|
-
|
|
245
|
+
---
|
|
143
246
|
|
|
247
|
+
## Manual Tracking
|
|
248
|
+
|
|
249
|
+
For full control, unusual providers, or non-LLM cost events:
|
|
250
|
+
|
|
251
|
+
```ts
|
|
144
252
|
await tollgate.track({
|
|
145
|
-
customerId: '
|
|
253
|
+
customerId: 'cust_acme',
|
|
146
254
|
runId: 'run_12345',
|
|
147
255
|
provider: 'anthropic',
|
|
148
256
|
model: 'claude-sonnet-4-6',
|
|
@@ -150,35 +258,83 @@ await tollgate.track({
|
|
|
150
258
|
tokensOut: 450,
|
|
151
259
|
reasoningTokens: 0,
|
|
152
260
|
cachedTokens: 0,
|
|
261
|
+
toolCalls: 2,
|
|
153
262
|
revenueUnitCents: 50,
|
|
154
|
-
idempotencyKey: 'run_12345#step_1',
|
|
263
|
+
idempotencyKey: 'run_12345#step_1',
|
|
155
264
|
});
|
|
156
265
|
```
|
|
157
266
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
267
|
+
### Already have an exact cost?
|
|
268
|
+
|
|
269
|
+
Pass `providerCostCents` (a number or a function of the response) and the server uses it verbatim, skipping the rate card entirely:
|
|
270
|
+
|
|
271
|
+
```ts
|
|
272
|
+
const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
273
|
+
customerId: 'cust_acme',
|
|
274
|
+
providerCostCents: 3.5, // or: (response) => computeMyOwnCost(response)
|
|
275
|
+
});
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## API Reference
|
|
281
|
+
|
|
282
|
+
### Exports
|
|
283
|
+
|
|
284
|
+
```ts
|
|
285
|
+
// Client
|
|
286
|
+
createTollgateClient(options?) // → TollgateClient
|
|
287
|
+
TollgateError // Custom error with status & body
|
|
288
|
+
|
|
289
|
+
// Auto-instrumentation wrappers
|
|
290
|
+
wrapAnthropic(client, tollgate, options) // → instrumented Anthropic client
|
|
291
|
+
wrapOpenAI(client, tollgate, options) // → instrumented OpenAI / compatible client
|
|
292
|
+
wrapBedrock(client, tollgate, options) // → instrumented Bedrock Runtime client
|
|
293
|
+
|
|
294
|
+
// Low-level event builders (for manual track payloads)
|
|
295
|
+
anthropicEventFrom(msg, options) // → TrackEventInput | null
|
|
296
|
+
openAIEventFrom(completion, options) // → TrackEventInput | null
|
|
297
|
+
bedrockEventFrom(usage, model, options) // → TrackEventInput | null
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### TollgateClient
|
|
301
|
+
|
|
302
|
+
| Method | Description |
|
|
303
|
+
|---|---|
|
|
304
|
+
| `track(event)` | Report a single usage event. Idempotent on `idempotencyKey`. |
|
|
305
|
+
| `resolve(input)` | Close a run with an outcome. Books revenue only when `outcome` is `'resolved'`. |
|
|
306
|
+
| `upsertCustomer(input)` | Create or update a customer and optionally assign a plan. |
|
|
307
|
+
|
|
308
|
+
### InstrumentOptions
|
|
309
|
+
|
|
310
|
+
| Field | Type | Required | Description |
|
|
311
|
+
|---|---|---|---|
|
|
312
|
+
| `customerId` | `string` | Yes | Your end customer's stable identifier. |
|
|
313
|
+
| `agentId` | `string` | No | Agent or workflow identifier. |
|
|
314
|
+
| `runId` | `string \| () => string` | No | Logical run ID. Defaults to the provider response ID. |
|
|
315
|
+
| `provider` | `Provider` | No | Override the reported provider (e.g. `'openai_compatible'`). |
|
|
316
|
+
| `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents. |
|
|
317
|
+
| `providerCostCents` | `number \| (response) => number` | No | Exact cost override — skips rate card. |
|
|
318
|
+
| `onError` | `(err) => void` | No | Error handler for background tracking (default: `console.warn`). |
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
## How It Works
|
|
323
|
+
|
|
324
|
+
1. **Proxy wrappers** intercept `messages.create` / `chat.completions.create` / `send` without modifying the request or response.
|
|
325
|
+
2. After the provider responds, the wrapper extracts token counts, tool call counts, and metadata from the response's `usage` object and content blocks.
|
|
326
|
+
3. A `POST /api/track` is fired **in the background** — non-blocking, with automatic retries on transient failures.
|
|
327
|
+
4. The server computes cost from tokens via rate cards, joins it with your plan-configured revenue, and updates real-time margin rollups.
|
|
328
|
+
5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID), so retries and stream replays never double-count.
|
|
329
|
+
|
|
330
|
+
## Privacy & Security
|
|
331
|
+
|
|
332
|
+
- **No prompt content is ever sent.** Only token counts, model name, and metadata.
|
|
333
|
+
- Events are deduplicated server-side — safe to retry.
|
|
334
|
+
- Background tracking never throws into your application code.
|
|
335
|
+
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
## License
|
|
339
|
+
|
|
340
|
+
Licensed for use with Tollgate.
|
package/dist/index.cjs
CHANGED
|
@@ -23,7 +23,7 @@ function createTollgateClient(opts = {}) {
|
|
|
23
23
|
if (typeof doFetch !== "function") {
|
|
24
24
|
throw new TollgateError("No fetch implementation available \u2014 pass `fetch` in options.");
|
|
25
25
|
}
|
|
26
|
-
async function
|
|
26
|
+
async function postJson(path, body) {
|
|
27
27
|
if (!apiKey) {
|
|
28
28
|
throw new TollgateError("Missing API key \u2014 set opts.apiKey or TOLLGATE_API_KEY.");
|
|
29
29
|
}
|
|
@@ -32,23 +32,23 @@ function createTollgateClient(opts = {}) {
|
|
|
32
32
|
const controller = new AbortController();
|
|
33
33
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
34
34
|
try {
|
|
35
|
-
const res = await doFetch(`${baseUrl}
|
|
35
|
+
const res = await doFetch(`${baseUrl}${path}`, {
|
|
36
36
|
method: "POST",
|
|
37
37
|
headers: {
|
|
38
38
|
"Content-Type": "application/json",
|
|
39
39
|
Authorization: `Bearer ${apiKey}`
|
|
40
40
|
},
|
|
41
|
-
body: JSON.stringify(
|
|
41
|
+
body: JSON.stringify(body),
|
|
42
42
|
signal: controller.signal
|
|
43
43
|
});
|
|
44
44
|
if (res.ok) {
|
|
45
45
|
return await res.json();
|
|
46
46
|
}
|
|
47
47
|
if (res.status >= 500 || res.status === 429) {
|
|
48
|
-
lastErr = new TollgateError(`Tollgate
|
|
48
|
+
lastErr = new TollgateError(`Tollgate request failed (${res.status})`, res.status);
|
|
49
49
|
} else {
|
|
50
|
-
const
|
|
51
|
-
throw new TollgateError(`Tollgate
|
|
50
|
+
const errBody = await res.json().catch(() => ({}));
|
|
51
|
+
throw new TollgateError(`Tollgate request failed (${res.status})`, res.status, errBody);
|
|
52
52
|
}
|
|
53
53
|
} catch (err) {
|
|
54
54
|
if (err instanceof TollgateError && err.status && err.status < 500 && err.status !== 429) {
|
|
@@ -62,7 +62,13 @@ function createTollgateClient(opts = {}) {
|
|
|
62
62
|
await sleep(2 ** attempt * 200);
|
|
63
63
|
}
|
|
64
64
|
}
|
|
65
|
-
throw lastErr instanceof Error ? lastErr : new TollgateError("Tollgate
|
|
65
|
+
throw lastErr instanceof Error ? lastErr : new TollgateError("Tollgate request failed after retries");
|
|
66
|
+
}
|
|
67
|
+
function track(event) {
|
|
68
|
+
return postJson("/api/track", event);
|
|
69
|
+
}
|
|
70
|
+
function upsertCustomer(input) {
|
|
71
|
+
return postJson("/api/sdk/customer", input);
|
|
66
72
|
}
|
|
67
73
|
function resolve(input) {
|
|
68
74
|
return track({
|
|
@@ -80,7 +86,7 @@ function createTollgateClient(opts = {}) {
|
|
|
80
86
|
ts: input.ts
|
|
81
87
|
});
|
|
82
88
|
}
|
|
83
|
-
return { track, resolve };
|
|
89
|
+
return { track, resolve, upsertCustomer };
|
|
84
90
|
}
|
|
85
91
|
|
|
86
92
|
// src/instrument.ts
|
|
@@ -155,6 +161,7 @@ function anthropicEventFrom(msg, opts) {
|
|
|
155
161
|
const fivem = usage.cache_creation?.ephemeral_5m_input_tokens;
|
|
156
162
|
const oneh = usage.cache_creation?.ephemeral_1h_input_tokens;
|
|
157
163
|
const hasSplit = fivem !== void 0 || oneh !== void 0;
|
|
164
|
+
const toolCalls = Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "tool_use").length : 0;
|
|
158
165
|
const event = {
|
|
159
166
|
customerId: opts.customerId,
|
|
160
167
|
agentId: opts.agentId,
|
|
@@ -166,6 +173,7 @@ function anthropicEventFrom(msg, opts) {
|
|
|
166
173
|
cachedTokens: usage.cache_read_input_tokens ?? 0,
|
|
167
174
|
cacheWrite5mTokens: hasSplit ? fivem ?? 0 : usage.cache_creation_input_tokens ?? 0,
|
|
168
175
|
cacheWrite1hTokens: hasSplit ? oneh ?? 0 : 0,
|
|
176
|
+
toolCalls,
|
|
169
177
|
revenueUnitCents: resolveRevenue(opts, msg),
|
|
170
178
|
idempotencyKey: msg.id ?? `${runId}#${randomId()}`
|
|
171
179
|
};
|
|
@@ -178,6 +186,7 @@ function wrapAnthropic(client, tollgate, opts) {
|
|
|
178
186
|
const result = await original(...args);
|
|
179
187
|
if (isAsyncIterable(result)) {
|
|
180
188
|
const msg = {};
|
|
189
|
+
const toolUseBlocks = [];
|
|
181
190
|
return instrumentStream(
|
|
182
191
|
result,
|
|
183
192
|
(ev) => {
|
|
@@ -187,9 +196,12 @@ function wrapAnthropic(client, tollgate, opts) {
|
|
|
187
196
|
msg.usage = { ...ev.message.usage };
|
|
188
197
|
} else if (ev.type === "message_delta" && ev.usage) {
|
|
189
198
|
msg.usage = { ...msg.usage ?? {}, output_tokens: ev.usage.output_tokens };
|
|
199
|
+
} else if (ev.type === "content_block_start" && ev.content_block?.type === "tool_use") {
|
|
200
|
+
toolUseBlocks.push(ev.content_block);
|
|
190
201
|
}
|
|
191
202
|
},
|
|
192
203
|
() => {
|
|
204
|
+
msg.content = toolUseBlocks;
|
|
193
205
|
const event2 = anthropicEventFrom(msg, opts);
|
|
194
206
|
if (event2) fireAndForget(tollgate.track(event2), opts.onError);
|
|
195
207
|
}
|
|
@@ -214,6 +226,7 @@ function openAIEventFrom(completion, opts) {
|
|
|
214
226
|
const usage = completion?.usage;
|
|
215
227
|
if (!usage) return null;
|
|
216
228
|
const runId = resolveRunId(opts, completion.id);
|
|
229
|
+
const toolCalls = completion.choices?.[0]?.message?.tool_calls?.length ?? 0;
|
|
217
230
|
const event = {
|
|
218
231
|
customerId: opts.customerId,
|
|
219
232
|
agentId: opts.agentId,
|
|
@@ -224,6 +237,7 @@ function openAIEventFrom(completion, opts) {
|
|
|
224
237
|
tokensOut: usage.completion_tokens ?? 0,
|
|
225
238
|
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
|
|
226
239
|
cachedTokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
|
240
|
+
toolCalls,
|
|
227
241
|
revenueUnitCents: resolveRevenue(opts, completion),
|
|
228
242
|
idempotencyKey: completion.id ?? `${runId}#${randomId()}`
|
|
229
243
|
};
|
|
@@ -238,16 +252,26 @@ function wrapOpenAI(client, tollgate, opts) {
|
|
|
238
252
|
let id;
|
|
239
253
|
let model;
|
|
240
254
|
let usage;
|
|
255
|
+
const toolCallIndices = /* @__PURE__ */ new Set();
|
|
241
256
|
return instrumentStream(
|
|
242
257
|
result,
|
|
243
258
|
(chunk) => {
|
|
244
259
|
if (chunk.id) id = chunk.id;
|
|
245
260
|
if (chunk.model) model = chunk.model;
|
|
246
261
|
if (chunk.usage) usage = chunk.usage;
|
|
262
|
+
for (const c of chunk.choices ?? []) {
|
|
263
|
+
for (const tc of c.delta?.tool_calls ?? []) {
|
|
264
|
+
if (tc.index !== void 0) toolCallIndices.add(tc.index);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
247
267
|
},
|
|
248
268
|
() => {
|
|
249
269
|
if (!usage) return;
|
|
250
|
-
const
|
|
270
|
+
const synth = { id, model, usage };
|
|
271
|
+
if (toolCallIndices.size > 0) {
|
|
272
|
+
synth.choices = [{ message: { tool_calls: new Array(toolCallIndices.size) } }];
|
|
273
|
+
}
|
|
274
|
+
const event2 = openAIEventFrom(synth, opts);
|
|
251
275
|
if (event2) fireAndForget(tollgate.track(event2), opts.onError);
|
|
252
276
|
}
|
|
253
277
|
);
|
|
@@ -270,7 +294,7 @@ function wrapOpenAI(client, tollgate, opts) {
|
|
|
270
294
|
}
|
|
271
295
|
});
|
|
272
296
|
}
|
|
273
|
-
function bedrockEventFrom(usage, model, opts, response = void 0) {
|
|
297
|
+
function bedrockEventFrom(usage, model, opts, response = void 0, toolCalls = 0) {
|
|
274
298
|
if (!usage) return null;
|
|
275
299
|
const runId = resolveRunId(opts, void 0);
|
|
276
300
|
const event = {
|
|
@@ -283,6 +307,7 @@ function bedrockEventFrom(usage, model, opts, response = void 0) {
|
|
|
283
307
|
tokensOut: usage.outputTokens ?? 0,
|
|
284
308
|
cachedTokens: usage.cacheReadInputTokens ?? 0,
|
|
285
309
|
cacheWrite5mTokens: usage.cacheWriteInputTokens ?? 0,
|
|
310
|
+
toolCalls,
|
|
286
311
|
revenueUnitCents: resolveRevenue(opts, response),
|
|
287
312
|
idempotencyKey: `${runId}#${randomId()}`
|
|
288
313
|
};
|
|
@@ -295,20 +320,23 @@ function wrapBedrock(client, tollgate, opts) {
|
|
|
295
320
|
const model = command?.input?.modelId ?? "unknown";
|
|
296
321
|
if (result?.stream && isAsyncIterable(result.stream)) {
|
|
297
322
|
let usage;
|
|
323
|
+
let streamToolCalls = 0;
|
|
298
324
|
result.stream = instrumentStream(
|
|
299
325
|
result.stream,
|
|
300
326
|
(ev) => {
|
|
301
327
|
if (ev.metadata?.usage) usage = ev.metadata.usage;
|
|
328
|
+
if (ev.contentBlockStart?.start?.toolUse) streamToolCalls++;
|
|
302
329
|
},
|
|
303
330
|
() => {
|
|
304
|
-
const event = bedrockEventFrom(usage, model, opts, result);
|
|
331
|
+
const event = bedrockEventFrom(usage, model, opts, result, streamToolCalls);
|
|
305
332
|
if (event) fireAndForget(tollgate.track(event), opts.onError);
|
|
306
333
|
}
|
|
307
334
|
);
|
|
308
335
|
return result;
|
|
309
336
|
}
|
|
310
337
|
if (result?.usage) {
|
|
311
|
-
const
|
|
338
|
+
const tc = result.output?.message?.content?.filter((b) => b.toolUse != null).length ?? 0;
|
|
339
|
+
const event = bedrockEventFrom(result.usage, model, opts, result, tc);
|
|
312
340
|
if (event) fireAndForget(tollgate.track(event), opts.onError);
|
|
313
341
|
}
|
|
314
342
|
return result;
|