@tollgateai/sdk 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +113 -87
- package/dist/index.cjs +138 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +76 -2
- package/dist/index.d.ts +76 -2
- package/dist/index.js +137 -10
- package/dist/index.js.map +1 -1
- package/package.json +5 -3
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tollgate
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
> Real-time gross-margin observability for AI agents. Track every LLM call's cost, attribute it to a customer, and see whether you're making money — before the invoice goes out.
|
|
4
4
|
|
|
5
|
-
**v0.
|
|
5
|
+
**v0.5.0** · [npm](https://www.npmjs.com/package/@tollgateai/sdk) · [Dashboard](https://tollgateai.vercel.app)
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
## Why Tollgate
|
|
10
10
|
|
|
11
|
-
You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, cached tokens, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
|
|
11
|
+
You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, audio tokens, cached tokens, web searches, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
@@ -34,11 +34,11 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
|
34
34
|
runId: 'ticket_8842',
|
|
35
35
|
});
|
|
36
36
|
|
|
37
|
-
// Every call is tracked automatically — tokens, cost, tool calls.
|
|
37
|
+
// Every call is tracked automatically — tokens, cost, latency, tool calls.
|
|
38
38
|
const msg = await anthropic.messages.create({
|
|
39
39
|
model: 'claude-sonnet-4-6',
|
|
40
40
|
max_tokens: 1024,
|
|
41
|
-
messages: [{ role: 'user', content: 'Resolve this billing dispute
|
|
41
|
+
messages: [{ role: 'user', content: 'Resolve this billing dispute...' }],
|
|
42
42
|
});
|
|
43
43
|
|
|
44
44
|
// Close the run and book revenue.
|
|
@@ -52,12 +52,13 @@ await tollgate.resolve({
|
|
|
52
52
|
|
|
53
53
|
## Provider Support
|
|
54
54
|
|
|
55
|
-
| Provider | Wrapper | Streaming |
|
|
55
|
+
| Provider | Wrapper | Streaming | What Gets Extracted |
|
|
56
56
|
|---|---|---|---|
|
|
57
|
-
| Anthropic | `wrapAnthropic` | Automatic |
|
|
58
|
-
| OpenAI | `wrapOpenAI` |
|
|
59
|
-
|
|
|
60
|
-
|
|
|
57
|
+
| **Anthropic** | `wrapAnthropic` | Automatic | Tokens, cache (read + write by TTL), web search requests, tool calls, latency |
|
|
58
|
+
| **OpenAI** | `wrapOpenAI` | `stream_options: { include_usage: true }` | Tokens, reasoning, cached, audio in/out, text in/out, prediction tokens, service tier, tool calls, latency |
|
|
59
|
+
| **Google Gemini** | `wrapGemini` | Automatic | Tokens, thinking, cached, audio/image/video per-modality, web search (grounding), tool calls, latency |
|
|
60
|
+
| **OpenAI-compatible** | `wrapOpenAI` + `provider: 'openai_compatible'` | Same as OpenAI | Same as OpenAI |
|
|
61
|
+
| **AWS Bedrock** | `wrapBedrock` | Automatic | Tokens, cache (read + write), tool calls, latency |
|
|
61
62
|
|
|
62
63
|
## Configuration
|
|
63
64
|
|
|
@@ -81,7 +82,7 @@ const tollgate = createTollgateClient({
|
|
|
81
82
|
|
|
82
83
|
## Auto-Instrumentation
|
|
83
84
|
|
|
84
|
-
Wrap your provider client once. Every `create` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
|
|
85
|
+
Wrap your provider client once. Every `create` / `generateContent` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
|
|
85
86
|
|
|
86
87
|
### Anthropic
|
|
87
88
|
|
|
@@ -98,7 +99,7 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
|
98
99
|
await anthropic.messages.create({
|
|
99
100
|
model: 'claude-sonnet-4-6',
|
|
100
101
|
max_tokens: 512,
|
|
101
|
-
messages: [{ role: 'user', content: 'Summarize this ticket
|
|
102
|
+
messages: [{ role: 'user', content: 'Summarize this ticket...' }],
|
|
102
103
|
});
|
|
103
104
|
```
|
|
104
105
|
|
|
@@ -117,6 +118,23 @@ await openai.chat.completions.create({
|
|
|
117
118
|
});
|
|
118
119
|
```
|
|
119
120
|
|
|
121
|
+
### Google Gemini
|
|
122
|
+
|
|
123
|
+
```ts
|
|
124
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
125
|
+
import { createTollgateClient, wrapGemini } from '@tollgateai/sdk';
|
|
126
|
+
|
|
127
|
+
const tollgate = createTollgateClient();
|
|
128
|
+
const genai = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
|
|
129
|
+
const model = wrapGemini(
|
|
130
|
+
genai.getGenerativeModel({ model: 'gemini-2.0-flash' }),
|
|
131
|
+
tollgate,
|
|
132
|
+
{ customerId: 'cust_acme' },
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
const result = await model.generateContent('Explain quantum computing');
|
|
136
|
+
```
|
|
137
|
+
|
|
120
138
|
### OpenAI-Compatible Gateways
|
|
121
139
|
|
|
122
140
|
Point the OpenAI SDK at any compatible endpoint and set `provider: 'openai_compatible'`:
|
|
@@ -159,9 +177,9 @@ await bedrock.send(new ConverseCommand({
|
|
|
159
177
|
|
|
160
178
|
### Streaming
|
|
161
179
|
|
|
162
|
-
Streaming is captured automatically
|
|
180
|
+
Streaming is captured automatically. Iterate the stream as usual — usage and latency are reported when the stream ends.
|
|
163
181
|
|
|
164
|
-
**OpenAI / compatible** requires `stream_options: { include_usage: true }
|
|
182
|
+
**OpenAI / compatible** requires `stream_options: { include_usage: true }`. **Anthropic**, **Gemini**, and **Bedrock** need no extra flags.
|
|
165
183
|
|
|
166
184
|
```ts
|
|
167
185
|
const stream = await openai.chat.completions.create({
|
|
@@ -171,37 +189,45 @@ const stream = await openai.chat.completions.create({
|
|
|
171
189
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
172
190
|
});
|
|
173
191
|
for await (const chunk of stream) { /* render to UI */ }
|
|
174
|
-
// Usage reported automatically when stream ends.
|
|
192
|
+
// Usage + latency reported automatically when stream ends.
|
|
175
193
|
```
|
|
176
194
|
|
|
177
195
|
---
|
|
178
196
|
|
|
179
197
|
## What Gets Tracked
|
|
180
198
|
|
|
181
|
-
Every auto-instrumented call captures
|
|
199
|
+
Every auto-instrumented call captures these fields from the provider response:
|
|
182
200
|
|
|
183
|
-
| Field |
|
|
201
|
+
| Field | Providers | Description |
|
|
184
202
|
|---|---|---|
|
|
185
|
-
| `tokensIn` |
|
|
186
|
-
| `tokensOut` |
|
|
187
|
-
| `reasoningTokens` |
|
|
188
|
-
| `cachedTokens` |
|
|
189
|
-
| `cacheWrite5mTokens` |
|
|
190
|
-
| `cacheWrite1hTokens` |
|
|
191
|
-
| `
|
|
192
|
-
| `
|
|
193
|
-
| `
|
|
194
|
-
|
|
195
|
-
|
|
203
|
+
| `tokensIn` | All | Input tokens consumed |
|
|
204
|
+
| `tokensOut` | All | Output tokens generated |
|
|
205
|
+
| `reasoningTokens` | OpenAI, Gemini | Reasoning/thinking tokens (billed at output rate) |
|
|
206
|
+
| `cachedTokens` | All | Prompt cache read tokens (reduced rate) |
|
|
207
|
+
| `cacheWrite5mTokens` | Anthropic, Bedrock | 5-min TTL cache creation tokens |
|
|
208
|
+
| `cacheWrite1hTokens` | Anthropic | 1-hour TTL cache creation tokens |
|
|
209
|
+
| `audioTokensIn` | OpenAI | Audio input tokens (GPT-4o audio / Realtime) |
|
|
210
|
+
| `audioTokensOut` | OpenAI, Gemini | Audio output tokens |
|
|
211
|
+
| `imageTokensIn` | Gemini | Image/vision input tokens |
|
|
212
|
+
| `imageTokensOut` | Gemini | Image generation output tokens |
|
|
213
|
+
| `videoTokensIn` | Gemini | Video input tokens |
|
|
214
|
+
| `textTokensIn` | OpenAI, Gemini | Text-only input tokens (modality split) |
|
|
215
|
+
| `textTokensOut` | OpenAI, Gemini | Text-only output tokens |
|
|
216
|
+
| `webSearchRequests` | Anthropic, Gemini | Web search requests (server tools / grounding) |
|
|
217
|
+
| `acceptedPredictionTokens` | OpenAI | Predicted Outputs: accepted tokens |
|
|
218
|
+
| `rejectedPredictionTokens` | OpenAI | Predicted Outputs: rejected tokens (waste) |
|
|
219
|
+
| `serviceTier` | OpenAI | Service tier used (`default`, `flex`, `priority`) |
|
|
220
|
+
| `latencyMs` | All | SDK-measured request duration in milliseconds |
|
|
221
|
+
| `toolCalls` | All | Number of tool calls in the response |
|
|
222
|
+
| `model` | All | Model identifier as reported by the provider |
|
|
223
|
+
|
|
224
|
+
Cost is computed **server-side** from token counts and a rate card that auto-syncs daily from the LiteLLM registry (1,500+ models). Rate cards include per-token pricing for text, audio, image, video, cache, reasoning, and web search. Unknown models are priced at $0 and flagged in logs.
|
|
196
225
|
|
|
197
226
|
---
|
|
198
227
|
|
|
199
228
|
## Outcome-Based Pricing
|
|
200
229
|
|
|
201
|
-
Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts.
|
|
202
|
-
|
|
203
|
-
1. **Wrap** to meter cost on every LLM call (automatic).
|
|
204
|
-
2. **Resolve** once at the end to book the outcome.
|
|
230
|
+
Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts.
|
|
205
231
|
|
|
206
232
|
```ts
|
|
207
233
|
const runId = 'ticket_8842';
|
|
@@ -210,7 +236,7 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
|
210
236
|
runId,
|
|
211
237
|
});
|
|
212
238
|
|
|
213
|
-
//
|
|
239
|
+
// ... multiple LLM calls within this run ...
|
|
214
240
|
|
|
215
241
|
await tollgate.resolve({
|
|
216
242
|
runId,
|
|
@@ -224,54 +250,38 @@ For simple per-call billing, pass `revenueUnitCents` in the wrap options and ski
|
|
|
224
250
|
|
|
225
251
|
---
|
|
226
252
|
|
|
227
|
-
##
|
|
253
|
+
## External Tool Costs
|
|
228
254
|
|
|
229
|
-
|
|
255
|
+
Report costs from external services (image generation, code sandboxes, search APIs) alongside LLM calls:
|
|
230
256
|
|
|
231
257
|
```ts
|
|
232
|
-
await tollgate.
|
|
258
|
+
await tollgate.track({
|
|
233
259
|
customerId: 'cust_acme',
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
},
|
|
260
|
+
runId: 'ticket_8842',
|
|
261
|
+
provider: 'openai',
|
|
262
|
+
model: 'gpt-4o',
|
|
263
|
+
tokensIn: 500,
|
|
264
|
+
tokensOut: 200,
|
|
265
|
+
externalCostCents: 4.0, // $0.04 for the DALL-E call
|
|
266
|
+
idempotencyKey: 'ticket_8842#step_2',
|
|
242
267
|
});
|
|
243
268
|
```
|
|
244
269
|
|
|
245
270
|
---
|
|
246
271
|
|
|
247
|
-
##
|
|
248
|
-
|
|
249
|
-
For full control, unusual providers, or non-LLM cost events:
|
|
250
|
-
|
|
251
|
-
```ts
|
|
252
|
-
await tollgate.track({
|
|
253
|
-
customerId: 'cust_acme',
|
|
254
|
-
runId: 'run_12345',
|
|
255
|
-
provider: 'anthropic',
|
|
256
|
-
model: 'claude-sonnet-4-6',
|
|
257
|
-
tokensIn: 1200,
|
|
258
|
-
tokensOut: 450,
|
|
259
|
-
reasoningTokens: 0,
|
|
260
|
-
cachedTokens: 0,
|
|
261
|
-
toolCalls: 2,
|
|
262
|
-
revenueUnitCents: 50,
|
|
263
|
-
idempotencyKey: 'run_12345#step_1',
|
|
264
|
-
});
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
### Already have an exact cost?
|
|
272
|
+
## Customer & Plan Setup
|
|
268
273
|
|
|
269
|
-
|
|
274
|
+
Create customers and assign plans before sending usage so plan-priced revenue is recognized from the first event. Idempotent.
|
|
270
275
|
|
|
271
276
|
```ts
|
|
272
|
-
|
|
277
|
+
await tollgate.upsertCustomer({
|
|
273
278
|
customerId: 'cust_acme',
|
|
274
|
-
|
|
279
|
+
name: 'Acme Corp',
|
|
280
|
+
plan: {
|
|
281
|
+
name: 'Pro Plan',
|
|
282
|
+
pricingModel: 'usage_based', // per_unit | per_resolution | usage_based | per_seat | flat | hybrid
|
|
283
|
+
unitRevenueCents: 10,
|
|
284
|
+
},
|
|
275
285
|
});
|
|
276
286
|
```
|
|
277
287
|
|
|
@@ -283,18 +293,20 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
|
|
|
283
293
|
|
|
284
294
|
```ts
|
|
285
295
|
// Client
|
|
286
|
-
createTollgateClient(options?) //
|
|
296
|
+
createTollgateClient(options?) // -> TollgateClient
|
|
287
297
|
TollgateError // Custom error with status & body
|
|
288
298
|
|
|
289
299
|
// Auto-instrumentation wrappers
|
|
290
|
-
wrapAnthropic(client, tollgate, options)
|
|
291
|
-
wrapOpenAI(client, tollgate, options)
|
|
292
|
-
wrapBedrock(client, tollgate, options)
|
|
300
|
+
wrapAnthropic(client, tollgate, options) // -> instrumented Anthropic client
|
|
301
|
+
wrapOpenAI(client, tollgate, options) // -> instrumented OpenAI / compatible client
|
|
302
|
+
wrapBedrock(client, tollgate, options) // -> instrumented Bedrock Runtime client
|
|
303
|
+
wrapGemini(model, tollgate, options) // -> instrumented Gemini model
|
|
293
304
|
|
|
294
305
|
// Low-level event builders (for manual track payloads)
|
|
295
|
-
anthropicEventFrom(msg, options)
|
|
296
|
-
openAIEventFrom(completion, options)
|
|
297
|
-
bedrockEventFrom(usage, model, options)
|
|
306
|
+
anthropicEventFrom(msg, options) // -> TrackEventInput | null
|
|
307
|
+
openAIEventFrom(completion, options) // -> TrackEventInput | null
|
|
308
|
+
bedrockEventFrom(usage, model, options) // -> TrackEventInput | null
|
|
309
|
+
geminiEventFrom(response, options) // -> TrackEventInput | null
|
|
298
310
|
```
|
|
299
311
|
|
|
300
312
|
### TollgateClient
|
|
@@ -309,32 +321,46 @@ bedrockEventFrom(usage, model, options) // → TrackEventInput | null
|
|
|
309
321
|
|
|
310
322
|
| Field | Type | Required | Description |
|
|
311
323
|
|---|---|---|---|
|
|
312
|
-
| `customerId` | `string` | Yes | Your end customer's stable identifier
|
|
313
|
-
| `agentId` | `string` | No | Agent or workflow identifier
|
|
314
|
-
| `runId` | `string \| () => string` | No | Logical run ID
|
|
315
|
-
| `provider` | `Provider` | No | Override the reported provider
|
|
316
|
-
| `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents
|
|
317
|
-
| `providerCostCents` | `number \| (response) => number` | No | Exact cost override
|
|
318
|
-
| `onError` | `(err) => void` | No | Error handler for background tracking
|
|
324
|
+
| `customerId` | `string` | Yes | Your end customer's stable identifier |
|
|
325
|
+
| `agentId` | `string` | No | Agent or workflow identifier |
|
|
326
|
+
| `runId` | `string \| () => string` | No | Logical run ID (defaults to provider response ID) |
|
|
327
|
+
| `provider` | `Provider` | No | Override the reported provider |
|
|
328
|
+
| `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents |
|
|
329
|
+
| `providerCostCents` | `number \| (response) => number` | No | Exact cost override (skips rate card) |
|
|
330
|
+
| `onError` | `(err) => void` | No | Error handler for background tracking |
|
|
319
331
|
|
|
320
332
|
---
|
|
321
333
|
|
|
322
334
|
## How It Works
|
|
323
335
|
|
|
324
|
-
1. **Proxy wrappers** intercept
|
|
325
|
-
2. After the provider responds, the wrapper extracts token counts, tool
|
|
326
|
-
3. A `POST /api/track`
|
|
327
|
-
4. The server computes cost from tokens via rate cards, joins it with
|
|
328
|
-
5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID)
|
|
336
|
+
1. **Proxy wrappers** intercept provider calls without modifying the request or response.
|
|
337
|
+
2. After the provider responds, the wrapper extracts token counts (by modality), tool calls, service tier, and latency from the response.
|
|
338
|
+
3. A `POST /api/track` fires **in the background** with automatic retries on transient failures.
|
|
339
|
+
4. The server computes cost from tokens via rate cards (text, audio, image, video, cache, reasoning, web search), joins it with plan-configured revenue, and updates real-time margin rollups.
|
|
340
|
+
5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID).
|
|
329
341
|
|
|
330
342
|
## Privacy & Security
|
|
331
343
|
|
|
332
344
|
- **No prompt content is ever sent.** Only token counts, model name, and metadata.
|
|
333
|
-
- Events are deduplicated server-side
|
|
345
|
+
- Events are deduplicated server-side.
|
|
334
346
|
- Background tracking never throws into your application code.
|
|
335
347
|
|
|
336
348
|
---
|
|
337
349
|
|
|
350
|
+
## What's New in v0.5.0
|
|
351
|
+
|
|
352
|
+
- **Google Gemini / Vertex AI** support (`wrapGemini`) with full multimodal extraction
|
|
353
|
+
- **Audio token tracking** (OpenAI GPT-4o audio / Realtime API)
|
|
354
|
+
- **Image & video token tracking** (Gemini per-modality breakdowns)
|
|
355
|
+
- **Web search request tracking** (Anthropic `server_tool_use`, Gemini grounding)
|
|
356
|
+
- **Latency measurement** on all wrappers (SDK-measured `latencyMs`)
|
|
357
|
+
- **OpenAI Predicted Outputs** (`acceptedPredictionTokens` / `rejectedPredictionTokens`)
|
|
358
|
+
- **Service tier tracking** (OpenAI `flex` / `priority`, Anthropic `priority`)
|
|
359
|
+
- **Text modality split** for accurate cost attribution in mixed-modal requests
|
|
360
|
+
- Expanded rate card sync: audio, image, video, and web search rates from LiteLLM
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
338
364
|
## License
|
|
339
365
|
|
|
340
366
|
Licensed for use with Tollgate.
|
package/dist/index.cjs
CHANGED
|
@@ -173,6 +173,7 @@ function anthropicEventFrom(msg, opts) {
|
|
|
173
173
|
cachedTokens: usage.cache_read_input_tokens ?? 0,
|
|
174
174
|
cacheWrite5mTokens: hasSplit ? fivem ?? 0 : usage.cache_creation_input_tokens ?? 0,
|
|
175
175
|
cacheWrite1hTokens: hasSplit ? oneh ?? 0 : 0,
|
|
176
|
+
webSearchRequests: usage.server_tool_use?.web_search_requests ?? 0,
|
|
176
177
|
toolCalls,
|
|
177
178
|
revenueUnitCents: resolveRevenue(opts, msg),
|
|
178
179
|
idempotencyKey: msg.id ?? `${runId}#${randomId()}`
|
|
@@ -183,6 +184,7 @@ function wrapAnthropic(client, tollgate, opts) {
|
|
|
183
184
|
const messages = client.messages;
|
|
184
185
|
const original = messages.create.bind(messages);
|
|
185
186
|
const create = async (...args) => {
|
|
187
|
+
const t0 = Date.now();
|
|
186
188
|
const result = await original(...args);
|
|
187
189
|
if (isAsyncIterable(result)) {
|
|
188
190
|
const msg = {};
|
|
@@ -203,12 +205,18 @@ function wrapAnthropic(client, tollgate, opts) {
|
|
|
203
205
|
() => {
|
|
204
206
|
msg.content = toolUseBlocks;
|
|
205
207
|
const event2 = anthropicEventFrom(msg, opts);
|
|
206
|
-
if (event2)
|
|
208
|
+
if (event2) {
|
|
209
|
+
event2.latencyMs = Date.now() - t0;
|
|
210
|
+
fireAndForget(tollgate.track(event2), opts.onError);
|
|
211
|
+
}
|
|
207
212
|
}
|
|
208
213
|
);
|
|
209
214
|
}
|
|
210
215
|
const event = anthropicEventFrom(result, opts);
|
|
211
|
-
if (event)
|
|
216
|
+
if (event) {
|
|
217
|
+
event.latencyMs = Date.now() - t0;
|
|
218
|
+
fireAndForget(tollgate.track(event), opts.onError);
|
|
219
|
+
}
|
|
212
220
|
return result;
|
|
213
221
|
};
|
|
214
222
|
return new Proxy(client, {
|
|
@@ -227,6 +235,8 @@ function openAIEventFrom(completion, opts) {
|
|
|
227
235
|
if (!usage) return null;
|
|
228
236
|
const runId = resolveRunId(opts, completion.id);
|
|
229
237
|
const toolCalls = completion.choices?.[0]?.message?.tool_calls?.length ?? 0;
|
|
238
|
+
const ptd = usage.prompt_tokens_details;
|
|
239
|
+
const ctd = usage.completion_tokens_details;
|
|
230
240
|
const event = {
|
|
231
241
|
customerId: opts.customerId,
|
|
232
242
|
agentId: opts.agentId,
|
|
@@ -235,8 +245,15 @@ function openAIEventFrom(completion, opts) {
|
|
|
235
245
|
model: completion.model ?? "unknown",
|
|
236
246
|
tokensIn: usage.prompt_tokens ?? 0,
|
|
237
247
|
tokensOut: usage.completion_tokens ?? 0,
|
|
238
|
-
reasoningTokens:
|
|
239
|
-
cachedTokens:
|
|
248
|
+
reasoningTokens: ctd?.reasoning_tokens ?? 0,
|
|
249
|
+
cachedTokens: ptd?.cached_tokens ?? 0,
|
|
250
|
+
audioTokensIn: ptd?.audio_tokens ?? 0,
|
|
251
|
+
audioTokensOut: ctd?.audio_tokens ?? 0,
|
|
252
|
+
textTokensIn: ptd?.text_tokens ?? 0,
|
|
253
|
+
textTokensOut: ctd?.text_tokens ?? 0,
|
|
254
|
+
acceptedPredictionTokens: ctd?.accepted_prediction_tokens ?? 0,
|
|
255
|
+
rejectedPredictionTokens: ctd?.rejected_prediction_tokens ?? 0,
|
|
256
|
+
serviceTier: completion.service_tier,
|
|
240
257
|
toolCalls,
|
|
241
258
|
revenueUnitCents: resolveRevenue(opts, completion),
|
|
242
259
|
idempotencyKey: completion.id ?? `${runId}#${randomId()}`
|
|
@@ -247,11 +264,13 @@ function wrapOpenAI(client, tollgate, opts) {
|
|
|
247
264
|
const completions = client.chat.completions;
|
|
248
265
|
const original = completions.create.bind(completions);
|
|
249
266
|
const create = async (...args) => {
|
|
267
|
+
const t0 = Date.now();
|
|
250
268
|
const result = await original(...args);
|
|
251
269
|
if (isAsyncIterable(result)) {
|
|
252
270
|
let id;
|
|
253
271
|
let model;
|
|
254
272
|
let usage;
|
|
273
|
+
let serviceTier;
|
|
255
274
|
const toolCallIndices = /* @__PURE__ */ new Set();
|
|
256
275
|
return instrumentStream(
|
|
257
276
|
result,
|
|
@@ -259,6 +278,7 @@ function wrapOpenAI(client, tollgate, opts) {
|
|
|
259
278
|
if (chunk.id) id = chunk.id;
|
|
260
279
|
if (chunk.model) model = chunk.model;
|
|
261
280
|
if (chunk.usage) usage = chunk.usage;
|
|
281
|
+
if (chunk.service_tier) serviceTier = chunk.service_tier;
|
|
262
282
|
for (const c of chunk.choices ?? []) {
|
|
263
283
|
for (const tc of c.delta?.tool_calls ?? []) {
|
|
264
284
|
if (tc.index !== void 0) toolCallIndices.add(tc.index);
|
|
@@ -267,17 +287,23 @@ function wrapOpenAI(client, tollgate, opts) {
|
|
|
267
287
|
},
|
|
268
288
|
() => {
|
|
269
289
|
if (!usage) return;
|
|
270
|
-
const synth = { id, model, usage };
|
|
290
|
+
const synth = { id, model, usage, service_tier: serviceTier };
|
|
271
291
|
if (toolCallIndices.size > 0) {
|
|
272
292
|
synth.choices = [{ message: { tool_calls: new Array(toolCallIndices.size) } }];
|
|
273
293
|
}
|
|
274
294
|
const event2 = openAIEventFrom(synth, opts);
|
|
275
|
-
if (event2)
|
|
295
|
+
if (event2) {
|
|
296
|
+
event2.latencyMs = Date.now() - t0;
|
|
297
|
+
fireAndForget(tollgate.track(event2), opts.onError);
|
|
298
|
+
}
|
|
276
299
|
}
|
|
277
300
|
);
|
|
278
301
|
}
|
|
279
302
|
const event = openAIEventFrom(result, opts);
|
|
280
|
-
if (event)
|
|
303
|
+
if (event) {
|
|
304
|
+
event.latencyMs = Date.now() - t0;
|
|
305
|
+
fireAndForget(tollgate.track(event), opts.onError);
|
|
306
|
+
}
|
|
281
307
|
return result;
|
|
282
308
|
};
|
|
283
309
|
return new Proxy(client, {
|
|
@@ -316,6 +342,7 @@ function bedrockEventFrom(usage, model, opts, response = void 0, toolCalls = 0)
|
|
|
316
342
|
function wrapBedrock(client, tollgate, opts) {
|
|
317
343
|
const originalSend = client.send.bind(client);
|
|
318
344
|
const send = async (command, ...rest) => {
|
|
345
|
+
const t0 = Date.now();
|
|
319
346
|
const result = await originalSend(command, ...rest);
|
|
320
347
|
const model = command?.input?.modelId ?? "unknown";
|
|
321
348
|
if (result?.stream && isAsyncIterable(result.stream)) {
|
|
@@ -329,7 +356,10 @@ function wrapBedrock(client, tollgate, opts) {
|
|
|
329
356
|
},
|
|
330
357
|
() => {
|
|
331
358
|
const event = bedrockEventFrom(usage, model, opts, result, streamToolCalls);
|
|
332
|
-
if (event)
|
|
359
|
+
if (event) {
|
|
360
|
+
event.latencyMs = Date.now() - t0;
|
|
361
|
+
fireAndForget(tollgate.track(event), opts.onError);
|
|
362
|
+
}
|
|
333
363
|
}
|
|
334
364
|
);
|
|
335
365
|
return result;
|
|
@@ -337,7 +367,10 @@ function wrapBedrock(client, tollgate, opts) {
|
|
|
337
367
|
if (result?.usage) {
|
|
338
368
|
const tc = result.output?.message?.content?.filter((b) => b.toolUse != null).length ?? 0;
|
|
339
369
|
const event = bedrockEventFrom(result.usage, model, opts, result, tc);
|
|
340
|
-
if (event)
|
|
370
|
+
if (event) {
|
|
371
|
+
event.latencyMs = Date.now() - t0;
|
|
372
|
+
fireAndForget(tollgate.track(event), opts.onError);
|
|
373
|
+
}
|
|
341
374
|
}
|
|
342
375
|
return result;
|
|
343
376
|
};
|
|
@@ -348,14 +381,110 @@ function wrapBedrock(client, tollgate, opts) {
|
|
|
348
381
|
}
|
|
349
382
|
});
|
|
350
383
|
}
|
|
384
|
+
function modalityTokens(details, modality) {
|
|
385
|
+
if (!details) return 0;
|
|
386
|
+
return details.filter((d) => d.modality === modality).reduce((sum, d) => sum + (d.tokenCount ?? 0), 0);
|
|
387
|
+
}
|
|
388
|
+
function geminiEventFrom(response, opts) {
|
|
389
|
+
const usage = response?.usageMetadata;
|
|
390
|
+
if (!usage) return null;
|
|
391
|
+
const runId = resolveRunId(opts, void 0);
|
|
392
|
+
const candidates = response.candidates ?? [];
|
|
393
|
+
const toolCalls = candidates.reduce((sum, c) => {
|
|
394
|
+
const parts = c.content?.parts ?? [];
|
|
395
|
+
return sum + parts.filter((p) => p.functionCall != null).length;
|
|
396
|
+
}, 0);
|
|
397
|
+
const webSearchRequests = candidates.reduce((sum, c) => {
|
|
398
|
+
return sum + (c.groundingMetadata?.webSearchQueries?.length ?? 0);
|
|
399
|
+
}, 0);
|
|
400
|
+
const promptDetails = usage.promptTokensDetails;
|
|
401
|
+
const candidateDetails = usage.candidatesTokensDetails;
|
|
402
|
+
const event = {
|
|
403
|
+
customerId: opts.customerId,
|
|
404
|
+
agentId: opts.agentId,
|
|
405
|
+
runId,
|
|
406
|
+
provider: opts.provider ?? "google",
|
|
407
|
+
model: "unknown",
|
|
408
|
+
tokensIn: usage.promptTokenCount ?? 0,
|
|
409
|
+
tokensOut: usage.candidatesTokenCount ?? 0,
|
|
410
|
+
reasoningTokens: usage.thoughtsTokenCount ?? 0,
|
|
411
|
+
cachedTokens: usage.cachedContentTokenCount ?? 0,
|
|
412
|
+
audioTokensIn: modalityTokens(promptDetails, "AUDIO"),
|
|
413
|
+
audioTokensOut: modalityTokens(candidateDetails, "AUDIO"),
|
|
414
|
+
imageTokensIn: modalityTokens(promptDetails, "IMAGE"),
|
|
415
|
+
imageTokensOut: modalityTokens(candidateDetails, "IMAGE"),
|
|
416
|
+
videoTokensIn: modalityTokens(promptDetails, "VIDEO"),
|
|
417
|
+
textTokensIn: modalityTokens(promptDetails, "TEXT"),
|
|
418
|
+
textTokensOut: modalityTokens(candidateDetails, "TEXT"),
|
|
419
|
+
webSearchRequests,
|
|
420
|
+
toolCalls,
|
|
421
|
+
revenueUnitCents: resolveRevenue(opts, response),
|
|
422
|
+
idempotencyKey: `${runId}#${randomId()}`
|
|
423
|
+
};
|
|
424
|
+
return withCost(event, opts, response);
|
|
425
|
+
}
|
|
426
|
+
function wrapGemini(model, tollgate, opts) {
|
|
427
|
+
const original = model.generateContent.bind(model);
|
|
428
|
+
const modelName = model.model ?? "unknown";
|
|
429
|
+
const generateContent = async (...args) => {
|
|
430
|
+
const t0 = Date.now();
|
|
431
|
+
const result = await original(...args);
|
|
432
|
+
if (isAsyncIterable(result)) {
|
|
433
|
+
const accumulated = {};
|
|
434
|
+
let toolCallCount = 0;
|
|
435
|
+
let searchCount = 0;
|
|
436
|
+
return instrumentStream(
|
|
437
|
+
result,
|
|
438
|
+
(chunk) => {
|
|
439
|
+
if (chunk.usageMetadata) {
|
|
440
|
+
Object.assign(accumulated, chunk.usageMetadata);
|
|
441
|
+
}
|
|
442
|
+
for (const c of chunk.candidates ?? []) {
|
|
443
|
+
for (const p of c.content?.parts ?? []) {
|
|
444
|
+
if (p.functionCall != null) toolCallCount++;
|
|
445
|
+
}
|
|
446
|
+
searchCount += c.groundingMetadata?.webSearchQueries?.length ?? 0;
|
|
447
|
+
}
|
|
448
|
+
},
|
|
449
|
+
() => {
|
|
450
|
+
const synth = {
|
|
451
|
+
usageMetadata: accumulated,
|
|
452
|
+
candidates: searchCount > 0 || toolCallCount > 0 ? [{ content: { parts: new Array(toolCallCount).fill({ functionCall: {} }) }, groundingMetadata: { webSearchQueries: new Array(searchCount) } }] : []
|
|
453
|
+
};
|
|
454
|
+
const event2 = geminiEventFrom(synth, opts);
|
|
455
|
+
if (event2) {
|
|
456
|
+
event2.model = modelName;
|
|
457
|
+
event2.latencyMs = Date.now() - t0;
|
|
458
|
+
fireAndForget(tollgate.track(event2), opts.onError);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
);
|
|
462
|
+
}
|
|
463
|
+
const event = geminiEventFrom(result, opts);
|
|
464
|
+
if (event) {
|
|
465
|
+
event.model = modelName;
|
|
466
|
+
event.latencyMs = Date.now() - t0;
|
|
467
|
+
fireAndForget(tollgate.track(event), opts.onError);
|
|
468
|
+
}
|
|
469
|
+
return result;
|
|
470
|
+
};
|
|
471
|
+
return new Proxy(model, {
|
|
472
|
+
get(target, prop, recv) {
|
|
473
|
+
if (prop === "generateContent") return generateContent;
|
|
474
|
+
return Reflect.get(target, prop, recv);
|
|
475
|
+
}
|
|
476
|
+
});
|
|
477
|
+
}
|
|
351
478
|
|
|
352
479
|
exports.TollgateError = TollgateError;
|
|
353
480
|
exports.anthropicEventFrom = anthropicEventFrom;
|
|
354
481
|
exports.bedrockEventFrom = bedrockEventFrom;
|
|
355
482
|
exports.createTollgateClient = createTollgateClient;
|
|
483
|
+
exports.geminiEventFrom = geminiEventFrom;
|
|
356
484
|
exports.openAIEventFrom = openAIEventFrom;
|
|
357
485
|
exports.wrapAnthropic = wrapAnthropic;
|
|
358
486
|
exports.wrapBedrock = wrapBedrock;
|
|
487
|
+
exports.wrapGemini = wrapGemini;
|
|
359
488
|
exports.wrapOpenAI = wrapOpenAI;
|
|
360
489
|
//# sourceMappingURL=index.cjs.map
|
|
361
490
|
//# sourceMappingURL=index.cjs.map
|