@tollgateai/sdk 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tollgate
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -2,13 +2,13 @@
2
2
 
3
3
  > Real-time gross-margin observability for AI agents. Track every LLM call's cost, attribute it to a customer, and see whether you're making money — before the invoice goes out.
4
4
 
5
- **v0.4.0** · [npm](https://www.npmjs.com/package/@tollgateai/sdk) · [Dashboard](https://tollgateai.vercel.app)
5
+ **v0.5.0** · [npm](https://www.npmjs.com/package/@tollgateai/sdk) · [Dashboard](https://tollgateai.vercel.app)
6
6
 
7
7
  ---
8
8
 
9
9
  ## Why Tollgate
10
10
 
11
- You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, cached tokens, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
11
+ You sell an AI-powered product. Each customer interaction triggers LLM calls that cost you real money — input tokens, output tokens, reasoning tokens, audio tokens, cached tokens, web searches, tool calls. Tollgate captures that cost automatically from provider responses, joins it with the revenue your pricing model defines, and shows you per-customer, per-agent, per-run gross margin in real time.
12
12
 
13
13
  ## Installation
14
14
 
@@ -34,11 +34,11 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
34
34
  runId: 'ticket_8842',
35
35
  });
36
36
 
37
- // Every call is tracked automatically — tokens, cost, tool calls.
37
+ // Every call is tracked automatically — tokens, cost, latency, tool calls.
38
38
  const msg = await anthropic.messages.create({
39
39
  model: 'claude-sonnet-4-6',
40
40
  max_tokens: 1024,
41
- messages: [{ role: 'user', content: 'Resolve this billing dispute' }],
41
+ messages: [{ role: 'user', content: 'Resolve this billing dispute...' }],
42
42
  });
43
43
 
44
44
  // Close the run and book revenue.
@@ -52,12 +52,13 @@ await tollgate.resolve({
52
52
 
53
53
  ## Provider Support
54
54
 
55
- | Provider | Wrapper | Streaming | Tool-Call Tracking |
55
+ | Provider | Wrapper | Streaming | What Gets Extracted |
56
56
  |---|---|---|---|
57
- | Anthropic | `wrapAnthropic` | Automatic | Counts `tool_use` content blocks |
58
- | OpenAI | `wrapOpenAI` | Needs `stream_options: { include_usage: true }` | Counts `tool_calls` on choices |
59
- | OpenAI-compatible (Groq, OpenRouter, Together, Nebius, vLLM, …) | `wrapOpenAI` with `provider: 'openai_compatible'` | Same as OpenAI | Same as OpenAI |
60
- | AWS Bedrock | `wrapBedrock` | Automatic | Counts `toolUse` content blocks |
57
+ | **Anthropic** | `wrapAnthropic` | Automatic | Tokens, cache (read + write by TTL), web search requests, tool calls, latency |
58
+ | **OpenAI** | `wrapOpenAI` | `stream_options: { include_usage: true }` | Tokens, reasoning, cached, audio in/out, text in/out, prediction tokens, service tier, tool calls, latency |
59
+ | **Google Gemini** | `wrapGemini` | Automatic | Tokens, thinking, cached, audio/image/video per-modality, web search (grounding), tool calls, latency |
60
+ | **OpenAI-compatible** | `wrapOpenAI` + `provider: 'openai_compatible'` | Same as OpenAI | Same as OpenAI |
61
+ | **AWS Bedrock** | `wrapBedrock` | Automatic | Tokens, cache (read + write), tool calls, latency |
61
62
 
62
63
  ## Configuration
63
64
 
@@ -81,7 +82,7 @@ const tollgate = createTollgateClient({
81
82
 
82
83
  ## Auto-Instrumentation
83
84
 
84
- Wrap your provider client once. Every `create` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
85
+ Wrap your provider client once. Every `create` / `generateContent` call reports usage in the background — non-blocking, fire-and-forget. Failures go to `onError` (default: `console.warn`) and never break your LLM call.
85
86
 
86
87
  ### Anthropic
87
88
 
@@ -98,7 +99,7 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
98
99
  await anthropic.messages.create({
99
100
  model: 'claude-sonnet-4-6',
100
101
  max_tokens: 512,
101
- messages: [{ role: 'user', content: 'Summarize this ticket' }],
102
+ messages: [{ role: 'user', content: 'Summarize this ticket...' }],
102
103
  });
103
104
  ```
104
105
 
@@ -117,6 +118,23 @@ await openai.chat.completions.create({
117
118
  });
118
119
  ```
119
120
 
121
+ ### Google Gemini
122
+
123
+ ```ts
124
+ import { GoogleGenerativeAI } from '@google/generative-ai';
125
+ import { createTollgateClient, wrapGemini } from '@tollgateai/sdk';
126
+
127
+ const tollgate = createTollgateClient();
128
+ const genai = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
129
+ const model = wrapGemini(
130
+ genai.getGenerativeModel({ model: 'gemini-2.0-flash' }),
131
+ tollgate,
132
+ { customerId: 'cust_acme' },
133
+ );
134
+
135
+ const result = await model.generateContent('Explain quantum computing');
136
+ ```
137
+
120
138
  ### OpenAI-Compatible Gateways
121
139
 
122
140
  Point the OpenAI SDK at any compatible endpoint and set `provider: 'openai_compatible'`:
@@ -159,9 +177,9 @@ await bedrock.send(new ConverseCommand({
159
177
 
160
178
  ### Streaming
161
179
 
162
- Streaming is captured automatically iterate the stream as usual and usage is reported when the stream ends.
180
+ Streaming is captured automatically. Iterate the stream as usual usage and latency are reported when the stream ends.
163
181
 
164
- **OpenAI / compatible** requires `stream_options: { include_usage: true }` for the final usage chunk. **Anthropic** and **Bedrock** need no extra flags.
182
+ **OpenAI / compatible** requires `stream_options: { include_usage: true }`. **Anthropic**, **Gemini**, and **Bedrock** need no extra flags.
165
183
 
166
184
  ```ts
167
185
  const stream = await openai.chat.completions.create({
@@ -171,38 +189,45 @@ const stream = await openai.chat.completions.create({
171
189
  messages: [{ role: 'user', content: 'Hello' }],
172
190
  });
173
191
  for await (const chunk of stream) { /* render to UI */ }
174
- // Usage reported automatically when stream ends.
192
+ // Usage + latency reported automatically when stream ends.
175
193
  ```
176
194
 
177
195
  ---
178
196
 
179
197
  ## What Gets Tracked
180
198
 
181
- Every auto-instrumented call captures the following from the provider response:
199
+ Every auto-instrumented call captures these fields from the provider response:
182
200
 
183
- | Field | Source | Description |
201
+ | Field | Providers | Description |
184
202
  |---|---|---|
185
- | `tokensIn` | `usage.input_tokens` / `prompt_tokens` | Input tokens consumed |
186
- | `tokensOut` | `usage.output_tokens` / `completion_tokens` | Output tokens generated |
187
- | `reasoningTokens` | `completion_tokens_details.reasoning_tokens` | Reasoning/chain-of-thought tokens (OpenAI) |
188
- | `cachedTokens` | `cache_read_input_tokens` / `cached_tokens` | Prompt cache read tokens |
189
- | `cacheWrite5mTokens` | `cache_creation_input_tokens` | 5-min TTL cache write tokens |
190
- | `cacheWrite1hTokens` | `cache_creation.ephemeral_1h_input_tokens` | 1-hour TTL cache write tokens |
191
- | `toolCalls` | Content block / choice inspection | Number of tool calls in the response |
192
- | `externalCostCents` | User-provided | Cost of external tools/services (image gen, sandbox, search) |
193
- | `provider` | Wrapper default or override | `anthropic`, `openai`, `openai_compatible`, `bedrock` |
194
- | `model` | Response object | Model identifier as reported by the provider |
195
-
196
- Cost is computed **server-side** from token counts and a rate card that auto-syncs daily from the public LiteLLM registry. Unknown models are priced at $0 and flagged in logs.
203
+ | `tokensIn` | All | Input tokens consumed |
204
+ | `tokensOut` | All | Output tokens generated |
205
+ | `reasoningTokens` | OpenAI, Gemini | Reasoning/thinking tokens (billed at output rate) |
206
+ | `cachedTokens` | All | Prompt cache read tokens (reduced rate) |
207
+ | `cacheWrite5mTokens` | Anthropic, Bedrock | 5-min TTL cache creation tokens |
208
+ | `cacheWrite1hTokens` | Anthropic | 1-hour TTL cache creation tokens |
209
+ | `audioTokensIn` | OpenAI | Audio input tokens (GPT-4o audio / Realtime) |
210
+ | `audioTokensOut` | OpenAI, Gemini | Audio output tokens |
211
+ | `imageTokensIn` | Gemini | Image/vision input tokens |
212
+ | `imageTokensOut` | Gemini | Image generation output tokens |
213
+ | `videoTokensIn` | Gemini | Video input tokens |
214
+ | `textTokensIn` | OpenAI, Gemini | Text-only input tokens (modality split) |
215
+ | `textTokensOut` | OpenAI, Gemini | Text-only output tokens |
216
+ | `webSearchRequests` | Anthropic, Gemini | Web search requests (server tools / grounding) |
217
+ | `acceptedPredictionTokens` | OpenAI | Predicted Outputs: accepted tokens |
218
+ | `rejectedPredictionTokens` | OpenAI | Predicted Outputs: rejected tokens (waste) |
219
+ | `serviceTier` | OpenAI | Service tier used (`default`, `flex`, `priority`) |
220
+ | `latencyMs` | All | SDK-measured request duration in milliseconds |
221
+ | `toolCalls` | All | Number of tool calls in the response |
222
+ | `model` | All | Model identifier as reported by the provider |
223
+
224
+ Cost is computed **server-side** from token counts and a rate card that auto-syncs daily from the LiteLLM registry (1,500+ models). Rate cards include per-token pricing for text, audio, image, video, cache, reasoning, and web search. Unknown models are priced at $0 and flagged in logs.
197
225
 
198
226
  ---
199
227
 
200
228
  ## Outcome-Based Pricing
201
229
 
202
- Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts. The pattern:
203
-
204
- 1. **Wrap** to meter cost on every LLM call (automatic).
205
- 2. **Resolve** once at the end to book the outcome.
230
+ Under per-resolution pricing, only a **resolved** run earns revenue. An escalated or failed run earns $0 but its provider cost still counts.
206
231
 
207
232
  ```ts
208
233
  const runId = 'ticket_8842';
@@ -211,7 +236,7 @@ const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
211
236
  runId,
212
237
  });
213
238
 
214
- // multiple LLM calls within this run
239
+ // ... multiple LLM calls within this run ...
215
240
 
216
241
  await tollgate.resolve({
217
242
  runId,
@@ -227,12 +252,9 @@ For simple per-call billing, pass `revenueUnitCents` in the wrap options and ski
227
252
 
228
253
  ## External Tool Costs
229
254
 
230
- AI agents often call external services image generation, code sandboxes, search APIs, vector databases — that cost real money outside of LLM token pricing. Report these costs alongside the LLM call so Tollgate includes them in the margin calculation.
255
+ Report costs from external services (image generation, code sandboxes, search APIs) alongside LLM calls:
231
256
 
232
257
  ```ts
233
- // Agent generates an image during the run
234
- const image = await dalle.generate({ prompt: '...' }); // costs $0.04
235
-
236
258
  await tollgate.track({
237
259
  customerId: 'cust_acme',
238
260
  runId: 'ticket_8842',
@@ -240,35 +262,21 @@ await tollgate.track({
240
262
  model: 'gpt-4o',
241
263
  tokensIn: 500,
242
264
  tokensOut: 200,
243
- toolCalls: 1, // LLM tool_use count (auto-extracted by wrappers)
244
265
  externalCostCents: 4.0, // $0.04 for the DALL-E call
245
266
  idempotencyKey: 'ticket_8842#step_2',
246
267
  });
247
268
  ```
248
269
 
249
- Common examples:
250
-
251
- | Service | Typical Cost | How to report |
252
- |---|---|---|
253
- | DALL-E image generation | ~$0.04/image | `externalCostCents: 4` |
254
- | E2B code sandbox | ~$0.01/run | `externalCostCents: 1` |
255
- | Tavily search API | ~$0.01/search | `externalCostCents: 1` |
256
- | Pinecone vector query | ~$0.001/query | `externalCostCents: 0.1` |
257
-
258
- External costs flow into the **Tools** bucket in the Customer Drawer's cost-split chart — so you can see exactly what share of each customer's cost comes from external services vs. LLM tokens.
259
-
260
270
  ---
261
271
 
262
272
  ## Customer & Plan Setup
263
273
 
264
- Create customers and assign plans **before** sending usage so plan-priced revenue is recognized from the first event. Idempotent — safe to run on every boot.
274
+ Create customers and assign plans before sending usage so plan-priced revenue is recognized from the first event. Idempotent.
265
275
 
266
276
  ```ts
267
277
  await tollgate.upsertCustomer({
268
278
  customerId: 'cust_acme',
269
279
  name: 'Acme Corp',
270
- company: 'Acme Corp',
271
- seats: 5,
272
280
  plan: {
273
281
  name: 'Pro Plan',
274
282
  pricingModel: 'usage_based', // per_unit | per_resolution | usage_based | per_seat | flat | hybrid
@@ -279,57 +287,26 @@ await tollgate.upsertCustomer({
279
287
 
280
288
  ---
281
289
 
282
- ## Manual Tracking
283
-
284
- For full control, unusual providers, or non-LLM cost events:
285
-
286
- ```ts
287
- await tollgate.track({
288
- customerId: 'cust_acme',
289
- runId: 'run_12345',
290
- provider: 'anthropic',
291
- model: 'claude-sonnet-4-6',
292
- tokensIn: 1200,
293
- tokensOut: 450,
294
- reasoningTokens: 0,
295
- cachedTokens: 0,
296
- toolCalls: 2,
297
- revenueUnitCents: 50,
298
- idempotencyKey: 'run_12345#step_1',
299
- });
300
- ```
301
-
302
- ### Already have an exact cost?
303
-
304
- Pass `providerCostCents` (a number or a function of the response) and the server uses it verbatim, skipping the rate card entirely:
305
-
306
- ```ts
307
- const anthropic = wrapAnthropic(new Anthropic(), tollgate, {
308
- customerId: 'cust_acme',
309
- providerCostCents: 3.5, // or: (response) => computeMyOwnCost(response)
310
- });
311
- ```
312
-
313
- ---
314
-
315
290
  ## API Reference
316
291
 
317
292
  ### Exports
318
293
 
319
294
  ```ts
320
295
  // Client
321
- createTollgateClient(options?) // TollgateClient
296
+ createTollgateClient(options?) // -> TollgateClient
322
297
  TollgateError // Custom error with status & body
323
298
 
324
299
  // Auto-instrumentation wrappers
325
- wrapAnthropic(client, tollgate, options) // instrumented Anthropic client
326
- wrapOpenAI(client, tollgate, options) // instrumented OpenAI / compatible client
327
- wrapBedrock(client, tollgate, options) // instrumented Bedrock Runtime client
300
+ wrapAnthropic(client, tollgate, options) // -> instrumented Anthropic client
301
+ wrapOpenAI(client, tollgate, options) // -> instrumented OpenAI / compatible client
302
+ wrapBedrock(client, tollgate, options) // -> instrumented Bedrock Runtime client
303
+ wrapGemini(model, tollgate, options) // -> instrumented Gemini model
328
304
 
329
305
  // Low-level event builders (for manual track payloads)
330
- anthropicEventFrom(msg, options) // TrackEventInput | null
331
- openAIEventFrom(completion, options) // TrackEventInput | null
332
- bedrockEventFrom(usage, model, options) // TrackEventInput | null
306
+ anthropicEventFrom(msg, options) // -> TrackEventInput | null
307
+ openAIEventFrom(completion, options) // -> TrackEventInput | null
308
+ bedrockEventFrom(usage, model, options) // -> TrackEventInput | null
309
+ geminiEventFrom(response, options) // -> TrackEventInput | null
333
310
  ```
334
311
 
335
312
  ### TollgateClient
@@ -344,32 +321,46 @@ bedrockEventFrom(usage, model, options) // → TrackEventInput | null
344
321
 
345
322
  | Field | Type | Required | Description |
346
323
  |---|---|---|---|
347
- | `customerId` | `string` | Yes | Your end customer's stable identifier. |
348
- | `agentId` | `string` | No | Agent or workflow identifier. |
349
- | `runId` | `string \| () => string` | No | Logical run ID. Defaults to the provider response ID. |
350
- | `provider` | `Provider` | No | Override the reported provider (e.g. `'openai_compatible'`). |
351
- | `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents. |
352
- | `providerCostCents` | `number \| (response) => number` | No | Exact cost override skips rate card. |
353
- | `onError` | `(err) => void` | No | Error handler for background tracking (default: `console.warn`). |
324
+ | `customerId` | `string` | Yes | Your end customer's stable identifier |
325
+ | `agentId` | `string` | No | Agent or workflow identifier |
326
+ | `runId` | `string \| () => string` | No | Logical run ID (defaults to provider response ID) |
327
+ | `provider` | `Provider` | No | Override the reported provider |
328
+ | `revenueUnitCents` | `number \| (response) => number` | No | Revenue per call in cents |
329
+ | `providerCostCents` | `number \| (response) => number` | No | Exact cost override (skips rate card) |
330
+ | `onError` | `(err) => void` | No | Error handler for background tracking |
354
331
 
355
332
  ---
356
333
 
357
334
  ## How It Works
358
335
 
359
- 1. **Proxy wrappers** intercept `messages.create` / `chat.completions.create` / `send` without modifying the request or response.
360
- 2. After the provider responds, the wrapper extracts token counts, tool call counts, and metadata from the response's `usage` object and content blocks.
361
- 3. A `POST /api/track` is fired **in the background** — non-blocking, with automatic retries on transient failures.
362
- 4. The server computes cost from tokens via rate cards, joins it with your plan-configured revenue, and updates real-time margin rollups.
363
- 5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID), so retries and stream replays never double-count.
336
+ 1. **Proxy wrappers** intercept provider calls without modifying the request or response.
337
+ 2. After the provider responds, the wrapper extracts token counts (by modality), tool calls, service tier, and latency from the response.
338
+ 3. A `POST /api/track` fires **in the background** with automatic retries on transient failures.
339
+ 4. The server computes cost from tokens via rate cards (text, audio, image, video, cache, reasoning, web search), joins it with plan-configured revenue, and updates real-time margin rollups.
340
+ 5. Events are **idempotent** on `idempotencyKey` (auto-set to the provider response ID).
364
341
 
365
342
  ## Privacy & Security
366
343
 
367
344
  - **No prompt content is ever sent.** Only token counts, model name, and metadata.
368
- - Events are deduplicated server-side — safe to retry.
345
+ - Events are deduplicated server-side.
369
346
  - Background tracking never throws into your application code.
370
347
 
371
348
  ---
372
349
 
350
+ ## What's New in v0.5.0
351
+
352
+ - **Google Gemini / Vertex AI** support (`wrapGemini`) with full multimodal extraction
353
+ - **Audio token tracking** (OpenAI GPT-4o audio / Realtime API)
354
+ - **Image & video token tracking** (Gemini per-modality breakdowns)
355
+ - **Web search request tracking** (Anthropic `server_tool_use`, Gemini grounding)
356
+ - **Latency measurement** on all wrappers (SDK-measured `latencyMs`)
357
+ - **OpenAI Predicted Outputs** (`acceptedPredictionTokens` / `rejectedPredictionTokens`)
358
+ - **Service tier tracking** (OpenAI `flex` / `priority`, Anthropic `priority`)
359
+ - **Text modality split** for accurate cost attribution in mixed-modal requests
360
+ - Expanded rate card sync: audio, image, video, and web search rates from LiteLLM
361
+
362
+ ---
363
+
373
364
  ## License
374
365
 
375
366
  Licensed for use with Tollgate.
package/dist/index.cjs CHANGED
@@ -173,6 +173,7 @@ function anthropicEventFrom(msg, opts) {
173
173
  cachedTokens: usage.cache_read_input_tokens ?? 0,
174
174
  cacheWrite5mTokens: hasSplit ? fivem ?? 0 : usage.cache_creation_input_tokens ?? 0,
175
175
  cacheWrite1hTokens: hasSplit ? oneh ?? 0 : 0,
176
+ webSearchRequests: usage.server_tool_use?.web_search_requests ?? 0,
176
177
  toolCalls,
177
178
  revenueUnitCents: resolveRevenue(opts, msg),
178
179
  idempotencyKey: msg.id ?? `${runId}#${randomId()}`
@@ -183,6 +184,7 @@ function wrapAnthropic(client, tollgate, opts) {
183
184
  const messages = client.messages;
184
185
  const original = messages.create.bind(messages);
185
186
  const create = async (...args) => {
187
+ const t0 = Date.now();
186
188
  const result = await original(...args);
187
189
  if (isAsyncIterable(result)) {
188
190
  const msg = {};
@@ -203,12 +205,18 @@ function wrapAnthropic(client, tollgate, opts) {
203
205
  () => {
204
206
  msg.content = toolUseBlocks;
205
207
  const event2 = anthropicEventFrom(msg, opts);
206
- if (event2) fireAndForget(tollgate.track(event2), opts.onError);
208
+ if (event2) {
209
+ event2.latencyMs = Date.now() - t0;
210
+ fireAndForget(tollgate.track(event2), opts.onError);
211
+ }
207
212
  }
208
213
  );
209
214
  }
210
215
  const event = anthropicEventFrom(result, opts);
211
- if (event) fireAndForget(tollgate.track(event), opts.onError);
216
+ if (event) {
217
+ event.latencyMs = Date.now() - t0;
218
+ fireAndForget(tollgate.track(event), opts.onError);
219
+ }
212
220
  return result;
213
221
  };
214
222
  return new Proxy(client, {
@@ -227,6 +235,8 @@ function openAIEventFrom(completion, opts) {
227
235
  if (!usage) return null;
228
236
  const runId = resolveRunId(opts, completion.id);
229
237
  const toolCalls = completion.choices?.[0]?.message?.tool_calls?.length ?? 0;
238
+ const ptd = usage.prompt_tokens_details;
239
+ const ctd = usage.completion_tokens_details;
230
240
  const event = {
231
241
  customerId: opts.customerId,
232
242
  agentId: opts.agentId,
@@ -235,8 +245,15 @@ function openAIEventFrom(completion, opts) {
235
245
  model: completion.model ?? "unknown",
236
246
  tokensIn: usage.prompt_tokens ?? 0,
237
247
  tokensOut: usage.completion_tokens ?? 0,
238
- reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
239
- cachedTokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
248
+ reasoningTokens: ctd?.reasoning_tokens ?? 0,
249
+ cachedTokens: ptd?.cached_tokens ?? 0,
250
+ audioTokensIn: ptd?.audio_tokens ?? 0,
251
+ audioTokensOut: ctd?.audio_tokens ?? 0,
252
+ textTokensIn: ptd?.text_tokens ?? 0,
253
+ textTokensOut: ctd?.text_tokens ?? 0,
254
+ acceptedPredictionTokens: ctd?.accepted_prediction_tokens ?? 0,
255
+ rejectedPredictionTokens: ctd?.rejected_prediction_tokens ?? 0,
256
+ serviceTier: completion.service_tier,
240
257
  toolCalls,
241
258
  revenueUnitCents: resolveRevenue(opts, completion),
242
259
  idempotencyKey: completion.id ?? `${runId}#${randomId()}`
@@ -247,11 +264,13 @@ function wrapOpenAI(client, tollgate, opts) {
247
264
  const completions = client.chat.completions;
248
265
  const original = completions.create.bind(completions);
249
266
  const create = async (...args) => {
267
+ const t0 = Date.now();
250
268
  const result = await original(...args);
251
269
  if (isAsyncIterable(result)) {
252
270
  let id;
253
271
  let model;
254
272
  let usage;
273
+ let serviceTier;
255
274
  const toolCallIndices = /* @__PURE__ */ new Set();
256
275
  return instrumentStream(
257
276
  result,
@@ -259,6 +278,7 @@ function wrapOpenAI(client, tollgate, opts) {
259
278
  if (chunk.id) id = chunk.id;
260
279
  if (chunk.model) model = chunk.model;
261
280
  if (chunk.usage) usage = chunk.usage;
281
+ if (chunk.service_tier) serviceTier = chunk.service_tier;
262
282
  for (const c of chunk.choices ?? []) {
263
283
  for (const tc of c.delta?.tool_calls ?? []) {
264
284
  if (tc.index !== void 0) toolCallIndices.add(tc.index);
@@ -267,17 +287,23 @@ function wrapOpenAI(client, tollgate, opts) {
267
287
  },
268
288
  () => {
269
289
  if (!usage) return;
270
- const synth = { id, model, usage };
290
+ const synth = { id, model, usage, service_tier: serviceTier };
271
291
  if (toolCallIndices.size > 0) {
272
292
  synth.choices = [{ message: { tool_calls: new Array(toolCallIndices.size) } }];
273
293
  }
274
294
  const event2 = openAIEventFrom(synth, opts);
275
- if (event2) fireAndForget(tollgate.track(event2), opts.onError);
295
+ if (event2) {
296
+ event2.latencyMs = Date.now() - t0;
297
+ fireAndForget(tollgate.track(event2), opts.onError);
298
+ }
276
299
  }
277
300
  );
278
301
  }
279
302
  const event = openAIEventFrom(result, opts);
280
- if (event) fireAndForget(tollgate.track(event), opts.onError);
303
+ if (event) {
304
+ event.latencyMs = Date.now() - t0;
305
+ fireAndForget(tollgate.track(event), opts.onError);
306
+ }
281
307
  return result;
282
308
  };
283
309
  return new Proxy(client, {
@@ -316,6 +342,7 @@ function bedrockEventFrom(usage, model, opts, response = void 0, toolCalls = 0)
316
342
  function wrapBedrock(client, tollgate, opts) {
317
343
  const originalSend = client.send.bind(client);
318
344
  const send = async (command, ...rest) => {
345
+ const t0 = Date.now();
319
346
  const result = await originalSend(command, ...rest);
320
347
  const model = command?.input?.modelId ?? "unknown";
321
348
  if (result?.stream && isAsyncIterable(result.stream)) {
@@ -329,7 +356,10 @@ function wrapBedrock(client, tollgate, opts) {
329
356
  },
330
357
  () => {
331
358
  const event = bedrockEventFrom(usage, model, opts, result, streamToolCalls);
332
- if (event) fireAndForget(tollgate.track(event), opts.onError);
359
+ if (event) {
360
+ event.latencyMs = Date.now() - t0;
361
+ fireAndForget(tollgate.track(event), opts.onError);
362
+ }
333
363
  }
334
364
  );
335
365
  return result;
@@ -337,7 +367,10 @@ function wrapBedrock(client, tollgate, opts) {
337
367
  if (result?.usage) {
338
368
  const tc = result.output?.message?.content?.filter((b) => b.toolUse != null).length ?? 0;
339
369
  const event = bedrockEventFrom(result.usage, model, opts, result, tc);
340
- if (event) fireAndForget(tollgate.track(event), opts.onError);
370
+ if (event) {
371
+ event.latencyMs = Date.now() - t0;
372
+ fireAndForget(tollgate.track(event), opts.onError);
373
+ }
341
374
  }
342
375
  return result;
343
376
  };
@@ -348,14 +381,110 @@ function wrapBedrock(client, tollgate, opts) {
348
381
  }
349
382
  });
350
383
  }
384
+ function modalityTokens(details, modality) {
385
+ if (!details) return 0;
386
+ return details.filter((d) => d.modality === modality).reduce((sum, d) => sum + (d.tokenCount ?? 0), 0);
387
+ }
388
+ function geminiEventFrom(response, opts) {
389
+ const usage = response?.usageMetadata;
390
+ if (!usage) return null;
391
+ const runId = resolveRunId(opts, void 0);
392
+ const candidates = response.candidates ?? [];
393
+ const toolCalls = candidates.reduce((sum, c) => {
394
+ const parts = c.content?.parts ?? [];
395
+ return sum + parts.filter((p) => p.functionCall != null).length;
396
+ }, 0);
397
+ const webSearchRequests = candidates.reduce((sum, c) => {
398
+ return sum + (c.groundingMetadata?.webSearchQueries?.length ?? 0);
399
+ }, 0);
400
+ const promptDetails = usage.promptTokensDetails;
401
+ const candidateDetails = usage.candidatesTokensDetails;
402
+ const event = {
403
+ customerId: opts.customerId,
404
+ agentId: opts.agentId,
405
+ runId,
406
+ provider: opts.provider ?? "google",
407
+ model: "unknown",
408
+ tokensIn: usage.promptTokenCount ?? 0,
409
+ tokensOut: usage.candidatesTokenCount ?? 0,
410
+ reasoningTokens: usage.thoughtsTokenCount ?? 0,
411
+ cachedTokens: usage.cachedContentTokenCount ?? 0,
412
+ audioTokensIn: modalityTokens(promptDetails, "AUDIO"),
413
+ audioTokensOut: modalityTokens(candidateDetails, "AUDIO"),
414
+ imageTokensIn: modalityTokens(promptDetails, "IMAGE"),
415
+ imageTokensOut: modalityTokens(candidateDetails, "IMAGE"),
416
+ videoTokensIn: modalityTokens(promptDetails, "VIDEO"),
417
+ textTokensIn: modalityTokens(promptDetails, "TEXT"),
418
+ textTokensOut: modalityTokens(candidateDetails, "TEXT"),
419
+ webSearchRequests,
420
+ toolCalls,
421
+ revenueUnitCents: resolveRevenue(opts, response),
422
+ idempotencyKey: `${runId}#${randomId()}`
423
+ };
424
+ return withCost(event, opts, response);
425
+ }
426
+ function wrapGemini(model, tollgate, opts) {
427
+ const original = model.generateContent.bind(model);
428
+ const modelName = model.model ?? "unknown";
429
+ const generateContent = async (...args) => {
430
+ const t0 = Date.now();
431
+ const result = await original(...args);
432
+ if (isAsyncIterable(result)) {
433
+ const accumulated = {};
434
+ let toolCallCount = 0;
435
+ let searchCount = 0;
436
+ return instrumentStream(
437
+ result,
438
+ (chunk) => {
439
+ if (chunk.usageMetadata) {
440
+ Object.assign(accumulated, chunk.usageMetadata);
441
+ }
442
+ for (const c of chunk.candidates ?? []) {
443
+ for (const p of c.content?.parts ?? []) {
444
+ if (p.functionCall != null) toolCallCount++;
445
+ }
446
+ searchCount += c.groundingMetadata?.webSearchQueries?.length ?? 0;
447
+ }
448
+ },
449
+ () => {
450
+ const synth = {
451
+ usageMetadata: accumulated,
452
+ candidates: searchCount > 0 || toolCallCount > 0 ? [{ content: { parts: new Array(toolCallCount).fill({ functionCall: {} }) }, groundingMetadata: { webSearchQueries: new Array(searchCount) } }] : []
453
+ };
454
+ const event2 = geminiEventFrom(synth, opts);
455
+ if (event2) {
456
+ event2.model = modelName;
457
+ event2.latencyMs = Date.now() - t0;
458
+ fireAndForget(tollgate.track(event2), opts.onError);
459
+ }
460
+ }
461
+ );
462
+ }
463
+ const event = geminiEventFrom(result, opts);
464
+ if (event) {
465
+ event.model = modelName;
466
+ event.latencyMs = Date.now() - t0;
467
+ fireAndForget(tollgate.track(event), opts.onError);
468
+ }
469
+ return result;
470
+ };
471
+ return new Proxy(model, {
472
+ get(target, prop, recv) {
473
+ if (prop === "generateContent") return generateContent;
474
+ return Reflect.get(target, prop, recv);
475
+ }
476
+ });
477
+ }
351
478
 
352
479
  exports.TollgateError = TollgateError;
353
480
  exports.anthropicEventFrom = anthropicEventFrom;
354
481
  exports.bedrockEventFrom = bedrockEventFrom;
355
482
  exports.createTollgateClient = createTollgateClient;
483
+ exports.geminiEventFrom = geminiEventFrom;
356
484
  exports.openAIEventFrom = openAIEventFrom;
357
485
  exports.wrapAnthropic = wrapAnthropic;
358
486
  exports.wrapBedrock = wrapBedrock;
487
+ exports.wrapGemini = wrapGemini;
359
488
  exports.wrapOpenAI = wrapOpenAI;
360
489
  //# sourceMappingURL=index.cjs.map
361
490
  //# sourceMappingURL=index.cjs.map