@ai-sdk/gateway 3.0.82 → 3.0.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -238,6 +238,86 @@ The `getCredits()` method returns your team's credit information based on the au
238
238
  - **balance** _number_ - Your team's current available credit balance
239
239
  - **total_used** _number_ - Total credits consumed by your team
240
240
 
241
+ ## Generation Lookup
242
+
243
+ Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
244
+
245
+ When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
246
+
247
+ ```ts
248
+ import { gateway, generateText } from 'ai';
249
+
250
+ // Make a request
251
+ const result = await generateText({
252
+ model: gateway('anthropic/claude-sonnet-4'),
253
+ prompt: 'Explain quantum entanglement briefly',
254
+ });
255
+
256
+ // Get the generation ID from provider metadata
257
+ const generationId = result.providerMetadata?.gateway?.generationId;
258
+
259
+ // Look up detailed generation info
260
+ const generation = await gateway.getGenerationInfo({ id: generationId });
261
+
262
+ console.log(`Model: ${generation.model}`);
263
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
264
+ console.log(`Latency: ${generation.latency}ms`);
265
+ console.log(`Prompt tokens: ${generation.promptTokens}`);
266
+ console.log(`Completion tokens: ${generation.completionTokens}`);
267
+ ```
268
+
269
+ With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
270
+
271
+ ```ts
272
+ import { gateway, streamText } from 'ai';
273
+
274
+ const result = streamText({
275
+ model: gateway('anthropic/claude-sonnet-4'),
276
+ prompt: 'Explain quantum entanglement briefly',
277
+ });
278
+
279
+ let generationId: string | undefined;
280
+
281
+ for await (const part of result.fullStream) {
282
+ if (!generationId && part.providerMetadata?.gateway?.generationId) {
283
+ generationId = part.providerMetadata.gateway.generationId as string;
284
+ console.log(`Generation ID (early): ${generationId}`);
285
+ }
286
+ }
287
+
288
+ // Look up cost and usage after the stream completes
289
+ if (generationId) {
290
+ const generation = await gateway.getGenerationInfo({ id: generationId });
291
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
292
+ console.log(`Finish reason: ${generation.finishReason}`);
293
+ }
294
+ ```
295
+
296
+ The `getGenerationInfo()` method accepts:
297
+
298
+ - **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
299
+
300
+ It returns a `GatewayGenerationInfo` object with the following fields:
301
+
302
+ - **id** _string_ - The generation ID
303
+ - **totalCost** _number_ - Total cost in USD
304
+ - **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
305
+ - **usage** _number_ - Usage cost in USD (same as totalCost)
306
+ - **createdAt** _string_ - ISO 8601 timestamp when the generation was created
307
+ - **model** _string_ - Model identifier used
308
+ - **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
309
+ - **providerName** _string_ - The provider that served this generation
310
+ - **streamed** _boolean_ - Whether streaming was used
311
+ - **finishReason** _string_ - Finish reason (e.g. `'stop'`)
312
+ - **latency** _number_ - Time to first token in milliseconds
313
+ - **generationTime** _number_ - Total generation time in milliseconds
314
+ - **promptTokens** _number_ - Number of prompt tokens
315
+ - **completionTokens** _number_ - Number of completion tokens
316
+ - **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
317
+ - **cachedTokens** _number_ - Cached tokens used (if applicable)
318
+ - **cacheCreationTokens** _number_ - Cache creation input tokens
319
+ - **billableWebSearchCalls** _number_ - Number of billable web search calls
320
+
241
321
  ## Examples
242
322
 
243
323
  ### Basic Text Generation
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@ai-sdk/gateway",
3
3
  "private": false,
4
- "version": "3.0.82",
4
+ "version": "3.0.83",
5
5
  "license": "Apache-2.0",
6
6
  "sideEffects": false,
7
7
  "main": "./dist/index.js",
@@ -0,0 +1,147 @@
1
+ import {
2
+ createJsonErrorResponseHandler,
3
+ createJsonResponseHandler,
4
+ getFromApi,
5
+ lazySchema,
6
+ resolve,
7
+ zodSchema,
8
+ } from '@ai-sdk/provider-utils';
9
+ import { z } from 'zod/v4';
10
+ import { asGatewayError } from './errors';
11
+ import type { GatewayConfig } from './gateway-config';
12
+
13
+ export interface GatewayGenerationInfoParams {
14
+ /** The generation ID to look up (format: gen_<ulid>) */
15
+ id: string;
16
+ }
17
+
18
+ export interface GatewayGenerationInfo {
19
+ /** The generation ID */
20
+ id: string;
21
+ /** Total cost in USD */
22
+ totalCost: number;
23
+ /** Upstream inference cost in USD (BYOK only) */
24
+ upstreamInferenceCost: number;
25
+ /** Usage cost in USD (same as totalCost) */
26
+ usage: number;
27
+ /** ISO 8601 timestamp when the generation was created */
28
+ createdAt: string;
29
+ /** Model identifier */
30
+ model: string;
31
+ /** Whether BYOK credentials were used */
32
+ isByok: boolean;
33
+ /** Provider that served this generation */
34
+ providerName: string;
35
+ /** Whether streaming was used */
36
+ streamed: boolean;
37
+ /** Finish reason (e.g. 'stop') */
38
+ finishReason: string;
39
+ /** Time to first token in milliseconds */
40
+ latency: number;
41
+ /** Total generation time in milliseconds */
42
+ generationTime: number;
43
+ /** Number of prompt tokens */
44
+ promptTokens: number;
45
+ /** Number of completion tokens */
46
+ completionTokens: number;
47
+ /** Reasoning tokens used */
48
+ reasoningTokens: number;
49
+ /** Cached tokens used */
50
+ cachedTokens: number;
51
+ /** Cache creation input tokens */
52
+ cacheCreationTokens: number;
53
+ /** Billable web search calls */
54
+ billableWebSearchCalls: number;
55
+ }
56
+
57
+ export class GatewayGenerationInfoFetcher {
58
+ constructor(private readonly config: GatewayConfig) {}
59
+
60
+ async getGenerationInfo(
61
+ params: GatewayGenerationInfoParams,
62
+ ): Promise<GatewayGenerationInfo> {
63
+ try {
64
+ const baseUrl = new URL(this.config.baseURL);
65
+
66
+ const { value } = await getFromApi({
67
+ url: `${baseUrl.origin}/v1/generation?id=${encodeURIComponent(params.id)}`,
68
+ headers: await resolve(this.config.headers()),
69
+ successfulResponseHandler: createJsonResponseHandler(
70
+ gatewayGenerationInfoResponseSchema,
71
+ ),
72
+ failedResponseHandler: createJsonErrorResponseHandler({
73
+ errorSchema: z.any(),
74
+ errorToMessage: data => data,
75
+ }),
76
+ fetch: this.config.fetch,
77
+ });
78
+
79
+ return value;
80
+ } catch (error) {
81
+ throw await asGatewayError(error);
82
+ }
83
+ }
84
+ }
85
+
86
+ const gatewayGenerationInfoResponseSchema = lazySchema(() =>
87
+ zodSchema(
88
+ z
89
+ .object({
90
+ data: z
91
+ .object({
92
+ id: z.string(),
93
+ total_cost: z.number(),
94
+ upstream_inference_cost: z.number(),
95
+ usage: z.number(),
96
+ created_at: z.string(),
97
+ model: z.string(),
98
+ is_byok: z.boolean(),
99
+ provider_name: z.string(),
100
+ streamed: z.boolean(),
101
+ finish_reason: z.string(),
102
+ latency: z.number(),
103
+ generation_time: z.number(),
104
+ native_tokens_prompt: z.number(),
105
+ native_tokens_completion: z.number(),
106
+ native_tokens_reasoning: z.number(),
107
+ native_tokens_cached: z.number(),
108
+ native_tokens_cache_creation: z.number(),
109
+ billable_web_search_calls: z.number(),
110
+ })
111
+ .transform(
112
+ ({
113
+ total_cost,
114
+ upstream_inference_cost,
115
+ created_at,
116
+ is_byok,
117
+ provider_name,
118
+ finish_reason,
119
+ generation_time,
120
+ native_tokens_prompt,
121
+ native_tokens_completion,
122
+ native_tokens_reasoning,
123
+ native_tokens_cached,
124
+ native_tokens_cache_creation,
125
+ billable_web_search_calls,
126
+ ...rest
127
+ }) => ({
128
+ ...rest,
129
+ totalCost: total_cost,
130
+ upstreamInferenceCost: upstream_inference_cost,
131
+ createdAt: created_at,
132
+ isByok: is_byok,
133
+ providerName: provider_name,
134
+ finishReason: finish_reason,
135
+ generationTime: generation_time,
136
+ promptTokens: native_tokens_prompt,
137
+ completionTokens: native_tokens_completion,
138
+ reasoningTokens: native_tokens_reasoning,
139
+ cachedTokens: native_tokens_cached,
140
+ cacheCreationTokens: native_tokens_cache_creation,
141
+ billableWebSearchCalls: billable_web_search_calls,
142
+ }),
143
+ ),
144
+ })
145
+ .transform(({ data }) => data),
146
+ ),
147
+ );
@@ -18,6 +18,11 @@ import {
18
18
  type GatewaySpendReportParams,
19
19
  type GatewaySpendReportResponse,
20
20
  } from './gateway-spend-report';
21
+ import {
22
+ GatewayGenerationInfoFetcher,
23
+ type GatewayGenerationInfoParams,
24
+ type GatewayGenerationInfo,
25
+ } from './gateway-generation-info';
21
26
  import { GatewayLanguageModel } from './gateway-language-model';
22
27
  import { GatewayEmbeddingModel } from './gateway-embedding-model';
23
28
  import { GatewayImageModel } from './gateway-image-model';
@@ -69,6 +74,14 @@ export interface GatewayProvider extends ProviderV3 {
69
74
  params: GatewaySpendReportParams,
70
75
  ): Promise<GatewaySpendReportResponse>;
71
76
 
77
+ /**
78
+ * Returns detailed information about a specific generation by its ID,
79
+ * including cost, token usage, latency, and provider details.
80
+ */
81
+ getGenerationInfo(
82
+ params: GatewayGenerationInfoParams,
83
+ ): Promise<GatewayGenerationInfo>;
84
+
72
85
  /**
73
86
  * Creates a model for generating text embeddings.
74
87
  */
@@ -281,6 +294,21 @@ export function createGatewayProvider(
281
294
  });
282
295
  };
283
296
 
297
+ const getGenerationInfo = async (params: GatewayGenerationInfoParams) => {
298
+ return new GatewayGenerationInfoFetcher({
299
+ baseURL,
300
+ headers: getHeaders,
301
+ fetch: options.fetch,
302
+ })
303
+ .getGenerationInfo(params)
304
+ .catch(async (error: unknown) => {
305
+ throw await asGatewayError(
306
+ error,
307
+ await parseAuthMethod(await getHeaders()),
308
+ );
309
+ });
310
+ };
311
+
284
312
  const provider = function (modelId: GatewayModelId) {
285
313
  if (new.target) {
286
314
  throw new Error(
@@ -295,6 +323,7 @@ export function createGatewayProvider(
295
323
  provider.getAvailableModels = getAvailableModels;
296
324
  provider.getCredits = getCredits;
297
325
  provider.getSpendReport = getSpendReport;
326
+ provider.getGenerationInfo = getGenerationInfo;
298
327
  provider.imageModel = (modelId: GatewayImageModelId) => {
299
328
  return new GatewayImageModel(modelId, {
300
329
  provider: 'gateway',
package/src/index.ts CHANGED
@@ -10,6 +10,10 @@ export type {
10
10
  GatewaySpendReportRow,
11
11
  GatewaySpendReportResponse,
12
12
  } from './gateway-spend-report';
13
+ export type {
14
+ GatewayGenerationInfoParams,
15
+ GatewayGenerationInfo,
16
+ } from './gateway-generation-info';
13
17
  export type { GatewayLanguageModelEntry as GatewayModelEntry } from './gateway-model-entry';
14
18
  export {
15
19
  createGatewayProvider,