@x12i/ai-gateway 9.0.3 → 9.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
34
34
  * Invokes router with retry logic for network and server errors
35
35
  * Returns response and retry metadata
36
36
  */
37
- export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer): Promise<{
37
+ export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
38
+ /**
39
+ * Called immediately before each provider call attempt (including the first try).
40
+ * `retryIndex` is 0-based within this invokeWithRetry call.
41
+ */
42
+ onTryStart?: (info: {
43
+ retryIndex: number;
44
+ startedAt: number;
45
+ }) => void;
46
+ /**
47
+ * Called immediately after each provider call attempt finishes (success or error).
48
+ * `retryIndex` is 0-based within this invokeWithRetry call.
49
+ */
50
+ onTryEnd?: (info: {
51
+ retryIndex: number;
52
+ endedAt: number;
53
+ ok: boolean;
54
+ response?: any;
55
+ error?: Error;
56
+ }) => void;
57
+ }): Promise<{
38
58
  response: any;
39
59
  retryMetadata?: {
40
60
  retryCount: number;
@@ -93,7 +93,7 @@ export function sleep(ms) {
93
93
  * Invokes router with retry logic for network and server errors
94
94
  * Returns response and retry metadata
95
95
  */
96
- export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
96
+ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
97
97
  const maxRetries = retryConfig.maxRetries ?? 3;
98
98
  const initialDelay = retryConfig.initialDelay ?? 1000;
99
99
  const maxDelay = retryConfig.maxDelay ?? 30000;
@@ -103,8 +103,12 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
103
103
  let lastError;
104
104
  const retryAttempts = [];
105
105
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
106
+ const startedAt = Date.now();
107
+ hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
106
108
  try {
107
109
  const response = await router.invoke(routerRequest);
110
+ const endedAt = Date.now();
111
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
108
112
  // Log success after retry if this wasn't the first attempt
109
113
  if (attempt > 0) {
110
114
  logger.info('Request succeeded after retry', {
@@ -128,6 +132,8 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
128
132
  }
129
133
  catch (error) {
130
134
  lastError = error instanceof Error ? error : new Error(String(error));
135
+ const endedAt = Date.now();
136
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
131
137
  // Check if error is retryable
132
138
  if (!isRetryableError(lastError)) {
133
139
  logger.debug('Non-retryable error, not retrying', {
@@ -19,3 +19,20 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
19
19
  export declare function mergeConfig(request: ChatRequest & {
20
20
  useInternalDefaults?: 'skill' | 'audit';
21
21
  }, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
22
+ /**
23
+ * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
24
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
25
+ */
26
+ export declare function normalizeRouterUsageTokens(usage: unknown): {
27
+ prompt: number;
28
+ completion: number;
29
+ total: number;
30
+ } | undefined;
31
+ /**
32
+ * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
33
+ */
34
+ export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
35
+ prompt: number;
36
+ completion: number;
37
+ total: number;
38
+ };
@@ -179,3 +179,55 @@ export async function mergeConfig(request, config, logger) {
179
179
  });
180
180
  return merged;
181
181
  }
182
+ function firstFiniteNumber(...vals) {
183
+ for (const v of vals) {
184
+ if (typeof v === 'number' && Number.isFinite(v))
185
+ return v;
186
+ }
187
+ return undefined;
188
+ }
189
+ /**
190
+ * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
191
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
192
+ */
193
+ export function normalizeRouterUsageTokens(usage) {
194
+ if (usage == null || typeof usage !== 'object')
195
+ return undefined;
196
+ const u = usage;
197
+ const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
198
+ const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
199
+ let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
200
+ if (!total && (prompt || completion))
201
+ total = prompt + completion;
202
+ return { prompt, completion, total };
203
+ }
204
+ /**
205
+ * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
206
+ */
207
+ export function extractTokenUsageFromRouterResponse(routerResponse) {
208
+ if (routerResponse == null || typeof routerResponse !== 'object') {
209
+ return { prompt: 0, completion: 0, total: 0 };
210
+ }
211
+ const r = routerResponse;
212
+ const meta = r.metadata != null && typeof r.metadata === 'object'
213
+ ? r.metadata
214
+ : undefined;
215
+ const buckets = [r.usage];
216
+ if (meta) {
217
+ buckets.push(meta.usage);
218
+ const nested = meta['ai-activities-response'];
219
+ if (nested != null && typeof nested === 'object') {
220
+ buckets.push(nested.usage);
221
+ }
222
+ }
223
+ const raw = r.rawResponse ?? r.raw;
224
+ if (raw != null && typeof raw === 'object') {
225
+ buckets.push(raw.usage);
226
+ }
227
+ for (const b of buckets) {
228
+ const n = normalizeRouterUsageTokens(b);
229
+ if (n && (n.prompt || n.completion || n.total))
230
+ return n;
231
+ }
232
+ return { prompt: 0, completion: 0, total: 0 };
233
+ }
package/dist/gateway.js CHANGED
@@ -8,10 +8,11 @@ import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
8
  import { initializeGatewayComponents } from './gateway-config.js';
9
9
  import { buildMessages } from './message-builder.js';
10
10
  import { extractJsonFromFlexMd } from './flex-md-loader.js';
11
- import { mergeConfig } from './gateway-utils.js';
11
+ import { extractTokenUsageFromRouterResponse, mergeConfig } from './gateway-utils.js';
12
12
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
13
13
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
14
14
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
15
+ import { invokeWithRetry } from './gateway-retry.js';
15
16
  /** Error message thrown by the router when no provider is registered or specified */
16
17
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
17
18
  const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
@@ -93,7 +94,7 @@ export class AIGateway {
93
94
  aiRequestId: request.aiRequestId,
94
95
  identity: request.identity,
95
96
  latencyMs: Date.now() - startTime,
96
- tokens: response.usage || { prompt: 0, completion: 0, total: 0 },
97
+ tokens: extractTokenUsageFromRouterResponse(response),
97
98
  taskTypeId,
98
99
  agentType: 'chat'
99
100
  }
@@ -203,6 +204,9 @@ export class AIGateway {
203
204
  request._parsedRequest = parsedSnapshot;
204
205
  // Merge config (modelConfig > request.config > gateway defaults)
205
206
  const mergedConfig = await mergeConfig(request, this.config, this.logger);
207
+ const diagnosticsMode = request.diagnostics?.mode;
208
+ const traceEnabled = diagnosticsMode === 'trace';
209
+ const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
206
210
  // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
207
211
  if (!this._autoRegisterDone) {
208
212
  await autoRegisterProviders(this.router, this.logger);
@@ -223,15 +227,206 @@ export class AIGateway {
223
227
  }
224
228
  }
225
229
  try {
226
- // Call router directly with merged config
227
- const response = await this.router.invoke({
228
- request: {
229
- messages,
230
- config: mergedConfig,
231
- identity: request.identity
232
- },
233
- mode: 'sync'
234
- });
230
+ let response;
231
+ let traceAttempts;
232
+ let traceRetryCount;
233
+ let traceFallbackCount;
234
+ let traceRequestIds;
235
+ let providerCallLatencyMs;
236
+ if (!traceEnabled) {
237
+ // Default minimal behavior (no extra allocations/payload).
238
+ response = await this.router.invoke({
239
+ request: {
240
+ messages,
241
+ config: mergedConfig,
242
+ identity: request.identity
243
+ },
244
+ mode: 'sync'
245
+ });
246
+ }
247
+ else {
248
+ const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
249
+ const capErrorMessage = (s) => capString(s, 500);
250
+ const safeJsonStringify = (value) => {
251
+ try {
252
+ return JSON.stringify(value);
253
+ }
254
+ catch {
255
+ return '[Unserializable]';
256
+ }
257
+ };
258
+ const gatewayAiRequestId = request.aiRequestId;
259
+ const baseRequest = {
260
+ request: {
261
+ messages,
262
+ config: mergedConfig,
263
+ identity: request.identity
264
+ },
265
+ mode: 'sync'
266
+ };
267
+ // Build deterministic provider/model candidate chain.
268
+ const candidates = [];
269
+ const primaryProvider = mergedConfig?.provider;
270
+ const primaryModel = mergedConfig?.model;
271
+ if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
272
+ candidates.push({ provider: primaryProvider, model: primaryModel });
273
+ }
274
+ const defaultTarget = this.config?.defaultTarget;
275
+ if (defaultTarget?.engine && defaultTarget?.model) {
276
+ candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
277
+ }
278
+ const fallbackChain = this.config?.fallbackChain;
279
+ if (Array.isArray(fallbackChain)) {
280
+ for (const item of fallbackChain) {
281
+ if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
282
+ candidates.push({ provider: String(item.engine), model: String(item.model) });
283
+ }
284
+ }
285
+ }
286
+ // De-dup while preserving order.
287
+ const seen = new Set();
288
+ const deduped = candidates.filter(c => {
289
+ const key = `${c.provider}::${c.model}`;
290
+ if (seen.has(key))
291
+ return false;
292
+ seen.add(key);
293
+ return true;
294
+ });
295
+ traceAttempts = [];
296
+ // Attempt execution across fallbacks (authoritative ordering).
297
+ let lastError;
298
+ for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
299
+ const candidate = deduped[fallbackIndex];
300
+ // Track per-retry attempt objects through retry hooks.
301
+ const attemptIndexByRetry = new Map();
302
+ try {
303
+ const result = await invokeWithRetry({
304
+ ...baseRequest,
305
+ request: {
306
+ ...baseRequest.request,
307
+ config: {
308
+ ...mergedConfig,
309
+ provider: candidate.provider,
310
+ model: candidate.model
311
+ }
312
+ }
313
+ }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
314
+ onTryStart: ({ retryIndex, startedAt }) => {
315
+ const idx = traceAttempts.push({
316
+ timing: { startedAt, endedAt: startedAt, durationMs: 0 },
317
+ routing: {
318
+ provider: candidate.provider,
319
+ requestIds: { gatewayAiRequestId },
320
+ retryIndex,
321
+ fallbackIndex
322
+ },
323
+ usage: {
324
+ tokens: { prompt: 0, completion: 0, total: 0 },
325
+ maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
326
+ },
327
+ modelUsed: candidate.model,
328
+ ok: false
329
+ }) - 1;
330
+ attemptIndexByRetry.set(retryIndex, idx);
331
+ },
332
+ onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
333
+ const idx = attemptIndexByRetry.get(retryIndex);
334
+ if (idx === undefined)
335
+ return;
336
+ const a = traceAttempts[idx];
337
+ a.timing.endedAt = endedAt;
338
+ a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
339
+ a.ok = ok;
340
+ const respAny = tryResp;
341
+ if (ok && respAny) {
342
+ const meta = respAny.metadata || {};
343
+ const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
344
+ a.usage = {
345
+ tokens: tokenCounts,
346
+ maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
347
+ ? meta.maxTokensRequested
348
+ : typeof mergedConfig?.maxTokens === 'number'
349
+ ? mergedConfig.maxTokens
350
+ : undefined
351
+ };
352
+ a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
353
+ if (typeof meta?.region === 'string')
354
+ a.routing.region = meta.region;
355
+ const requestIds = {
356
+ gatewayAiRequestId,
357
+ routerRequestId: respAny.requestId || meta?.requestId
358
+ };
359
+ if (typeof meta?.providerRequestId === 'string')
360
+ requestIds.providerRequestId = meta.providerRequestId;
361
+ if (typeof meta?.openrouterRequestId === 'string')
362
+ requestIds.openrouterRequestId = meta.openrouterRequestId;
363
+ if (meta?.requestIds && typeof meta.requestIds === 'object') {
364
+ for (const [k, v] of Object.entries(meta.requestIds)) {
365
+ if (typeof v === 'string')
366
+ requestIds[k] = v;
367
+ }
368
+ }
369
+ a.routing.requestIds = requestIds;
370
+ a.modelUsed =
371
+ meta?.modelUsed || meta?.model || respAny.model || candidate.model;
372
+ const costUsd = typeof meta?.costUsd === 'number'
373
+ ? meta.costUsd
374
+ : typeof meta?.cost === 'number'
375
+ ? meta.cost
376
+ : typeof respAny?.costUsd === 'number'
377
+ ? respAny.costUsd
378
+ : typeof respAny?.cost === 'number'
379
+ ? respAny.cost
380
+ : undefined;
381
+ if (typeof costUsd === 'number')
382
+ a.costUsd = costUsd;
383
+ if (includeRawProviderPayload) {
384
+ // Size-capped preview only.
385
+ const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
386
+ const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
387
+ a.rawProviderPayload = capString(rawStr, 4000);
388
+ }
389
+ }
390
+ else if (tryErr) {
391
+ a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
392
+ }
393
+ }
394
+ });
395
+ response = result.response;
396
+ lastError = undefined;
397
+ break; // success => stop fallback chain
398
+ }
399
+ catch (err) {
400
+ lastError = err instanceof Error ? err : new Error(String(err));
401
+ continue;
402
+ }
403
+ }
404
+ if (!response) {
405
+ throw lastError ?? new Error('All fallback candidates failed');
406
+ }
407
+ // Summary counts + final request ids.
408
+ traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
409
+ const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
410
+ traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
411
+ const finalResp = response;
412
+ const finalMeta = finalResp?.metadata || {};
413
+ traceRequestIds = {
414
+ gatewayAiRequestId,
415
+ routerRequestId: finalResp?.requestId || finalMeta?.requestId
416
+ };
417
+ if (typeof finalMeta?.providerRequestId === 'string')
418
+ traceRequestIds.providerRequestId = finalMeta.providerRequestId;
419
+ if (typeof finalMeta?.openrouterRequestId === 'string')
420
+ traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
421
+ if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
422
+ for (const [k, v] of Object.entries(finalMeta.requestIds)) {
423
+ if (typeof v === 'string')
424
+ traceRequestIds[k] = v;
425
+ }
426
+ }
427
+ const lastOk = [...traceAttempts].reverse().find(a => a.ok);
428
+ providerCallLatencyMs = lastOk?.timing?.durationMs;
429
+ }
235
430
  // Contract output processing removed - expectedSchema no longer supported
236
431
  // Create enhanced response - extract content properly from router response
237
432
  const routerResponse = response;
@@ -293,35 +488,49 @@ export class AIGateway {
293
488
  }
294
489
  contentType = 'structured';
295
490
  parsingMethod = 'flex-md';
296
- // Extract token usage properly
297
- let tokens = { prompt: 0, completion: 0, total: 0 };
298
- if (routerResponse.usage) {
299
- tokens = {
300
- prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
301
- completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
302
- total: routerResponse.usage.totalTokens || 0
303
- };
304
- }
305
- else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
306
- const usage = routerResponse.metadata['ai-activities-response'].usage;
307
- tokens = {
308
- prompt: usage.promptTokens || usage.inputTokens || 0,
309
- completion: usage.completionTokens || usage.outputTokens || 0,
310
- total: usage.totalTokens || 0
311
- };
312
- }
491
+ const tokens = extractTokenUsageFromRouterResponse(routerResponse);
313
492
  const enhancedResponse = {
314
493
  content: content,
315
494
  parsedContent: parsedContent,
316
495
  metadata: {
317
496
  aiRequestId: request.aiRequestId,
318
497
  identity: request.identity,
319
- latencyMs: Date.now() - startTime,
498
+ latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
320
499
  tokens: tokens,
321
500
  taskTypeId,
322
501
  agentType: 'ai',
323
502
  contentType,
324
- parsingMethod
503
+ parsingMethod,
504
+ ...(traceEnabled
505
+ ? (() => {
506
+ const meta = routerResponse?.metadata || {};
507
+ const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
508
+ const region = typeof meta.region === 'string' ? meta.region : undefined;
509
+ const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
510
+ const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
511
+ ? meta.maxTokensRequested
512
+ : typeof mergedConfig?.maxTokens === 'number'
513
+ ? mergedConfig.maxTokens
514
+ : undefined;
515
+ const costUsd = typeof meta.costUsd === 'number'
516
+ ? meta.costUsd
517
+ : typeof meta.cost === 'number'
518
+ ? meta.cost
519
+ : undefined;
520
+ return {
521
+ provider,
522
+ region,
523
+ modelUsed,
524
+ maxTokensRequested,
525
+ cost: typeof meta.cost === 'number' ? meta.cost : undefined,
526
+ costUsd,
527
+ requestIds: traceRequestIds,
528
+ retryCount: traceRetryCount,
529
+ fallbackCount: traceFallbackCount,
530
+ attempts: traceAttempts
531
+ };
532
+ })()
533
+ : {})
325
534
  }
326
535
  };
327
536
  // Track activity success if activity was started
package/dist/types.d.ts CHANGED
@@ -10,6 +10,65 @@ export type UsageTier = string;
10
10
  import type { Activix } from '@x12i/activix';
11
11
  import type { TemplateRenderOptions } from '@x12i/rendrix';
12
12
  import type { Logxer } from '@x12i/logxer';
13
+ /**
14
+ * Diagnostics options for opt-in authoritative tracing.
15
+ * Default behavior must remain minimal when diagnostics are not enabled.
16
+ */
17
+ export type DiagnosticsMode = 'none' | 'trace';
18
+ export interface DiagnosticsOptions {
19
+ mode?: DiagnosticsMode;
20
+ /**
21
+ * Whether to include raw provider payloads (NEVER on by default).
22
+ * Implementations must size-cap any raw payload included.
23
+ */
24
+ includeRawProviderPayload?: boolean;
25
+ }
26
+ export type GatewayTraceRequestIds = {
27
+ /** Stable alias of gateway aiRequestId (always set when trace enabled). */
28
+ gatewayAiRequestId: string;
29
+ /** Router-level correlation id, when available. */
30
+ routerRequestId?: string;
31
+ /** Provider-level request id (or equivalent), when available. */
32
+ providerRequestId?: string;
33
+ /** OpenRouter request id, when applicable and available. */
34
+ openrouterRequestId?: string;
35
+ /** Allow additional stable ids without breaking contract. */
36
+ [key: string]: string | undefined;
37
+ };
38
+ export type GatewayTraceAttempt = {
39
+ timing: {
40
+ startedAt: number;
41
+ endedAt: number;
42
+ durationMs: number;
43
+ };
44
+ routing: {
45
+ provider: string;
46
+ region?: string;
47
+ requestIds: GatewayTraceRequestIds;
48
+ retryIndex: number;
49
+ fallbackIndex: number;
50
+ };
51
+ usage?: {
52
+ tokens: {
53
+ prompt: number;
54
+ completion: number;
55
+ total: number;
56
+ };
57
+ maxTokensRequested?: number;
58
+ };
59
+ modelUsed?: string;
60
+ costUsd?: number;
61
+ ok: boolean;
62
+ error?: {
63
+ name: string;
64
+ message: string;
65
+ };
66
+ /**
67
+ * Optional raw provider payload (size-capped, gated by request flag).
68
+ * The exact shape is intentionally loose to avoid locking downstream to provider schemas.
69
+ */
70
+ rawProviderPayload?: unknown;
71
+ };
13
72
  /**
14
73
  * Identity object used for activity linkage.
15
74
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
606
665
  * Used when inferenceType is provided for parsing inference outputs
607
666
  */
608
667
  parseOptions?: Record<string, unknown>;
668
+ /**
669
+ * Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
670
+ * attach heavy diagnostic objects or raw provider payloads.
671
+ */
672
+ diagnostics?: DiagnosticsOptions;
609
673
  }
610
674
  /**
611
675
  * Chat request for conversational use cases
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
835
899
  * Cost in USD (if available)
836
900
  */
837
901
  cost?: number;
902
+ /**
903
+ * Cost in USD (preferred, stable key for trace mode).
904
+ * When both are present, costUsd should mirror cost.
905
+ */
906
+ costUsd?: number;
907
+ /**
908
+ * Final effective max token cap applied (after merges/normalization), if known.
909
+ */
910
+ maxTokensRequested?: number;
911
+ /**
912
+ * Model that actually served the response (after routing/fallback), if known.
913
+ * This is distinct from requested model.
914
+ */
915
+ modelUsed?: string;
916
+ /**
917
+ * Optional region identifier when applicable (provider-specific).
918
+ */
919
+ region?: string;
920
+ /**
921
+ * Stable request/correlation identifiers across gateway/router/provider layers.
922
+ * Only populated when diagnostics trace mode is enabled.
923
+ */
924
+ requestIds?: GatewayTraceRequestIds;
925
+ /**
926
+ * Total number of retries performed across the execution (trace mode).
927
+ */
928
+ retryCount?: number;
929
+ /**
930
+ * Total number of fallback transitions performed across the execution (trace mode).
931
+ */
932
+ fallbackCount?: number;
933
+ /**
934
+ * Ordered, authoritative attempts across retries and fallbacks (trace mode).
935
+ */
936
+ attempts?: GatewayTraceAttempt[];
838
937
  /**
839
938
  * Content type classification
840
939
  * Indicates whether content is 'string', 'object', 'array', or 'null'
@@ -8,6 +8,7 @@
8
8
  *
9
9
  * Note: x-models dependency removed - usage tracking functions are permanently disabled
10
10
  */
11
+ import { extractTokenUsageFromRouterResponse } from './gateway-utils.js';
11
12
  /**
12
13
  * Manages usage tracking for LLM requests
13
14
  */
@@ -26,11 +27,7 @@ export class UsageTracker {
26
27
  * @returns Token usage breakdown
27
28
  */
28
29
  extractTokens(response) {
29
- return {
30
- prompt: response.usage?.promptTokens || 0,
31
- completion: response.usage?.completionTokens || 0,
32
- total: response.usage?.totalTokens || 0
33
- };
30
+ return extractTokenUsageFromRouterResponse(response);
34
31
  }
35
32
  /**
36
33
  * Records usage for a request
@@ -101,7 +101,7 @@ function sleep(ms) {
101
101
  * Invokes router with retry logic for network and server errors
102
102
  * Returns response and retry metadata
103
103
  */
104
- async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
104
+ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
105
105
  const maxRetries = retryConfig.maxRetries ?? 3;
106
106
  const initialDelay = retryConfig.initialDelay ?? 1000;
107
107
  const maxDelay = retryConfig.maxDelay ?? 30000;
@@ -111,8 +111,12 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
111
111
  let lastError;
112
112
  const retryAttempts = [];
113
113
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
114
+ const startedAt = Date.now();
115
+ hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
114
116
  try {
115
117
  const response = await router.invoke(routerRequest);
118
+ const endedAt = Date.now();
119
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
116
120
  // Log success after retry if this wasn't the first attempt
117
121
  if (attempt > 0) {
118
122
  logger.info('Request succeeded after retry', {
@@ -136,6 +140,8 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
136
140
  }
137
141
  catch (error) {
138
142
  lastError = error instanceof Error ? error : new Error(String(error));
143
+ const endedAt = Date.now();
144
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
139
145
  // Check if error is retryable
140
146
  if (!isRetryableError(lastError)) {
141
147
  logger.debug('Non-retryable error, not retrying', {
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
34
34
  * Invokes router with retry logic for network and server errors
35
35
  * Returns response and retry metadata
36
36
  */
37
- export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer): Promise<{
37
+ export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
38
+ /**
39
+ * Called immediately before each provider call attempt (including the first try).
40
+ * `retryIndex` is 0-based within this invokeWithRetry call.
41
+ */
42
+ onTryStart?: (info: {
43
+ retryIndex: number;
44
+ startedAt: number;
45
+ }) => void;
46
+ /**
47
+ * Called immediately after each provider call attempt finishes (success or error).
48
+ * `retryIndex` is 0-based within this invokeWithRetry call.
49
+ */
50
+ onTryEnd?: (info: {
51
+ retryIndex: number;
52
+ endedAt: number;
53
+ ok: boolean;
54
+ response?: any;
55
+ error?: Error;
56
+ }) => void;
57
+ }): Promise<{
38
58
  response: any;
39
59
  retryMetadata?: {
40
60
  retryCount: number;
@@ -40,6 +40,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
40
40
  exports.generateMD5Hash = generateMD5Hash;
41
41
  exports.ensureTaskTypeId = ensureTaskTypeId;
42
42
  exports.mergeConfig = mergeConfig;
43
+ exports.normalizeRouterUsageTokens = normalizeRouterUsageTokens;
44
+ exports.extractTokenUsageFromRouterResponse = extractTokenUsageFromRouterResponse;
43
45
  const crypto = __importStar(require("crypto"));
44
46
  const gateway_instructions_js_1 = require("./gateway-instructions.cjs");
45
47
  const flex_md_loader_js_1 = require("./flex-md-loader.cjs");
@@ -217,3 +219,55 @@ async function mergeConfig(request, config, logger) {
217
219
  });
218
220
  return merged;
219
221
  }
222
+ function firstFiniteNumber(...vals) {
223
+ for (const v of vals) {
224
+ if (typeof v === 'number' && Number.isFinite(v))
225
+ return v;
226
+ }
227
+ return undefined;
228
+ }
229
+ /**
230
+ * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
231
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
232
+ */
233
+ function normalizeRouterUsageTokens(usage) {
234
+ if (usage == null || typeof usage !== 'object')
235
+ return undefined;
236
+ const u = usage;
237
+ const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
238
+ const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
239
+ let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
240
+ if (!total && (prompt || completion))
241
+ total = prompt + completion;
242
+ return { prompt, completion, total };
243
+ }
244
+ /**
245
+ * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
246
+ */
247
+ function extractTokenUsageFromRouterResponse(routerResponse) {
248
+ if (routerResponse == null || typeof routerResponse !== 'object') {
249
+ return { prompt: 0, completion: 0, total: 0 };
250
+ }
251
+ const r = routerResponse;
252
+ const meta = r.metadata != null && typeof r.metadata === 'object'
253
+ ? r.metadata
254
+ : undefined;
255
+ const buckets = [r.usage];
256
+ if (meta) {
257
+ buckets.push(meta.usage);
258
+ const nested = meta['ai-activities-response'];
259
+ if (nested != null && typeof nested === 'object') {
260
+ buckets.push(nested.usage);
261
+ }
262
+ }
263
+ const raw = r.rawResponse ?? r.raw;
264
+ if (raw != null && typeof raw === 'object') {
265
+ buckets.push(raw.usage);
266
+ }
267
+ for (const b of buckets) {
268
+ const n = normalizeRouterUsageTokens(b);
269
+ if (n && (n.prompt || n.completion || n.total))
270
+ return n;
271
+ }
272
+ return { prompt: 0, completion: 0, total: 0 };
273
+ }
@@ -19,3 +19,20 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
19
19
  export declare function mergeConfig(request: ChatRequest & {
20
20
  useInternalDefaults?: 'skill' | 'audit';
21
21
  }, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
22
+ /**
23
+ * Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
24
+ * Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
25
+ */
26
+ export declare function normalizeRouterUsageTokens(usage: unknown): {
27
+ prompt: number;
28
+ completion: number;
29
+ total: number;
30
+ } | undefined;
31
+ /**
32
+ * Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
33
+ */
34
+ export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
35
+ prompt: number;
36
+ completion: number;
37
+ total: number;
38
+ };
@@ -15,6 +15,7 @@ const gateway_utils_js_1 = require("./gateway-utils.cjs");
15
15
  const gateway_provider_auto_register_js_1 = require("./gateway-provider-auto-register.cjs");
16
16
  const runtime_objects_js_1 = require("./runtime-objects.cjs");
17
17
  const gateway_log_meta_js_1 = require("./gateway-log-meta.cjs");
18
+ const gateway_retry_js_1 = require("./gateway-retry.cjs");
18
19
  /** Error message thrown by the router when no provider is registered or specified */
19
20
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
20
21
  const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
@@ -96,7 +97,7 @@ class AIGateway {
96
97
  aiRequestId: request.aiRequestId,
97
98
  identity: request.identity,
98
99
  latencyMs: Date.now() - startTime,
99
- tokens: response.usage || { prompt: 0, completion: 0, total: 0 },
100
+ tokens: (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response),
100
101
  taskTypeId,
101
102
  agentType: 'chat'
102
103
  }
@@ -206,6 +207,9 @@ class AIGateway {
206
207
  request._parsedRequest = parsedSnapshot;
207
208
  // Merge config (modelConfig > request.config > gateway defaults)
208
209
  const mergedConfig = await (0, gateway_utils_js_1.mergeConfig)(request, this.config, this.logger);
210
+ const diagnosticsMode = request.diagnostics?.mode;
211
+ const traceEnabled = diagnosticsMode === 'trace';
212
+ const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
209
213
  // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
210
214
  if (!this._autoRegisterDone) {
211
215
  await (0, gateway_provider_auto_register_js_1.autoRegisterProviders)(this.router, this.logger);
@@ -226,15 +230,206 @@ class AIGateway {
226
230
  }
227
231
  }
228
232
  try {
229
- // Call router directly with merged config
230
- const response = await this.router.invoke({
231
- request: {
232
- messages,
233
- config: mergedConfig,
234
- identity: request.identity
235
- },
236
- mode: 'sync'
237
- });
233
+ let response;
234
+ let traceAttempts;
235
+ let traceRetryCount;
236
+ let traceFallbackCount;
237
+ let traceRequestIds;
238
+ let providerCallLatencyMs;
239
+ if (!traceEnabled) {
240
+ // Default minimal behavior (no extra allocations/payload).
241
+ response = await this.router.invoke({
242
+ request: {
243
+ messages,
244
+ config: mergedConfig,
245
+ identity: request.identity
246
+ },
247
+ mode: 'sync'
248
+ });
249
+ }
250
+ else {
251
+ const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
252
+ const capErrorMessage = (s) => capString(s, 500);
253
+ const safeJsonStringify = (value) => {
254
+ try {
255
+ return JSON.stringify(value);
256
+ }
257
+ catch {
258
+ return '[Unserializable]';
259
+ }
260
+ };
261
+ const gatewayAiRequestId = request.aiRequestId;
262
+ const baseRequest = {
263
+ request: {
264
+ messages,
265
+ config: mergedConfig,
266
+ identity: request.identity
267
+ },
268
+ mode: 'sync'
269
+ };
270
+ // Build deterministic provider/model candidate chain.
271
+ const candidates = [];
272
+ const primaryProvider = mergedConfig?.provider;
273
+ const primaryModel = mergedConfig?.model;
274
+ if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
275
+ candidates.push({ provider: primaryProvider, model: primaryModel });
276
+ }
277
+ const defaultTarget = this.config?.defaultTarget;
278
+ if (defaultTarget?.engine && defaultTarget?.model) {
279
+ candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
280
+ }
281
+ const fallbackChain = this.config?.fallbackChain;
282
+ if (Array.isArray(fallbackChain)) {
283
+ for (const item of fallbackChain) {
284
+ if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
285
+ candidates.push({ provider: String(item.engine), model: String(item.model) });
286
+ }
287
+ }
288
+ }
289
+ // De-dup while preserving order.
290
+ const seen = new Set();
291
+ const deduped = candidates.filter(c => {
292
+ const key = `${c.provider}::${c.model}`;
293
+ if (seen.has(key))
294
+ return false;
295
+ seen.add(key);
296
+ return true;
297
+ });
298
+ traceAttempts = [];
299
+ // Attempt execution across fallbacks (authoritative ordering).
300
+ let lastError;
301
+ for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
302
+ const candidate = deduped[fallbackIndex];
303
+ // Track per-retry attempt objects through retry hooks.
304
+ const attemptIndexByRetry = new Map();
305
+ try {
306
+ const result = await (0, gateway_retry_js_1.invokeWithRetry)({
307
+ ...baseRequest,
308
+ request: {
309
+ ...baseRequest.request,
310
+ config: {
311
+ ...mergedConfig,
312
+ provider: candidate.provider,
313
+ model: candidate.model
314
+ }
315
+ }
316
+ }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
317
+ onTryStart: ({ retryIndex, startedAt }) => {
318
+ const idx = traceAttempts.push({
319
+ timing: { startedAt, endedAt: startedAt, durationMs: 0 },
320
+ routing: {
321
+ provider: candidate.provider,
322
+ requestIds: { gatewayAiRequestId },
323
+ retryIndex,
324
+ fallbackIndex
325
+ },
326
+ usage: {
327
+ tokens: { prompt: 0, completion: 0, total: 0 },
328
+ maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
329
+ },
330
+ modelUsed: candidate.model,
331
+ ok: false
332
+ }) - 1;
333
+ attemptIndexByRetry.set(retryIndex, idx);
334
+ },
335
+ onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
336
+ const idx = attemptIndexByRetry.get(retryIndex);
337
+ if (idx === undefined)
338
+ return;
339
+ const a = traceAttempts[idx];
340
+ a.timing.endedAt = endedAt;
341
+ a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
342
+ a.ok = ok;
343
+ const respAny = tryResp;
344
+ if (ok && respAny) {
345
+ const meta = respAny.metadata || {};
346
+ const tokenCounts = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(respAny);
347
+ a.usage = {
348
+ tokens: tokenCounts,
349
+ maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
350
+ ? meta.maxTokensRequested
351
+ : typeof mergedConfig?.maxTokens === 'number'
352
+ ? mergedConfig.maxTokens
353
+ : undefined
354
+ };
355
+ a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
356
+ if (typeof meta?.region === 'string')
357
+ a.routing.region = meta.region;
358
+ const requestIds = {
359
+ gatewayAiRequestId,
360
+ routerRequestId: respAny.requestId || meta?.requestId
361
+ };
362
+ if (typeof meta?.providerRequestId === 'string')
363
+ requestIds.providerRequestId = meta.providerRequestId;
364
+ if (typeof meta?.openrouterRequestId === 'string')
365
+ requestIds.openrouterRequestId = meta.openrouterRequestId;
366
+ if (meta?.requestIds && typeof meta.requestIds === 'object') {
367
+ for (const [k, v] of Object.entries(meta.requestIds)) {
368
+ if (typeof v === 'string')
369
+ requestIds[k] = v;
370
+ }
371
+ }
372
+ a.routing.requestIds = requestIds;
373
+ a.modelUsed =
374
+ meta?.modelUsed || meta?.model || respAny.model || candidate.model;
375
+ const costUsd = typeof meta?.costUsd === 'number'
376
+ ? meta.costUsd
377
+ : typeof meta?.cost === 'number'
378
+ ? meta.cost
379
+ : typeof respAny?.costUsd === 'number'
380
+ ? respAny.costUsd
381
+ : typeof respAny?.cost === 'number'
382
+ ? respAny.cost
383
+ : undefined;
384
+ if (typeof costUsd === 'number')
385
+ a.costUsd = costUsd;
386
+ if (includeRawProviderPayload) {
387
+ // Size-capped preview only.
388
+ const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
389
+ const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
390
+ a.rawProviderPayload = capString(rawStr, 4000);
391
+ }
392
+ }
393
+ else if (tryErr) {
394
+ a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
395
+ }
396
+ }
397
+ });
398
+ response = result.response;
399
+ lastError = undefined;
400
+ break; // success => stop fallback chain
401
+ }
402
+ catch (err) {
403
+ lastError = err instanceof Error ? err : new Error(String(err));
404
+ continue;
405
+ }
406
+ }
407
+ if (!response) {
408
+ throw lastError ?? new Error('All fallback candidates failed');
409
+ }
410
+ // Summary counts + final request ids.
411
+ traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
412
+ const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
413
+ traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
414
+ const finalResp = response;
415
+ const finalMeta = finalResp?.metadata || {};
416
+ traceRequestIds = {
417
+ gatewayAiRequestId,
418
+ routerRequestId: finalResp?.requestId || finalMeta?.requestId
419
+ };
420
+ if (typeof finalMeta?.providerRequestId === 'string')
421
+ traceRequestIds.providerRequestId = finalMeta.providerRequestId;
422
+ if (typeof finalMeta?.openrouterRequestId === 'string')
423
+ traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
424
+ if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
425
+ for (const [k, v] of Object.entries(finalMeta.requestIds)) {
426
+ if (typeof v === 'string')
427
+ traceRequestIds[k] = v;
428
+ }
429
+ }
430
+ const lastOk = [...traceAttempts].reverse().find(a => a.ok);
431
+ providerCallLatencyMs = lastOk?.timing?.durationMs;
432
+ }
238
433
  // Contract output processing removed - expectedSchema no longer supported
239
434
  // Create enhanced response - extract content properly from router response
240
435
  const routerResponse = response;
@@ -296,35 +491,49 @@ class AIGateway {
296
491
  }
297
492
  contentType = 'structured';
298
493
  parsingMethod = 'flex-md';
299
- // Extract token usage properly
300
- let tokens = { prompt: 0, completion: 0, total: 0 };
301
- if (routerResponse.usage) {
302
- tokens = {
303
- prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
304
- completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
305
- total: routerResponse.usage.totalTokens || 0
306
- };
307
- }
308
- else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
309
- const usage = routerResponse.metadata['ai-activities-response'].usage;
310
- tokens = {
311
- prompt: usage.promptTokens || usage.inputTokens || 0,
312
- completion: usage.completionTokens || usage.outputTokens || 0,
313
- total: usage.totalTokens || 0
314
- };
315
- }
494
+ const tokens = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(routerResponse);
316
495
  const enhancedResponse = {
317
496
  content: content,
318
497
  parsedContent: parsedContent,
319
498
  metadata: {
320
499
  aiRequestId: request.aiRequestId,
321
500
  identity: request.identity,
322
- latencyMs: Date.now() - startTime,
501
+ latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
323
502
  tokens: tokens,
324
503
  taskTypeId,
325
504
  agentType: 'ai',
326
505
  contentType,
327
- parsingMethod
506
+ parsingMethod,
507
+ ...(traceEnabled
508
+ ? (() => {
509
+ const meta = routerResponse?.metadata || {};
510
+ const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
511
+ const region = typeof meta.region === 'string' ? meta.region : undefined;
512
+ const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
513
+ const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
514
+ ? meta.maxTokensRequested
515
+ : typeof mergedConfig?.maxTokens === 'number'
516
+ ? mergedConfig.maxTokens
517
+ : undefined;
518
+ const costUsd = typeof meta.costUsd === 'number'
519
+ ? meta.costUsd
520
+ : typeof meta.cost === 'number'
521
+ ? meta.cost
522
+ : undefined;
523
+ return {
524
+ provider,
525
+ region,
526
+ modelUsed,
527
+ maxTokensRequested,
528
+ cost: typeof meta.cost === 'number' ? meta.cost : undefined,
529
+ costUsd,
530
+ requestIds: traceRequestIds,
531
+ retryCount: traceRetryCount,
532
+ fallbackCount: traceFallbackCount,
533
+ attempts: traceAttempts
534
+ };
535
+ })()
536
+ : {})
328
537
  }
329
538
  };
330
539
  // Track activity success if activity was started
@@ -10,6 +10,65 @@ export type UsageTier = string;
10
10
  import type { Activix } from '@x12i/activix';
11
11
  import type { TemplateRenderOptions } from '@x12i/rendrix';
12
12
  import type { Logxer } from '@x12i/logxer';
13
+ /**
14
+ * Diagnostics options for opt-in authoritative tracing.
15
+ * Default behavior must remain minimal when diagnostics are not enabled.
16
+ */
17
+ export type DiagnosticsMode = 'none' | 'trace';
18
+ export interface DiagnosticsOptions {
19
+ mode?: DiagnosticsMode;
20
+ /**
21
+ * Whether to include raw provider payloads (NEVER on by default).
22
+ * Implementations must size-cap any raw payload included.
23
+ */
24
+ includeRawProviderPayload?: boolean;
25
+ }
26
+ export type GatewayTraceRequestIds = {
27
+ /** Stable alias of gateway aiRequestId (always set when trace enabled). */
28
+ gatewayAiRequestId: string;
29
+ /** Router-level correlation id, when available. */
30
+ routerRequestId?: string;
31
+ /** Provider-level request id (or equivalent), when available. */
32
+ providerRequestId?: string;
33
+ /** OpenRouter request id, when applicable and available. */
34
+ openrouterRequestId?: string;
35
+ /** Allow additional stable ids without breaking contract. */
36
+ [key: string]: string | undefined;
37
+ };
38
+ export type GatewayTraceAttempt = {
39
+ timing: {
40
+ startedAt: number;
41
+ endedAt: number;
42
+ durationMs: number;
43
+ };
44
+ routing: {
45
+ provider: string;
46
+ region?: string;
47
+ requestIds: GatewayTraceRequestIds;
48
+ retryIndex: number;
49
+ fallbackIndex: number;
50
+ };
51
+ usage?: {
52
+ tokens: {
53
+ prompt: number;
54
+ completion: number;
55
+ total: number;
56
+ };
57
+ maxTokensRequested?: number;
58
+ };
59
+ modelUsed?: string;
60
+ costUsd?: number;
61
+ ok: boolean;
62
+ error?: {
63
+ name: string;
64
+ message: string;
65
+ };
66
+ /**
67
+ * Optional raw provider payload (size-capped, gated by request flag).
68
+ * The exact shape is intentionally loose to avoid locking downstream to provider schemas.
69
+ */
70
+ rawProviderPayload?: unknown;
71
+ };
13
72
  /**
14
73
  * Identity object used for activity linkage.
15
74
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
606
665
  * Used when inferenceType is provided for parsing inference outputs
607
666
  */
608
667
  parseOptions?: Record<string, unknown>;
668
+ /**
669
+ * Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
670
+ * attach heavy diagnostic objects or raw provider payloads.
671
+ */
672
+ diagnostics?: DiagnosticsOptions;
609
673
  }
610
674
  /**
611
675
  * Chat request for conversational use cases
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
835
899
  * Cost in USD (if available)
836
900
  */
837
901
  cost?: number;
902
+ /**
903
+ * Cost in USD (preferred, stable key for trace mode).
904
+ * When both are present, costUsd should mirror cost.
905
+ */
906
+ costUsd?: number;
907
+ /**
908
+ * Final effective max token cap applied (after merges/normalization), if known.
909
+ */
910
+ maxTokensRequested?: number;
911
+ /**
912
+ * Model that actually served the response (after routing/fallback), if known.
913
+ * This is distinct from requested model.
914
+ */
915
+ modelUsed?: string;
916
+ /**
917
+ * Optional region identifier when applicable (provider-specific).
918
+ */
919
+ region?: string;
920
+ /**
921
+ * Stable request/correlation identifiers across gateway/router/provider layers.
922
+ * Only populated when diagnostics trace mode is enabled.
923
+ */
924
+ requestIds?: GatewayTraceRequestIds;
925
+ /**
926
+ * Total number of retries performed across the execution (trace mode).
927
+ */
928
+ retryCount?: number;
929
+ /**
930
+ * Total number of fallback transitions performed across the execution (trace mode).
931
+ */
932
+ fallbackCount?: number;
933
+ /**
934
+ * Ordered, authoritative attempts across retries and fallbacks (trace mode).
935
+ */
936
+ attempts?: GatewayTraceAttempt[];
838
937
  /**
839
938
  * Content type classification
840
939
  * Indicates whether content is 'string', 'object', 'array', or 'null'
@@ -11,6 +11,7 @@
11
11
  */
12
12
  Object.defineProperty(exports, "__esModule", { value: true });
13
13
  exports.UsageTracker = void 0;
14
+ const gateway_utils_js_1 = require("./gateway-utils.cjs");
14
15
  /**
15
16
  * Manages usage tracking for LLM requests
16
17
  */
@@ -29,11 +30,7 @@ class UsageTracker {
29
30
  * @returns Token usage breakdown
30
31
  */
31
32
  extractTokens(response) {
32
- return {
33
- prompt: response.usage?.promptTokens || 0,
34
- completion: response.usage?.completionTokens || 0,
35
- total: response.usage?.totalTokens || 0
36
- };
33
+ return (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response);
37
34
  }
38
35
  /**
39
36
  * Records usage for a request
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@x12i/ai-gateway",
3
- "version": "9.0.3",
3
+ "version": "9.0.8",
4
4
  "description": "AI Gateway - Unified interface for LLM provider routing and management",
5
5
  "type": "module",
6
6
  "exports": {
@@ -60,7 +60,7 @@
60
60
  "author": "x12i",
61
61
  "license": "mit",
62
62
  "dependencies": {
63
- "@x12i/ai-providers-router": "^4.7.1",
63
+ "@x12i/ai-providers-router": "^4.7.7",
64
64
  "@x12i/rendrix": "^4.2.0",
65
65
  "@aws-sdk/s3-request-presigner": "^3.953.0",
66
66
  "@x12i/env": "^4.0.1",