@x12i/ai-gateway 9.0.3 → 9.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
34
34
  * Invokes router with retry logic for network and server errors
35
35
  * Returns response and retry metadata
36
36
  */
37
- export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer): Promise<{
37
+ export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
38
+ /**
39
+ * Called immediately before each provider call attempt (including the first try).
40
+ * `retryIndex` is 0-based within this invokeWithRetry call.
41
+ */
42
+ onTryStart?: (info: {
43
+ retryIndex: number;
44
+ startedAt: number;
45
+ }) => void;
46
+ /**
47
+ * Called immediately after each provider call attempt finishes (success or error).
48
+ * `retryIndex` is 0-based within this invokeWithRetry call.
49
+ */
50
+ onTryEnd?: (info: {
51
+ retryIndex: number;
52
+ endedAt: number;
53
+ ok: boolean;
54
+ response?: any;
55
+ error?: Error;
56
+ }) => void;
57
+ }): Promise<{
38
58
  response: any;
39
59
  retryMetadata?: {
40
60
  retryCount: number;
@@ -93,7 +93,7 @@ export function sleep(ms) {
93
93
  * Invokes router with retry logic for network and server errors
94
94
  * Returns response and retry metadata
95
95
  */
96
- export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
96
+ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
97
97
  const maxRetries = retryConfig.maxRetries ?? 3;
98
98
  const initialDelay = retryConfig.initialDelay ?? 1000;
99
99
  const maxDelay = retryConfig.maxDelay ?? 30000;
@@ -103,8 +103,12 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
103
103
  let lastError;
104
104
  const retryAttempts = [];
105
105
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
106
+ const startedAt = Date.now();
107
+ hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
106
108
  try {
107
109
  const response = await router.invoke(routerRequest);
110
+ const endedAt = Date.now();
111
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
108
112
  // Log success after retry if this wasn't the first attempt
109
113
  if (attempt > 0) {
110
114
  logger.info('Request succeeded after retry', {
@@ -128,6 +132,8 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
128
132
  }
129
133
  catch (error) {
130
134
  lastError = error instanceof Error ? error : new Error(String(error));
135
+ const endedAt = Date.now();
136
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
131
137
  // Check if error is retryable
132
138
  if (!isRetryableError(lastError)) {
133
139
  logger.debug('Non-retryable error, not retrying', {
package/dist/gateway.js CHANGED
@@ -12,6 +12,7 @@ import { mergeConfig } from './gateway-utils.js';
12
12
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
13
13
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
14
14
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
15
+ import { invokeWithRetry } from './gateway-retry.js';
15
16
  /** Error message thrown by the router when no provider is registered or specified */
16
17
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
17
18
  const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
@@ -203,6 +204,9 @@ export class AIGateway {
203
204
  request._parsedRequest = parsedSnapshot;
204
205
  // Merge config (modelConfig > request.config > gateway defaults)
205
206
  const mergedConfig = await mergeConfig(request, this.config, this.logger);
207
+ const diagnosticsMode = request.diagnostics?.mode;
208
+ const traceEnabled = diagnosticsMode === 'trace';
209
+ const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
206
210
  // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
207
211
  if (!this._autoRegisterDone) {
208
212
  await autoRegisterProviders(this.router, this.logger);
@@ -223,15 +227,209 @@ export class AIGateway {
223
227
  }
224
228
  }
225
229
  try {
226
- // Call router directly with merged config
227
- const response = await this.router.invoke({
228
- request: {
229
- messages,
230
- config: mergedConfig,
231
- identity: request.identity
232
- },
233
- mode: 'sync'
234
- });
230
+ let response;
231
+ let traceAttempts;
232
+ let traceRetryCount;
233
+ let traceFallbackCount;
234
+ let traceRequestIds;
235
+ let providerCallLatencyMs;
236
+ if (!traceEnabled) {
237
+ // Default minimal behavior (no extra allocations/payload).
238
+ response = await this.router.invoke({
239
+ request: {
240
+ messages,
241
+ config: mergedConfig,
242
+ identity: request.identity
243
+ },
244
+ mode: 'sync'
245
+ });
246
+ }
247
+ else {
248
+ const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
249
+ const capErrorMessage = (s) => capString(s, 500);
250
+ const safeJsonStringify = (value) => {
251
+ try {
252
+ return JSON.stringify(value);
253
+ }
254
+ catch {
255
+ return '[Unserializable]';
256
+ }
257
+ };
258
+ const gatewayAiRequestId = request.aiRequestId;
259
+ const baseRequest = {
260
+ request: {
261
+ messages,
262
+ config: mergedConfig,
263
+ identity: request.identity
264
+ },
265
+ mode: 'sync'
266
+ };
267
+ // Build deterministic provider/model candidate chain.
268
+ const candidates = [];
269
+ const primaryProvider = mergedConfig?.provider;
270
+ const primaryModel = mergedConfig?.model;
271
+ if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
272
+ candidates.push({ provider: primaryProvider, model: primaryModel });
273
+ }
274
+ const defaultTarget = this.config?.defaultTarget;
275
+ if (defaultTarget?.engine && defaultTarget?.model) {
276
+ candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
277
+ }
278
+ const fallbackChain = this.config?.fallbackChain;
279
+ if (Array.isArray(fallbackChain)) {
280
+ for (const item of fallbackChain) {
281
+ if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
282
+ candidates.push({ provider: String(item.engine), model: String(item.model) });
283
+ }
284
+ }
285
+ }
286
+ // De-dup while preserving order.
287
+ const seen = new Set();
288
+ const deduped = candidates.filter(c => {
289
+ const key = `${c.provider}::${c.model}`;
290
+ if (seen.has(key))
291
+ return false;
292
+ seen.add(key);
293
+ return true;
294
+ });
295
+ traceAttempts = [];
296
+ // Attempt execution across fallbacks (authoritative ordering).
297
+ let lastError;
298
+ for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
299
+ const candidate = deduped[fallbackIndex];
300
+ // Track per-retry attempt objects through retry hooks.
301
+ const attemptIndexByRetry = new Map();
302
+ try {
303
+ const result = await invokeWithRetry({
304
+ ...baseRequest,
305
+ request: {
306
+ ...baseRequest.request,
307
+ config: {
308
+ ...mergedConfig,
309
+ provider: candidate.provider,
310
+ model: candidate.model
311
+ }
312
+ }
313
+ }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
314
+ onTryStart: ({ retryIndex, startedAt }) => {
315
+ const idx = traceAttempts.push({
316
+ timing: { startedAt, endedAt: startedAt, durationMs: 0 },
317
+ routing: {
318
+ provider: candidate.provider,
319
+ requestIds: { gatewayAiRequestId },
320
+ retryIndex,
321
+ fallbackIndex
322
+ },
323
+ usage: {
324
+ tokens: { prompt: 0, completion: 0, total: 0 },
325
+ maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
326
+ },
327
+ modelUsed: candidate.model,
328
+ ok: false
329
+ }) - 1;
330
+ attemptIndexByRetry.set(retryIndex, idx);
331
+ },
332
+ onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
333
+ const idx = attemptIndexByRetry.get(retryIndex);
334
+ if (idx === undefined)
335
+ return;
336
+ const a = traceAttempts[idx];
337
+ a.timing.endedAt = endedAt;
338
+ a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
339
+ a.ok = ok;
340
+ const respAny = tryResp;
341
+ if (ok && respAny) {
342
+ const meta = respAny.metadata || {};
343
+ const usage = respAny.usage || meta?.['ai-activities-response']?.usage;
344
+ const prompt = usage?.promptTokens ?? usage?.inputTokens ?? 0;
345
+ const completion = usage?.completionTokens ?? usage?.outputTokens ?? 0;
346
+ const total = usage?.totalTokens ?? 0;
347
+ a.usage = {
348
+ tokens: { prompt, completion, total },
349
+ maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
350
+ ? meta.maxTokensRequested
351
+ : typeof mergedConfig?.maxTokens === 'number'
352
+ ? mergedConfig.maxTokens
353
+ : undefined
354
+ };
355
+ a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
356
+ if (typeof meta?.region === 'string')
357
+ a.routing.region = meta.region;
358
+ const requestIds = {
359
+ gatewayAiRequestId,
360
+ routerRequestId: respAny.requestId || meta?.requestId
361
+ };
362
+ if (typeof meta?.providerRequestId === 'string')
363
+ requestIds.providerRequestId = meta.providerRequestId;
364
+ if (typeof meta?.openrouterRequestId === 'string')
365
+ requestIds.openrouterRequestId = meta.openrouterRequestId;
366
+ if (meta?.requestIds && typeof meta.requestIds === 'object') {
367
+ for (const [k, v] of Object.entries(meta.requestIds)) {
368
+ if (typeof v === 'string')
369
+ requestIds[k] = v;
370
+ }
371
+ }
372
+ a.routing.requestIds = requestIds;
373
+ a.modelUsed =
374
+ meta?.modelUsed || meta?.model || respAny.model || candidate.model;
375
+ const costUsd = typeof meta?.costUsd === 'number'
376
+ ? meta.costUsd
377
+ : typeof meta?.cost === 'number'
378
+ ? meta.cost
379
+ : typeof respAny?.costUsd === 'number'
380
+ ? respAny.costUsd
381
+ : typeof respAny?.cost === 'number'
382
+ ? respAny.cost
383
+ : undefined;
384
+ if (typeof costUsd === 'number')
385
+ a.costUsd = costUsd;
386
+ if (includeRawProviderPayload) {
387
+ // Size-capped preview only.
388
+ const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
389
+ const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
390
+ a.rawProviderPayload = capString(rawStr, 4000);
391
+ }
392
+ }
393
+ else if (tryErr) {
394
+ a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
395
+ }
396
+ }
397
+ });
398
+ response = result.response;
399
+ lastError = undefined;
400
+ break; // success => stop fallback chain
401
+ }
402
+ catch (err) {
403
+ lastError = err instanceof Error ? err : new Error(String(err));
404
+ continue;
405
+ }
406
+ }
407
+ if (!response) {
408
+ throw lastError ?? new Error('All fallback candidates failed');
409
+ }
410
+ // Summary counts + final request ids.
411
+ traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
412
+ const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
413
+ traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
414
+ const finalResp = response;
415
+ const finalMeta = finalResp?.metadata || {};
416
+ traceRequestIds = {
417
+ gatewayAiRequestId,
418
+ routerRequestId: finalResp?.requestId || finalMeta?.requestId
419
+ };
420
+ if (typeof finalMeta?.providerRequestId === 'string')
421
+ traceRequestIds.providerRequestId = finalMeta.providerRequestId;
422
+ if (typeof finalMeta?.openrouterRequestId === 'string')
423
+ traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
424
+ if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
425
+ for (const [k, v] of Object.entries(finalMeta.requestIds)) {
426
+ if (typeof v === 'string')
427
+ traceRequestIds[k] = v;
428
+ }
429
+ }
430
+ const lastOk = [...traceAttempts].reverse().find(a => a.ok);
431
+ providerCallLatencyMs = lastOk?.timing?.durationMs;
432
+ }
235
433
  // Contract output processing removed - expectedSchema no longer supported
236
434
  // Create enhanced response - extract content properly from router response
237
435
  const routerResponse = response;
@@ -316,12 +514,42 @@ export class AIGateway {
316
514
  metadata: {
317
515
  aiRequestId: request.aiRequestId,
318
516
  identity: request.identity,
319
- latencyMs: Date.now() - startTime,
517
+ latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
320
518
  tokens: tokens,
321
519
  taskTypeId,
322
520
  agentType: 'ai',
323
521
  contentType,
324
- parsingMethod
522
+ parsingMethod,
523
+ ...(traceEnabled
524
+ ? (() => {
525
+ const meta = routerResponse?.metadata || {};
526
+ const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
527
+ const region = typeof meta.region === 'string' ? meta.region : undefined;
528
+ const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
529
+ const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
530
+ ? meta.maxTokensRequested
531
+ : typeof mergedConfig?.maxTokens === 'number'
532
+ ? mergedConfig.maxTokens
533
+ : undefined;
534
+ const costUsd = typeof meta.costUsd === 'number'
535
+ ? meta.costUsd
536
+ : typeof meta.cost === 'number'
537
+ ? meta.cost
538
+ : undefined;
539
+ return {
540
+ provider,
541
+ region,
542
+ modelUsed,
543
+ maxTokensRequested,
544
+ cost: typeof meta.cost === 'number' ? meta.cost : undefined,
545
+ costUsd,
546
+ requestIds: traceRequestIds,
547
+ retryCount: traceRetryCount,
548
+ fallbackCount: traceFallbackCount,
549
+ attempts: traceAttempts
550
+ };
551
+ })()
552
+ : {})
325
553
  }
326
554
  };
327
555
  // Track activity success if activity was started
package/dist/types.d.ts CHANGED
@@ -10,6 +10,65 @@ export type UsageTier = string;
10
10
  import type { Activix } from '@x12i/activix';
11
11
  import type { TemplateRenderOptions } from '@x12i/rendrix';
12
12
  import type { Logxer } from '@x12i/logxer';
13
+ /**
14
+ * Diagnostics options for opt-in authoritative tracing.
15
+ * Default behavior must remain minimal when diagnostics are not enabled.
16
+ */
17
+ export type DiagnosticsMode = 'none' | 'trace';
18
+ export interface DiagnosticsOptions {
19
+ mode?: DiagnosticsMode;
20
+ /**
21
+ * Whether to include raw provider payloads (NEVER on by default).
22
+ * Implementations must size-cap any raw payload included.
23
+ */
24
+ includeRawProviderPayload?: boolean;
25
+ }
26
+ export type GatewayTraceRequestIds = {
27
+ /** Stable alias of gateway aiRequestId (always set when trace enabled). */
28
+ gatewayAiRequestId: string;
29
+ /** Router-level correlation id, when available. */
30
+ routerRequestId?: string;
31
+ /** Provider-level request id (or equivalent), when available. */
32
+ providerRequestId?: string;
33
+ /** OpenRouter request id, when applicable and available. */
34
+ openrouterRequestId?: string;
35
+ /** Allow additional stable ids without breaking contract. */
36
+ [key: string]: string | undefined;
37
+ };
38
+ export type GatewayTraceAttempt = {
39
+ timing: {
40
+ startedAt: number;
41
+ endedAt: number;
42
+ durationMs: number;
43
+ };
44
+ routing: {
45
+ provider: string;
46
+ region?: string;
47
+ requestIds: GatewayTraceRequestIds;
48
+ retryIndex: number;
49
+ fallbackIndex: number;
50
+ };
51
+ usage?: {
52
+ tokens: {
53
+ prompt: number;
54
+ completion: number;
55
+ total: number;
56
+ };
57
+ maxTokensRequested?: number;
58
+ };
59
+ modelUsed?: string;
60
+ costUsd?: number;
61
+ ok: boolean;
62
+ error?: {
63
+ name: string;
64
+ message: string;
65
+ };
66
+ /**
67
+ * Optional raw provider payload (size-capped, gated by request flag).
68
+ * The exact shape is intentionally loose to avoid locking downstream to provider schemas.
69
+ */
70
+ rawProviderPayload?: unknown;
71
+ };
13
72
  /**
14
73
  * Identity object used for activity linkage.
15
74
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
606
665
  * Used when inferenceType is provided for parsing inference outputs
607
666
  */
608
667
  parseOptions?: Record<string, unknown>;
668
+ /**
669
+ * Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
670
+ * attach heavy diagnostic objects or raw provider payloads.
671
+ */
672
+ diagnostics?: DiagnosticsOptions;
609
673
  }
610
674
  /**
611
675
  * Chat request for conversational use cases
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
835
899
  * Cost in USD (if available)
836
900
  */
837
901
  cost?: number;
902
+ /**
903
+ * Cost in USD (preferred, stable key for trace mode).
904
+ * When both are present, costUsd should mirror cost.
905
+ */
906
+ costUsd?: number;
907
+ /**
908
+ * Final effective max token cap applied (after merges/normalization), if known.
909
+ */
910
+ maxTokensRequested?: number;
911
+ /**
912
+ * Model that actually served the response (after routing/fallback), if known.
913
+ * This is distinct from requested model.
914
+ */
915
+ modelUsed?: string;
916
+ /**
917
+ * Optional region identifier when applicable (provider-specific).
918
+ */
919
+ region?: string;
920
+ /**
921
+ * Stable request/correlation identifiers across gateway/router/provider layers.
922
+ * Only populated when diagnostics trace mode is enabled.
923
+ */
924
+ requestIds?: GatewayTraceRequestIds;
925
+ /**
926
+ * Total number of retries performed across the execution (trace mode).
927
+ */
928
+ retryCount?: number;
929
+ /**
930
+ * Total number of fallback transitions performed across the execution (trace mode).
931
+ */
932
+ fallbackCount?: number;
933
+ /**
934
+ * Ordered, authoritative attempts across retries and fallbacks (trace mode).
935
+ */
936
+ attempts?: GatewayTraceAttempt[];
838
937
  /**
839
938
  * Content type classification
840
939
  * Indicates whether content is 'string', 'object', 'array', or 'null'
@@ -101,7 +101,7 @@ function sleep(ms) {
101
101
  * Invokes router with retry logic for network and server errors
102
102
  * Returns response and retry metadata
103
103
  */
104
- async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
104
+ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
105
105
  const maxRetries = retryConfig.maxRetries ?? 3;
106
106
  const initialDelay = retryConfig.initialDelay ?? 1000;
107
107
  const maxDelay = retryConfig.maxDelay ?? 30000;
@@ -111,8 +111,12 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
111
111
  let lastError;
112
112
  const retryAttempts = [];
113
113
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
114
+ const startedAt = Date.now();
115
+ hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
114
116
  try {
115
117
  const response = await router.invoke(routerRequest);
118
+ const endedAt = Date.now();
119
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
116
120
  // Log success after retry if this wasn't the first attempt
117
121
  if (attempt > 0) {
118
122
  logger.info('Request succeeded after retry', {
@@ -136,6 +140,8 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
136
140
  }
137
141
  catch (error) {
138
142
  lastError = error instanceof Error ? error : new Error(String(error));
143
+ const endedAt = Date.now();
144
+ hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
139
145
  // Check if error is retryable
140
146
  if (!isRetryableError(lastError)) {
141
147
  logger.debug('Non-retryable error, not retrying', {
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
34
34
  * Invokes router with retry logic for network and server errors
35
35
  * Returns response and retry metadata
36
36
  */
37
- export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer): Promise<{
37
+ export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
38
+ /**
39
+ * Called immediately before each provider call attempt (including the first try).
40
+ * `retryIndex` is 0-based within this invokeWithRetry call.
41
+ */
42
+ onTryStart?: (info: {
43
+ retryIndex: number;
44
+ startedAt: number;
45
+ }) => void;
46
+ /**
47
+ * Called immediately after each provider call attempt finishes (success or error).
48
+ * `retryIndex` is 0-based within this invokeWithRetry call.
49
+ */
50
+ onTryEnd?: (info: {
51
+ retryIndex: number;
52
+ endedAt: number;
53
+ ok: boolean;
54
+ response?: any;
55
+ error?: Error;
56
+ }) => void;
57
+ }): Promise<{
38
58
  response: any;
39
59
  retryMetadata?: {
40
60
  retryCount: number;
@@ -15,6 +15,7 @@ const gateway_utils_js_1 = require("./gateway-utils.cjs");
15
15
  const gateway_provider_auto_register_js_1 = require("./gateway-provider-auto-register.cjs");
16
16
  const runtime_objects_js_1 = require("./runtime-objects.cjs");
17
17
  const gateway_log_meta_js_1 = require("./gateway-log-meta.cjs");
18
+ const gateway_retry_js_1 = require("./gateway-retry.cjs");
18
19
  /** Error message thrown by the router when no provider is registered or specified */
19
20
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
20
21
  const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
@@ -206,6 +207,9 @@ class AIGateway {
206
207
  request._parsedRequest = parsedSnapshot;
207
208
  // Merge config (modelConfig > request.config > gateway defaults)
208
209
  const mergedConfig = await (0, gateway_utils_js_1.mergeConfig)(request, this.config, this.logger);
210
+ const diagnosticsMode = request.diagnostics?.mode;
211
+ const traceEnabled = diagnosticsMode === 'trace';
212
+ const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
209
213
  // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
210
214
  if (!this._autoRegisterDone) {
211
215
  await (0, gateway_provider_auto_register_js_1.autoRegisterProviders)(this.router, this.logger);
@@ -226,15 +230,209 @@ class AIGateway {
226
230
  }
227
231
  }
228
232
  try {
229
- // Call router directly with merged config
230
- const response = await this.router.invoke({
231
- request: {
232
- messages,
233
- config: mergedConfig,
234
- identity: request.identity
235
- },
236
- mode: 'sync'
237
- });
233
+ let response;
234
+ let traceAttempts;
235
+ let traceRetryCount;
236
+ let traceFallbackCount;
237
+ let traceRequestIds;
238
+ let providerCallLatencyMs;
239
+ if (!traceEnabled) {
240
+ // Default minimal behavior (no extra allocations/payload).
241
+ response = await this.router.invoke({
242
+ request: {
243
+ messages,
244
+ config: mergedConfig,
245
+ identity: request.identity
246
+ },
247
+ mode: 'sync'
248
+ });
249
+ }
250
+ else {
251
+ const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
252
+ const capErrorMessage = (s) => capString(s, 500);
253
+ const safeJsonStringify = (value) => {
254
+ try {
255
+ return JSON.stringify(value);
256
+ }
257
+ catch {
258
+ return '[Unserializable]';
259
+ }
260
+ };
261
+ const gatewayAiRequestId = request.aiRequestId;
262
+ const baseRequest = {
263
+ request: {
264
+ messages,
265
+ config: mergedConfig,
266
+ identity: request.identity
267
+ },
268
+ mode: 'sync'
269
+ };
270
+ // Build deterministic provider/model candidate chain.
271
+ const candidates = [];
272
+ const primaryProvider = mergedConfig?.provider;
273
+ const primaryModel = mergedConfig?.model;
274
+ if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
275
+ candidates.push({ provider: primaryProvider, model: primaryModel });
276
+ }
277
+ const defaultTarget = this.config?.defaultTarget;
278
+ if (defaultTarget?.engine && defaultTarget?.model) {
279
+ candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
280
+ }
281
+ const fallbackChain = this.config?.fallbackChain;
282
+ if (Array.isArray(fallbackChain)) {
283
+ for (const item of fallbackChain) {
284
+ if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
285
+ candidates.push({ provider: String(item.engine), model: String(item.model) });
286
+ }
287
+ }
288
+ }
289
+ // De-dup while preserving order.
290
+ const seen = new Set();
291
+ const deduped = candidates.filter(c => {
292
+ const key = `${c.provider}::${c.model}`;
293
+ if (seen.has(key))
294
+ return false;
295
+ seen.add(key);
296
+ return true;
297
+ });
298
+ traceAttempts = [];
299
+ // Attempt execution across fallbacks (authoritative ordering).
300
+ let lastError;
301
+ for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
302
+ const candidate = deduped[fallbackIndex];
303
+ // Track per-retry attempt objects through retry hooks.
304
+ const attemptIndexByRetry = new Map();
305
+ try {
306
+ const result = await (0, gateway_retry_js_1.invokeWithRetry)({
307
+ ...baseRequest,
308
+ request: {
309
+ ...baseRequest.request,
310
+ config: {
311
+ ...mergedConfig,
312
+ provider: candidate.provider,
313
+ model: candidate.model
314
+ }
315
+ }
316
+ }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
317
+ onTryStart: ({ retryIndex, startedAt }) => {
318
+ const idx = traceAttempts.push({
319
+ timing: { startedAt, endedAt: startedAt, durationMs: 0 },
320
+ routing: {
321
+ provider: candidate.provider,
322
+ requestIds: { gatewayAiRequestId },
323
+ retryIndex,
324
+ fallbackIndex
325
+ },
326
+ usage: {
327
+ tokens: { prompt: 0, completion: 0, total: 0 },
328
+ maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
329
+ },
330
+ modelUsed: candidate.model,
331
+ ok: false
332
+ }) - 1;
333
+ attemptIndexByRetry.set(retryIndex, idx);
334
+ },
335
+ onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
336
+ const idx = attemptIndexByRetry.get(retryIndex);
337
+ if (idx === undefined)
338
+ return;
339
+ const a = traceAttempts[idx];
340
+ a.timing.endedAt = endedAt;
341
+ a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
342
+ a.ok = ok;
343
+ const respAny = tryResp;
344
+ if (ok && respAny) {
345
+ const meta = respAny.metadata || {};
346
+ const usage = respAny.usage || meta?.['ai-activities-response']?.usage;
347
+ const prompt = usage?.promptTokens ?? usage?.inputTokens ?? 0;
348
+ const completion = usage?.completionTokens ?? usage?.outputTokens ?? 0;
349
+ const total = usage?.totalTokens ?? 0;
350
+ a.usage = {
351
+ tokens: { prompt, completion, total },
352
+ maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
353
+ ? meta.maxTokensRequested
354
+ : typeof mergedConfig?.maxTokens === 'number'
355
+ ? mergedConfig.maxTokens
356
+ : undefined
357
+ };
358
+ a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
359
+ if (typeof meta?.region === 'string')
360
+ a.routing.region = meta.region;
361
+ const requestIds = {
362
+ gatewayAiRequestId,
363
+ routerRequestId: respAny.requestId || meta?.requestId
364
+ };
365
+ if (typeof meta?.providerRequestId === 'string')
366
+ requestIds.providerRequestId = meta.providerRequestId;
367
+ if (typeof meta?.openrouterRequestId === 'string')
368
+ requestIds.openrouterRequestId = meta.openrouterRequestId;
369
+ if (meta?.requestIds && typeof meta.requestIds === 'object') {
370
+ for (const [k, v] of Object.entries(meta.requestIds)) {
371
+ if (typeof v === 'string')
372
+ requestIds[k] = v;
373
+ }
374
+ }
375
+ a.routing.requestIds = requestIds;
376
+ a.modelUsed =
377
+ meta?.modelUsed || meta?.model || respAny.model || candidate.model;
378
+ const costUsd = typeof meta?.costUsd === 'number'
379
+ ? meta.costUsd
380
+ : typeof meta?.cost === 'number'
381
+ ? meta.cost
382
+ : typeof respAny?.costUsd === 'number'
383
+ ? respAny.costUsd
384
+ : typeof respAny?.cost === 'number'
385
+ ? respAny.cost
386
+ : undefined;
387
+ if (typeof costUsd === 'number')
388
+ a.costUsd = costUsd;
389
+ if (includeRawProviderPayload) {
390
+ // Size-capped preview only.
391
+ const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
392
+ const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
393
+ a.rawProviderPayload = capString(rawStr, 4000);
394
+ }
395
+ }
396
+ else if (tryErr) {
397
+ a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
398
+ }
399
+ }
400
+ });
401
+ response = result.response;
402
+ lastError = undefined;
403
+ break; // success => stop fallback chain
404
+ }
405
+ catch (err) {
406
+ lastError = err instanceof Error ? err : new Error(String(err));
407
+ continue;
408
+ }
409
+ }
410
+ if (!response) {
411
+ throw lastError ?? new Error('All fallback candidates failed');
412
+ }
413
+ // Summary counts + final request ids.
414
+ traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
415
+ const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
416
+ traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
417
+ const finalResp = response;
418
+ const finalMeta = finalResp?.metadata || {};
419
+ traceRequestIds = {
420
+ gatewayAiRequestId,
421
+ routerRequestId: finalResp?.requestId || finalMeta?.requestId
422
+ };
423
+ if (typeof finalMeta?.providerRequestId === 'string')
424
+ traceRequestIds.providerRequestId = finalMeta.providerRequestId;
425
+ if (typeof finalMeta?.openrouterRequestId === 'string')
426
+ traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
427
+ if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
428
+ for (const [k, v] of Object.entries(finalMeta.requestIds)) {
429
+ if (typeof v === 'string')
430
+ traceRequestIds[k] = v;
431
+ }
432
+ }
433
+ const lastOk = [...traceAttempts].reverse().find(a => a.ok);
434
+ providerCallLatencyMs = lastOk?.timing?.durationMs;
435
+ }
238
436
  // Contract output processing removed - expectedSchema no longer supported
239
437
  // Create enhanced response - extract content properly from router response
240
438
  const routerResponse = response;
@@ -319,12 +517,42 @@ class AIGateway {
319
517
  metadata: {
320
518
  aiRequestId: request.aiRequestId,
321
519
  identity: request.identity,
322
- latencyMs: Date.now() - startTime,
520
+ latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
323
521
  tokens: tokens,
324
522
  taskTypeId,
325
523
  agentType: 'ai',
326
524
  contentType,
327
- parsingMethod
525
+ parsingMethod,
526
+ ...(traceEnabled
527
+ ? (() => {
528
+ const meta = routerResponse?.metadata || {};
529
+ const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
530
+ const region = typeof meta.region === 'string' ? meta.region : undefined;
531
+ const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
532
+ const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
533
+ ? meta.maxTokensRequested
534
+ : typeof mergedConfig?.maxTokens === 'number'
535
+ ? mergedConfig.maxTokens
536
+ : undefined;
537
+ const costUsd = typeof meta.costUsd === 'number'
538
+ ? meta.costUsd
539
+ : typeof meta.cost === 'number'
540
+ ? meta.cost
541
+ : undefined;
542
+ return {
543
+ provider,
544
+ region,
545
+ modelUsed,
546
+ maxTokensRequested,
547
+ cost: typeof meta.cost === 'number' ? meta.cost : undefined,
548
+ costUsd,
549
+ requestIds: traceRequestIds,
550
+ retryCount: traceRetryCount,
551
+ fallbackCount: traceFallbackCount,
552
+ attempts: traceAttempts
553
+ };
554
+ })()
555
+ : {})
328
556
  }
329
557
  };
330
558
  // Track activity success if activity was started
@@ -10,6 +10,65 @@ export type UsageTier = string;
10
10
  import type { Activix } from '@x12i/activix';
11
11
  import type { TemplateRenderOptions } from '@x12i/rendrix';
12
12
  import type { Logxer } from '@x12i/logxer';
13
+ /**
14
+ * Diagnostics options for opt-in authoritative tracing.
15
+ * Default behavior must remain minimal when diagnostics are not enabled.
16
+ */
17
+ export type DiagnosticsMode = 'none' | 'trace';
18
+ export interface DiagnosticsOptions {
19
+ mode?: DiagnosticsMode;
20
+ /**
21
+ * Whether to include raw provider payloads (NEVER on by default).
22
+ * Implementations must size-cap any raw payload included.
23
+ */
24
+ includeRawProviderPayload?: boolean;
25
+ }
26
+ export type GatewayTraceRequestIds = {
27
+ /** Stable alias of gateway aiRequestId (always set when trace enabled). */
28
+ gatewayAiRequestId: string;
29
+ /** Router-level correlation id, when available. */
30
+ routerRequestId?: string;
31
+ /** Provider-level request id (or equivalent), when available. */
32
+ providerRequestId?: string;
33
+ /** OpenRouter request id, when applicable and available. */
34
+ openrouterRequestId?: string;
35
+ /** Allow additional stable ids without breaking contract. */
36
+ [key: string]: string | undefined;
37
+ };
38
+ export type GatewayTraceAttempt = {
39
+ timing: {
40
+ startedAt: number;
41
+ endedAt: number;
42
+ durationMs: number;
43
+ };
44
+ routing: {
45
+ provider: string;
46
+ region?: string;
47
+ requestIds: GatewayTraceRequestIds;
48
+ retryIndex: number;
49
+ fallbackIndex: number;
50
+ };
51
+ usage?: {
52
+ tokens: {
53
+ prompt: number;
54
+ completion: number;
55
+ total: number;
56
+ };
57
+ maxTokensRequested?: number;
58
+ };
59
+ modelUsed?: string;
60
+ costUsd?: number;
61
+ ok: boolean;
62
+ error?: {
63
+ name: string;
64
+ message: string;
65
+ };
66
+ /**
67
+ * Optional raw provider payload (size-capped, gated by request flag).
68
+ * The exact shape is intentionally loose to avoid locking downstream to provider schemas.
69
+ */
70
+ rawProviderPayload?: unknown;
71
+ };
13
72
  /**
14
73
  * Identity object used for activity linkage.
15
74
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
606
665
  * Used when inferenceType is provided for parsing inference outputs
607
666
  */
608
667
  parseOptions?: Record<string, unknown>;
668
+ /**
669
+ * Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
670
+ * attach heavy diagnostic objects or raw provider payloads.
671
+ */
672
+ diagnostics?: DiagnosticsOptions;
609
673
  }
610
674
  /**
611
675
  * Chat request for conversational use cases
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
835
899
  * Cost in USD (if available)
836
900
  */
837
901
  cost?: number;
902
+ /**
903
+ * Cost in USD (preferred, stable key for trace mode).
904
+ * When both are present, costUsd should mirror cost.
905
+ */
906
+ costUsd?: number;
907
+ /**
908
+ * Final effective max token cap applied (after merges/normalization), if known.
909
+ */
910
+ maxTokensRequested?: number;
911
+ /**
912
+ * Model that actually served the response (after routing/fallback), if known.
913
+ * This is distinct from requested model.
914
+ */
915
+ modelUsed?: string;
916
+ /**
917
+ * Optional region identifier when applicable (provider-specific).
918
+ */
919
+ region?: string;
920
+ /**
921
+ * Stable request/correlation identifiers across gateway/router/provider layers.
922
+ * Only populated when diagnostics trace mode is enabled.
923
+ */
924
+ requestIds?: GatewayTraceRequestIds;
925
+ /**
926
+ * Total number of retries performed across the execution (trace mode).
927
+ */
928
+ retryCount?: number;
929
+ /**
930
+ * Total number of fallback transitions performed across the execution (trace mode).
931
+ */
932
+ fallbackCount?: number;
933
+ /**
934
+ * Ordered, authoritative attempts across retries and fallbacks (trace mode).
935
+ */
936
+ attempts?: GatewayTraceAttempt[];
838
937
  /**
839
938
  * Content type classification
840
939
  * Indicates whether content is 'string', 'object', 'array', or 'null'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@x12i/ai-gateway",
3
- "version": "9.0.3",
3
+ "version": "9.0.7",
4
4
  "description": "AI Gateway - Unified interface for LLM provider routing and management",
5
5
  "type": "module",
6
6
  "exports": {
@@ -60,7 +60,7 @@
60
60
  "author": "x12i",
61
61
  "license": "mit",
62
62
  "dependencies": {
63
- "@x12i/ai-providers-router": "^4.7.1",
63
+ "@x12i/ai-providers-router": "^4.7.7",
64
64
  "@x12i/rendrix": "^4.2.0",
65
65
  "@aws-sdk/s3-request-presigner": "^3.953.0",
66
66
  "@x12i/env": "^4.0.1",