@x12i/ai-gateway 9.0.3 → 9.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gateway-retry.d.ts +21 -1
- package/dist/gateway-retry.js +7 -1
- package/dist/gateway-utils.d.ts +17 -0
- package/dist/gateway-utils.js +52 -0
- package/dist/gateway.js +239 -30
- package/dist/types.d.ts +99 -0
- package/dist/usage-tracker.js +2 -5
- package/dist-cjs/gateway-retry.cjs +7 -1
- package/dist-cjs/gateway-retry.d.ts +21 -1
- package/dist-cjs/gateway-utils.cjs +54 -0
- package/dist-cjs/gateway-utils.d.ts +17 -0
- package/dist-cjs/gateway.cjs +238 -29
- package/dist-cjs/types.d.ts +99 -0
- package/dist-cjs/usage-tracker.cjs +2 -5
- package/package.json +2 -2
package/dist/gateway-retry.d.ts
CHANGED
|
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
|
|
|
34
34
|
* Invokes router with retry logic for network and server errors
|
|
35
35
|
* Returns response and retry metadata
|
|
36
36
|
*/
|
|
37
|
-
export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer
|
|
37
|
+
export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
|
|
38
|
+
/**
|
|
39
|
+
* Called immediately before each provider call attempt (including the first try).
|
|
40
|
+
* `retryIndex` is 0-based within this invokeWithRetry call.
|
|
41
|
+
*/
|
|
42
|
+
onTryStart?: (info: {
|
|
43
|
+
retryIndex: number;
|
|
44
|
+
startedAt: number;
|
|
45
|
+
}) => void;
|
|
46
|
+
/**
|
|
47
|
+
* Called immediately after each provider call attempt finishes (success or error).
|
|
48
|
+
* `retryIndex` is 0-based within this invokeWithRetry call.
|
|
49
|
+
*/
|
|
50
|
+
onTryEnd?: (info: {
|
|
51
|
+
retryIndex: number;
|
|
52
|
+
endedAt: number;
|
|
53
|
+
ok: boolean;
|
|
54
|
+
response?: any;
|
|
55
|
+
error?: Error;
|
|
56
|
+
}) => void;
|
|
57
|
+
}): Promise<{
|
|
38
58
|
response: any;
|
|
39
59
|
retryMetadata?: {
|
|
40
60
|
retryCount: number;
|
package/dist/gateway-retry.js
CHANGED
|
@@ -93,7 +93,7 @@ export function sleep(ms) {
|
|
|
93
93
|
* Invokes router with retry logic for network and server errors
|
|
94
94
|
* Returns response and retry metadata
|
|
95
95
|
*/
|
|
96
|
-
export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
|
|
96
|
+
export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
|
|
97
97
|
const maxRetries = retryConfig.maxRetries ?? 3;
|
|
98
98
|
const initialDelay = retryConfig.initialDelay ?? 1000;
|
|
99
99
|
const maxDelay = retryConfig.maxDelay ?? 30000;
|
|
@@ -103,8 +103,12 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
|
|
|
103
103
|
let lastError;
|
|
104
104
|
const retryAttempts = [];
|
|
105
105
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
106
|
+
const startedAt = Date.now();
|
|
107
|
+
hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
|
|
106
108
|
try {
|
|
107
109
|
const response = await router.invoke(routerRequest);
|
|
110
|
+
const endedAt = Date.now();
|
|
111
|
+
hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
|
|
108
112
|
// Log success after retry if this wasn't the first attempt
|
|
109
113
|
if (attempt > 0) {
|
|
110
114
|
logger.info('Request succeeded after retry', {
|
|
@@ -128,6 +132,8 @@ export async function invokeWithRetry(routerRequest, retryConfig, jobId, router,
|
|
|
128
132
|
}
|
|
129
133
|
catch (error) {
|
|
130
134
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
135
|
+
const endedAt = Date.now();
|
|
136
|
+
hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
|
|
131
137
|
// Check if error is retryable
|
|
132
138
|
if (!isRetryableError(lastError)) {
|
|
133
139
|
logger.debug('Non-retryable error, not retrying', {
|
package/dist/gateway-utils.d.ts
CHANGED
|
@@ -19,3 +19,20 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
|
|
|
19
19
|
export declare function mergeConfig(request: ChatRequest & {
|
|
20
20
|
useInternalDefaults?: 'skill' | 'audit';
|
|
21
21
|
}, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
|
|
22
|
+
/**
|
|
23
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
24
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
27
|
+
prompt: number;
|
|
28
|
+
completion: number;
|
|
29
|
+
total: number;
|
|
30
|
+
} | undefined;
|
|
31
|
+
/**
|
|
32
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
|
|
35
|
+
prompt: number;
|
|
36
|
+
completion: number;
|
|
37
|
+
total: number;
|
|
38
|
+
};
|
package/dist/gateway-utils.js
CHANGED
|
@@ -179,3 +179,55 @@ export async function mergeConfig(request, config, logger) {
|
|
|
179
179
|
});
|
|
180
180
|
return merged;
|
|
181
181
|
}
|
|
182
|
+
function firstFiniteNumber(...vals) {
|
|
183
|
+
for (const v of vals) {
|
|
184
|
+
if (typeof v === 'number' && Number.isFinite(v))
|
|
185
|
+
return v;
|
|
186
|
+
}
|
|
187
|
+
return undefined;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
191
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
192
|
+
*/
|
|
193
|
+
export function normalizeRouterUsageTokens(usage) {
|
|
194
|
+
if (usage == null || typeof usage !== 'object')
|
|
195
|
+
return undefined;
|
|
196
|
+
const u = usage;
|
|
197
|
+
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
198
|
+
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
|
|
199
|
+
let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
|
|
200
|
+
if (!total && (prompt || completion))
|
|
201
|
+
total = prompt + completion;
|
|
202
|
+
return { prompt, completion, total };
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
206
|
+
*/
|
|
207
|
+
export function extractTokenUsageFromRouterResponse(routerResponse) {
|
|
208
|
+
if (routerResponse == null || typeof routerResponse !== 'object') {
|
|
209
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
210
|
+
}
|
|
211
|
+
const r = routerResponse;
|
|
212
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
213
|
+
? r.metadata
|
|
214
|
+
: undefined;
|
|
215
|
+
const buckets = [r.usage];
|
|
216
|
+
if (meta) {
|
|
217
|
+
buckets.push(meta.usage);
|
|
218
|
+
const nested = meta['ai-activities-response'];
|
|
219
|
+
if (nested != null && typeof nested === 'object') {
|
|
220
|
+
buckets.push(nested.usage);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
const raw = r.rawResponse ?? r.raw;
|
|
224
|
+
if (raw != null && typeof raw === 'object') {
|
|
225
|
+
buckets.push(raw.usage);
|
|
226
|
+
}
|
|
227
|
+
for (const b of buckets) {
|
|
228
|
+
const n = normalizeRouterUsageTokens(b);
|
|
229
|
+
if (n && (n.prompt || n.completion || n.total))
|
|
230
|
+
return n;
|
|
231
|
+
}
|
|
232
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
233
|
+
}
|
package/dist/gateway.js
CHANGED
|
@@ -8,10 +8,11 @@ import { ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
|
8
8
|
import { initializeGatewayComponents } from './gateway-config.js';
|
|
9
9
|
import { buildMessages } from './message-builder.js';
|
|
10
10
|
import { extractJsonFromFlexMd } from './flex-md-loader.js';
|
|
11
|
-
import { mergeConfig } from './gateway-utils.js';
|
|
11
|
+
import { extractTokenUsageFromRouterResponse, mergeConfig } from './gateway-utils.js';
|
|
12
12
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
13
13
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
14
14
|
import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
|
|
15
|
+
import { invokeWithRetry } from './gateway-retry.js';
|
|
15
16
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
16
17
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
17
18
|
const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
@@ -93,7 +94,7 @@ export class AIGateway {
|
|
|
93
94
|
aiRequestId: request.aiRequestId,
|
|
94
95
|
identity: request.identity,
|
|
95
96
|
latencyMs: Date.now() - startTime,
|
|
96
|
-
tokens: response
|
|
97
|
+
tokens: extractTokenUsageFromRouterResponse(response),
|
|
97
98
|
taskTypeId,
|
|
98
99
|
agentType: 'chat'
|
|
99
100
|
}
|
|
@@ -203,6 +204,9 @@ export class AIGateway {
|
|
|
203
204
|
request._parsedRequest = parsedSnapshot;
|
|
204
205
|
// Merge config (modelConfig > request.config > gateway defaults)
|
|
205
206
|
const mergedConfig = await mergeConfig(request, this.config, this.logger);
|
|
207
|
+
const diagnosticsMode = request.diagnostics?.mode;
|
|
208
|
+
const traceEnabled = diagnosticsMode === 'trace';
|
|
209
|
+
const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
|
|
206
210
|
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
207
211
|
if (!this._autoRegisterDone) {
|
|
208
212
|
await autoRegisterProviders(this.router, this.logger);
|
|
@@ -223,15 +227,206 @@ export class AIGateway {
|
|
|
223
227
|
}
|
|
224
228
|
}
|
|
225
229
|
try {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
230
|
+
let response;
|
|
231
|
+
let traceAttempts;
|
|
232
|
+
let traceRetryCount;
|
|
233
|
+
let traceFallbackCount;
|
|
234
|
+
let traceRequestIds;
|
|
235
|
+
let providerCallLatencyMs;
|
|
236
|
+
if (!traceEnabled) {
|
|
237
|
+
// Default minimal behavior (no extra allocations/payload).
|
|
238
|
+
response = await this.router.invoke({
|
|
239
|
+
request: {
|
|
240
|
+
messages,
|
|
241
|
+
config: mergedConfig,
|
|
242
|
+
identity: request.identity
|
|
243
|
+
},
|
|
244
|
+
mode: 'sync'
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
else {
|
|
248
|
+
const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
|
|
249
|
+
const capErrorMessage = (s) => capString(s, 500);
|
|
250
|
+
const safeJsonStringify = (value) => {
|
|
251
|
+
try {
|
|
252
|
+
return JSON.stringify(value);
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
return '[Unserializable]';
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
const gatewayAiRequestId = request.aiRequestId;
|
|
259
|
+
const baseRequest = {
|
|
260
|
+
request: {
|
|
261
|
+
messages,
|
|
262
|
+
config: mergedConfig,
|
|
263
|
+
identity: request.identity
|
|
264
|
+
},
|
|
265
|
+
mode: 'sync'
|
|
266
|
+
};
|
|
267
|
+
// Build deterministic provider/model candidate chain.
|
|
268
|
+
const candidates = [];
|
|
269
|
+
const primaryProvider = mergedConfig?.provider;
|
|
270
|
+
const primaryModel = mergedConfig?.model;
|
|
271
|
+
if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
|
|
272
|
+
candidates.push({ provider: primaryProvider, model: primaryModel });
|
|
273
|
+
}
|
|
274
|
+
const defaultTarget = this.config?.defaultTarget;
|
|
275
|
+
if (defaultTarget?.engine && defaultTarget?.model) {
|
|
276
|
+
candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
|
|
277
|
+
}
|
|
278
|
+
const fallbackChain = this.config?.fallbackChain;
|
|
279
|
+
if (Array.isArray(fallbackChain)) {
|
|
280
|
+
for (const item of fallbackChain) {
|
|
281
|
+
if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
|
|
282
|
+
candidates.push({ provider: String(item.engine), model: String(item.model) });
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// De-dup while preserving order.
|
|
287
|
+
const seen = new Set();
|
|
288
|
+
const deduped = candidates.filter(c => {
|
|
289
|
+
const key = `${c.provider}::${c.model}`;
|
|
290
|
+
if (seen.has(key))
|
|
291
|
+
return false;
|
|
292
|
+
seen.add(key);
|
|
293
|
+
return true;
|
|
294
|
+
});
|
|
295
|
+
traceAttempts = [];
|
|
296
|
+
// Attempt execution across fallbacks (authoritative ordering).
|
|
297
|
+
let lastError;
|
|
298
|
+
for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
|
|
299
|
+
const candidate = deduped[fallbackIndex];
|
|
300
|
+
// Track per-retry attempt objects through retry hooks.
|
|
301
|
+
const attemptIndexByRetry = new Map();
|
|
302
|
+
try {
|
|
303
|
+
const result = await invokeWithRetry({
|
|
304
|
+
...baseRequest,
|
|
305
|
+
request: {
|
|
306
|
+
...baseRequest.request,
|
|
307
|
+
config: {
|
|
308
|
+
...mergedConfig,
|
|
309
|
+
provider: candidate.provider,
|
|
310
|
+
model: candidate.model
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
|
|
314
|
+
onTryStart: ({ retryIndex, startedAt }) => {
|
|
315
|
+
const idx = traceAttempts.push({
|
|
316
|
+
timing: { startedAt, endedAt: startedAt, durationMs: 0 },
|
|
317
|
+
routing: {
|
|
318
|
+
provider: candidate.provider,
|
|
319
|
+
requestIds: { gatewayAiRequestId },
|
|
320
|
+
retryIndex,
|
|
321
|
+
fallbackIndex
|
|
322
|
+
},
|
|
323
|
+
usage: {
|
|
324
|
+
tokens: { prompt: 0, completion: 0, total: 0 },
|
|
325
|
+
maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
|
|
326
|
+
},
|
|
327
|
+
modelUsed: candidate.model,
|
|
328
|
+
ok: false
|
|
329
|
+
}) - 1;
|
|
330
|
+
attemptIndexByRetry.set(retryIndex, idx);
|
|
331
|
+
},
|
|
332
|
+
onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
|
|
333
|
+
const idx = attemptIndexByRetry.get(retryIndex);
|
|
334
|
+
if (idx === undefined)
|
|
335
|
+
return;
|
|
336
|
+
const a = traceAttempts[idx];
|
|
337
|
+
a.timing.endedAt = endedAt;
|
|
338
|
+
a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
|
|
339
|
+
a.ok = ok;
|
|
340
|
+
const respAny = tryResp;
|
|
341
|
+
if (ok && respAny) {
|
|
342
|
+
const meta = respAny.metadata || {};
|
|
343
|
+
const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
|
|
344
|
+
a.usage = {
|
|
345
|
+
tokens: tokenCounts,
|
|
346
|
+
maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
|
|
347
|
+
? meta.maxTokensRequested
|
|
348
|
+
: typeof mergedConfig?.maxTokens === 'number'
|
|
349
|
+
? mergedConfig.maxTokens
|
|
350
|
+
: undefined
|
|
351
|
+
};
|
|
352
|
+
a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
|
|
353
|
+
if (typeof meta?.region === 'string')
|
|
354
|
+
a.routing.region = meta.region;
|
|
355
|
+
const requestIds = {
|
|
356
|
+
gatewayAiRequestId,
|
|
357
|
+
routerRequestId: respAny.requestId || meta?.requestId
|
|
358
|
+
};
|
|
359
|
+
if (typeof meta?.providerRequestId === 'string')
|
|
360
|
+
requestIds.providerRequestId = meta.providerRequestId;
|
|
361
|
+
if (typeof meta?.openrouterRequestId === 'string')
|
|
362
|
+
requestIds.openrouterRequestId = meta.openrouterRequestId;
|
|
363
|
+
if (meta?.requestIds && typeof meta.requestIds === 'object') {
|
|
364
|
+
for (const [k, v] of Object.entries(meta.requestIds)) {
|
|
365
|
+
if (typeof v === 'string')
|
|
366
|
+
requestIds[k] = v;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
a.routing.requestIds = requestIds;
|
|
370
|
+
a.modelUsed =
|
|
371
|
+
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
372
|
+
const costUsd = typeof meta?.costUsd === 'number'
|
|
373
|
+
? meta.costUsd
|
|
374
|
+
: typeof meta?.cost === 'number'
|
|
375
|
+
? meta.cost
|
|
376
|
+
: typeof respAny?.costUsd === 'number'
|
|
377
|
+
? respAny.costUsd
|
|
378
|
+
: typeof respAny?.cost === 'number'
|
|
379
|
+
? respAny.cost
|
|
380
|
+
: undefined;
|
|
381
|
+
if (typeof costUsd === 'number')
|
|
382
|
+
a.costUsd = costUsd;
|
|
383
|
+
if (includeRawProviderPayload) {
|
|
384
|
+
// Size-capped preview only.
|
|
385
|
+
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
386
|
+
const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
|
|
387
|
+
a.rawProviderPayload = capString(rawStr, 4000);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
else if (tryErr) {
|
|
391
|
+
a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
});
|
|
395
|
+
response = result.response;
|
|
396
|
+
lastError = undefined;
|
|
397
|
+
break; // success => stop fallback chain
|
|
398
|
+
}
|
|
399
|
+
catch (err) {
|
|
400
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
if (!response) {
|
|
405
|
+
throw lastError ?? new Error('All fallback candidates failed');
|
|
406
|
+
}
|
|
407
|
+
// Summary counts + final request ids.
|
|
408
|
+
traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
|
|
409
|
+
const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
|
|
410
|
+
traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
|
|
411
|
+
const finalResp = response;
|
|
412
|
+
const finalMeta = finalResp?.metadata || {};
|
|
413
|
+
traceRequestIds = {
|
|
414
|
+
gatewayAiRequestId,
|
|
415
|
+
routerRequestId: finalResp?.requestId || finalMeta?.requestId
|
|
416
|
+
};
|
|
417
|
+
if (typeof finalMeta?.providerRequestId === 'string')
|
|
418
|
+
traceRequestIds.providerRequestId = finalMeta.providerRequestId;
|
|
419
|
+
if (typeof finalMeta?.openrouterRequestId === 'string')
|
|
420
|
+
traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
|
|
421
|
+
if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
|
|
422
|
+
for (const [k, v] of Object.entries(finalMeta.requestIds)) {
|
|
423
|
+
if (typeof v === 'string')
|
|
424
|
+
traceRequestIds[k] = v;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
const lastOk = [...traceAttempts].reverse().find(a => a.ok);
|
|
428
|
+
providerCallLatencyMs = lastOk?.timing?.durationMs;
|
|
429
|
+
}
|
|
235
430
|
// Contract output processing removed - expectedSchema no longer supported
|
|
236
431
|
// Create enhanced response - extract content properly from router response
|
|
237
432
|
const routerResponse = response;
|
|
@@ -293,35 +488,49 @@ export class AIGateway {
|
|
|
293
488
|
}
|
|
294
489
|
contentType = 'structured';
|
|
295
490
|
parsingMethod = 'flex-md';
|
|
296
|
-
|
|
297
|
-
let tokens = { prompt: 0, completion: 0, total: 0 };
|
|
298
|
-
if (routerResponse.usage) {
|
|
299
|
-
tokens = {
|
|
300
|
-
prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
|
|
301
|
-
completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
|
|
302
|
-
total: routerResponse.usage.totalTokens || 0
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
|
|
306
|
-
const usage = routerResponse.metadata['ai-activities-response'].usage;
|
|
307
|
-
tokens = {
|
|
308
|
-
prompt: usage.promptTokens || usage.inputTokens || 0,
|
|
309
|
-
completion: usage.completionTokens || usage.outputTokens || 0,
|
|
310
|
-
total: usage.totalTokens || 0
|
|
311
|
-
};
|
|
312
|
-
}
|
|
491
|
+
const tokens = extractTokenUsageFromRouterResponse(routerResponse);
|
|
313
492
|
const enhancedResponse = {
|
|
314
493
|
content: content,
|
|
315
494
|
parsedContent: parsedContent,
|
|
316
495
|
metadata: {
|
|
317
496
|
aiRequestId: request.aiRequestId,
|
|
318
497
|
identity: request.identity,
|
|
319
|
-
latencyMs: Date.now() - startTime,
|
|
498
|
+
latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
|
|
320
499
|
tokens: tokens,
|
|
321
500
|
taskTypeId,
|
|
322
501
|
agentType: 'ai',
|
|
323
502
|
contentType,
|
|
324
|
-
parsingMethod
|
|
503
|
+
parsingMethod,
|
|
504
|
+
...(traceEnabled
|
|
505
|
+
? (() => {
|
|
506
|
+
const meta = routerResponse?.metadata || {};
|
|
507
|
+
const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
|
|
508
|
+
const region = typeof meta.region === 'string' ? meta.region : undefined;
|
|
509
|
+
const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
|
|
510
|
+
const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
|
|
511
|
+
? meta.maxTokensRequested
|
|
512
|
+
: typeof mergedConfig?.maxTokens === 'number'
|
|
513
|
+
? mergedConfig.maxTokens
|
|
514
|
+
: undefined;
|
|
515
|
+
const costUsd = typeof meta.costUsd === 'number'
|
|
516
|
+
? meta.costUsd
|
|
517
|
+
: typeof meta.cost === 'number'
|
|
518
|
+
? meta.cost
|
|
519
|
+
: undefined;
|
|
520
|
+
return {
|
|
521
|
+
provider,
|
|
522
|
+
region,
|
|
523
|
+
modelUsed,
|
|
524
|
+
maxTokensRequested,
|
|
525
|
+
cost: typeof meta.cost === 'number' ? meta.cost : undefined,
|
|
526
|
+
costUsd,
|
|
527
|
+
requestIds: traceRequestIds,
|
|
528
|
+
retryCount: traceRetryCount,
|
|
529
|
+
fallbackCount: traceFallbackCount,
|
|
530
|
+
attempts: traceAttempts
|
|
531
|
+
};
|
|
532
|
+
})()
|
|
533
|
+
: {})
|
|
325
534
|
}
|
|
326
535
|
};
|
|
327
536
|
// Track activity success if activity was started
|
package/dist/types.d.ts
CHANGED
|
@@ -10,6 +10,65 @@ export type UsageTier = string;
|
|
|
10
10
|
import type { Activix } from '@x12i/activix';
|
|
11
11
|
import type { TemplateRenderOptions } from '@x12i/rendrix';
|
|
12
12
|
import type { Logxer } from '@x12i/logxer';
|
|
13
|
+
/**
|
|
14
|
+
* Diagnostics options for opt-in authoritative tracing.
|
|
15
|
+
* Default behavior must remain minimal when diagnostics are not enabled.
|
|
16
|
+
*/
|
|
17
|
+
export type DiagnosticsMode = 'none' | 'trace';
|
|
18
|
+
export interface DiagnosticsOptions {
|
|
19
|
+
mode?: DiagnosticsMode;
|
|
20
|
+
/**
|
|
21
|
+
* Whether to include raw provider payloads (NEVER on by default).
|
|
22
|
+
* Implementations must size-cap any raw payload included.
|
|
23
|
+
*/
|
|
24
|
+
includeRawProviderPayload?: boolean;
|
|
25
|
+
}
|
|
26
|
+
export type GatewayTraceRequestIds = {
|
|
27
|
+
/** Stable alias of gateway aiRequestId (always set when trace enabled). */
|
|
28
|
+
gatewayAiRequestId: string;
|
|
29
|
+
/** Router-level correlation id, when available. */
|
|
30
|
+
routerRequestId?: string;
|
|
31
|
+
/** Provider-level request id (or equivalent), when available. */
|
|
32
|
+
providerRequestId?: string;
|
|
33
|
+
/** OpenRouter request id, when applicable and available. */
|
|
34
|
+
openrouterRequestId?: string;
|
|
35
|
+
/** Allow additional stable ids without breaking contract. */
|
|
36
|
+
[key: string]: string | undefined;
|
|
37
|
+
};
|
|
38
|
+
export type GatewayTraceAttempt = {
|
|
39
|
+
timing: {
|
|
40
|
+
startedAt: number;
|
|
41
|
+
endedAt: number;
|
|
42
|
+
durationMs: number;
|
|
43
|
+
};
|
|
44
|
+
routing: {
|
|
45
|
+
provider: string;
|
|
46
|
+
region?: string;
|
|
47
|
+
requestIds: GatewayTraceRequestIds;
|
|
48
|
+
retryIndex: number;
|
|
49
|
+
fallbackIndex: number;
|
|
50
|
+
};
|
|
51
|
+
usage?: {
|
|
52
|
+
tokens: {
|
|
53
|
+
prompt: number;
|
|
54
|
+
completion: number;
|
|
55
|
+
total: number;
|
|
56
|
+
};
|
|
57
|
+
maxTokensRequested?: number;
|
|
58
|
+
};
|
|
59
|
+
modelUsed?: string;
|
|
60
|
+
costUsd?: number;
|
|
61
|
+
ok: boolean;
|
|
62
|
+
error?: {
|
|
63
|
+
name: string;
|
|
64
|
+
message: string;
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Optional raw provider payload (size-capped, gated by request flag).
|
|
68
|
+
* The exact shape is intentionally loose to avoid locking downstream to provider schemas.
|
|
69
|
+
*/
|
|
70
|
+
rawProviderPayload?: unknown;
|
|
71
|
+
};
|
|
13
72
|
/**
|
|
14
73
|
* Identity object used for activity linkage.
|
|
15
74
|
* On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
|
|
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
|
|
|
606
665
|
* Used when inferenceType is provided for parsing inference outputs
|
|
607
666
|
*/
|
|
608
667
|
parseOptions?: Record<string, unknown>;
|
|
668
|
+
/**
|
|
669
|
+
* Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
|
|
670
|
+
* attach heavy diagnostic objects or raw provider payloads.
|
|
671
|
+
*/
|
|
672
|
+
diagnostics?: DiagnosticsOptions;
|
|
609
673
|
}
|
|
610
674
|
/**
|
|
611
675
|
* Chat request for conversational use cases
|
|
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
|
|
|
835
899
|
* Cost in USD (if available)
|
|
836
900
|
*/
|
|
837
901
|
cost?: number;
|
|
902
|
+
/**
|
|
903
|
+
* Cost in USD (preferred, stable key for trace mode).
|
|
904
|
+
* When both are present, costUsd should mirror cost.
|
|
905
|
+
*/
|
|
906
|
+
costUsd?: number;
|
|
907
|
+
/**
|
|
908
|
+
* Final effective max token cap applied (after merges/normalization), if known.
|
|
909
|
+
*/
|
|
910
|
+
maxTokensRequested?: number;
|
|
911
|
+
/**
|
|
912
|
+
* Model that actually served the response (after routing/fallback), if known.
|
|
913
|
+
* This is distinct from requested model.
|
|
914
|
+
*/
|
|
915
|
+
modelUsed?: string;
|
|
916
|
+
/**
|
|
917
|
+
* Optional region identifier when applicable (provider-specific).
|
|
918
|
+
*/
|
|
919
|
+
region?: string;
|
|
920
|
+
/**
|
|
921
|
+
* Stable request/correlation identifiers across gateway/router/provider layers.
|
|
922
|
+
* Only populated when diagnostics trace mode is enabled.
|
|
923
|
+
*/
|
|
924
|
+
requestIds?: GatewayTraceRequestIds;
|
|
925
|
+
/**
|
|
926
|
+
* Total number of retries performed across the execution (trace mode).
|
|
927
|
+
*/
|
|
928
|
+
retryCount?: number;
|
|
929
|
+
/**
|
|
930
|
+
* Total number of fallback transitions performed across the execution (trace mode).
|
|
931
|
+
*/
|
|
932
|
+
fallbackCount?: number;
|
|
933
|
+
/**
|
|
934
|
+
* Ordered, authoritative attempts across retries and fallbacks (trace mode).
|
|
935
|
+
*/
|
|
936
|
+
attempts?: GatewayTraceAttempt[];
|
|
838
937
|
/**
|
|
839
938
|
* Content type classification
|
|
840
939
|
* Indicates whether content is 'string', 'object', 'array', or 'null'
|
package/dist/usage-tracker.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*
|
|
9
9
|
* Note: x-models dependency removed - usage tracking functions are permanently disabled
|
|
10
10
|
*/
|
|
11
|
+
import { extractTokenUsageFromRouterResponse } from './gateway-utils.js';
|
|
11
12
|
/**
|
|
12
13
|
* Manages usage tracking for LLM requests
|
|
13
14
|
*/
|
|
@@ -26,11 +27,7 @@ export class UsageTracker {
|
|
|
26
27
|
* @returns Token usage breakdown
|
|
27
28
|
*/
|
|
28
29
|
extractTokens(response) {
|
|
29
|
-
return
|
|
30
|
-
prompt: response.usage?.promptTokens || 0,
|
|
31
|
-
completion: response.usage?.completionTokens || 0,
|
|
32
|
-
total: response.usage?.totalTokens || 0
|
|
33
|
-
};
|
|
30
|
+
return extractTokenUsageFromRouterResponse(response);
|
|
34
31
|
}
|
|
35
32
|
/**
|
|
36
33
|
* Records usage for a request
|
|
@@ -101,7 +101,7 @@ function sleep(ms) {
|
|
|
101
101
|
* Invokes router with retry logic for network and server errors
|
|
102
102
|
* Returns response and retry metadata
|
|
103
103
|
*/
|
|
104
|
-
async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger) {
|
|
104
|
+
async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
|
|
105
105
|
const maxRetries = retryConfig.maxRetries ?? 3;
|
|
106
106
|
const initialDelay = retryConfig.initialDelay ?? 1000;
|
|
107
107
|
const maxDelay = retryConfig.maxDelay ?? 30000;
|
|
@@ -111,8 +111,12 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
|
|
|
111
111
|
let lastError;
|
|
112
112
|
const retryAttempts = [];
|
|
113
113
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
114
|
+
const startedAt = Date.now();
|
|
115
|
+
hooks?.onTryStart?.({ retryIndex: attempt, startedAt });
|
|
114
116
|
try {
|
|
115
117
|
const response = await router.invoke(routerRequest);
|
|
118
|
+
const endedAt = Date.now();
|
|
119
|
+
hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: true, response });
|
|
116
120
|
// Log success after retry if this wasn't the first attempt
|
|
117
121
|
if (attempt > 0) {
|
|
118
122
|
logger.info('Request succeeded after retry', {
|
|
@@ -136,6 +140,8 @@ async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger
|
|
|
136
140
|
}
|
|
137
141
|
catch (error) {
|
|
138
142
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
143
|
+
const endedAt = Date.now();
|
|
144
|
+
hooks?.onTryEnd?.({ retryIndex: attempt, endedAt, ok: false, error: lastError });
|
|
139
145
|
// Check if error is retryable
|
|
140
146
|
if (!isRetryableError(lastError)) {
|
|
141
147
|
logger.debug('Non-retryable error, not retrying', {
|
|
@@ -34,7 +34,27 @@ export declare function sleep(ms: number): Promise<void>;
|
|
|
34
34
|
* Invokes router with retry logic for network and server errors
|
|
35
35
|
* Returns response and retry metadata
|
|
36
36
|
*/
|
|
37
|
-
export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer
|
|
37
|
+
export declare function invokeWithRetry(routerRequest: any, retryConfig: RetryConfig, jobId: string, router: LLMProviderRouter, logger: Logxer, hooks?: {
|
|
38
|
+
/**
|
|
39
|
+
* Called immediately before each provider call attempt (including the first try).
|
|
40
|
+
* `retryIndex` is 0-based within this invokeWithRetry call.
|
|
41
|
+
*/
|
|
42
|
+
onTryStart?: (info: {
|
|
43
|
+
retryIndex: number;
|
|
44
|
+
startedAt: number;
|
|
45
|
+
}) => void;
|
|
46
|
+
/**
|
|
47
|
+
* Called immediately after each provider call attempt finishes (success or error).
|
|
48
|
+
* `retryIndex` is 0-based within this invokeWithRetry call.
|
|
49
|
+
*/
|
|
50
|
+
onTryEnd?: (info: {
|
|
51
|
+
retryIndex: number;
|
|
52
|
+
endedAt: number;
|
|
53
|
+
ok: boolean;
|
|
54
|
+
response?: any;
|
|
55
|
+
error?: Error;
|
|
56
|
+
}) => void;
|
|
57
|
+
}): Promise<{
|
|
38
58
|
response: any;
|
|
39
59
|
retryMetadata?: {
|
|
40
60
|
retryCount: number;
|
|
@@ -40,6 +40,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
40
40
|
exports.generateMD5Hash = generateMD5Hash;
|
|
41
41
|
exports.ensureTaskTypeId = ensureTaskTypeId;
|
|
42
42
|
exports.mergeConfig = mergeConfig;
|
|
43
|
+
exports.normalizeRouterUsageTokens = normalizeRouterUsageTokens;
|
|
44
|
+
exports.extractTokenUsageFromRouterResponse = extractTokenUsageFromRouterResponse;
|
|
43
45
|
const crypto = __importStar(require("crypto"));
|
|
44
46
|
const gateway_instructions_js_1 = require("./gateway-instructions.cjs");
|
|
45
47
|
const flex_md_loader_js_1 = require("./flex-md-loader.cjs");
|
|
@@ -217,3 +219,55 @@ async function mergeConfig(request, config, logger) {
|
|
|
217
219
|
});
|
|
218
220
|
return merged;
|
|
219
221
|
}
|
|
222
|
+
function firstFiniteNumber(...vals) {
|
|
223
|
+
for (const v of vals) {
|
|
224
|
+
if (typeof v === 'number' && Number.isFinite(v))
|
|
225
|
+
return v;
|
|
226
|
+
}
|
|
227
|
+
return undefined;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
231
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
232
|
+
*/
|
|
233
|
+
function normalizeRouterUsageTokens(usage) {
|
|
234
|
+
if (usage == null || typeof usage !== 'object')
|
|
235
|
+
return undefined;
|
|
236
|
+
const u = usage;
|
|
237
|
+
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
238
|
+
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
|
|
239
|
+
let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
|
|
240
|
+
if (!total && (prompt || completion))
|
|
241
|
+
total = prompt + completion;
|
|
242
|
+
return { prompt, completion, total };
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
246
|
+
*/
|
|
247
|
+
function extractTokenUsageFromRouterResponse(routerResponse) {
|
|
248
|
+
if (routerResponse == null || typeof routerResponse !== 'object') {
|
|
249
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
250
|
+
}
|
|
251
|
+
const r = routerResponse;
|
|
252
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
253
|
+
? r.metadata
|
|
254
|
+
: undefined;
|
|
255
|
+
const buckets = [r.usage];
|
|
256
|
+
if (meta) {
|
|
257
|
+
buckets.push(meta.usage);
|
|
258
|
+
const nested = meta['ai-activities-response'];
|
|
259
|
+
if (nested != null && typeof nested === 'object') {
|
|
260
|
+
buckets.push(nested.usage);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
const raw = r.rawResponse ?? r.raw;
|
|
264
|
+
if (raw != null && typeof raw === 'object') {
|
|
265
|
+
buckets.push(raw.usage);
|
|
266
|
+
}
|
|
267
|
+
for (const b of buckets) {
|
|
268
|
+
const n = normalizeRouterUsageTokens(b);
|
|
269
|
+
if (n && (n.prompt || n.completion || n.total))
|
|
270
|
+
return n;
|
|
271
|
+
}
|
|
272
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
273
|
+
}
|
|
@@ -19,3 +19,20 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
|
|
|
19
19
|
export declare function mergeConfig(request: ChatRequest & {
|
|
20
20
|
useInternalDefaults?: 'skill' | 'audit';
|
|
21
21
|
}, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
|
|
22
|
+
/**
|
|
23
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
24
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
27
|
+
prompt: number;
|
|
28
|
+
completion: number;
|
|
29
|
+
total: number;
|
|
30
|
+
} | undefined;
|
|
31
|
+
/**
|
|
32
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
|
|
35
|
+
prompt: number;
|
|
36
|
+
completion: number;
|
|
37
|
+
total: number;
|
|
38
|
+
};
|
package/dist-cjs/gateway.cjs
CHANGED
|
@@ -15,6 +15,7 @@ const gateway_utils_js_1 = require("./gateway-utils.cjs");
|
|
|
15
15
|
const gateway_provider_auto_register_js_1 = require("./gateway-provider-auto-register.cjs");
|
|
16
16
|
const runtime_objects_js_1 = require("./runtime-objects.cjs");
|
|
17
17
|
const gateway_log_meta_js_1 = require("./gateway-log-meta.cjs");
|
|
18
|
+
const gateway_retry_js_1 = require("./gateway-retry.cjs");
|
|
18
19
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
19
20
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
20
21
|
const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
@@ -96,7 +97,7 @@ class AIGateway {
|
|
|
96
97
|
aiRequestId: request.aiRequestId,
|
|
97
98
|
identity: request.identity,
|
|
98
99
|
latencyMs: Date.now() - startTime,
|
|
99
|
-
tokens:
|
|
100
|
+
tokens: (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response),
|
|
100
101
|
taskTypeId,
|
|
101
102
|
agentType: 'chat'
|
|
102
103
|
}
|
|
@@ -206,6 +207,9 @@ class AIGateway {
|
|
|
206
207
|
request._parsedRequest = parsedSnapshot;
|
|
207
208
|
// Merge config (modelConfig > request.config > gateway defaults)
|
|
208
209
|
const mergedConfig = await (0, gateway_utils_js_1.mergeConfig)(request, this.config, this.logger);
|
|
210
|
+
const diagnosticsMode = request.diagnostics?.mode;
|
|
211
|
+
const traceEnabled = diagnosticsMode === 'trace';
|
|
212
|
+
const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
|
|
209
213
|
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
210
214
|
if (!this._autoRegisterDone) {
|
|
211
215
|
await (0, gateway_provider_auto_register_js_1.autoRegisterProviders)(this.router, this.logger);
|
|
@@ -226,15 +230,206 @@ class AIGateway {
|
|
|
226
230
|
}
|
|
227
231
|
}
|
|
228
232
|
try {
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
233
|
+
let response;
|
|
234
|
+
let traceAttempts;
|
|
235
|
+
let traceRetryCount;
|
|
236
|
+
let traceFallbackCount;
|
|
237
|
+
let traceRequestIds;
|
|
238
|
+
let providerCallLatencyMs;
|
|
239
|
+
if (!traceEnabled) {
|
|
240
|
+
// Default minimal behavior (no extra allocations/payload).
|
|
241
|
+
response = await this.router.invoke({
|
|
242
|
+
request: {
|
|
243
|
+
messages,
|
|
244
|
+
config: mergedConfig,
|
|
245
|
+
identity: request.identity
|
|
246
|
+
},
|
|
247
|
+
mode: 'sync'
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
|
|
252
|
+
const capErrorMessage = (s) => capString(s, 500);
|
|
253
|
+
const safeJsonStringify = (value) => {
|
|
254
|
+
try {
|
|
255
|
+
return JSON.stringify(value);
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
return '[Unserializable]';
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
const gatewayAiRequestId = request.aiRequestId;
|
|
262
|
+
const baseRequest = {
|
|
263
|
+
request: {
|
|
264
|
+
messages,
|
|
265
|
+
config: mergedConfig,
|
|
266
|
+
identity: request.identity
|
|
267
|
+
},
|
|
268
|
+
mode: 'sync'
|
|
269
|
+
};
|
|
270
|
+
// Build deterministic provider/model candidate chain.
|
|
271
|
+
const candidates = [];
|
|
272
|
+
const primaryProvider = mergedConfig?.provider;
|
|
273
|
+
const primaryModel = mergedConfig?.model;
|
|
274
|
+
if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
|
|
275
|
+
candidates.push({ provider: primaryProvider, model: primaryModel });
|
|
276
|
+
}
|
|
277
|
+
const defaultTarget = this.config?.defaultTarget;
|
|
278
|
+
if (defaultTarget?.engine && defaultTarget?.model) {
|
|
279
|
+
candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
|
|
280
|
+
}
|
|
281
|
+
const fallbackChain = this.config?.fallbackChain;
|
|
282
|
+
if (Array.isArray(fallbackChain)) {
|
|
283
|
+
for (const item of fallbackChain) {
|
|
284
|
+
if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
|
|
285
|
+
candidates.push({ provider: String(item.engine), model: String(item.model) });
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
// De-dup while preserving order.
|
|
290
|
+
const seen = new Set();
|
|
291
|
+
const deduped = candidates.filter(c => {
|
|
292
|
+
const key = `${c.provider}::${c.model}`;
|
|
293
|
+
if (seen.has(key))
|
|
294
|
+
return false;
|
|
295
|
+
seen.add(key);
|
|
296
|
+
return true;
|
|
297
|
+
});
|
|
298
|
+
traceAttempts = [];
|
|
299
|
+
// Attempt execution across fallbacks (authoritative ordering).
|
|
300
|
+
let lastError;
|
|
301
|
+
for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
|
|
302
|
+
const candidate = deduped[fallbackIndex];
|
|
303
|
+
// Track per-retry attempt objects through retry hooks.
|
|
304
|
+
const attemptIndexByRetry = new Map();
|
|
305
|
+
try {
|
|
306
|
+
const result = await (0, gateway_retry_js_1.invokeWithRetry)({
|
|
307
|
+
...baseRequest,
|
|
308
|
+
request: {
|
|
309
|
+
...baseRequest.request,
|
|
310
|
+
config: {
|
|
311
|
+
...mergedConfig,
|
|
312
|
+
provider: candidate.provider,
|
|
313
|
+
model: candidate.model
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
|
|
317
|
+
onTryStart: ({ retryIndex, startedAt }) => {
|
|
318
|
+
const idx = traceAttempts.push({
|
|
319
|
+
timing: { startedAt, endedAt: startedAt, durationMs: 0 },
|
|
320
|
+
routing: {
|
|
321
|
+
provider: candidate.provider,
|
|
322
|
+
requestIds: { gatewayAiRequestId },
|
|
323
|
+
retryIndex,
|
|
324
|
+
fallbackIndex
|
|
325
|
+
},
|
|
326
|
+
usage: {
|
|
327
|
+
tokens: { prompt: 0, completion: 0, total: 0 },
|
|
328
|
+
maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
|
|
329
|
+
},
|
|
330
|
+
modelUsed: candidate.model,
|
|
331
|
+
ok: false
|
|
332
|
+
}) - 1;
|
|
333
|
+
attemptIndexByRetry.set(retryIndex, idx);
|
|
334
|
+
},
|
|
335
|
+
onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
|
|
336
|
+
const idx = attemptIndexByRetry.get(retryIndex);
|
|
337
|
+
if (idx === undefined)
|
|
338
|
+
return;
|
|
339
|
+
const a = traceAttempts[idx];
|
|
340
|
+
a.timing.endedAt = endedAt;
|
|
341
|
+
a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
|
|
342
|
+
a.ok = ok;
|
|
343
|
+
const respAny = tryResp;
|
|
344
|
+
if (ok && respAny) {
|
|
345
|
+
const meta = respAny.metadata || {};
|
|
346
|
+
const tokenCounts = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(respAny);
|
|
347
|
+
a.usage = {
|
|
348
|
+
tokens: tokenCounts,
|
|
349
|
+
maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
|
|
350
|
+
? meta.maxTokensRequested
|
|
351
|
+
: typeof mergedConfig?.maxTokens === 'number'
|
|
352
|
+
? mergedConfig.maxTokens
|
|
353
|
+
: undefined
|
|
354
|
+
};
|
|
355
|
+
a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
|
|
356
|
+
if (typeof meta?.region === 'string')
|
|
357
|
+
a.routing.region = meta.region;
|
|
358
|
+
const requestIds = {
|
|
359
|
+
gatewayAiRequestId,
|
|
360
|
+
routerRequestId: respAny.requestId || meta?.requestId
|
|
361
|
+
};
|
|
362
|
+
if (typeof meta?.providerRequestId === 'string')
|
|
363
|
+
requestIds.providerRequestId = meta.providerRequestId;
|
|
364
|
+
if (typeof meta?.openrouterRequestId === 'string')
|
|
365
|
+
requestIds.openrouterRequestId = meta.openrouterRequestId;
|
|
366
|
+
if (meta?.requestIds && typeof meta.requestIds === 'object') {
|
|
367
|
+
for (const [k, v] of Object.entries(meta.requestIds)) {
|
|
368
|
+
if (typeof v === 'string')
|
|
369
|
+
requestIds[k] = v;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
a.routing.requestIds = requestIds;
|
|
373
|
+
a.modelUsed =
|
|
374
|
+
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
375
|
+
const costUsd = typeof meta?.costUsd === 'number'
|
|
376
|
+
? meta.costUsd
|
|
377
|
+
: typeof meta?.cost === 'number'
|
|
378
|
+
? meta.cost
|
|
379
|
+
: typeof respAny?.costUsd === 'number'
|
|
380
|
+
? respAny.costUsd
|
|
381
|
+
: typeof respAny?.cost === 'number'
|
|
382
|
+
? respAny.cost
|
|
383
|
+
: undefined;
|
|
384
|
+
if (typeof costUsd === 'number')
|
|
385
|
+
a.costUsd = costUsd;
|
|
386
|
+
if (includeRawProviderPayload) {
|
|
387
|
+
// Size-capped preview only.
|
|
388
|
+
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
389
|
+
const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
|
|
390
|
+
a.rawProviderPayload = capString(rawStr, 4000);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
else if (tryErr) {
|
|
394
|
+
a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
});
|
|
398
|
+
response = result.response;
|
|
399
|
+
lastError = undefined;
|
|
400
|
+
break; // success => stop fallback chain
|
|
401
|
+
}
|
|
402
|
+
catch (err) {
|
|
403
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if (!response) {
|
|
408
|
+
throw lastError ?? new Error('All fallback candidates failed');
|
|
409
|
+
}
|
|
410
|
+
// Summary counts + final request ids.
|
|
411
|
+
traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
|
|
412
|
+
const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
|
|
413
|
+
traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
|
|
414
|
+
const finalResp = response;
|
|
415
|
+
const finalMeta = finalResp?.metadata || {};
|
|
416
|
+
traceRequestIds = {
|
|
417
|
+
gatewayAiRequestId,
|
|
418
|
+
routerRequestId: finalResp?.requestId || finalMeta?.requestId
|
|
419
|
+
};
|
|
420
|
+
if (typeof finalMeta?.providerRequestId === 'string')
|
|
421
|
+
traceRequestIds.providerRequestId = finalMeta.providerRequestId;
|
|
422
|
+
if (typeof finalMeta?.openrouterRequestId === 'string')
|
|
423
|
+
traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
|
|
424
|
+
if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
|
|
425
|
+
for (const [k, v] of Object.entries(finalMeta.requestIds)) {
|
|
426
|
+
if (typeof v === 'string')
|
|
427
|
+
traceRequestIds[k] = v;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
const lastOk = [...traceAttempts].reverse().find(a => a.ok);
|
|
431
|
+
providerCallLatencyMs = lastOk?.timing?.durationMs;
|
|
432
|
+
}
|
|
238
433
|
// Contract output processing removed - expectedSchema no longer supported
|
|
239
434
|
// Create enhanced response - extract content properly from router response
|
|
240
435
|
const routerResponse = response;
|
|
@@ -296,35 +491,49 @@ class AIGateway {
|
|
|
296
491
|
}
|
|
297
492
|
contentType = 'structured';
|
|
298
493
|
parsingMethod = 'flex-md';
|
|
299
|
-
|
|
300
|
-
let tokens = { prompt: 0, completion: 0, total: 0 };
|
|
301
|
-
if (routerResponse.usage) {
|
|
302
|
-
tokens = {
|
|
303
|
-
prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
|
|
304
|
-
completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
|
|
305
|
-
total: routerResponse.usage.totalTokens || 0
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
|
|
309
|
-
const usage = routerResponse.metadata['ai-activities-response'].usage;
|
|
310
|
-
tokens = {
|
|
311
|
-
prompt: usage.promptTokens || usage.inputTokens || 0,
|
|
312
|
-
completion: usage.completionTokens || usage.outputTokens || 0,
|
|
313
|
-
total: usage.totalTokens || 0
|
|
314
|
-
};
|
|
315
|
-
}
|
|
494
|
+
const tokens = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(routerResponse);
|
|
316
495
|
const enhancedResponse = {
|
|
317
496
|
content: content,
|
|
318
497
|
parsedContent: parsedContent,
|
|
319
498
|
metadata: {
|
|
320
499
|
aiRequestId: request.aiRequestId,
|
|
321
500
|
identity: request.identity,
|
|
322
|
-
latencyMs: Date.now() - startTime,
|
|
501
|
+
latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
|
|
323
502
|
tokens: tokens,
|
|
324
503
|
taskTypeId,
|
|
325
504
|
agentType: 'ai',
|
|
326
505
|
contentType,
|
|
327
|
-
parsingMethod
|
|
506
|
+
parsingMethod,
|
|
507
|
+
...(traceEnabled
|
|
508
|
+
? (() => {
|
|
509
|
+
const meta = routerResponse?.metadata || {};
|
|
510
|
+
const provider = meta.provider || routerResponse?.provider || mergedConfig?.provider;
|
|
511
|
+
const region = typeof meta.region === 'string' ? meta.region : undefined;
|
|
512
|
+
const modelUsed = meta.modelUsed || meta.model || routerResponse?.model || mergedConfig?.model;
|
|
513
|
+
const maxTokensRequested = typeof meta.maxTokensRequested === 'number'
|
|
514
|
+
? meta.maxTokensRequested
|
|
515
|
+
: typeof mergedConfig?.maxTokens === 'number'
|
|
516
|
+
? mergedConfig.maxTokens
|
|
517
|
+
: undefined;
|
|
518
|
+
const costUsd = typeof meta.costUsd === 'number'
|
|
519
|
+
? meta.costUsd
|
|
520
|
+
: typeof meta.cost === 'number'
|
|
521
|
+
? meta.cost
|
|
522
|
+
: undefined;
|
|
523
|
+
return {
|
|
524
|
+
provider,
|
|
525
|
+
region,
|
|
526
|
+
modelUsed,
|
|
527
|
+
maxTokensRequested,
|
|
528
|
+
cost: typeof meta.cost === 'number' ? meta.cost : undefined,
|
|
529
|
+
costUsd,
|
|
530
|
+
requestIds: traceRequestIds,
|
|
531
|
+
retryCount: traceRetryCount,
|
|
532
|
+
fallbackCount: traceFallbackCount,
|
|
533
|
+
attempts: traceAttempts
|
|
534
|
+
};
|
|
535
|
+
})()
|
|
536
|
+
: {})
|
|
328
537
|
}
|
|
329
538
|
};
|
|
330
539
|
// Track activity success if activity was started
|
package/dist-cjs/types.d.ts
CHANGED
|
@@ -10,6 +10,65 @@ export type UsageTier = string;
|
|
|
10
10
|
import type { Activix } from '@x12i/activix';
|
|
11
11
|
import type { TemplateRenderOptions } from '@x12i/rendrix';
|
|
12
12
|
import type { Logxer } from '@x12i/logxer';
|
|
13
|
+
/**
|
|
14
|
+
* Diagnostics options for opt-in authoritative tracing.
|
|
15
|
+
* Default behavior must remain minimal when diagnostics are not enabled.
|
|
16
|
+
*/
|
|
17
|
+
export type DiagnosticsMode = 'none' | 'trace';
|
|
18
|
+
export interface DiagnosticsOptions {
|
|
19
|
+
mode?: DiagnosticsMode;
|
|
20
|
+
/**
|
|
21
|
+
* Whether to include raw provider payloads (NEVER on by default).
|
|
22
|
+
* Implementations must size-cap any raw payload included.
|
|
23
|
+
*/
|
|
24
|
+
includeRawProviderPayload?: boolean;
|
|
25
|
+
}
|
|
26
|
+
export type GatewayTraceRequestIds = {
|
|
27
|
+
/** Stable alias of gateway aiRequestId (always set when trace enabled). */
|
|
28
|
+
gatewayAiRequestId: string;
|
|
29
|
+
/** Router-level correlation id, when available. */
|
|
30
|
+
routerRequestId?: string;
|
|
31
|
+
/** Provider-level request id (or equivalent), when available. */
|
|
32
|
+
providerRequestId?: string;
|
|
33
|
+
/** OpenRouter request id, when applicable and available. */
|
|
34
|
+
openrouterRequestId?: string;
|
|
35
|
+
/** Allow additional stable ids without breaking contract. */
|
|
36
|
+
[key: string]: string | undefined;
|
|
37
|
+
};
|
|
38
|
+
export type GatewayTraceAttempt = {
|
|
39
|
+
timing: {
|
|
40
|
+
startedAt: number;
|
|
41
|
+
endedAt: number;
|
|
42
|
+
durationMs: number;
|
|
43
|
+
};
|
|
44
|
+
routing: {
|
|
45
|
+
provider: string;
|
|
46
|
+
region?: string;
|
|
47
|
+
requestIds: GatewayTraceRequestIds;
|
|
48
|
+
retryIndex: number;
|
|
49
|
+
fallbackIndex: number;
|
|
50
|
+
};
|
|
51
|
+
usage?: {
|
|
52
|
+
tokens: {
|
|
53
|
+
prompt: number;
|
|
54
|
+
completion: number;
|
|
55
|
+
total: number;
|
|
56
|
+
};
|
|
57
|
+
maxTokensRequested?: number;
|
|
58
|
+
};
|
|
59
|
+
modelUsed?: string;
|
|
60
|
+
costUsd?: number;
|
|
61
|
+
ok: boolean;
|
|
62
|
+
error?: {
|
|
63
|
+
name: string;
|
|
64
|
+
message: string;
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Optional raw provider payload (size-capped, gated by request flag).
|
|
68
|
+
* The exact shape is intentionally loose to avoid locking downstream to provider schemas.
|
|
69
|
+
*/
|
|
70
|
+
rawProviderPayload?: unknown;
|
|
71
|
+
};
|
|
13
72
|
/**
|
|
14
73
|
* Identity object used for activity linkage.
|
|
15
74
|
* On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
|
|
@@ -606,6 +665,11 @@ interface BaseLLMRequest extends Omit<LLMRequest, 'messages' | 'input' | 'reques
|
|
|
606
665
|
* Used when inferenceType is provided for parsing inference outputs
|
|
607
666
|
*/
|
|
608
667
|
parseOptions?: Record<string, unknown>;
|
|
668
|
+
/**
|
|
669
|
+
* Optional diagnostics controls. When omitted or mode != 'trace', the gateway must not
|
|
670
|
+
* attach heavy diagnostic objects or raw provider payloads.
|
|
671
|
+
*/
|
|
672
|
+
diagnostics?: DiagnosticsOptions;
|
|
609
673
|
}
|
|
610
674
|
/**
|
|
611
675
|
* Chat request for conversational use cases
|
|
@@ -835,6 +899,41 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
|
|
|
835
899
|
* Cost in USD (if available)
|
|
836
900
|
*/
|
|
837
901
|
cost?: number;
|
|
902
|
+
/**
|
|
903
|
+
* Cost in USD (preferred, stable key for trace mode).
|
|
904
|
+
* When both are present, costUsd should mirror cost.
|
|
905
|
+
*/
|
|
906
|
+
costUsd?: number;
|
|
907
|
+
/**
|
|
908
|
+
* Final effective max token cap applied (after merges/normalization), if known.
|
|
909
|
+
*/
|
|
910
|
+
maxTokensRequested?: number;
|
|
911
|
+
/**
|
|
912
|
+
* Model that actually served the response (after routing/fallback), if known.
|
|
913
|
+
* This is distinct from requested model.
|
|
914
|
+
*/
|
|
915
|
+
modelUsed?: string;
|
|
916
|
+
/**
|
|
917
|
+
* Optional region identifier when applicable (provider-specific).
|
|
918
|
+
*/
|
|
919
|
+
region?: string;
|
|
920
|
+
/**
|
|
921
|
+
* Stable request/correlation identifiers across gateway/router/provider layers.
|
|
922
|
+
* Only populated when diagnostics trace mode is enabled.
|
|
923
|
+
*/
|
|
924
|
+
requestIds?: GatewayTraceRequestIds;
|
|
925
|
+
/**
|
|
926
|
+
* Total number of retries performed across the execution (trace mode).
|
|
927
|
+
*/
|
|
928
|
+
retryCount?: number;
|
|
929
|
+
/**
|
|
930
|
+
* Total number of fallback transitions performed across the execution (trace mode).
|
|
931
|
+
*/
|
|
932
|
+
fallbackCount?: number;
|
|
933
|
+
/**
|
|
934
|
+
* Ordered, authoritative attempts across retries and fallbacks (trace mode).
|
|
935
|
+
*/
|
|
936
|
+
attempts?: GatewayTraceAttempt[];
|
|
838
937
|
/**
|
|
839
938
|
* Content type classification
|
|
840
939
|
* Indicates whether content is 'string', 'object', 'array', or 'null'
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
13
|
exports.UsageTracker = void 0;
|
|
14
|
+
const gateway_utils_js_1 = require("./gateway-utils.cjs");
|
|
14
15
|
/**
|
|
15
16
|
* Manages usage tracking for LLM requests
|
|
16
17
|
*/
|
|
@@ -29,11 +30,7 @@ class UsageTracker {
|
|
|
29
30
|
* @returns Token usage breakdown
|
|
30
31
|
*/
|
|
31
32
|
extractTokens(response) {
|
|
32
|
-
return
|
|
33
|
-
prompt: response.usage?.promptTokens || 0,
|
|
34
|
-
completion: response.usage?.completionTokens || 0,
|
|
35
|
-
total: response.usage?.totalTokens || 0
|
|
36
|
-
};
|
|
33
|
+
return (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response);
|
|
37
34
|
}
|
|
38
35
|
/**
|
|
39
36
|
* Records usage for a request
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@x12i/ai-gateway",
|
|
3
|
-
"version": "9.0.
|
|
3
|
+
"version": "9.0.8",
|
|
4
4
|
"description": "AI Gateway - Unified interface for LLM provider routing and management",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -60,7 +60,7 @@
|
|
|
60
60
|
"author": "x12i",
|
|
61
61
|
"license": "mit",
|
|
62
62
|
"dependencies": {
|
|
63
|
-
"@x12i/ai-providers-router": "^4.7.
|
|
63
|
+
"@x12i/ai-providers-router": "^4.7.7",
|
|
64
64
|
"@x12i/rendrix": "^4.2.0",
|
|
65
65
|
"@aws-sdk/s3-request-presigner": "^3.953.0",
|
|
66
66
|
"@x12i/env": "^4.0.1",
|