@x12i/ai-gateway 9.0.7 → 9.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gateway-utils.d.ts +23 -0
- package/dist/gateway-utils.js +99 -0
- package/dist/gateway.js +58 -43
- package/dist/usage-tracker.js +2 -5
- package/dist-cjs/gateway-utils.cjs +102 -0
- package/dist-cjs/gateway-utils.d.ts +23 -0
- package/dist-cjs/gateway.cjs +57 -42
- package/dist-cjs/usage-tracker.cjs +2 -5
- package/package.json +1 -1
package/dist/gateway-utils.d.ts
CHANGED
|
@@ -19,3 +19,26 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
|
|
|
19
19
|
export declare function mergeConfig(request: ChatRequest & {
|
|
20
20
|
useInternalDefaults?: 'skill' | 'audit';
|
|
21
21
|
}, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
|
|
22
|
+
/**
|
|
23
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
24
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
27
|
+
prompt: number;
|
|
28
|
+
completion: number;
|
|
29
|
+
total: number;
|
|
30
|
+
} | undefined;
|
|
31
|
+
/**
|
|
32
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
|
|
35
|
+
prompt: number;
|
|
36
|
+
completion: number;
|
|
37
|
+
total: number;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
41
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
42
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
43
|
+
*/
|
|
44
|
+
export declare function extractCostUsdFromRouterResponse(routerResponse: unknown): number | undefined;
|
package/dist/gateway-utils.js
CHANGED
|
@@ -179,3 +179,102 @@ export async function mergeConfig(request, config, logger) {
|
|
|
179
179
|
});
|
|
180
180
|
return merged;
|
|
181
181
|
}
|
|
182
|
+
function firstFiniteNumber(...vals) {
|
|
183
|
+
for (const v of vals) {
|
|
184
|
+
if (typeof v === 'number' && Number.isFinite(v))
|
|
185
|
+
return v;
|
|
186
|
+
}
|
|
187
|
+
return undefined;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
191
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
192
|
+
*/
|
|
193
|
+
export function normalizeRouterUsageTokens(usage) {
|
|
194
|
+
if (usage == null || typeof usage !== 'object')
|
|
195
|
+
return undefined;
|
|
196
|
+
const u = usage;
|
|
197
|
+
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
198
|
+
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
|
|
199
|
+
let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
|
|
200
|
+
if (!total && (prompt || completion))
|
|
201
|
+
total = prompt + completion;
|
|
202
|
+
return { prompt, completion, total };
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
206
|
+
*/
|
|
207
|
+
export function extractTokenUsageFromRouterResponse(routerResponse) {
|
|
208
|
+
if (routerResponse == null || typeof routerResponse !== 'object') {
|
|
209
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
210
|
+
}
|
|
211
|
+
const r = routerResponse;
|
|
212
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
213
|
+
? r.metadata
|
|
214
|
+
: undefined;
|
|
215
|
+
const buckets = [r.usage];
|
|
216
|
+
if (meta) {
|
|
217
|
+
buckets.push(meta.usage);
|
|
218
|
+
const nested = meta['ai-activities-response'];
|
|
219
|
+
if (nested != null && typeof nested === 'object') {
|
|
220
|
+
buckets.push(nested.usage);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
const raw = r.rawResponse ?? r.raw;
|
|
224
|
+
if (raw != null && typeof raw === 'object') {
|
|
225
|
+
buckets.push(raw.usage);
|
|
226
|
+
}
|
|
227
|
+
for (const b of buckets) {
|
|
228
|
+
const n = normalizeRouterUsageTokens(b);
|
|
229
|
+
if (n && (n.prompt || n.completion || n.total))
|
|
230
|
+
return n;
|
|
231
|
+
}
|
|
232
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
236
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
237
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
238
|
+
*/
|
|
239
|
+
export function extractCostUsdFromRouterResponse(routerResponse) {
|
|
240
|
+
if (routerResponse == null || typeof routerResponse !== 'object')
|
|
241
|
+
return undefined;
|
|
242
|
+
const r = routerResponse;
|
|
243
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
244
|
+
? r.metadata
|
|
245
|
+
: undefined;
|
|
246
|
+
const pick = (...vals) => firstFiniteNumber(...vals);
|
|
247
|
+
const fromMeta = pick(meta?.costUsd, meta?.cost);
|
|
248
|
+
if (fromMeta !== undefined)
|
|
249
|
+
return fromMeta;
|
|
250
|
+
const fromRoot = pick(r.costUsd, r.cost);
|
|
251
|
+
if (fromRoot !== undefined)
|
|
252
|
+
return fromRoot;
|
|
253
|
+
const attempts = meta?.attempts;
|
|
254
|
+
if (Array.isArray(attempts)) {
|
|
255
|
+
for (let i = attempts.length - 1; i >= 0; i--) {
|
|
256
|
+
const a = attempts[i];
|
|
257
|
+
if (a != null && typeof a === 'object') {
|
|
258
|
+
const o = a;
|
|
259
|
+
const c = pick(o.costUsd, o.cost);
|
|
260
|
+
if (c !== undefined)
|
|
261
|
+
return c;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
const raw = r.rawResponse ?? r.raw;
|
|
266
|
+
if (raw != null && typeof raw === 'object') {
|
|
267
|
+
const rawObj = raw;
|
|
268
|
+
const usage = rawObj.usage;
|
|
269
|
+
if (usage != null && typeof usage === 'object') {
|
|
270
|
+
const u = usage;
|
|
271
|
+
const fromUsage = pick(u.cost, u.costUsd, u.total_cost, u.totalCost);
|
|
272
|
+
if (fromUsage !== undefined)
|
|
273
|
+
return fromUsage;
|
|
274
|
+
}
|
|
275
|
+
const fromRawTop = pick(rawObj.cost, rawObj.costUsd);
|
|
276
|
+
if (fromRawTop !== undefined)
|
|
277
|
+
return fromRawTop;
|
|
278
|
+
}
|
|
279
|
+
return undefined;
|
|
280
|
+
}
|
package/dist/gateway.js
CHANGED
|
@@ -8,7 +8,7 @@ import { ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
|
8
8
|
import { initializeGatewayComponents } from './gateway-config.js';
|
|
9
9
|
import { buildMessages } from './message-builder.js';
|
|
10
10
|
import { extractJsonFromFlexMd } from './flex-md-loader.js';
|
|
11
|
-
import { mergeConfig } from './gateway-utils.js';
|
|
11
|
+
import { extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig } from './gateway-utils.js';
|
|
12
12
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
13
13
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
14
14
|
import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
|
|
@@ -16,6 +16,25 @@ import { invokeWithRetry } from './gateway-retry.js';
|
|
|
16
16
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
17
17
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
18
18
|
const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
19
|
+
/** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
|
|
20
|
+
function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
|
|
21
|
+
const { tokens, costUsd, cost } = meta;
|
|
22
|
+
const zeroTokens = tokens.prompt === 0 && tokens.completion === 0 && tokens.total === 0;
|
|
23
|
+
const zeroCostUsd = typeof costUsd === 'number' && costUsd === 0;
|
|
24
|
+
const zeroCost = typeof cost === 'number' && cost === 0;
|
|
25
|
+
if (!zeroTokens && !zeroCostUsd && !zeroCost)
|
|
26
|
+
return;
|
|
27
|
+
logger.warn('Successful provider response reported zero token usage and/or zero cost; verify router adapter usage and billing metadata', withActivityIdentity(identity, {
|
|
28
|
+
invokeKind,
|
|
29
|
+
zeroTokens,
|
|
30
|
+
zeroCostUsd,
|
|
31
|
+
zeroCostField: zeroCost,
|
|
32
|
+
tokens,
|
|
33
|
+
costUsd,
|
|
34
|
+
cost,
|
|
35
|
+
debugKind: gatewayLogDebug.anomaly
|
|
36
|
+
}));
|
|
37
|
+
}
|
|
19
38
|
/**
|
|
20
39
|
* Simplified AI Gateway - Clean proxy implementation
|
|
21
40
|
*/
|
|
@@ -87,6 +106,8 @@ export class AIGateway {
|
|
|
87
106
|
},
|
|
88
107
|
mode: 'sync'
|
|
89
108
|
});
|
|
109
|
+
const costUsdChat = extractCostUsdFromRouterResponse(response);
|
|
110
|
+
const metaChat = response?.metadata || {};
|
|
90
111
|
// Create enhanced response
|
|
91
112
|
const enhancedResponse = {
|
|
92
113
|
content: response.content || '',
|
|
@@ -94,15 +115,22 @@ export class AIGateway {
|
|
|
94
115
|
aiRequestId: request.aiRequestId,
|
|
95
116
|
identity: request.identity,
|
|
96
117
|
latencyMs: Date.now() - startTime,
|
|
97
|
-
tokens: response
|
|
118
|
+
tokens: extractTokenUsageFromRouterResponse(response),
|
|
98
119
|
taskTypeId,
|
|
99
|
-
agentType: 'chat'
|
|
120
|
+
agentType: 'chat',
|
|
121
|
+
...(typeof costUsdChat === 'number'
|
|
122
|
+
? {
|
|
123
|
+
costUsd: costUsdChat,
|
|
124
|
+
...(typeof metaChat.cost === 'number' ? { cost: metaChat.cost } : { cost: costUsdChat })
|
|
125
|
+
}
|
|
126
|
+
: {})
|
|
100
127
|
}
|
|
101
128
|
};
|
|
102
129
|
// Track activity success if activity was started
|
|
103
130
|
if (activity) {
|
|
104
131
|
try {
|
|
105
132
|
await this.activityManager.logSuccess(activity, {
|
|
133
|
+
...(typeof costUsdChat === 'number' ? { cost: costUsdChat } : {}),
|
|
106
134
|
response: enhancedResponse,
|
|
107
135
|
endTime: Date.now(),
|
|
108
136
|
duration: Date.now() - startTime
|
|
@@ -116,6 +144,11 @@ export class AIGateway {
|
|
|
116
144
|
});
|
|
117
145
|
}
|
|
118
146
|
}
|
|
147
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
148
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
149
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
150
|
+
cost: enhancedResponse.metadata.cost
|
|
151
|
+
}, 'invokeChat');
|
|
119
152
|
return enhancedResponse;
|
|
120
153
|
}
|
|
121
154
|
catch (error) {
|
|
@@ -340,12 +373,9 @@ export class AIGateway {
|
|
|
340
373
|
const respAny = tryResp;
|
|
341
374
|
if (ok && respAny) {
|
|
342
375
|
const meta = respAny.metadata || {};
|
|
343
|
-
const
|
|
344
|
-
const prompt = usage?.promptTokens ?? usage?.inputTokens ?? 0;
|
|
345
|
-
const completion = usage?.completionTokens ?? usage?.outputTokens ?? 0;
|
|
346
|
-
const total = usage?.totalTokens ?? 0;
|
|
376
|
+
const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
|
|
347
377
|
a.usage = {
|
|
348
|
-
tokens:
|
|
378
|
+
tokens: tokenCounts,
|
|
349
379
|
maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
|
|
350
380
|
? meta.maxTokensRequested
|
|
351
381
|
: typeof mergedConfig?.maxTokens === 'number'
|
|
@@ -372,17 +402,9 @@ export class AIGateway {
|
|
|
372
402
|
a.routing.requestIds = requestIds;
|
|
373
403
|
a.modelUsed =
|
|
374
404
|
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
375
|
-
const
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
? meta.cost
|
|
379
|
-
: typeof respAny?.costUsd === 'number'
|
|
380
|
-
? respAny.costUsd
|
|
381
|
-
: typeof respAny?.cost === 'number'
|
|
382
|
-
? respAny.cost
|
|
383
|
-
: undefined;
|
|
384
|
-
if (typeof costUsd === 'number')
|
|
385
|
-
a.costUsd = costUsd;
|
|
405
|
+
const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
|
|
406
|
+
if (typeof attemptCostUsd === 'number')
|
|
407
|
+
a.costUsd = attemptCostUsd;
|
|
386
408
|
if (includeRawProviderPayload) {
|
|
387
409
|
// Size-capped preview only.
|
|
388
410
|
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
@@ -491,23 +513,9 @@ export class AIGateway {
|
|
|
491
513
|
}
|
|
492
514
|
contentType = 'structured';
|
|
493
515
|
parsingMethod = 'flex-md';
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
tokens = {
|
|
498
|
-
prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
|
|
499
|
-
completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
|
|
500
|
-
total: routerResponse.usage.totalTokens || 0
|
|
501
|
-
};
|
|
502
|
-
}
|
|
503
|
-
else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
|
|
504
|
-
const usage = routerResponse.metadata['ai-activities-response'].usage;
|
|
505
|
-
tokens = {
|
|
506
|
-
prompt: usage.promptTokens || usage.inputTokens || 0,
|
|
507
|
-
completion: usage.completionTokens || usage.outputTokens || 0,
|
|
508
|
-
total: usage.totalTokens || 0
|
|
509
|
-
};
|
|
510
|
-
}
|
|
516
|
+
const tokens = extractTokenUsageFromRouterResponse(routerResponse);
|
|
517
|
+
const resolvedCostUsd = extractCostUsdFromRouterResponse(routerResponse);
|
|
518
|
+
const routerMetaForCost = routerResponse?.metadata || {};
|
|
511
519
|
const enhancedResponse = {
|
|
512
520
|
content: content,
|
|
513
521
|
parsedContent: parsedContent,
|
|
@@ -520,6 +528,14 @@ export class AIGateway {
|
|
|
520
528
|
agentType: 'ai',
|
|
521
529
|
contentType,
|
|
522
530
|
parsingMethod,
|
|
531
|
+
...(typeof resolvedCostUsd === 'number'
|
|
532
|
+
? {
|
|
533
|
+
costUsd: resolvedCostUsd,
|
|
534
|
+
...(typeof routerMetaForCost.cost === 'number'
|
|
535
|
+
? { cost: routerMetaForCost.cost }
|
|
536
|
+
: { cost: resolvedCostUsd })
|
|
537
|
+
}
|
|
538
|
+
: {}),
|
|
523
539
|
...(traceEnabled
|
|
524
540
|
? (() => {
|
|
525
541
|
const meta = routerResponse?.metadata || {};
|
|
@@ -531,18 +547,11 @@ export class AIGateway {
|
|
|
531
547
|
: typeof mergedConfig?.maxTokens === 'number'
|
|
532
548
|
? mergedConfig.maxTokens
|
|
533
549
|
: undefined;
|
|
534
|
-
const costUsd = typeof meta.costUsd === 'number'
|
|
535
|
-
? meta.costUsd
|
|
536
|
-
: typeof meta.cost === 'number'
|
|
537
|
-
? meta.cost
|
|
538
|
-
: undefined;
|
|
539
550
|
return {
|
|
540
551
|
provider,
|
|
541
552
|
region,
|
|
542
553
|
modelUsed,
|
|
543
554
|
maxTokensRequested,
|
|
544
|
-
cost: typeof meta.cost === 'number' ? meta.cost : undefined,
|
|
545
|
-
costUsd,
|
|
546
555
|
requestIds: traceRequestIds,
|
|
547
556
|
retryCount: traceRetryCount,
|
|
548
557
|
fallbackCount: traceFallbackCount,
|
|
@@ -568,6 +577,7 @@ export class AIGateway {
|
|
|
568
577
|
usage: tokens
|
|
569
578
|
};
|
|
570
579
|
await this.activityManager.logSuccess(activity, {
|
|
580
|
+
...(typeof resolvedCostUsd === 'number' ? { cost: resolvedCostUsd } : {}),
|
|
571
581
|
response: activityResponse,
|
|
572
582
|
endTime: Date.now(),
|
|
573
583
|
duration: Date.now() - startTime
|
|
@@ -581,6 +591,11 @@ export class AIGateway {
|
|
|
581
591
|
});
|
|
582
592
|
}
|
|
583
593
|
}
|
|
594
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
595
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
596
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
597
|
+
cost: enhancedResponse.metadata.cost
|
|
598
|
+
}, 'invoke');
|
|
584
599
|
this.logger.debug('gateway: enhancedResponse', withActivityIdentity(request.identity, {
|
|
585
600
|
latencyMs: enhancedResponse.metadata?.latencyMs,
|
|
586
601
|
contentType: enhancedResponse.metadata?.contentType,
|
package/dist/usage-tracker.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*
|
|
9
9
|
* Note: x-models dependency removed - usage tracking functions are permanently disabled
|
|
10
10
|
*/
|
|
11
|
+
import { extractTokenUsageFromRouterResponse } from './gateway-utils.js';
|
|
11
12
|
/**
|
|
12
13
|
* Manages usage tracking for LLM requests
|
|
13
14
|
*/
|
|
@@ -26,11 +27,7 @@ export class UsageTracker {
|
|
|
26
27
|
* @returns Token usage breakdown
|
|
27
28
|
*/
|
|
28
29
|
extractTokens(response) {
|
|
29
|
-
return
|
|
30
|
-
prompt: response.usage?.promptTokens || 0,
|
|
31
|
-
completion: response.usage?.completionTokens || 0,
|
|
32
|
-
total: response.usage?.totalTokens || 0
|
|
33
|
-
};
|
|
30
|
+
return extractTokenUsageFromRouterResponse(response);
|
|
34
31
|
}
|
|
35
32
|
/**
|
|
36
33
|
* Records usage for a request
|
|
@@ -40,6 +40,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
40
40
|
exports.generateMD5Hash = generateMD5Hash;
|
|
41
41
|
exports.ensureTaskTypeId = ensureTaskTypeId;
|
|
42
42
|
exports.mergeConfig = mergeConfig;
|
|
43
|
+
exports.normalizeRouterUsageTokens = normalizeRouterUsageTokens;
|
|
44
|
+
exports.extractTokenUsageFromRouterResponse = extractTokenUsageFromRouterResponse;
|
|
45
|
+
exports.extractCostUsdFromRouterResponse = extractCostUsdFromRouterResponse;
|
|
43
46
|
const crypto = __importStar(require("crypto"));
|
|
44
47
|
const gateway_instructions_js_1 = require("./gateway-instructions.cjs");
|
|
45
48
|
const flex_md_loader_js_1 = require("./flex-md-loader.cjs");
|
|
@@ -217,3 +220,102 @@ async function mergeConfig(request, config, logger) {
|
|
|
217
220
|
});
|
|
218
221
|
return merged;
|
|
219
222
|
}
|
|
223
|
+
function firstFiniteNumber(...vals) {
|
|
224
|
+
for (const v of vals) {
|
|
225
|
+
if (typeof v === 'number' && Number.isFinite(v))
|
|
226
|
+
return v;
|
|
227
|
+
}
|
|
228
|
+
return undefined;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
232
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
233
|
+
*/
|
|
234
|
+
function normalizeRouterUsageTokens(usage) {
|
|
235
|
+
if (usage == null || typeof usage !== 'object')
|
|
236
|
+
return undefined;
|
|
237
|
+
const u = usage;
|
|
238
|
+
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
239
|
+
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
|
|
240
|
+
let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
|
|
241
|
+
if (!total && (prompt || completion))
|
|
242
|
+
total = prompt + completion;
|
|
243
|
+
return { prompt, completion, total };
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
247
|
+
*/
|
|
248
|
+
function extractTokenUsageFromRouterResponse(routerResponse) {
|
|
249
|
+
if (routerResponse == null || typeof routerResponse !== 'object') {
|
|
250
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
251
|
+
}
|
|
252
|
+
const r = routerResponse;
|
|
253
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
254
|
+
? r.metadata
|
|
255
|
+
: undefined;
|
|
256
|
+
const buckets = [r.usage];
|
|
257
|
+
if (meta) {
|
|
258
|
+
buckets.push(meta.usage);
|
|
259
|
+
const nested = meta['ai-activities-response'];
|
|
260
|
+
if (nested != null && typeof nested === 'object') {
|
|
261
|
+
buckets.push(nested.usage);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
const raw = r.rawResponse ?? r.raw;
|
|
265
|
+
if (raw != null && typeof raw === 'object') {
|
|
266
|
+
buckets.push(raw.usage);
|
|
267
|
+
}
|
|
268
|
+
for (const b of buckets) {
|
|
269
|
+
const n = normalizeRouterUsageTokens(b);
|
|
270
|
+
if (n && (n.prompt || n.completion || n.total))
|
|
271
|
+
return n;
|
|
272
|
+
}
|
|
273
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
277
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
278
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
279
|
+
*/
|
|
280
|
+
function extractCostUsdFromRouterResponse(routerResponse) {
|
|
281
|
+
if (routerResponse == null || typeof routerResponse !== 'object')
|
|
282
|
+
return undefined;
|
|
283
|
+
const r = routerResponse;
|
|
284
|
+
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
285
|
+
? r.metadata
|
|
286
|
+
: undefined;
|
|
287
|
+
const pick = (...vals) => firstFiniteNumber(...vals);
|
|
288
|
+
const fromMeta = pick(meta?.costUsd, meta?.cost);
|
|
289
|
+
if (fromMeta !== undefined)
|
|
290
|
+
return fromMeta;
|
|
291
|
+
const fromRoot = pick(r.costUsd, r.cost);
|
|
292
|
+
if (fromRoot !== undefined)
|
|
293
|
+
return fromRoot;
|
|
294
|
+
const attempts = meta?.attempts;
|
|
295
|
+
if (Array.isArray(attempts)) {
|
|
296
|
+
for (let i = attempts.length - 1; i >= 0; i--) {
|
|
297
|
+
const a = attempts[i];
|
|
298
|
+
if (a != null && typeof a === 'object') {
|
|
299
|
+
const o = a;
|
|
300
|
+
const c = pick(o.costUsd, o.cost);
|
|
301
|
+
if (c !== undefined)
|
|
302
|
+
return c;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
const raw = r.rawResponse ?? r.raw;
|
|
307
|
+
if (raw != null && typeof raw === 'object') {
|
|
308
|
+
const rawObj = raw;
|
|
309
|
+
const usage = rawObj.usage;
|
|
310
|
+
if (usage != null && typeof usage === 'object') {
|
|
311
|
+
const u = usage;
|
|
312
|
+
const fromUsage = pick(u.cost, u.costUsd, u.total_cost, u.totalCost);
|
|
313
|
+
if (fromUsage !== undefined)
|
|
314
|
+
return fromUsage;
|
|
315
|
+
}
|
|
316
|
+
const fromRawTop = pick(rawObj.cost, rawObj.costUsd);
|
|
317
|
+
if (fromRawTop !== undefined)
|
|
318
|
+
return fromRawTop;
|
|
319
|
+
}
|
|
320
|
+
return undefined;
|
|
321
|
+
}
|
|
@@ -19,3 +19,26 @@ export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer):
|
|
|
19
19
|
export declare function mergeConfig(request: ChatRequest & {
|
|
20
20
|
useInternalDefaults?: 'skill' | 'audit';
|
|
21
21
|
}, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
|
|
22
|
+
/**
|
|
23
|
+
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
24
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
27
|
+
prompt: number;
|
|
28
|
+
completion: number;
|
|
29
|
+
total: number;
|
|
30
|
+
} | undefined;
|
|
31
|
+
/**
|
|
32
|
+
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
|
|
35
|
+
prompt: number;
|
|
36
|
+
completion: number;
|
|
37
|
+
total: number;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
41
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
42
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
43
|
+
*/
|
|
44
|
+
export declare function extractCostUsdFromRouterResponse(routerResponse: unknown): number | undefined;
|
package/dist-cjs/gateway.cjs
CHANGED
|
@@ -19,6 +19,25 @@ const gateway_retry_js_1 = require("./gateway-retry.cjs");
|
|
|
19
19
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
20
20
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
21
21
|
const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
22
|
+
/** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
|
|
23
|
+
function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
|
|
24
|
+
const { tokens, costUsd, cost } = meta;
|
|
25
|
+
const zeroTokens = tokens.prompt === 0 && tokens.completion === 0 && tokens.total === 0;
|
|
26
|
+
const zeroCostUsd = typeof costUsd === 'number' && costUsd === 0;
|
|
27
|
+
const zeroCost = typeof cost === 'number' && cost === 0;
|
|
28
|
+
if (!zeroTokens && !zeroCostUsd && !zeroCost)
|
|
29
|
+
return;
|
|
30
|
+
logger.warn('Successful provider response reported zero token usage and/or zero cost; verify router adapter usage and billing metadata', (0, gateway_log_meta_js_1.withActivityIdentity)(identity, {
|
|
31
|
+
invokeKind,
|
|
32
|
+
zeroTokens,
|
|
33
|
+
zeroCostUsd,
|
|
34
|
+
zeroCostField: zeroCost,
|
|
35
|
+
tokens,
|
|
36
|
+
costUsd,
|
|
37
|
+
cost,
|
|
38
|
+
debugKind: gateway_log_meta_js_1.gatewayLogDebug.anomaly
|
|
39
|
+
}));
|
|
40
|
+
}
|
|
22
41
|
/**
|
|
23
42
|
* Simplified AI Gateway - Clean proxy implementation
|
|
24
43
|
*/
|
|
@@ -90,6 +109,8 @@ class AIGateway {
|
|
|
90
109
|
},
|
|
91
110
|
mode: 'sync'
|
|
92
111
|
});
|
|
112
|
+
const costUsdChat = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(response);
|
|
113
|
+
const metaChat = response?.metadata || {};
|
|
93
114
|
// Create enhanced response
|
|
94
115
|
const enhancedResponse = {
|
|
95
116
|
content: response.content || '',
|
|
@@ -97,15 +118,22 @@ class AIGateway {
|
|
|
97
118
|
aiRequestId: request.aiRequestId,
|
|
98
119
|
identity: request.identity,
|
|
99
120
|
latencyMs: Date.now() - startTime,
|
|
100
|
-
tokens:
|
|
121
|
+
tokens: (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response),
|
|
101
122
|
taskTypeId,
|
|
102
|
-
agentType: 'chat'
|
|
123
|
+
agentType: 'chat',
|
|
124
|
+
...(typeof costUsdChat === 'number'
|
|
125
|
+
? {
|
|
126
|
+
costUsd: costUsdChat,
|
|
127
|
+
...(typeof metaChat.cost === 'number' ? { cost: metaChat.cost } : { cost: costUsdChat })
|
|
128
|
+
}
|
|
129
|
+
: {})
|
|
103
130
|
}
|
|
104
131
|
};
|
|
105
132
|
// Track activity success if activity was started
|
|
106
133
|
if (activity) {
|
|
107
134
|
try {
|
|
108
135
|
await this.activityManager.logSuccess(activity, {
|
|
136
|
+
...(typeof costUsdChat === 'number' ? { cost: costUsdChat } : {}),
|
|
109
137
|
response: enhancedResponse,
|
|
110
138
|
endTime: Date.now(),
|
|
111
139
|
duration: Date.now() - startTime
|
|
@@ -119,6 +147,11 @@ class AIGateway {
|
|
|
119
147
|
});
|
|
120
148
|
}
|
|
121
149
|
}
|
|
150
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
151
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
152
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
153
|
+
cost: enhancedResponse.metadata.cost
|
|
154
|
+
}, 'invokeChat');
|
|
122
155
|
return enhancedResponse;
|
|
123
156
|
}
|
|
124
157
|
catch (error) {
|
|
@@ -343,12 +376,9 @@ class AIGateway {
|
|
|
343
376
|
const respAny = tryResp;
|
|
344
377
|
if (ok && respAny) {
|
|
345
378
|
const meta = respAny.metadata || {};
|
|
346
|
-
const
|
|
347
|
-
const prompt = usage?.promptTokens ?? usage?.inputTokens ?? 0;
|
|
348
|
-
const completion = usage?.completionTokens ?? usage?.outputTokens ?? 0;
|
|
349
|
-
const total = usage?.totalTokens ?? 0;
|
|
379
|
+
const tokenCounts = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(respAny);
|
|
350
380
|
a.usage = {
|
|
351
|
-
tokens:
|
|
381
|
+
tokens: tokenCounts,
|
|
352
382
|
maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
|
|
353
383
|
? meta.maxTokensRequested
|
|
354
384
|
: typeof mergedConfig?.maxTokens === 'number'
|
|
@@ -375,17 +405,9 @@ class AIGateway {
|
|
|
375
405
|
a.routing.requestIds = requestIds;
|
|
376
406
|
a.modelUsed =
|
|
377
407
|
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
378
|
-
const
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
? meta.cost
|
|
382
|
-
: typeof respAny?.costUsd === 'number'
|
|
383
|
-
? respAny.costUsd
|
|
384
|
-
: typeof respAny?.cost === 'number'
|
|
385
|
-
? respAny.cost
|
|
386
|
-
: undefined;
|
|
387
|
-
if (typeof costUsd === 'number')
|
|
388
|
-
a.costUsd = costUsd;
|
|
408
|
+
const attemptCostUsd = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(respAny);
|
|
409
|
+
if (typeof attemptCostUsd === 'number')
|
|
410
|
+
a.costUsd = attemptCostUsd;
|
|
389
411
|
if (includeRawProviderPayload) {
|
|
390
412
|
// Size-capped preview only.
|
|
391
413
|
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
@@ -494,23 +516,9 @@ class AIGateway {
|
|
|
494
516
|
}
|
|
495
517
|
contentType = 'structured';
|
|
496
518
|
parsingMethod = 'flex-md';
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
tokens = {
|
|
501
|
-
prompt: routerResponse.usage.promptTokens || routerResponse.usage.inputTokens || 0,
|
|
502
|
-
completion: routerResponse.usage.completionTokens || routerResponse.usage.outputTokens || 0,
|
|
503
|
-
total: routerResponse.usage.totalTokens || 0
|
|
504
|
-
};
|
|
505
|
-
}
|
|
506
|
-
else if (routerResponse.metadata?.['ai-activities-response']?.usage) {
|
|
507
|
-
const usage = routerResponse.metadata['ai-activities-response'].usage;
|
|
508
|
-
tokens = {
|
|
509
|
-
prompt: usage.promptTokens || usage.inputTokens || 0,
|
|
510
|
-
completion: usage.completionTokens || usage.outputTokens || 0,
|
|
511
|
-
total: usage.totalTokens || 0
|
|
512
|
-
};
|
|
513
|
-
}
|
|
519
|
+
const tokens = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(routerResponse);
|
|
520
|
+
const resolvedCostUsd = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(routerResponse);
|
|
521
|
+
const routerMetaForCost = routerResponse?.metadata || {};
|
|
514
522
|
const enhancedResponse = {
|
|
515
523
|
content: content,
|
|
516
524
|
parsedContent: parsedContent,
|
|
@@ -523,6 +531,14 @@ class AIGateway {
|
|
|
523
531
|
agentType: 'ai',
|
|
524
532
|
contentType,
|
|
525
533
|
parsingMethod,
|
|
534
|
+
...(typeof resolvedCostUsd === 'number'
|
|
535
|
+
? {
|
|
536
|
+
costUsd: resolvedCostUsd,
|
|
537
|
+
...(typeof routerMetaForCost.cost === 'number'
|
|
538
|
+
? { cost: routerMetaForCost.cost }
|
|
539
|
+
: { cost: resolvedCostUsd })
|
|
540
|
+
}
|
|
541
|
+
: {}),
|
|
526
542
|
...(traceEnabled
|
|
527
543
|
? (() => {
|
|
528
544
|
const meta = routerResponse?.metadata || {};
|
|
@@ -534,18 +550,11 @@ class AIGateway {
|
|
|
534
550
|
: typeof mergedConfig?.maxTokens === 'number'
|
|
535
551
|
? mergedConfig.maxTokens
|
|
536
552
|
: undefined;
|
|
537
|
-
const costUsd = typeof meta.costUsd === 'number'
|
|
538
|
-
? meta.costUsd
|
|
539
|
-
: typeof meta.cost === 'number'
|
|
540
|
-
? meta.cost
|
|
541
|
-
: undefined;
|
|
542
553
|
return {
|
|
543
554
|
provider,
|
|
544
555
|
region,
|
|
545
556
|
modelUsed,
|
|
546
557
|
maxTokensRequested,
|
|
547
|
-
cost: typeof meta.cost === 'number' ? meta.cost : undefined,
|
|
548
|
-
costUsd,
|
|
549
558
|
requestIds: traceRequestIds,
|
|
550
559
|
retryCount: traceRetryCount,
|
|
551
560
|
fallbackCount: traceFallbackCount,
|
|
@@ -571,6 +580,7 @@ class AIGateway {
|
|
|
571
580
|
usage: tokens
|
|
572
581
|
};
|
|
573
582
|
await this.activityManager.logSuccess(activity, {
|
|
583
|
+
...(typeof resolvedCostUsd === 'number' ? { cost: resolvedCostUsd } : {}),
|
|
574
584
|
response: activityResponse,
|
|
575
585
|
endTime: Date.now(),
|
|
576
586
|
duration: Date.now() - startTime
|
|
@@ -584,6 +594,11 @@ class AIGateway {
|
|
|
584
594
|
});
|
|
585
595
|
}
|
|
586
596
|
}
|
|
597
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
598
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
599
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
600
|
+
cost: enhancedResponse.metadata.cost
|
|
601
|
+
}, 'invoke');
|
|
587
602
|
this.logger.debug('gateway: enhancedResponse', (0, gateway_log_meta_js_1.withActivityIdentity)(request.identity, {
|
|
588
603
|
latencyMs: enhancedResponse.metadata?.latencyMs,
|
|
589
604
|
contentType: enhancedResponse.metadata?.contentType,
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
13
13
|
exports.UsageTracker = void 0;
|
|
14
|
+
const gateway_utils_js_1 = require("./gateway-utils.cjs");
|
|
14
15
|
/**
|
|
15
16
|
* Manages usage tracking for LLM requests
|
|
16
17
|
*/
|
|
@@ -29,11 +30,7 @@ class UsageTracker {
|
|
|
29
30
|
* @returns Token usage breakdown
|
|
30
31
|
*/
|
|
31
32
|
extractTokens(response) {
|
|
32
|
-
return
|
|
33
|
-
prompt: response.usage?.promptTokens || 0,
|
|
34
|
-
completion: response.usage?.completionTokens || 0,
|
|
35
|
-
total: response.usage?.totalTokens || 0
|
|
36
|
-
};
|
|
33
|
+
return (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response);
|
|
37
34
|
}
|
|
38
35
|
/**
|
|
39
36
|
* Records usage for a request
|