@x12i/ai-gateway 9.0.8 → 9.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +897 -998
- package/dist/activity-manager.js +46 -6
- package/dist/config/activity-tracking-config.d.ts +2 -1
- package/dist/config/activity-tracking-config.js +3 -2
- package/dist/gateway-memory.d.ts +1 -2
- package/dist/gateway-memory.js +1 -15
- package/dist/gateway-meta.js +3 -0
- package/dist/gateway-utils.d.ts +15 -1
- package/dist/gateway-utils.js +125 -17
- package/dist/gateway-validation.d.ts +3 -3
- package/dist/gateway-validation.js +10 -1
- package/dist/gateway.d.ts +2 -2
- package/dist/gateway.js +73 -22
- package/dist/index.d.ts +2 -2
- package/dist/instruction-optimizer.js +3 -0
- package/dist/runtime-objects.d.ts +2 -13
- package/dist/troubleshooting-helper.d.ts +0 -3
- package/dist/troubleshooting-helper.js +99 -20
- package/dist/types.d.ts +39 -89
- package/dist-cjs/activity-manager.cjs +45 -5
- package/dist-cjs/config/activity-tracking-config.cjs +3 -2
- package/dist-cjs/config/activity-tracking-config.d.ts +2 -1
- package/dist-cjs/gateway-memory.cjs +1 -15
- package/dist-cjs/gateway-memory.d.ts +1 -2
- package/dist-cjs/gateway-meta.cjs +3 -0
- package/dist-cjs/gateway-utils.cjs +128 -17
- package/dist-cjs/gateway-utils.d.ts +15 -1
- package/dist-cjs/gateway-validation.cjs +10 -1
- package/dist-cjs/gateway-validation.d.ts +3 -3
- package/dist-cjs/gateway.cjs +72 -21
- package/dist-cjs/gateway.d.ts +2 -2
- package/dist-cjs/index.d.ts +2 -2
- package/dist-cjs/instruction-optimizer.cjs +3 -0
- package/dist-cjs/runtime-objects.d.ts +2 -13
- package/dist-cjs/troubleshooting-helper.cjs +99 -20
- package/dist-cjs/troubleshooting-helper.d.ts +0 -3
- package/dist-cjs/types.d.ts +39 -89
- package/package.json +2 -2
|
@@ -37,11 +37,14 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
37
37
|
};
|
|
38
38
|
})();
|
|
39
39
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = void 0;
|
|
40
41
|
exports.generateMD5Hash = generateMD5Hash;
|
|
41
42
|
exports.ensureTaskTypeId = ensureTaskTypeId;
|
|
42
43
|
exports.mergeConfig = mergeConfig;
|
|
43
44
|
exports.normalizeRouterUsageTokens = normalizeRouterUsageTokens;
|
|
44
45
|
exports.extractTokenUsageFromRouterResponse = extractTokenUsageFromRouterResponse;
|
|
46
|
+
exports.extractCostUsdFromRouterResponse = extractCostUsdFromRouterResponse;
|
|
47
|
+
exports.capActivityFullResponsePayload = capActivityFullResponsePayload;
|
|
45
48
|
const crypto = __importStar(require("crypto"));
|
|
46
49
|
const gateway_instructions_js_1 = require("./gateway-instructions.cjs");
|
|
47
50
|
const flex_md_loader_js_1 = require("./flex-md-loader.cjs");
|
|
@@ -223,51 +226,159 @@ function firstFiniteNumber(...vals) {
|
|
|
223
226
|
for (const v of vals) {
|
|
224
227
|
if (typeof v === 'number' && Number.isFinite(v))
|
|
225
228
|
return v;
|
|
229
|
+
if (typeof v === 'string' && v.trim() !== '') {
|
|
230
|
+
const n = Number(v);
|
|
231
|
+
if (Number.isFinite(n))
|
|
232
|
+
return n;
|
|
233
|
+
}
|
|
226
234
|
}
|
|
227
235
|
return undefined;
|
|
228
236
|
}
|
|
237
|
+
function isNonZeroTokenCount(n) {
|
|
238
|
+
return !!(n.prompt || n.completion || n.total);
|
|
239
|
+
}
|
|
229
240
|
/**
|
|
230
241
|
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
231
|
-
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
242
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, Responses-style input/output tokens, and missing total (sum prompt+completion).
|
|
232
243
|
*/
|
|
233
244
|
function normalizeRouterUsageTokens(usage) {
|
|
234
245
|
if (usage == null || typeof usage !== 'object')
|
|
235
246
|
return undefined;
|
|
236
247
|
const u = usage;
|
|
237
|
-
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
238
|
-
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.completion, u.completion_tokens) ?? 0;
|
|
239
|
-
let total = firstFiniteNumber(u.totalTokens, u.total_tokens) ?? 0;
|
|
248
|
+
const prompt = firstFiniteNumber(u.promptTokens, u.inputTokens, u.input_tokens, u.prompt, u.prompt_tokens) ?? 0;
|
|
249
|
+
const completion = firstFiniteNumber(u.completionTokens, u.outputTokens, u.output_tokens, u.completion, u.completion_tokens) ?? 0;
|
|
250
|
+
let total = firstFiniteNumber(u.totalTokens, u.total_tokens, u.total) ?? 0;
|
|
240
251
|
if (!total && (prompt || completion))
|
|
241
252
|
total = prompt + completion;
|
|
242
253
|
return { prompt, completion, total };
|
|
243
254
|
}
|
|
255
|
+
/**
|
|
256
|
+
* Collect usage from one router/provider envelope (single object).
|
|
257
|
+
* When followRaw is true, also reads `(rawResponse ?? raw).usage` on that envelope.
|
|
258
|
+
*/
|
|
259
|
+
function collectUsageBucketsFromRoot(root, followRaw) {
|
|
260
|
+
const meta = root.metadata != null && typeof root.metadata === 'object'
|
|
261
|
+
? root.metadata
|
|
262
|
+
: undefined;
|
|
263
|
+
const buckets = [root.usage];
|
|
264
|
+
if (meta) {
|
|
265
|
+
buckets.push(meta.usage);
|
|
266
|
+
buckets.push(meta.tokens);
|
|
267
|
+
const nested = meta['ai-activities-response'];
|
|
268
|
+
if (nested != null && typeof nested === 'object') {
|
|
269
|
+
buckets.push(nested.usage);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
if (followRaw) {
|
|
273
|
+
const raw = root.rawResponse ?? root.raw;
|
|
274
|
+
if (raw != null && typeof raw === 'object') {
|
|
275
|
+
buckets.push(raw.usage);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return buckets;
|
|
279
|
+
}
|
|
280
|
+
function firstNonZeroUsageFromBuckets(buckets) {
|
|
281
|
+
for (const b of buckets) {
|
|
282
|
+
const n = normalizeRouterUsageTokens(b);
|
|
283
|
+
if (n && isNonZeroTokenCount(n))
|
|
284
|
+
return n;
|
|
285
|
+
}
|
|
286
|
+
return undefined;
|
|
287
|
+
}
|
|
244
288
|
/**
|
|
245
289
|
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
290
|
+
* Prefers the raw/provider body (`rawResponse` / `raw`) when it carries non-zero usage before re-reading the outer envelope.
|
|
246
291
|
*/
|
|
247
292
|
function extractTokenUsageFromRouterResponse(routerResponse) {
|
|
293
|
+
const zeros = { prompt: 0, completion: 0, total: 0 };
|
|
248
294
|
if (routerResponse == null || typeof routerResponse !== 'object') {
|
|
249
|
-
return
|
|
295
|
+
return zeros;
|
|
296
|
+
}
|
|
297
|
+
const r = routerResponse;
|
|
298
|
+
const raw = r.rawResponse ?? r.raw;
|
|
299
|
+
const inner = raw != null && typeof raw === 'object' ? raw : undefined;
|
|
300
|
+
const roots = inner != null && inner !== r
|
|
301
|
+
? [
|
|
302
|
+
{ root: inner, followRaw: false },
|
|
303
|
+
{ root: r, followRaw: true }
|
|
304
|
+
]
|
|
305
|
+
: [{ root: r, followRaw: true }];
|
|
306
|
+
for (const { root, followRaw } of roots) {
|
|
307
|
+
const buckets = collectUsageBucketsFromRoot(root, followRaw);
|
|
308
|
+
const found = firstNonZeroUsageFromBuckets(buckets);
|
|
309
|
+
if (found)
|
|
310
|
+
return found;
|
|
250
311
|
}
|
|
312
|
+
return zeros;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
316
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
317
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
318
|
+
*/
|
|
319
|
+
function extractCostUsdFromRouterResponse(routerResponse) {
|
|
320
|
+
if (routerResponse == null || typeof routerResponse !== 'object')
|
|
321
|
+
return undefined;
|
|
251
322
|
const r = routerResponse;
|
|
252
323
|
const meta = r.metadata != null && typeof r.metadata === 'object'
|
|
253
324
|
? r.metadata
|
|
254
325
|
: undefined;
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
326
|
+
const pick = (...vals) => firstFiniteNumber(...vals);
|
|
327
|
+
const fromMeta = pick(meta?.costUsd, meta?.cost);
|
|
328
|
+
if (fromMeta !== undefined)
|
|
329
|
+
return fromMeta;
|
|
330
|
+
const fromRoot = pick(r.costUsd, r.cost);
|
|
331
|
+
if (fromRoot !== undefined)
|
|
332
|
+
return fromRoot;
|
|
333
|
+
const attempts = meta?.attempts;
|
|
334
|
+
if (Array.isArray(attempts)) {
|
|
335
|
+
for (let i = attempts.length - 1; i >= 0; i--) {
|
|
336
|
+
const a = attempts[i];
|
|
337
|
+
if (a != null && typeof a === 'object') {
|
|
338
|
+
const o = a;
|
|
339
|
+
const c = pick(o.costUsd, o.cost);
|
|
340
|
+
if (c !== undefined)
|
|
341
|
+
return c;
|
|
342
|
+
}
|
|
261
343
|
}
|
|
262
344
|
}
|
|
263
345
|
const raw = r.rawResponse ?? r.raw;
|
|
264
346
|
if (raw != null && typeof raw === 'object') {
|
|
265
|
-
|
|
347
|
+
const rawObj = raw;
|
|
348
|
+
const usage = rawObj.usage;
|
|
349
|
+
if (usage != null && typeof usage === 'object') {
|
|
350
|
+
const u = usage;
|
|
351
|
+
const fromUsage = pick(u.cost, u.costUsd, u.total_cost, u.totalCost);
|
|
352
|
+
if (fromUsage !== undefined)
|
|
353
|
+
return fromUsage;
|
|
354
|
+
}
|
|
355
|
+
const fromRawTop = pick(rawObj.cost, rawObj.costUsd);
|
|
356
|
+
if (fromRawTop !== undefined)
|
|
357
|
+
return fromRawTop;
|
|
266
358
|
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
359
|
+
return undefined;
|
|
360
|
+
}
|
|
361
|
+
/** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
|
|
362
|
+
exports.DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
|
|
363
|
+
/**
|
|
364
|
+
* Size-cap a provider/router payload before storing on an activity record.
|
|
365
|
+
* Non-serializable values become a small marker object instead of throwing.
|
|
366
|
+
*/
|
|
367
|
+
function capActivityFullResponsePayload(payload, maxChars = exports.DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS) {
|
|
368
|
+
if (payload == null)
|
|
369
|
+
return payload;
|
|
370
|
+
let serialized;
|
|
371
|
+
try {
|
|
372
|
+
serialized = typeof payload === 'string' ? payload : JSON.stringify(payload);
|
|
271
373
|
}
|
|
272
|
-
|
|
374
|
+
catch {
|
|
375
|
+
return { _truncated: true, _reason: 'not_serializable' };
|
|
376
|
+
}
|
|
377
|
+
if (serialized.length <= maxChars)
|
|
378
|
+
return payload;
|
|
379
|
+
return {
|
|
380
|
+
_truncated: true,
|
|
381
|
+
_originalCharLength: serialized.length,
|
|
382
|
+
_preview: serialized.slice(0, maxChars)
|
|
383
|
+
};
|
|
273
384
|
}
|
|
@@ -21,7 +21,7 @@ export declare function mergeConfig(request: ChatRequest & {
|
|
|
21
21
|
}, config: GatewayConfig, logger: Logxer): Promise<ChatRequest['config']>;
|
|
22
22
|
/**
|
|
23
23
|
* Maps provider/router usage objects to gateway token counts (`metadata.tokens`, Activix, trace attempts).
|
|
24
|
-
* Handles promptTokens/inputTokens, OpenAI-style snake_case, and missing total (sum prompt+completion).
|
|
24
|
+
* Handles promptTokens/inputTokens, OpenAI-style snake_case, Responses-style input/output tokens, and missing total (sum prompt+completion).
|
|
25
25
|
*/
|
|
26
26
|
export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
27
27
|
prompt: number;
|
|
@@ -30,9 +30,23 @@ export declare function normalizeRouterUsageTokens(usage: unknown): {
|
|
|
30
30
|
} | undefined;
|
|
31
31
|
/**
|
|
32
32
|
* Reads token usage from every stable location the router may populate (see docs/PROVIDERS_ROUTER_DIAGNOSTICS_TRACE_REQUIREMENTS.md).
|
|
33
|
+
* Prefers the raw/provider body (`rawResponse` / `raw`) when it carries non-zero usage before re-reading the outer envelope.
|
|
33
34
|
*/
|
|
34
35
|
export declare function extractTokenUsageFromRouterResponse(routerResponse: unknown): {
|
|
35
36
|
prompt: number;
|
|
36
37
|
completion: number;
|
|
37
38
|
total: number;
|
|
38
39
|
};
|
|
40
|
+
/**
|
|
41
|
+
* Best-effort USD cost from router/sync AIResponse shape: metadata.costUsd (preferred),
|
|
42
|
+
* metadata.attempts[].costUsd, response root, then common raw payload locations.
|
|
43
|
+
* Does not compute cost from tokens — adapters must populate normalized fields or raw usage.cost-style keys.
|
|
44
|
+
*/
|
|
45
|
+
export declare function extractCostUsdFromRouterResponse(routerResponse: unknown): number | undefined;
|
|
46
|
+
/** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
|
|
47
|
+
export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
|
|
48
|
+
/**
|
|
49
|
+
* Size-cap a provider/router payload before storing on an activity record.
|
|
50
|
+
* Non-serializable values become a small marker object instead of throwing.
|
|
51
|
+
*/
|
|
52
|
+
export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
|
|
@@ -36,8 +36,9 @@ function validateChatRequest(request) {
|
|
|
36
36
|
throw err;
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
|
+
const GATEWAY_ACTION_TYPES = ['skill', 'preSkill', 'postSkill'];
|
|
39
40
|
/**
|
|
40
|
-
* Validates
|
|
41
|
+
* Validates AIInvokeRequest has required fields
|
|
41
42
|
*/
|
|
42
43
|
function validateAIRequest(request) {
|
|
43
44
|
if (!request.aiRequestId) {
|
|
@@ -47,6 +48,14 @@ function validateAIRequest(request) {
|
|
|
47
48
|
throw new Error('agentId is required for AI requests');
|
|
48
49
|
}
|
|
49
50
|
validateMandatoryRuntimeIdentity(request);
|
|
51
|
+
if (!request.actionType ||
|
|
52
|
+
!GATEWAY_ACTION_TYPES.includes(request.actionType)) {
|
|
53
|
+
throw new Error(`actionType is required and must be one of: ${GATEWAY_ACTION_TYPES.join(', ')}`);
|
|
54
|
+
}
|
|
55
|
+
const ref = typeof request.actionRef === 'string' ? request.actionRef.trim() : '';
|
|
56
|
+
if (!ref) {
|
|
57
|
+
throw new Error('actionRef is required and must be a non-empty string');
|
|
58
|
+
}
|
|
50
59
|
// Reject input field - it has been removed
|
|
51
60
|
if ('input' in request && request.input !== undefined) {
|
|
52
61
|
const err = new Error(`The 'input' field has been removed. Use workingMemory.input instead for template rendering. Prompt templates should contain {{input}} which will be resolved from workingMemory.input.`);
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
* Gateway Validation Module
|
|
3
3
|
* Basic validation for clean proxy implementation
|
|
4
4
|
*/
|
|
5
|
-
import type { ChatRequest,
|
|
5
|
+
import type { ChatRequest, AIInvokeRequest } from './types.js';
|
|
6
6
|
/**
|
|
7
7
|
* Validates ChatRequest has required fields
|
|
8
8
|
*/
|
|
9
9
|
export declare function validateChatRequest(request: ChatRequest): void;
|
|
10
10
|
/**
|
|
11
|
-
* Validates
|
|
11
|
+
* Validates AIInvokeRequest has required fields
|
|
12
12
|
*/
|
|
13
|
-
export declare function validateAIRequest(request:
|
|
13
|
+
export declare function validateAIRequest(request: AIInvokeRequest): void;
|
package/dist-cjs/gateway.cjs
CHANGED
|
@@ -19,6 +19,25 @@ const gateway_retry_js_1 = require("./gateway-retry.cjs");
|
|
|
19
19
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
20
20
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
21
21
|
const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
22
|
+
/** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
|
|
23
|
+
function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
|
|
24
|
+
const { tokens, costUsd, cost } = meta;
|
|
25
|
+
const zeroTokens = tokens.prompt === 0 && tokens.completion === 0 && tokens.total === 0;
|
|
26
|
+
const zeroCostUsd = typeof costUsd === 'number' && costUsd === 0;
|
|
27
|
+
const zeroCost = typeof cost === 'number' && cost === 0;
|
|
28
|
+
if (!zeroTokens && !zeroCostUsd && !zeroCost)
|
|
29
|
+
return;
|
|
30
|
+
logger.warn('Successful provider response reported zero token usage and/or zero cost; verify router adapter usage and billing metadata', (0, gateway_log_meta_js_1.withActivityIdentity)(identity, {
|
|
31
|
+
invokeKind,
|
|
32
|
+
zeroTokens,
|
|
33
|
+
zeroCostUsd,
|
|
34
|
+
zeroCostField: zeroCost,
|
|
35
|
+
tokens,
|
|
36
|
+
costUsd,
|
|
37
|
+
cost,
|
|
38
|
+
debugKind: gateway_log_meta_js_1.gatewayLogDebug.anomaly
|
|
39
|
+
}));
|
|
40
|
+
}
|
|
22
41
|
/**
|
|
23
42
|
* Simplified AI Gateway - Clean proxy implementation
|
|
24
43
|
*/
|
|
@@ -90,6 +109,8 @@ class AIGateway {
|
|
|
90
109
|
},
|
|
91
110
|
mode: 'sync'
|
|
92
111
|
});
|
|
112
|
+
const costUsdChat = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(response);
|
|
113
|
+
const metaChat = response?.metadata || {};
|
|
93
114
|
// Create enhanced response
|
|
94
115
|
const enhancedResponse = {
|
|
95
116
|
content: response.content || '',
|
|
@@ -99,13 +120,20 @@ class AIGateway {
|
|
|
99
120
|
latencyMs: Date.now() - startTime,
|
|
100
121
|
tokens: (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(response),
|
|
101
122
|
taskTypeId,
|
|
102
|
-
agentType: 'chat'
|
|
123
|
+
agentType: 'chat',
|
|
124
|
+
...(typeof costUsdChat === 'number'
|
|
125
|
+
? {
|
|
126
|
+
costUsd: costUsdChat,
|
|
127
|
+
...(typeof metaChat.cost === 'number' ? { cost: metaChat.cost } : { cost: costUsdChat })
|
|
128
|
+
}
|
|
129
|
+
: {})
|
|
103
130
|
}
|
|
104
131
|
};
|
|
105
132
|
// Track activity success if activity was started
|
|
106
133
|
if (activity) {
|
|
107
134
|
try {
|
|
108
135
|
await this.activityManager.logSuccess(activity, {
|
|
136
|
+
...(typeof costUsdChat === 'number' ? { cost: costUsdChat } : {}),
|
|
109
137
|
response: enhancedResponse,
|
|
110
138
|
endTime: Date.now(),
|
|
111
139
|
duration: Date.now() - startTime
|
|
@@ -119,6 +147,11 @@ class AIGateway {
|
|
|
119
147
|
});
|
|
120
148
|
}
|
|
121
149
|
}
|
|
150
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
151
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
152
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
153
|
+
cost: enhancedResponse.metadata.cost
|
|
154
|
+
}, 'invokeChat');
|
|
122
155
|
return enhancedResponse;
|
|
123
156
|
}
|
|
124
157
|
catch (error) {
|
|
@@ -372,17 +405,9 @@ class AIGateway {
|
|
|
372
405
|
a.routing.requestIds = requestIds;
|
|
373
406
|
a.modelUsed =
|
|
374
407
|
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
375
|
-
const
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
? meta.cost
|
|
379
|
-
: typeof respAny?.costUsd === 'number'
|
|
380
|
-
? respAny.costUsd
|
|
381
|
-
: typeof respAny?.cost === 'number'
|
|
382
|
-
? respAny.cost
|
|
383
|
-
: undefined;
|
|
384
|
-
if (typeof costUsd === 'number')
|
|
385
|
-
a.costUsd = costUsd;
|
|
408
|
+
const attemptCostUsd = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(respAny);
|
|
409
|
+
if (typeof attemptCostUsd === 'number')
|
|
410
|
+
a.costUsd = attemptCostUsd;
|
|
386
411
|
if (includeRawProviderPayload) {
|
|
387
412
|
// Size-capped preview only.
|
|
388
413
|
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
@@ -491,7 +516,17 @@ class AIGateway {
|
|
|
491
516
|
}
|
|
492
517
|
contentType = 'structured';
|
|
493
518
|
parsingMethod = 'flex-md';
|
|
494
|
-
|
|
519
|
+
let tokens = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(routerResponse);
|
|
520
|
+
if (!(tokens.prompt || tokens.completion || tokens.total)) {
|
|
521
|
+
const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
|
|
522
|
+
if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
|
|
523
|
+
const second = (0, gateway_utils_js_1.extractTokenUsageFromRouterResponse)(alt);
|
|
524
|
+
if (second.prompt || second.completion || second.total)
|
|
525
|
+
tokens = second;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const resolvedCostUsd = (0, gateway_utils_js_1.extractCostUsdFromRouterResponse)(routerResponse);
|
|
529
|
+
const routerMetaForCost = routerResponse?.metadata || {};
|
|
495
530
|
const enhancedResponse = {
|
|
496
531
|
content: content,
|
|
497
532
|
parsedContent: parsedContent,
|
|
@@ -504,6 +539,14 @@ class AIGateway {
|
|
|
504
539
|
agentType: 'ai',
|
|
505
540
|
contentType,
|
|
506
541
|
parsingMethod,
|
|
542
|
+
...(typeof resolvedCostUsd === 'number'
|
|
543
|
+
? {
|
|
544
|
+
costUsd: resolvedCostUsd,
|
|
545
|
+
...(typeof routerMetaForCost.cost === 'number'
|
|
546
|
+
? { cost: routerMetaForCost.cost }
|
|
547
|
+
: { cost: resolvedCostUsd })
|
|
548
|
+
}
|
|
549
|
+
: {}),
|
|
507
550
|
...(traceEnabled
|
|
508
551
|
? (() => {
|
|
509
552
|
const meta = routerResponse?.metadata || {};
|
|
@@ -515,18 +558,11 @@ class AIGateway {
|
|
|
515
558
|
: typeof mergedConfig?.maxTokens === 'number'
|
|
516
559
|
? mergedConfig.maxTokens
|
|
517
560
|
: undefined;
|
|
518
|
-
const costUsd = typeof meta.costUsd === 'number'
|
|
519
|
-
? meta.costUsd
|
|
520
|
-
: typeof meta.cost === 'number'
|
|
521
|
-
? meta.cost
|
|
522
|
-
: undefined;
|
|
523
561
|
return {
|
|
524
562
|
provider,
|
|
525
563
|
region,
|
|
526
564
|
modelUsed,
|
|
527
565
|
maxTokensRequested,
|
|
528
|
-
cost: typeof meta.cost === 'number' ? meta.cost : undefined,
|
|
529
|
-
costUsd,
|
|
530
566
|
requestIds: traceRequestIds,
|
|
531
567
|
retryCount: traceRetryCount,
|
|
532
568
|
fallbackCount: traceFallbackCount,
|
|
@@ -539,11 +575,20 @@ class AIGateway {
|
|
|
539
575
|
// Track activity success if activity was started
|
|
540
576
|
if (activity) {
|
|
541
577
|
try {
|
|
578
|
+
const diag = request.diagnostics;
|
|
579
|
+
const includeFullProviderBlob = diag?.includeFullProviderResponseInActivity !== false;
|
|
580
|
+
const maxFullChars = typeof diag?.activityFullResponseMaxChars === 'number' && diag.activityFullResponseMaxChars > 0
|
|
581
|
+
? diag.activityFullResponseMaxChars
|
|
582
|
+
: gateway_utils_js_1.DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS;
|
|
583
|
+
const rawFull = routerResponse.rawResponse || routerResponse;
|
|
584
|
+
const fullResponseForActivity = includeFullProviderBlob
|
|
585
|
+
? (0, gateway_utils_js_1.capActivityFullResponsePayload)(rawFull, maxFullChars)
|
|
586
|
+
: undefined;
|
|
542
587
|
// Create activity response with proper structure for ActivityTracker
|
|
543
588
|
const activityResponse = {
|
|
544
589
|
content: {
|
|
545
590
|
rawContent: content, // Store the actual response content as rawContent
|
|
546
|
-
|
|
591
|
+
...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
|
|
547
592
|
},
|
|
548
593
|
parsed: parsedContent, // Include parsed content in activity record
|
|
549
594
|
metadata: enhancedResponse.metadata,
|
|
@@ -552,6 +597,7 @@ class AIGateway {
|
|
|
552
597
|
usage: tokens
|
|
553
598
|
};
|
|
554
599
|
await this.activityManager.logSuccess(activity, {
|
|
600
|
+
...(typeof resolvedCostUsd === 'number' ? { cost: resolvedCostUsd } : {}),
|
|
555
601
|
response: activityResponse,
|
|
556
602
|
endTime: Date.now(),
|
|
557
603
|
duration: Date.now() - startTime
|
|
@@ -565,6 +611,11 @@ class AIGateway {
|
|
|
565
611
|
});
|
|
566
612
|
}
|
|
567
613
|
}
|
|
614
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
615
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
616
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
617
|
+
cost: enhancedResponse.metadata.cost
|
|
618
|
+
}, 'invoke');
|
|
568
619
|
this.logger.debug('gateway: enhancedResponse', (0, gateway_log_meta_js_1.withActivityIdentity)(request.identity, {
|
|
569
620
|
latencyMs: enhancedResponse.metadata?.latencyMs,
|
|
570
621
|
contentType: enhancedResponse.metadata?.contentType,
|
package/dist-cjs/gateway.d.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Simplified AI Gateway - Clean proxy implementation
|
|
5
5
|
*/
|
|
6
6
|
import { LLMProviderRouter } from '@x12i/ai-providers-router';
|
|
7
|
-
import type { GatewayConfig, ChatRequest,
|
|
7
|
+
import type { GatewayConfig, ChatRequest, AIInvokeRequest, EnhancedLLMResponse } from './types.js';
|
|
8
8
|
import type { Logxer } from '@x12i/logxer';
|
|
9
9
|
import { ActivityManager } from './activity-manager.js';
|
|
10
10
|
/**
|
|
@@ -25,7 +25,7 @@ export declare class AIGateway {
|
|
|
25
25
|
/**
|
|
26
26
|
* Invoke AI request (with structured output support)
|
|
27
27
|
*/
|
|
28
|
-
invoke<TContent = unknown>(request:
|
|
28
|
+
invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
|
|
29
29
|
/**
|
|
30
30
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
31
31
|
*/
|
package/dist-cjs/index.d.ts
CHANGED
|
@@ -16,11 +16,11 @@ export * from '@x12i/ai-providers-router';
|
|
|
16
16
|
export { AIGateway } from './gateway.js';
|
|
17
17
|
export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
|
|
18
18
|
export { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
19
|
-
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIRequest, EnhancedLLMResponse, InstructionMetadata, ValidationRule,
|
|
19
|
+
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions } from './types.js';
|
|
20
20
|
export { mergeTemplateRenderOptions } from './template-render-merge.js';
|
|
21
21
|
export type { UsageTier } from './types.js';
|
|
22
22
|
export { Activix } from '@x12i/activix';
|
|
23
|
-
export type { ActivixRunContext, FindByRunContextCriteria } from '@x12i/activix';
|
|
23
|
+
export type { ActivixRunContext, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
24
24
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
25
25
|
export type { ActivityIdentity } from './types.js';
|
|
26
26
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
@@ -145,8 +145,11 @@ async function optimizeInstructions(gateway, originalInstructions, options) {
|
|
|
145
145
|
const optimizationRequest = {
|
|
146
146
|
aiRequestId,
|
|
147
147
|
agentId,
|
|
148
|
+
actionType: 'skill',
|
|
149
|
+
actionRef: 'internal/instruction-optimizer',
|
|
148
150
|
instructions: INSTRUCTION_OPTIMIZER_INSTRUCTIONS + additionalContext,
|
|
149
151
|
identity,
|
|
152
|
+
prompt: '{{input}}',
|
|
150
153
|
workingMemory: { input: originalInstructions },
|
|
151
154
|
config: {
|
|
152
155
|
model,
|
|
@@ -1,17 +1,6 @@
|
|
|
1
1
|
import type { Logxer } from '@x12i/logxer';
|
|
2
|
-
import type { Activix } from '@x12i/activix';
|
|
3
|
-
export type ActivixQueryableClient
|
|
4
|
-
getJobActivities(input: {
|
|
5
|
-
jobId: string;
|
|
6
|
-
graphId?: string;
|
|
7
|
-
nodeId?: string;
|
|
8
|
-
limit?: number;
|
|
9
|
-
}): Promise<{
|
|
10
|
-
jobId: string;
|
|
11
|
-
graphRun?: unknown;
|
|
12
|
-
activities: unknown[];
|
|
13
|
-
}>;
|
|
14
|
-
};
|
|
2
|
+
import type { Activix, ActivixQueryableClient } from '@x12i/activix';
|
|
3
|
+
export type { ActivixQueryableClient } from '@x12i/activix';
|
|
15
4
|
export type LogxerQueryableClient = {
|
|
16
5
|
getJobLogs(input: {
|
|
17
6
|
jobId: string;
|