@x12i/ai-gateway 9.3.5 → 9.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +151 -4147
- package/dist/activity-manager.d.ts +9 -1
- package/dist/activity-manager.js +85 -81
- package/dist/ai-tools-client.js +4 -12
- package/dist/gateway-config.d.ts +3 -0
- package/dist/gateway-config.js +19 -1
- package/dist/gateway-utils.d.ts +34 -2
- package/dist/gateway-utils.js +204 -35
- package/dist/gateway.d.ts +2 -0
- package/dist/gateway.js +69 -2
- package/dist/index.d.ts +5 -3
- package/dist/index.js +4 -19
- package/dist/optimixer-manager.d.ts +33 -0
- package/dist/optimixer-manager.js +128 -0
- package/dist/token-estimate.d.ts +12 -0
- package/dist/token-estimate.js +30 -0
- package/dist/types.d.ts +50 -2
- package/dist-cjs/activity-manager.cjs +85 -81
- package/dist-cjs/activity-manager.d.ts +9 -1
- package/dist-cjs/ai-tools-client.cjs +4 -12
- package/dist-cjs/gateway-config.cjs +19 -1
- package/dist-cjs/gateway-config.d.ts +3 -0
- package/dist-cjs/gateway-utils.cjs +204 -35
- package/dist-cjs/gateway-utils.d.ts +34 -2
- package/dist-cjs/gateway.cjs +69 -2
- package/dist-cjs/gateway.d.ts +2 -0
- package/dist-cjs/index.cjs +4 -19
- package/dist-cjs/index.d.ts +5 -3
- package/dist-cjs/optimixer-manager.cjs +128 -0
- package/dist-cjs/optimixer-manager.d.ts +33 -0
- package/dist-cjs/token-estimate.cjs +30 -0
- package/dist-cjs/token-estimate.d.ts +12 -0
- package/dist-cjs/types.d.ts +50 -2
- package/package.json +10 -36
package/dist/gateway-utils.js
CHANGED
|
@@ -55,6 +55,21 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
|
|
|
55
55
|
});
|
|
56
56
|
applyGatewayDefaultToMerged(merged, defaults, config);
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
60
|
+
*/
|
|
61
|
+
export function isMaxTokensExplicitlySet(request, config) {
|
|
62
|
+
const useInternalDefaults = request.useInternalDefaults;
|
|
63
|
+
const internalDefaults = useInternalDefaults === 'skill'
|
|
64
|
+
? config.internalSystemActions?.internalSkill
|
|
65
|
+
: useInternalDefaults === 'audit'
|
|
66
|
+
? config.internalSystemActions?.skillAudit
|
|
67
|
+
: undefined;
|
|
68
|
+
return (request.config?.maxTokens !== undefined ||
|
|
69
|
+
request.modelConfig?.maxTokens !== undefined ||
|
|
70
|
+
internalDefaults?.maxTokens !== undefined ||
|
|
71
|
+
config.maxTokens !== undefined);
|
|
72
|
+
}
|
|
58
73
|
/**
|
|
59
74
|
* Merges config with defaults
|
|
60
75
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -175,12 +190,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
175
190
|
if (!merged.model) {
|
|
176
191
|
await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
|
|
177
192
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
config.maxTokens !== undefined;
|
|
183
|
-
if (!maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
193
|
+
const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
|
|
194
|
+
const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
|
|
195
|
+
// Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
|
|
196
|
+
if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
184
197
|
// Try to get maxTokens from flex-md
|
|
185
198
|
try {
|
|
186
199
|
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
|
|
@@ -219,7 +232,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
219
232
|
});
|
|
220
233
|
}
|
|
221
234
|
}
|
|
222
|
-
else if (!merged.maxTokens) {
|
|
235
|
+
else if (!merged.maxTokens && !optimixerWillPredict) {
|
|
223
236
|
// If maxTokens wasn't set and wasn't auto-detected, use fallback
|
|
224
237
|
// This should rarely happen, but handle edge cases
|
|
225
238
|
merged.maxTokens = 2000;
|
|
@@ -228,7 +241,15 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
228
241
|
model: merged.model,
|
|
229
242
|
provider: merged.provider,
|
|
230
243
|
maxTokens: merged.maxTokens,
|
|
231
|
-
maxTokensExplicitlySet
|
|
244
|
+
maxTokensExplicitlySet,
|
|
245
|
+
optimixerWillPredict
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
else if (optimixerWillPredict) {
|
|
249
|
+
logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
|
|
250
|
+
jobId: request.identity.jobId,
|
|
251
|
+
model: merged.model,
|
|
252
|
+
provider: merged.provider
|
|
232
253
|
});
|
|
233
254
|
}
|
|
234
255
|
else {
|
|
@@ -434,6 +455,50 @@ export function resolveCostCompletionForActivity(routerResponse, tokens) {
|
|
|
434
455
|
}
|
|
435
456
|
return resolveActivityCostCompletion(tokens, costUsd);
|
|
436
457
|
}
|
|
458
|
+
/** Record shape for {@link CostCalculator.calculateFromRecord} (router + merged config + usage). */
|
|
459
|
+
export function buildGatewayPricingRecord(routerResponse, tokens, mergedConfig) {
|
|
460
|
+
const base = routerResponse != null && typeof routerResponse === 'object'
|
|
461
|
+
? { ...routerResponse }
|
|
462
|
+
: {};
|
|
463
|
+
const meta = base.metadata != null && typeof base.metadata === 'object'
|
|
464
|
+
? { ...base.metadata }
|
|
465
|
+
: {};
|
|
466
|
+
const routing = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
467
|
+
return {
|
|
468
|
+
...base,
|
|
469
|
+
usage: {
|
|
470
|
+
promptTokens: tokens.prompt,
|
|
471
|
+
completionTokens: tokens.completion,
|
|
472
|
+
totalTokens: tokens.total
|
|
473
|
+
},
|
|
474
|
+
tokens,
|
|
475
|
+
metadata: {
|
|
476
|
+
...meta,
|
|
477
|
+
tokens,
|
|
478
|
+
...(routing.provider ? { provider: routing.provider } : {}),
|
|
479
|
+
...(routing.modelUsed
|
|
480
|
+
? { modelUsed: routing.modelUsed, model: routing.modelUsed }
|
|
481
|
+
: {})
|
|
482
|
+
},
|
|
483
|
+
...(mergedConfig != null ? { config: mergedConfig } : {})
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
export function mapAiCostResultToResolvedActivityCost(base, result) {
|
|
487
|
+
if (result.unknownModel) {
|
|
488
|
+
return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
|
|
489
|
+
}
|
|
490
|
+
if (typeof result.cost !== 'number' || !Number.isFinite(result.cost)) {
|
|
491
|
+
return base;
|
|
492
|
+
}
|
|
493
|
+
if (!result.isAuthoritative && result.source === 'estimate-fallback') {
|
|
494
|
+
return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
|
|
495
|
+
}
|
|
496
|
+
return {
|
|
497
|
+
cost: result.cost,
|
|
498
|
+
costStatus: 'priced',
|
|
499
|
+
...(result.breakdown ? { costBreakdown: result.breakdown } : {})
|
|
500
|
+
};
|
|
501
|
+
}
|
|
437
502
|
/**
|
|
438
503
|
* Router cost passthrough, then optional @x12i/ai-tools catalog pricing when still unpriced.
|
|
439
504
|
*/
|
|
@@ -452,37 +517,114 @@ export async function resolveCostCompletionWithAiTools(routerResponse, tokens, o
|
|
|
452
517
|
if (!hasNonZeroTokenUsage(tokens)) {
|
|
453
518
|
return base;
|
|
454
519
|
}
|
|
455
|
-
const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
|
|
456
|
-
const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
|
|
457
|
-
? options.mergedConfig
|
|
458
|
-
: {};
|
|
459
|
-
const provider = routing.provider ?? cfg.provider;
|
|
460
|
-
const modelUsed = routing.modelUsed ?? cfg.model;
|
|
461
|
-
if (!provider || !modelUsed) {
|
|
462
|
-
return base;
|
|
463
|
-
}
|
|
464
520
|
try {
|
|
465
|
-
const
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
completion: tokens.completion,
|
|
469
|
-
total: tokens.total
|
|
470
|
-
},
|
|
471
|
-
provider,
|
|
472
|
-
modelUsed
|
|
473
|
-
});
|
|
474
|
-
if (typeof result.cost === 'number' && Number.isFinite(result.cost)) {
|
|
475
|
-
return {
|
|
476
|
-
cost: result.cost,
|
|
477
|
-
costStatus: 'priced',
|
|
478
|
-
...(result.breakdown ? { costBreakdown: result.breakdown } : {})
|
|
479
|
-
};
|
|
480
|
-
}
|
|
521
|
+
const record = buildGatewayPricingRecord(routerResponse, tokens, options.mergedConfig);
|
|
522
|
+
const result = await options.calculator.calculateFromRecord(record);
|
|
523
|
+
return mapAiCostResultToResolvedActivityCost(base, result);
|
|
481
524
|
}
|
|
482
525
|
catch {
|
|
483
|
-
|
|
526
|
+
const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
|
|
527
|
+
const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
|
|
528
|
+
? options.mergedConfig
|
|
529
|
+
: {};
|
|
530
|
+
const provider = routing.provider ?? cfg.provider;
|
|
531
|
+
const modelUsed = routing.modelUsed ?? cfg.model;
|
|
532
|
+
if (!provider || !modelUsed) {
|
|
533
|
+
return base;
|
|
534
|
+
}
|
|
535
|
+
try {
|
|
536
|
+
const result = await options.calculator.calculate({
|
|
537
|
+
tokens: {
|
|
538
|
+
prompt: tokens.prompt,
|
|
539
|
+
completion: tokens.completion,
|
|
540
|
+
total: tokens.total
|
|
541
|
+
},
|
|
542
|
+
provider,
|
|
543
|
+
usedModel: modelUsed
|
|
544
|
+
});
|
|
545
|
+
return mapAiCostResultToResolvedActivityCost(base, result);
|
|
546
|
+
}
|
|
547
|
+
catch {
|
|
548
|
+
return base;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
function applyBillingToTraceAttempt(attempt, billing) {
|
|
553
|
+
if (billing.costStatus === 'priced' || billing.costStatus === 'unpriced') {
|
|
554
|
+
attempt.costStatus = billing.costStatus;
|
|
555
|
+
}
|
|
556
|
+
if (typeof billing.cost === 'number' && Number.isFinite(billing.cost)) {
|
|
557
|
+
attempt.costUsd = billing.cost;
|
|
558
|
+
}
|
|
559
|
+
if (billing.costBreakdown) {
|
|
560
|
+
attempt.costBreakdown = billing.costBreakdown;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
function buildTraceAttemptPricingRecord(attempt, mergedConfig) {
|
|
564
|
+
const tokens = attempt.usage?.tokens ?? { prompt: 0, completion: 0, total: 0 };
|
|
565
|
+
return buildGatewayPricingRecord({
|
|
566
|
+
metadata: {
|
|
567
|
+
provider: attempt.routing.provider,
|
|
568
|
+
modelUsed: attempt.modelUsed,
|
|
569
|
+
region: attempt.routing.region,
|
|
570
|
+
tokens
|
|
571
|
+
}
|
|
572
|
+
}, tokens, mergedConfig);
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Trace-mode summary: final token usage + resolved billing (after catalog pricing when applicable).
|
|
576
|
+
*/
|
|
577
|
+
export function buildTraceUsageSummary(tokens, billing, maxTokensRequested) {
|
|
578
|
+
if (!hasNonZeroTokenUsage(tokens) && !billing.costStatus) {
|
|
579
|
+
return undefined;
|
|
580
|
+
}
|
|
581
|
+
const summary = { tokens };
|
|
582
|
+
if (maxTokensRequested !== undefined) {
|
|
583
|
+
summary.maxTokensRequested = maxTokensRequested;
|
|
584
|
+
}
|
|
585
|
+
if (billing.costStatus === 'priced' && typeof billing.cost === 'number') {
|
|
586
|
+
summary.costUsd = billing.cost;
|
|
587
|
+
summary.cost = billing.cost;
|
|
588
|
+
}
|
|
589
|
+
if (billing.costStatus) {
|
|
590
|
+
summary.costStatus = billing.costStatus;
|
|
591
|
+
}
|
|
592
|
+
if (billing.costBreakdown) {
|
|
593
|
+
summary.costBreakdown = billing.costBreakdown;
|
|
484
594
|
}
|
|
485
|
-
return
|
|
595
|
+
return summary;
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* Apply resolved billing to trace attempts: final successful attempt gets aggregate billing;
|
|
599
|
+
* other successful attempts without router cost get per-attempt catalog pricing when enabled.
|
|
600
|
+
*/
|
|
601
|
+
export async function enrichTraceAttemptsWithBilling(attempts, finalBilling, options) {
|
|
602
|
+
if (!attempts.length)
|
|
603
|
+
return;
|
|
604
|
+
let lastOkIdx = -1;
|
|
605
|
+
for (let i = attempts.length - 1; i >= 0; i--) {
|
|
606
|
+
if (attempts[i].ok) {
|
|
607
|
+
lastOkIdx = i;
|
|
608
|
+
break;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
if (lastOkIdx >= 0) {
|
|
612
|
+
applyBillingToTraceAttempt(attempts[lastOkIdx], finalBilling);
|
|
613
|
+
}
|
|
614
|
+
if (options?.calculateCost === false || !options?.calculator) {
|
|
615
|
+
return;
|
|
616
|
+
}
|
|
617
|
+
await Promise.all(attempts.map(async (attempt, idx) => {
|
|
618
|
+
if (!attempt.ok || idx === lastOkIdx)
|
|
619
|
+
return;
|
|
620
|
+
const tokens = attempt.usage?.tokens;
|
|
621
|
+
if (!tokens || !hasNonZeroTokenUsage(tokens))
|
|
622
|
+
return;
|
|
623
|
+
if (attempt.costStatus === 'priced' && typeof attempt.costUsd === 'number')
|
|
624
|
+
return;
|
|
625
|
+
const slice = await resolveCostCompletionWithAiTools(buildTraceAttemptPricingRecord(attempt, options.mergedConfig), tokens, options);
|
|
626
|
+
applyBillingToTraceAttempt(attempt, slice);
|
|
627
|
+
}));
|
|
486
628
|
}
|
|
487
629
|
/**
|
|
488
630
|
* Stable routing facts for gateway response metadata (router metadata + merged config fallbacks).
|
|
@@ -693,3 +835,30 @@ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIV
|
|
|
693
835
|
_preview: serialized.slice(0, maxChars)
|
|
694
836
|
};
|
|
695
837
|
}
|
|
838
|
+
export function resolveFinishReasonFromRouterResponse(response) {
|
|
839
|
+
if (response == null || typeof response !== 'object')
|
|
840
|
+
return undefined;
|
|
841
|
+
const r = response;
|
|
842
|
+
const meta = r.metadata != null && typeof r.metadata === 'object' ? r.metadata : undefined;
|
|
843
|
+
const candidates = [
|
|
844
|
+
r.finishReason,
|
|
845
|
+
r.finish_reason,
|
|
846
|
+
meta?.finishReason,
|
|
847
|
+
meta?.finish_reason
|
|
848
|
+
];
|
|
849
|
+
for (const c of candidates) {
|
|
850
|
+
if (typeof c === 'string' && c.trim())
|
|
851
|
+
return c.trim();
|
|
852
|
+
}
|
|
853
|
+
return undefined;
|
|
854
|
+
}
|
|
855
|
+
export function buildOptimixerActualUsage(tokens, response, latencyMs) {
|
|
856
|
+
const finishReason = resolveFinishReasonFromRouterResponse(response);
|
|
857
|
+
return {
|
|
858
|
+
promptTokens: tokens.prompt,
|
|
859
|
+
completionTokens: tokens.completion,
|
|
860
|
+
totalTokens: tokens.total,
|
|
861
|
+
...(finishReason ? { finishReason } : {}),
|
|
862
|
+
latencyMs
|
|
863
|
+
};
|
|
864
|
+
}
|
package/dist/gateway.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare class AIGateway {
|
|
|
15
15
|
private config;
|
|
16
16
|
private logger;
|
|
17
17
|
private activityManager?;
|
|
18
|
+
private optimixerManager?;
|
|
18
19
|
private messageBuilderConfig?;
|
|
19
20
|
private defaultModelConfig;
|
|
20
21
|
private _autoRegisterDone;
|
|
@@ -28,6 +29,7 @@ export declare class AIGateway {
|
|
|
28
29
|
* Invoke AI request (with structured output support)
|
|
29
30
|
*/
|
|
30
31
|
invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
|
|
32
|
+
private applyAdaptiveMaxTokensIfEnabled;
|
|
31
33
|
/**
|
|
32
34
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
33
35
|
*/
|
package/dist/gateway.js
CHANGED
|
@@ -7,9 +7,9 @@ import { validateChatRequest, validateAIRequest } from './gateway-validation.js'
|
|
|
7
7
|
import { ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
8
8
|
import { initializeGatewayComponents } from './gateway-config.js';
|
|
9
9
|
import { buildMessages } from './message-builder.js';
|
|
10
|
-
import { extractJsonFromFlexMd } from './flex-md-loader.js';
|
|
10
|
+
import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
|
|
11
11
|
import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
|
|
12
|
-
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
12
|
+
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
13
13
|
import { getAiToolsClient } from './ai-tools-client.js';
|
|
14
14
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
15
15
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
@@ -45,6 +45,7 @@ export class AIGateway {
|
|
|
45
45
|
config;
|
|
46
46
|
logger;
|
|
47
47
|
activityManager;
|
|
48
|
+
optimixerManager;
|
|
48
49
|
messageBuilderConfig;
|
|
49
50
|
defaultModelConfig = {};
|
|
50
51
|
_autoRegisterDone = false;
|
|
@@ -56,6 +57,7 @@ export class AIGateway {
|
|
|
56
57
|
this.logger = components.logger;
|
|
57
58
|
this.router = components.router;
|
|
58
59
|
this.activityManager = components.activityManager;
|
|
60
|
+
this.optimixerManager = components.optimixerManager;
|
|
59
61
|
this.messageBuilderConfig = components.messageBuilderConfig;
|
|
60
62
|
this.defaultModelConfig = components.defaultModelConfig ?? {};
|
|
61
63
|
setGatewayRuntimeClients({
|
|
@@ -93,6 +95,7 @@ export class AIGateway {
|
|
|
93
95
|
await autoRegisterProviders(this.router, this.logger);
|
|
94
96
|
this._autoRegisterDone = true;
|
|
95
97
|
}
|
|
98
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
96
99
|
// Start activity tracking if available
|
|
97
100
|
let activity = undefined;
|
|
98
101
|
if (this.activityManager) {
|
|
@@ -166,6 +169,9 @@ export class AIGateway {
|
|
|
166
169
|
});
|
|
167
170
|
}
|
|
168
171
|
}
|
|
172
|
+
if (optimixerPrediction) {
|
|
173
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
|
|
174
|
+
}
|
|
169
175
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
170
176
|
tokens: enhancedResponse.metadata.tokens,
|
|
171
177
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -279,6 +285,7 @@ export class AIGateway {
|
|
|
279
285
|
await autoRegisterProviders(this.router, this.logger);
|
|
280
286
|
this._autoRegisterDone = true;
|
|
281
287
|
}
|
|
288
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
282
289
|
// Start activity tracking if available
|
|
283
290
|
let activity = undefined;
|
|
284
291
|
if (this.activityManager) {
|
|
@@ -567,6 +574,16 @@ export class AIGateway {
|
|
|
567
574
|
const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
568
575
|
const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
|
|
569
576
|
const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
|
|
577
|
+
if (traceEnabled && traceAttempts) {
|
|
578
|
+
await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
|
|
579
|
+
mergedConfig,
|
|
580
|
+
calculator: aiTools?.calculator ?? null,
|
|
581
|
+
calculateCost: this.config.aiTools?.calculateCost
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
const traceUsageSummary = traceEnabled
|
|
585
|
+
? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
|
|
586
|
+
: undefined;
|
|
570
587
|
const enhancedResponse = {
|
|
571
588
|
content: content,
|
|
572
589
|
parsedContent: parsedContent,
|
|
@@ -597,6 +614,7 @@ export class AIGateway {
|
|
|
597
614
|
retryCount: traceRetryCount,
|
|
598
615
|
fallbackCount: traceFallbackCount,
|
|
599
616
|
attempts: traceAttempts,
|
|
617
|
+
...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
|
|
600
618
|
...(traceMergedRouterSnapshot !== undefined
|
|
601
619
|
? { mergedRouterConfig: traceMergedRouterSnapshot }
|
|
602
620
|
: {})
|
|
@@ -643,6 +661,9 @@ export class AIGateway {
|
|
|
643
661
|
});
|
|
644
662
|
}
|
|
645
663
|
}
|
|
664
|
+
if (optimixerPrediction) {
|
|
665
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
|
|
666
|
+
}
|
|
646
667
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
647
668
|
tokens: enhancedResponse.metadata.tokens,
|
|
648
669
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -676,6 +697,52 @@ export class AIGateway {
|
|
|
676
697
|
throw err;
|
|
677
698
|
}
|
|
678
699
|
}
|
|
700
|
+
async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
|
|
701
|
+
if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
|
|
702
|
+
return undefined;
|
|
703
|
+
}
|
|
704
|
+
const prediction = await this.optimixerManager.predictMaxTokens({
|
|
705
|
+
request,
|
|
706
|
+
mergedConfig,
|
|
707
|
+
messages
|
|
708
|
+
});
|
|
709
|
+
if (prediction) {
|
|
710
|
+
let maxTokens = prediction.recommendedMaxTokens;
|
|
711
|
+
const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
|
|
712
|
+
if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
|
|
713
|
+
try {
|
|
714
|
+
const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
715
|
+
if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
|
|
716
|
+
maxTokens = ceiling;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
catch {
|
|
720
|
+
// Non-blocking: use uncapped prediction
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
mergedConfig.maxTokens = maxTokens;
|
|
724
|
+
request._mergedRouterConfig = mergedConfig;
|
|
725
|
+
this.logger.debug('Applied Optimixer recommended max_tokens', {
|
|
726
|
+
aiRequestId: request.aiRequestId,
|
|
727
|
+
recommendedMaxTokens: prediction.recommendedMaxTokens,
|
|
728
|
+
maxTokens,
|
|
729
|
+
confidence: prediction.confidence,
|
|
730
|
+
requestId: prediction.requestId
|
|
731
|
+
});
|
|
732
|
+
return prediction;
|
|
733
|
+
}
|
|
734
|
+
if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
|
|
735
|
+
try {
|
|
736
|
+
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
737
|
+
mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
|
|
738
|
+
}
|
|
739
|
+
catch {
|
|
740
|
+
mergedConfig.maxTokens = 2000;
|
|
741
|
+
}
|
|
742
|
+
request._mergedRouterConfig = mergedConfig;
|
|
743
|
+
}
|
|
744
|
+
return undefined;
|
|
745
|
+
}
|
|
679
746
|
/**
|
|
680
747
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
681
748
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -16,8 +16,8 @@ export * from '@x12i/ai-providers-router';
|
|
|
16
16
|
export { AIGateway } from './gateway.js';
|
|
17
17
|
export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
|
|
18
18
|
export { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
19
|
-
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
|
|
20
|
-
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
19
|
+
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
|
|
20
|
+
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
21
21
|
export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
|
|
22
22
|
export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
|
|
23
23
|
export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
|
|
@@ -29,8 +29,10 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
|
|
|
29
29
|
export type { GatewayDualMemoryRoot } from './memory-path-resolution.js';
|
|
30
30
|
export type { UsageTier } from './types.js';
|
|
31
31
|
export { Activix } from '@x12i/activix';
|
|
32
|
-
export type { ActivixRunContext, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
32
|
+
export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
33
|
+
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
33
34
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
35
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
34
36
|
export type { ActivityIdentity } from './types.js';
|
|
35
37
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
36
38
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ export * from '@x12i/ai-providers-router';
|
|
|
17
17
|
export { AIGateway } from './gateway.js';
|
|
18
18
|
export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
|
|
19
19
|
export { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
20
|
-
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
20
|
+
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
21
21
|
export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
|
|
22
22
|
export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
|
|
23
23
|
export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
|
|
@@ -26,7 +26,9 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
|
|
|
26
26
|
// (x-models was previously used for RPM/TPM tracking but is no longer integrated)
|
|
27
27
|
// Re-export activity tracking primitives (Activix)
|
|
28
28
|
export { Activix } from '@x12i/activix';
|
|
29
|
+
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
29
30
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
31
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
30
32
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
31
33
|
// Re-export logging (@x12i/logxer)
|
|
32
34
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|
|
@@ -39,22 +41,5 @@ export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from '
|
|
|
39
41
|
export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
|
|
40
42
|
// Export object types library
|
|
41
43
|
export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
|
|
42
|
-
//
|
|
44
|
+
// Object-types library stubs (optional @x12i/outputs-library integration; see object-types-library-integration.ts)
|
|
43
45
|
export { initializeObjectTypesLibrary, getObjectTypesLibrary, resetObjectTypesLibrary } from './object-types-library-integration.js';
|
|
44
|
-
// Re-export outputs library types and utilities for convenience
|
|
45
|
-
// Note: Since we use dynamic imports for the outputs library, these types may not be available
|
|
46
|
-
// at compile time if the package isn't installed. Users can import directly from
|
|
47
|
-
// @x12i/outputs-library if they need these types or utilities.
|
|
48
|
-
//
|
|
49
|
-
// Recommended: Import types and utilities directly from @x12i/outputs-library:
|
|
50
|
-
// import type { ClassificationOutput } from '@x12i/outputs-library/types';
|
|
51
|
-
// import { ResponseParser } from '@x12i/outputs-library/parsers';
|
|
52
|
-
// import type { ObjectTypesLibrary, FlexMdSupport } from '@x12i/outputs-library';
|
|
53
|
-
//
|
|
54
|
-
// The gateway integrates with the outputs library internally via dynamic imports,
|
|
55
|
-
// so these re-exports are optional and mainly for convenience.
|
|
56
|
-
//
|
|
57
|
-
// For outputs-library v3.3.1+ with flex-md support:
|
|
58
|
-
// - ObjectTypesLibrary class with flex-md methods (getFlexMdTemplate, getFlexMdFormatSpec, etc.)
|
|
59
|
-
// - FlexMdSupport type for object type definitions
|
|
60
|
-
// - All flex-md methods are available on the library instance returned by getObjectTypesLibrary()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { AiMaxTokensActualUsage, AiMaxTokensPredictionResult } from '@x12i/optimixer';
|
|
2
|
+
import type { Activix } from '@x12i/activix';
|
|
3
|
+
import type { Logxer } from '@x12i/logxer';
|
|
4
|
+
import type { ChatRequest, GatewayConfig } from './types.js';
|
|
5
|
+
export type OptimixerGatewayConfig = NonNullable<GatewayConfig['optimixer']>;
|
|
6
|
+
export interface OptimixerManagerConfig {
|
|
7
|
+
optimixer?: OptimixerGatewayConfig;
|
|
8
|
+
logger: Logxer;
|
|
9
|
+
getActivix: () => Promise<Activix | undefined>;
|
|
10
|
+
}
|
|
11
|
+
export type OptimixerMaxTokensContext = {
|
|
12
|
+
request: ChatRequest;
|
|
13
|
+
mergedConfig: ChatRequest['config'];
|
|
14
|
+
messages: Array<{
|
|
15
|
+
role?: string;
|
|
16
|
+
content?: unknown;
|
|
17
|
+
}>;
|
|
18
|
+
};
|
|
19
|
+
export declare class OptimixerManager {
|
|
20
|
+
private readonly config;
|
|
21
|
+
private readonly logger;
|
|
22
|
+
private readonly getActivix;
|
|
23
|
+
private optimixer?;
|
|
24
|
+
private initPromise?;
|
|
25
|
+
private readonly activixCollection;
|
|
26
|
+
constructor(config: OptimixerManagerConfig);
|
|
27
|
+
isEnabled(): boolean;
|
|
28
|
+
private ensureReady;
|
|
29
|
+
private initialize;
|
|
30
|
+
predictMaxTokens(ctx: OptimixerMaxTokensContext): Promise<AiMaxTokensPredictionResult | undefined>;
|
|
31
|
+
completePrediction(requestId: string, actual: AiMaxTokensActualUsage): Promise<void>;
|
|
32
|
+
shutdown(): Promise<void>;
|
|
33
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { Optimixer } from '@x12i/optimixer';
|
|
2
|
+
import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
|
|
3
|
+
import { estimateMessagesTokenSizes } from './token-estimate.js';
|
|
4
|
+
function resolveActionTypeId(request) {
|
|
5
|
+
const identity = request.identity;
|
|
6
|
+
if (identity?.actionType && String(identity.actionType).trim()) {
|
|
7
|
+
return String(identity.actionType).trim();
|
|
8
|
+
}
|
|
9
|
+
if (request.taskTypeId && String(request.taskTypeId).trim()) {
|
|
10
|
+
return String(request.taskTypeId).trim();
|
|
11
|
+
}
|
|
12
|
+
return 'gateway.invoke';
|
|
13
|
+
}
|
|
14
|
+
function toActivixRunContext(identity) {
|
|
15
|
+
if (!identity)
|
|
16
|
+
return undefined;
|
|
17
|
+
return identity;
|
|
18
|
+
}
|
|
19
|
+
export class OptimixerManager {
|
|
20
|
+
config;
|
|
21
|
+
logger;
|
|
22
|
+
getActivix;
|
|
23
|
+
optimixer;
|
|
24
|
+
initPromise;
|
|
25
|
+
activixCollection;
|
|
26
|
+
constructor(config) {
|
|
27
|
+
this.config = config.optimixer;
|
|
28
|
+
this.logger = config.logger;
|
|
29
|
+
this.getActivix = config.getActivix;
|
|
30
|
+
this.activixCollection = resolveActivityTrackingConfig().collectionName;
|
|
31
|
+
}
|
|
32
|
+
isEnabled() {
|
|
33
|
+
return this.config?.enabled === true;
|
|
34
|
+
}
|
|
35
|
+
async ensureReady() {
|
|
36
|
+
if (!this.isEnabled())
|
|
37
|
+
return undefined;
|
|
38
|
+
if (this.optimixer)
|
|
39
|
+
return this.optimixer;
|
|
40
|
+
if (!this.initPromise) {
|
|
41
|
+
this.initPromise = this.initialize();
|
|
42
|
+
}
|
|
43
|
+
await this.initPromise;
|
|
44
|
+
return this.optimixer;
|
|
45
|
+
}
|
|
46
|
+
async initialize() {
|
|
47
|
+
const activix = await this.getActivix();
|
|
48
|
+
if (!activix) {
|
|
49
|
+
this.logger.warn('Optimixer enabled but Activix is unavailable; adaptive max_tokens disabled', {
|
|
50
|
+
activixCollection: this.activixCollection
|
|
51
|
+
});
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
this.optimixer = await Optimixer.create({
|
|
56
|
+
activixClient: activix,
|
|
57
|
+
activixCollection: this.activixCollection,
|
|
58
|
+
pipelines: { aiMaxTokens: { enabled: true } },
|
|
59
|
+
...(typeof this.config?.warmupLimit === 'number' ? { warmupLimit: this.config.warmupLimit } : {})
|
|
60
|
+
});
|
|
61
|
+
this.logger.info('Optimixer initialized for adaptive max_tokens', {
|
|
62
|
+
activixCollection: this.activixCollection,
|
|
63
|
+
acceptableRisk: this.config?.acceptableRisk ?? 'medium'
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
this.logger.warn('Optimixer initialization failed; adaptive max_tokens disabled', {
|
|
68
|
+
error: error instanceof Error ? error.message : String(error)
|
|
69
|
+
});
|
|
70
|
+
this.optimixer = undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
async predictMaxTokens(ctx) {
|
|
74
|
+
const optimixer = await this.ensureReady();
|
|
75
|
+
if (!optimixer)
|
|
76
|
+
return undefined;
|
|
77
|
+
const { request, mergedConfig, messages } = ctx;
|
|
78
|
+
const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
|
|
79
|
+
const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
|
|
80
|
+
try {
|
|
81
|
+
return await optimixer.predictAiMaxTokens({
|
|
82
|
+
actionTypeId: resolveActionTypeId(request),
|
|
83
|
+
inputSize,
|
|
84
|
+
contextSize,
|
|
85
|
+
acceptableRisk,
|
|
86
|
+
runContext: toActivixRunContext(request.identity),
|
|
87
|
+
provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
|
|
88
|
+
model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
this.logger.warn('Optimixer predictAiMaxTokens failed; caller should use fallback max_tokens', {
|
|
93
|
+
error: error instanceof Error ? error.message : String(error),
|
|
94
|
+
aiRequestId: request.aiRequestId
|
|
95
|
+
});
|
|
96
|
+
return undefined;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
async completePrediction(requestId, actual) {
|
|
100
|
+
const optimixer = await this.ensureReady();
|
|
101
|
+
if (!optimixer)
|
|
102
|
+
return;
|
|
103
|
+
try {
|
|
104
|
+
await optimixer.completeAiMaxTokensPrediction({ requestId, actual });
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
this.logger.warn('Optimixer completeAiMaxTokensPrediction failed (non-blocking)', {
|
|
108
|
+
requestId,
|
|
109
|
+
error: error instanceof Error ? error.message : String(error)
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
async shutdown() {
|
|
114
|
+
const optimixer = this.optimixer;
|
|
115
|
+
this.optimixer = undefined;
|
|
116
|
+
this.initPromise = undefined;
|
|
117
|
+
if (optimixer) {
|
|
118
|
+
try {
|
|
119
|
+
await optimixer.close();
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
this.logger.warn('OptimixerManager shutdown: close failed (non-blocking)', {
|
|
123
|
+
error: error instanceof Error ? error.message : String(error)
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|