@x12i/ai-gateway 9.3.5 → 9.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +151 -4147
- package/dist/activity-manager.d.ts +9 -1
- package/dist/activity-manager.js +85 -81
- package/dist/ai-tools-client.js +4 -12
- package/dist/gateway-config.d.ts +3 -0
- package/dist/gateway-config.js +19 -1
- package/dist/gateway-utils.d.ts +34 -2
- package/dist/gateway-utils.js +204 -35
- package/dist/gateway.d.ts +2 -0
- package/dist/gateway.js +69 -2
- package/dist/index.d.ts +5 -3
- package/dist/index.js +4 -19
- package/dist/optimixer-manager.d.ts +33 -0
- package/dist/optimixer-manager.js +128 -0
- package/dist/token-estimate.d.ts +12 -0
- package/dist/token-estimate.js +30 -0
- package/dist/types.d.ts +50 -2
- package/dist-cjs/activity-manager.cjs +85 -81
- package/dist-cjs/activity-manager.d.ts +9 -1
- package/dist-cjs/ai-tools-client.cjs +4 -12
- package/dist-cjs/gateway-config.cjs +19 -1
- package/dist-cjs/gateway-config.d.ts +3 -0
- package/dist-cjs/gateway-utils.cjs +204 -35
- package/dist-cjs/gateway-utils.d.ts +34 -2
- package/dist-cjs/gateway.cjs +69 -2
- package/dist-cjs/gateway.d.ts +2 -0
- package/dist-cjs/index.cjs +4 -19
- package/dist-cjs/index.d.ts +5 -3
- package/dist-cjs/optimixer-manager.cjs +128 -0
- package/dist-cjs/optimixer-manager.d.ts +33 -0
- package/dist-cjs/token-estimate.cjs +30 -0
- package/dist-cjs/token-estimate.d.ts +12 -0
- package/dist-cjs/types.d.ts +50 -2
- package/package.json +10 -36
|
@@ -55,6 +55,21 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
|
|
|
55
55
|
});
|
|
56
56
|
applyGatewayDefaultToMerged(merged, defaults, config);
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
60
|
+
*/
|
|
61
|
+
export function isMaxTokensExplicitlySet(request, config) {
|
|
62
|
+
const useInternalDefaults = request.useInternalDefaults;
|
|
63
|
+
const internalDefaults = useInternalDefaults === 'skill'
|
|
64
|
+
? config.internalSystemActions?.internalSkill
|
|
65
|
+
: useInternalDefaults === 'audit'
|
|
66
|
+
? config.internalSystemActions?.skillAudit
|
|
67
|
+
: undefined;
|
|
68
|
+
return (request.config?.maxTokens !== undefined ||
|
|
69
|
+
request.modelConfig?.maxTokens !== undefined ||
|
|
70
|
+
internalDefaults?.maxTokens !== undefined ||
|
|
71
|
+
config.maxTokens !== undefined);
|
|
72
|
+
}
|
|
58
73
|
/**
|
|
59
74
|
* Merges config with defaults
|
|
60
75
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -175,12 +190,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
175
190
|
if (!merged.model) {
|
|
176
191
|
await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
|
|
177
192
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
config.maxTokens !== undefined;
|
|
183
|
-
if (!maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
193
|
+
const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
|
|
194
|
+
const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
|
|
195
|
+
// Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
|
|
196
|
+
if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
184
197
|
// Try to get maxTokens from flex-md
|
|
185
198
|
try {
|
|
186
199
|
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
|
|
@@ -219,7 +232,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
219
232
|
});
|
|
220
233
|
}
|
|
221
234
|
}
|
|
222
|
-
else if (!merged.maxTokens) {
|
|
235
|
+
else if (!merged.maxTokens && !optimixerWillPredict) {
|
|
223
236
|
// If maxTokens wasn't set and wasn't auto-detected, use fallback
|
|
224
237
|
// This should rarely happen, but handle edge cases
|
|
225
238
|
merged.maxTokens = 2000;
|
|
@@ -228,7 +241,15 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
228
241
|
model: merged.model,
|
|
229
242
|
provider: merged.provider,
|
|
230
243
|
maxTokens: merged.maxTokens,
|
|
231
|
-
maxTokensExplicitlySet
|
|
244
|
+
maxTokensExplicitlySet,
|
|
245
|
+
optimixerWillPredict
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
else if (optimixerWillPredict) {
|
|
249
|
+
logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
|
|
250
|
+
jobId: request.identity.jobId,
|
|
251
|
+
model: merged.model,
|
|
252
|
+
provider: merged.provider
|
|
232
253
|
});
|
|
233
254
|
}
|
|
234
255
|
else {
|
|
@@ -434,6 +455,50 @@ export function resolveCostCompletionForActivity(routerResponse, tokens) {
|
|
|
434
455
|
}
|
|
435
456
|
return resolveActivityCostCompletion(tokens, costUsd);
|
|
436
457
|
}
|
|
458
|
+
/** Record shape for {@link CostCalculator.calculateFromRecord} (router + merged config + usage). */
|
|
459
|
+
export function buildGatewayPricingRecord(routerResponse, tokens, mergedConfig) {
|
|
460
|
+
const base = routerResponse != null && typeof routerResponse === 'object'
|
|
461
|
+
? { ...routerResponse }
|
|
462
|
+
: {};
|
|
463
|
+
const meta = base.metadata != null && typeof base.metadata === 'object'
|
|
464
|
+
? { ...base.metadata }
|
|
465
|
+
: {};
|
|
466
|
+
const routing = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
467
|
+
return {
|
|
468
|
+
...base,
|
|
469
|
+
usage: {
|
|
470
|
+
promptTokens: tokens.prompt,
|
|
471
|
+
completionTokens: tokens.completion,
|
|
472
|
+
totalTokens: tokens.total
|
|
473
|
+
},
|
|
474
|
+
tokens,
|
|
475
|
+
metadata: {
|
|
476
|
+
...meta,
|
|
477
|
+
tokens,
|
|
478
|
+
...(routing.provider ? { provider: routing.provider } : {}),
|
|
479
|
+
...(routing.modelUsed
|
|
480
|
+
? { modelUsed: routing.modelUsed, model: routing.modelUsed }
|
|
481
|
+
: {})
|
|
482
|
+
},
|
|
483
|
+
...(mergedConfig != null ? { config: mergedConfig } : {})
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
export function mapAiCostResultToResolvedActivityCost(base, result) {
|
|
487
|
+
if (result.unknownModel) {
|
|
488
|
+
return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
|
|
489
|
+
}
|
|
490
|
+
if (typeof result.cost !== 'number' || !Number.isFinite(result.cost)) {
|
|
491
|
+
return base;
|
|
492
|
+
}
|
|
493
|
+
if (!result.isAuthoritative && result.source === 'estimate-fallback') {
|
|
494
|
+
return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
|
|
495
|
+
}
|
|
496
|
+
return {
|
|
497
|
+
cost: result.cost,
|
|
498
|
+
costStatus: 'priced',
|
|
499
|
+
...(result.breakdown ? { costBreakdown: result.breakdown } : {})
|
|
500
|
+
};
|
|
501
|
+
}
|
|
437
502
|
/**
|
|
438
503
|
* Router cost passthrough, then optional @x12i/ai-tools catalog pricing when still unpriced.
|
|
439
504
|
*/
|
|
@@ -452,37 +517,114 @@ export async function resolveCostCompletionWithAiTools(routerResponse, tokens, o
|
|
|
452
517
|
if (!hasNonZeroTokenUsage(tokens)) {
|
|
453
518
|
return base;
|
|
454
519
|
}
|
|
455
|
-
const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
|
|
456
|
-
const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
|
|
457
|
-
? options.mergedConfig
|
|
458
|
-
: {};
|
|
459
|
-
const provider = routing.provider ?? cfg.provider;
|
|
460
|
-
const modelUsed = routing.modelUsed ?? cfg.model;
|
|
461
|
-
if (!provider || !modelUsed) {
|
|
462
|
-
return base;
|
|
463
|
-
}
|
|
464
520
|
try {
|
|
465
|
-
const
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
completion: tokens.completion,
|
|
469
|
-
total: tokens.total
|
|
470
|
-
},
|
|
471
|
-
provider,
|
|
472
|
-
modelUsed
|
|
473
|
-
});
|
|
474
|
-
if (typeof result.cost === 'number' && Number.isFinite(result.cost)) {
|
|
475
|
-
return {
|
|
476
|
-
cost: result.cost,
|
|
477
|
-
costStatus: 'priced',
|
|
478
|
-
...(result.breakdown ? { costBreakdown: result.breakdown } : {})
|
|
479
|
-
};
|
|
480
|
-
}
|
|
521
|
+
const record = buildGatewayPricingRecord(routerResponse, tokens, options.mergedConfig);
|
|
522
|
+
const result = await options.calculator.calculateFromRecord(record);
|
|
523
|
+
return mapAiCostResultToResolvedActivityCost(base, result);
|
|
481
524
|
}
|
|
482
525
|
catch {
|
|
483
|
-
|
|
526
|
+
const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
|
|
527
|
+
const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
|
|
528
|
+
? options.mergedConfig
|
|
529
|
+
: {};
|
|
530
|
+
const provider = routing.provider ?? cfg.provider;
|
|
531
|
+
const modelUsed = routing.modelUsed ?? cfg.model;
|
|
532
|
+
if (!provider || !modelUsed) {
|
|
533
|
+
return base;
|
|
534
|
+
}
|
|
535
|
+
try {
|
|
536
|
+
const result = await options.calculator.calculate({
|
|
537
|
+
tokens: {
|
|
538
|
+
prompt: tokens.prompt,
|
|
539
|
+
completion: tokens.completion,
|
|
540
|
+
total: tokens.total
|
|
541
|
+
},
|
|
542
|
+
provider,
|
|
543
|
+
usedModel: modelUsed
|
|
544
|
+
});
|
|
545
|
+
return mapAiCostResultToResolvedActivityCost(base, result);
|
|
546
|
+
}
|
|
547
|
+
catch {
|
|
548
|
+
return base;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
function applyBillingToTraceAttempt(attempt, billing) {
|
|
553
|
+
if (billing.costStatus === 'priced' || billing.costStatus === 'unpriced') {
|
|
554
|
+
attempt.costStatus = billing.costStatus;
|
|
555
|
+
}
|
|
556
|
+
if (typeof billing.cost === 'number' && Number.isFinite(billing.cost)) {
|
|
557
|
+
attempt.costUsd = billing.cost;
|
|
558
|
+
}
|
|
559
|
+
if (billing.costBreakdown) {
|
|
560
|
+
attempt.costBreakdown = billing.costBreakdown;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
function buildTraceAttemptPricingRecord(attempt, mergedConfig) {
|
|
564
|
+
const tokens = attempt.usage?.tokens ?? { prompt: 0, completion: 0, total: 0 };
|
|
565
|
+
return buildGatewayPricingRecord({
|
|
566
|
+
metadata: {
|
|
567
|
+
provider: attempt.routing.provider,
|
|
568
|
+
modelUsed: attempt.modelUsed,
|
|
569
|
+
region: attempt.routing.region,
|
|
570
|
+
tokens
|
|
571
|
+
}
|
|
572
|
+
}, tokens, mergedConfig);
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Trace-mode summary: final token usage + resolved billing (after catalog pricing when applicable).
|
|
576
|
+
*/
|
|
577
|
+
export function buildTraceUsageSummary(tokens, billing, maxTokensRequested) {
|
|
578
|
+
if (!hasNonZeroTokenUsage(tokens) && !billing.costStatus) {
|
|
579
|
+
return undefined;
|
|
580
|
+
}
|
|
581
|
+
const summary = { tokens };
|
|
582
|
+
if (maxTokensRequested !== undefined) {
|
|
583
|
+
summary.maxTokensRequested = maxTokensRequested;
|
|
584
|
+
}
|
|
585
|
+
if (billing.costStatus === 'priced' && typeof billing.cost === 'number') {
|
|
586
|
+
summary.costUsd = billing.cost;
|
|
587
|
+
summary.cost = billing.cost;
|
|
588
|
+
}
|
|
589
|
+
if (billing.costStatus) {
|
|
590
|
+
summary.costStatus = billing.costStatus;
|
|
591
|
+
}
|
|
592
|
+
if (billing.costBreakdown) {
|
|
593
|
+
summary.costBreakdown = billing.costBreakdown;
|
|
484
594
|
}
|
|
485
|
-
return
|
|
595
|
+
return summary;
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* Apply resolved billing to trace attempts: final successful attempt gets aggregate billing;
|
|
599
|
+
* other successful attempts without router cost get per-attempt catalog pricing when enabled.
|
|
600
|
+
*/
|
|
601
|
+
export async function enrichTraceAttemptsWithBilling(attempts, finalBilling, options) {
|
|
602
|
+
if (!attempts.length)
|
|
603
|
+
return;
|
|
604
|
+
let lastOkIdx = -1;
|
|
605
|
+
for (let i = attempts.length - 1; i >= 0; i--) {
|
|
606
|
+
if (attempts[i].ok) {
|
|
607
|
+
lastOkIdx = i;
|
|
608
|
+
break;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
if (lastOkIdx >= 0) {
|
|
612
|
+
applyBillingToTraceAttempt(attempts[lastOkIdx], finalBilling);
|
|
613
|
+
}
|
|
614
|
+
if (options?.calculateCost === false || !options?.calculator) {
|
|
615
|
+
return;
|
|
616
|
+
}
|
|
617
|
+
await Promise.all(attempts.map(async (attempt, idx) => {
|
|
618
|
+
if (!attempt.ok || idx === lastOkIdx)
|
|
619
|
+
return;
|
|
620
|
+
const tokens = attempt.usage?.tokens;
|
|
621
|
+
if (!tokens || !hasNonZeroTokenUsage(tokens))
|
|
622
|
+
return;
|
|
623
|
+
if (attempt.costStatus === 'priced' && typeof attempt.costUsd === 'number')
|
|
624
|
+
return;
|
|
625
|
+
const slice = await resolveCostCompletionWithAiTools(buildTraceAttemptPricingRecord(attempt, options.mergedConfig), tokens, options);
|
|
626
|
+
applyBillingToTraceAttempt(attempt, slice);
|
|
627
|
+
}));
|
|
486
628
|
}
|
|
487
629
|
/**
|
|
488
630
|
* Stable routing facts for gateway response metadata (router metadata + merged config fallbacks).
|
|
@@ -693,3 +835,30 @@ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIV
|
|
|
693
835
|
_preview: serialized.slice(0, maxChars)
|
|
694
836
|
};
|
|
695
837
|
}
|
|
838
|
+
export function resolveFinishReasonFromRouterResponse(response) {
|
|
839
|
+
if (response == null || typeof response !== 'object')
|
|
840
|
+
return undefined;
|
|
841
|
+
const r = response;
|
|
842
|
+
const meta = r.metadata != null && typeof r.metadata === 'object' ? r.metadata : undefined;
|
|
843
|
+
const candidates = [
|
|
844
|
+
r.finishReason,
|
|
845
|
+
r.finish_reason,
|
|
846
|
+
meta?.finishReason,
|
|
847
|
+
meta?.finish_reason
|
|
848
|
+
];
|
|
849
|
+
for (const c of candidates) {
|
|
850
|
+
if (typeof c === 'string' && c.trim())
|
|
851
|
+
return c.trim();
|
|
852
|
+
}
|
|
853
|
+
return undefined;
|
|
854
|
+
}
|
|
855
|
+
export function buildOptimixerActualUsage(tokens, response, latencyMs) {
|
|
856
|
+
const finishReason = resolveFinishReasonFromRouterResponse(response);
|
|
857
|
+
return {
|
|
858
|
+
promptTokens: tokens.prompt,
|
|
859
|
+
completionTokens: tokens.completion,
|
|
860
|
+
totalTokens: tokens.total,
|
|
861
|
+
...(finishReason ? { finishReason } : {}),
|
|
862
|
+
latencyMs
|
|
863
|
+
};
|
|
864
|
+
}
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Gateway Utilities Module
|
|
3
3
|
* Handles utility functions
|
|
4
4
|
*/
|
|
5
|
-
import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceMergedConfig, GatewayTraceRequestIds, ModelConfig } from './types.js';
|
|
5
|
+
import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
|
|
6
6
|
import type { Logxer } from '@x12i/logxer';
|
|
7
|
-
import { type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
|
|
7
|
+
import { type AiCostResult, type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
|
|
8
8
|
/**
|
|
9
9
|
* Generates MD5 hash of a string
|
|
10
10
|
*/
|
|
@@ -17,6 +17,12 @@ export type MergeConfigOptions = {
|
|
|
17
17
|
defaultModelConfig?: Record<string, unknown>;
|
|
18
18
|
catalog?: AiModelsCatalogClient | null;
|
|
19
19
|
};
|
|
20
|
+
/**
|
|
21
|
+
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
22
|
+
*/
|
|
23
|
+
export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
|
|
24
|
+
useInternalDefaults?: 'skill' | 'audit';
|
|
25
|
+
}, config: GatewayConfig): boolean;
|
|
20
26
|
/**
|
|
21
27
|
* Merges config with defaults
|
|
22
28
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -91,6 +97,13 @@ export type ResolveCostCompletionOptions = {
|
|
|
91
97
|
calculator?: CostCalculator | null;
|
|
92
98
|
calculateCost?: boolean;
|
|
93
99
|
};
|
|
100
|
+
/** Record shape for {@link CostCalculator.calculateFromRecord} (router + merged config + usage). */
|
|
101
|
+
export declare function buildGatewayPricingRecord(routerResponse: unknown, tokens: {
|
|
102
|
+
prompt: number;
|
|
103
|
+
completion: number;
|
|
104
|
+
total: number;
|
|
105
|
+
}, mergedConfig?: unknown): Record<string, unknown>;
|
|
106
|
+
export declare function mapAiCostResultToResolvedActivityCost(base: ResolvedActivityCost, result: AiCostResult): ResolvedActivityCost;
|
|
94
107
|
/**
|
|
95
108
|
* Router cost passthrough, then optional @x12i/ai-tools catalog pricing when still unpriced.
|
|
96
109
|
*/
|
|
@@ -99,6 +112,19 @@ export declare function resolveCostCompletionWithAiTools(routerResponse: unknown
|
|
|
99
112
|
completion: number;
|
|
100
113
|
total: number;
|
|
101
114
|
}, options?: ResolveCostCompletionOptions): Promise<ResolvedActivityCost>;
|
|
115
|
+
/**
|
|
116
|
+
* Trace-mode summary: final token usage + resolved billing (after catalog pricing when applicable).
|
|
117
|
+
*/
|
|
118
|
+
export declare function buildTraceUsageSummary(tokens: {
|
|
119
|
+
prompt: number;
|
|
120
|
+
completion: number;
|
|
121
|
+
total: number;
|
|
122
|
+
}, billing: ResolvedActivityCost, maxTokensRequested?: number): GatewayTraceUsageSummary | undefined;
|
|
123
|
+
/**
|
|
124
|
+
* Apply resolved billing to trace attempts: final successful attempt gets aggregate billing;
|
|
125
|
+
* other successful attempts without router cost get per-attempt catalog pricing when enabled.
|
|
126
|
+
*/
|
|
127
|
+
export declare function enrichTraceAttemptsWithBilling(attempts: GatewayTraceAttempt[], finalBilling: ResolvedActivityCost, options?: ResolveCostCompletionOptions): Promise<void>;
|
|
102
128
|
/**
|
|
103
129
|
* Stable routing facts for gateway response metadata (router metadata + merged config fallbacks).
|
|
104
130
|
* Matches trace-mode resolution; intended for every successful invoke(), not only diagnostics.trace.
|
|
@@ -145,4 +171,10 @@ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
|
|
|
145
171
|
* Non-serializable values become a small marker object instead of throwing.
|
|
146
172
|
*/
|
|
147
173
|
export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
|
|
174
|
+
export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
|
|
175
|
+
export declare function buildOptimixerActualUsage(tokens: {
|
|
176
|
+
prompt: number;
|
|
177
|
+
completion: number;
|
|
178
|
+
total: number;
|
|
179
|
+
}, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
|
|
148
180
|
export {};
|
package/dist-cjs/gateway.cjs
CHANGED
|
@@ -7,9 +7,9 @@ import { validateChatRequest, validateAIRequest } from './gateway-validation.js'
|
|
|
7
7
|
import { ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
8
8
|
import { initializeGatewayComponents } from './gateway-config.js';
|
|
9
9
|
import { buildMessages } from './message-builder.js';
|
|
10
|
-
import { extractJsonFromFlexMd } from './flex-md-loader.js';
|
|
10
|
+
import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
|
|
11
11
|
import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
|
|
12
|
-
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
12
|
+
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
13
13
|
import { getAiToolsClient } from './ai-tools-client.js';
|
|
14
14
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
15
15
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
@@ -45,6 +45,7 @@ export class AIGateway {
|
|
|
45
45
|
config;
|
|
46
46
|
logger;
|
|
47
47
|
activityManager;
|
|
48
|
+
optimixerManager;
|
|
48
49
|
messageBuilderConfig;
|
|
49
50
|
defaultModelConfig = {};
|
|
50
51
|
_autoRegisterDone = false;
|
|
@@ -56,6 +57,7 @@ export class AIGateway {
|
|
|
56
57
|
this.logger = components.logger;
|
|
57
58
|
this.router = components.router;
|
|
58
59
|
this.activityManager = components.activityManager;
|
|
60
|
+
this.optimixerManager = components.optimixerManager;
|
|
59
61
|
this.messageBuilderConfig = components.messageBuilderConfig;
|
|
60
62
|
this.defaultModelConfig = components.defaultModelConfig ?? {};
|
|
61
63
|
setGatewayRuntimeClients({
|
|
@@ -93,6 +95,7 @@ export class AIGateway {
|
|
|
93
95
|
await autoRegisterProviders(this.router, this.logger);
|
|
94
96
|
this._autoRegisterDone = true;
|
|
95
97
|
}
|
|
98
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
96
99
|
// Start activity tracking if available
|
|
97
100
|
let activity = undefined;
|
|
98
101
|
if (this.activityManager) {
|
|
@@ -166,6 +169,9 @@ export class AIGateway {
|
|
|
166
169
|
});
|
|
167
170
|
}
|
|
168
171
|
}
|
|
172
|
+
if (optimixerPrediction) {
|
|
173
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
|
|
174
|
+
}
|
|
169
175
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
170
176
|
tokens: enhancedResponse.metadata.tokens,
|
|
171
177
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -279,6 +285,7 @@ export class AIGateway {
|
|
|
279
285
|
await autoRegisterProviders(this.router, this.logger);
|
|
280
286
|
this._autoRegisterDone = true;
|
|
281
287
|
}
|
|
288
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
282
289
|
// Start activity tracking if available
|
|
283
290
|
let activity = undefined;
|
|
284
291
|
if (this.activityManager) {
|
|
@@ -567,6 +574,16 @@ export class AIGateway {
|
|
|
567
574
|
const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
568
575
|
const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
|
|
569
576
|
const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
|
|
577
|
+
if (traceEnabled && traceAttempts) {
|
|
578
|
+
await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
|
|
579
|
+
mergedConfig,
|
|
580
|
+
calculator: aiTools?.calculator ?? null,
|
|
581
|
+
calculateCost: this.config.aiTools?.calculateCost
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
const traceUsageSummary = traceEnabled
|
|
585
|
+
? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
|
|
586
|
+
: undefined;
|
|
570
587
|
const enhancedResponse = {
|
|
571
588
|
content: content,
|
|
572
589
|
parsedContent: parsedContent,
|
|
@@ -597,6 +614,7 @@ export class AIGateway {
|
|
|
597
614
|
retryCount: traceRetryCount,
|
|
598
615
|
fallbackCount: traceFallbackCount,
|
|
599
616
|
attempts: traceAttempts,
|
|
617
|
+
...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
|
|
600
618
|
...(traceMergedRouterSnapshot !== undefined
|
|
601
619
|
? { mergedRouterConfig: traceMergedRouterSnapshot }
|
|
602
620
|
: {})
|
|
@@ -643,6 +661,9 @@ export class AIGateway {
|
|
|
643
661
|
});
|
|
644
662
|
}
|
|
645
663
|
}
|
|
664
|
+
if (optimixerPrediction) {
|
|
665
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
|
|
666
|
+
}
|
|
646
667
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
647
668
|
tokens: enhancedResponse.metadata.tokens,
|
|
648
669
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -676,6 +697,52 @@ export class AIGateway {
|
|
|
676
697
|
throw err;
|
|
677
698
|
}
|
|
678
699
|
}
|
|
700
|
+
async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
|
|
701
|
+
if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
|
|
702
|
+
return undefined;
|
|
703
|
+
}
|
|
704
|
+
const prediction = await this.optimixerManager.predictMaxTokens({
|
|
705
|
+
request,
|
|
706
|
+
mergedConfig,
|
|
707
|
+
messages
|
|
708
|
+
});
|
|
709
|
+
if (prediction) {
|
|
710
|
+
let maxTokens = prediction.recommendedMaxTokens;
|
|
711
|
+
const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
|
|
712
|
+
if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
|
|
713
|
+
try {
|
|
714
|
+
const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
715
|
+
if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
|
|
716
|
+
maxTokens = ceiling;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
catch {
|
|
720
|
+
// Non-blocking: use uncapped prediction
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
mergedConfig.maxTokens = maxTokens;
|
|
724
|
+
request._mergedRouterConfig = mergedConfig;
|
|
725
|
+
this.logger.debug('Applied Optimixer recommended max_tokens', {
|
|
726
|
+
aiRequestId: request.aiRequestId,
|
|
727
|
+
recommendedMaxTokens: prediction.recommendedMaxTokens,
|
|
728
|
+
maxTokens,
|
|
729
|
+
confidence: prediction.confidence,
|
|
730
|
+
requestId: prediction.requestId
|
|
731
|
+
});
|
|
732
|
+
return prediction;
|
|
733
|
+
}
|
|
734
|
+
if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
|
|
735
|
+
try {
|
|
736
|
+
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
737
|
+
mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
|
|
738
|
+
}
|
|
739
|
+
catch {
|
|
740
|
+
mergedConfig.maxTokens = 2000;
|
|
741
|
+
}
|
|
742
|
+
request._mergedRouterConfig = mergedConfig;
|
|
743
|
+
}
|
|
744
|
+
return undefined;
|
|
745
|
+
}
|
|
679
746
|
/**
|
|
680
747
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
681
748
|
*/
|
package/dist-cjs/gateway.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare class AIGateway {
|
|
|
15
15
|
private config;
|
|
16
16
|
private logger;
|
|
17
17
|
private activityManager?;
|
|
18
|
+
private optimixerManager?;
|
|
18
19
|
private messageBuilderConfig?;
|
|
19
20
|
private defaultModelConfig;
|
|
20
21
|
private _autoRegisterDone;
|
|
@@ -28,6 +29,7 @@ export declare class AIGateway {
|
|
|
28
29
|
* Invoke AI request (with structured output support)
|
|
29
30
|
*/
|
|
30
31
|
invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
|
|
32
|
+
private applyAdaptiveMaxTokensIfEnabled;
|
|
31
33
|
/**
|
|
32
34
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
33
35
|
*/
|
package/dist-cjs/index.cjs
CHANGED
|
@@ -17,7 +17,7 @@ export * from '@x12i/ai-providers-router';
|
|
|
17
17
|
export { AIGateway } from './gateway.js';
|
|
18
18
|
export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
|
|
19
19
|
export { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
20
|
-
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
20
|
+
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
21
21
|
export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
|
|
22
22
|
export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
|
|
23
23
|
export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
|
|
@@ -26,7 +26,9 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
|
|
|
26
26
|
// (x-models was previously used for RPM/TPM tracking but is no longer integrated)
|
|
27
27
|
// Re-export activity tracking primitives (Activix)
|
|
28
28
|
export { Activix } from '@x12i/activix';
|
|
29
|
+
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
29
30
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
31
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
30
32
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
31
33
|
// Re-export logging (@x12i/logxer)
|
|
32
34
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|
|
@@ -39,22 +41,5 @@ export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from '
|
|
|
39
41
|
export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
|
|
40
42
|
// Export object types library
|
|
41
43
|
export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
|
|
42
|
-
//
|
|
44
|
+
// Object-types library stubs (optional @x12i/outputs-library integration; see object-types-library-integration.ts)
|
|
43
45
|
export { initializeObjectTypesLibrary, getObjectTypesLibrary, resetObjectTypesLibrary } from './object-types-library-integration.js';
|
|
44
|
-
// Re-export outputs library types and utilities for convenience
|
|
45
|
-
// Note: Since we use dynamic imports for the outputs library, these types may not be available
|
|
46
|
-
// at compile time if the package isn't installed. Users can import directly from
|
|
47
|
-
// @x12i/outputs-library if they need these types or utilities.
|
|
48
|
-
//
|
|
49
|
-
// Recommended: Import types and utilities directly from @x12i/outputs-library:
|
|
50
|
-
// import type { ClassificationOutput } from '@x12i/outputs-library/types';
|
|
51
|
-
// import { ResponseParser } from '@x12i/outputs-library/parsers';
|
|
52
|
-
// import type { ObjectTypesLibrary, FlexMdSupport } from '@x12i/outputs-library';
|
|
53
|
-
//
|
|
54
|
-
// The gateway integrates with the outputs library internally via dynamic imports,
|
|
55
|
-
// so these re-exports are optional and mainly for convenience.
|
|
56
|
-
//
|
|
57
|
-
// For outputs-library v3.3.1+ with flex-md support:
|
|
58
|
-
// - ObjectTypesLibrary class with flex-md methods (getFlexMdTemplate, getFlexMdFormatSpec, etc.)
|
|
59
|
-
// - FlexMdSupport type for object type definitions
|
|
60
|
-
// - All flex-md methods are available on the library instance returned by getObjectTypesLibrary()
|
package/dist-cjs/index.d.ts
CHANGED
|
@@ -16,8 +16,8 @@ export * from '@x12i/ai-providers-router';
|
|
|
16
16
|
export { AIGateway } from './gateway.js';
|
|
17
17
|
export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
|
|
18
18
|
export { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
19
|
-
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
|
|
20
|
-
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
19
|
+
export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
|
|
20
|
+
export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
|
|
21
21
|
export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
|
|
22
22
|
export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
|
|
23
23
|
export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
|
|
@@ -29,8 +29,10 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
|
|
|
29
29
|
export type { GatewayDualMemoryRoot } from './memory-path-resolution.js';
|
|
30
30
|
export type { UsageTier } from './types.js';
|
|
31
31
|
export { Activix } from '@x12i/activix';
|
|
32
|
-
export type { ActivixRunContext, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
32
|
+
export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
33
|
+
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
33
34
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
35
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
34
36
|
export type { ActivityIdentity } from './types.js';
|
|
35
37
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
36
38
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|