@x12i/ai-gateway 9.3.5 → 9.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,21 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
55
55
  });
56
56
  applyGatewayDefaultToMerged(merged, defaults, config);
57
57
  }
58
+ /**
59
+ * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
60
+ */
61
+ export function isMaxTokensExplicitlySet(request, config) {
62
+ const useInternalDefaults = request.useInternalDefaults;
63
+ const internalDefaults = useInternalDefaults === 'skill'
64
+ ? config.internalSystemActions?.internalSkill
65
+ : useInternalDefaults === 'audit'
66
+ ? config.internalSystemActions?.skillAudit
67
+ : undefined;
68
+ return (request.config?.maxTokens !== undefined ||
69
+ request.modelConfig?.maxTokens !== undefined ||
70
+ internalDefaults?.maxTokens !== undefined ||
71
+ config.maxTokens !== undefined);
72
+ }
58
73
  /**
59
74
  * Merges config with defaults
60
75
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -175,12 +190,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
175
190
  if (!merged.model) {
176
191
  await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
177
192
  }
178
- // Auto-get maxTokens from flex-md if not explicitly set in ANY config source
179
- // Check all possible sources: request.config, internalDefaults, gateway config
180
- const maxTokensExplicitlySet = request.config?.maxTokens !== undefined ||
181
- internalDefaults?.maxTokens !== undefined ||
182
- config.maxTokens !== undefined;
183
- if (!maxTokensExplicitlySet && merged.model && merged.provider) {
193
+ const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
194
+ const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
195
+ // Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
196
+ if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
184
197
  // Try to get maxTokens from flex-md
185
198
  try {
186
199
  const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
@@ -219,7 +232,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
219
232
  });
220
233
  }
221
234
  }
222
- else if (!merged.maxTokens) {
235
+ else if (!merged.maxTokens && !optimixerWillPredict) {
223
236
  // If maxTokens wasn't set and wasn't auto-detected, use fallback
224
237
  // This should rarely happen, but handle edge cases
225
238
  merged.maxTokens = 2000;
@@ -228,7 +241,15 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
228
241
  model: merged.model,
229
242
  provider: merged.provider,
230
243
  maxTokens: merged.maxTokens,
231
- maxTokensExplicitlySet
244
+ maxTokensExplicitlySet,
245
+ optimixerWillPredict
246
+ });
247
+ }
248
+ else if (optimixerWillPredict) {
249
+ logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
250
+ jobId: request.identity.jobId,
251
+ model: merged.model,
252
+ provider: merged.provider
232
253
  });
233
254
  }
234
255
  else {
@@ -434,6 +455,50 @@ export function resolveCostCompletionForActivity(routerResponse, tokens) {
434
455
  }
435
456
  return resolveActivityCostCompletion(tokens, costUsd);
436
457
  }
458
+ /** Record shape for {@link CostCalculator.calculateFromRecord} (router + merged config + usage). */
459
+ export function buildGatewayPricingRecord(routerResponse, tokens, mergedConfig) {
460
+ const base = routerResponse != null && typeof routerResponse === 'object'
461
+ ? { ...routerResponse }
462
+ : {};
463
+ const meta = base.metadata != null && typeof base.metadata === 'object'
464
+ ? { ...base.metadata }
465
+ : {};
466
+ const routing = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
467
+ return {
468
+ ...base,
469
+ usage: {
470
+ promptTokens: tokens.prompt,
471
+ completionTokens: tokens.completion,
472
+ totalTokens: tokens.total
473
+ },
474
+ tokens,
475
+ metadata: {
476
+ ...meta,
477
+ tokens,
478
+ ...(routing.provider ? { provider: routing.provider } : {}),
479
+ ...(routing.modelUsed
480
+ ? { modelUsed: routing.modelUsed, model: routing.modelUsed }
481
+ : {})
482
+ },
483
+ ...(mergedConfig != null ? { config: mergedConfig } : {})
484
+ };
485
+ }
486
+ export function mapAiCostResultToResolvedActivityCost(base, result) {
487
+ if (result.unknownModel) {
488
+ return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
489
+ }
490
+ if (typeof result.cost !== 'number' || !Number.isFinite(result.cost)) {
491
+ return base;
492
+ }
493
+ if (!result.isAuthoritative && result.source === 'estimate-fallback') {
494
+ return base.costStatus ? base : { ...base, costStatus: 'unpriced' };
495
+ }
496
+ return {
497
+ cost: result.cost,
498
+ costStatus: 'priced',
499
+ ...(result.breakdown ? { costBreakdown: result.breakdown } : {})
500
+ };
501
+ }
437
502
  /**
438
503
  * Router cost passthrough, then optional @x12i/ai-tools catalog pricing when still unpriced.
439
504
  */
@@ -452,37 +517,114 @@ export async function resolveCostCompletionWithAiTools(routerResponse, tokens, o
452
517
  if (!hasNonZeroTokenUsage(tokens)) {
453
518
  return base;
454
519
  }
455
- const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
456
- const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
457
- ? options.mergedConfig
458
- : {};
459
- const provider = routing.provider ?? cfg.provider;
460
- const modelUsed = routing.modelUsed ?? cfg.model;
461
- if (!provider || !modelUsed) {
462
- return base;
463
- }
464
520
  try {
465
- const result = await options.calculator.calculate({
466
- tokens: {
467
- prompt: tokens.prompt,
468
- completion: tokens.completion,
469
- total: tokens.total
470
- },
471
- provider,
472
- modelUsed
473
- });
474
- if (typeof result.cost === 'number' && Number.isFinite(result.cost)) {
475
- return {
476
- cost: result.cost,
477
- costStatus: 'priced',
478
- ...(result.breakdown ? { costBreakdown: result.breakdown } : {})
479
- };
480
- }
521
+ const record = buildGatewayPricingRecord(routerResponse, tokens, options.mergedConfig);
522
+ const result = await options.calculator.calculateFromRecord(record);
523
+ return mapAiCostResultToResolvedActivityCost(base, result);
481
524
  }
482
525
  catch {
483
- // Keep router/gateway unpriced fallback
526
+ const routing = pickInvokeRoutingMetadataSlice(routerResponse, options.mergedConfig);
527
+ const cfg = options.mergedConfig != null && typeof options.mergedConfig === 'object'
528
+ ? options.mergedConfig
529
+ : {};
530
+ const provider = routing.provider ?? cfg.provider;
531
+ const modelUsed = routing.modelUsed ?? cfg.model;
532
+ if (!provider || !modelUsed) {
533
+ return base;
534
+ }
535
+ try {
536
+ const result = await options.calculator.calculate({
537
+ tokens: {
538
+ prompt: tokens.prompt,
539
+ completion: tokens.completion,
540
+ total: tokens.total
541
+ },
542
+ provider,
543
+ usedModel: modelUsed
544
+ });
545
+ return mapAiCostResultToResolvedActivityCost(base, result);
546
+ }
547
+ catch {
548
+ return base;
549
+ }
550
+ }
551
+ }
552
+ function applyBillingToTraceAttempt(attempt, billing) {
553
+ if (billing.costStatus === 'priced' || billing.costStatus === 'unpriced') {
554
+ attempt.costStatus = billing.costStatus;
555
+ }
556
+ if (typeof billing.cost === 'number' && Number.isFinite(billing.cost)) {
557
+ attempt.costUsd = billing.cost;
558
+ }
559
+ if (billing.costBreakdown) {
560
+ attempt.costBreakdown = billing.costBreakdown;
561
+ }
562
+ }
563
+ function buildTraceAttemptPricingRecord(attempt, mergedConfig) {
564
+ const tokens = attempt.usage?.tokens ?? { prompt: 0, completion: 0, total: 0 };
565
+ return buildGatewayPricingRecord({
566
+ metadata: {
567
+ provider: attempt.routing.provider,
568
+ modelUsed: attempt.modelUsed,
569
+ region: attempt.routing.region,
570
+ tokens
571
+ }
572
+ }, tokens, mergedConfig);
573
+ }
574
+ /**
575
+ * Trace-mode summary: final token usage + resolved billing (after catalog pricing when applicable).
576
+ */
577
+ export function buildTraceUsageSummary(tokens, billing, maxTokensRequested) {
578
+ if (!hasNonZeroTokenUsage(tokens) && !billing.costStatus) {
579
+ return undefined;
580
+ }
581
+ const summary = { tokens };
582
+ if (maxTokensRequested !== undefined) {
583
+ summary.maxTokensRequested = maxTokensRequested;
584
+ }
585
+ if (billing.costStatus === 'priced' && typeof billing.cost === 'number') {
586
+ summary.costUsd = billing.cost;
587
+ summary.cost = billing.cost;
588
+ }
589
+ if (billing.costStatus) {
590
+ summary.costStatus = billing.costStatus;
591
+ }
592
+ if (billing.costBreakdown) {
593
+ summary.costBreakdown = billing.costBreakdown;
484
594
  }
485
- return base;
595
+ return summary;
596
+ }
597
+ /**
598
+ * Apply resolved billing to trace attempts: final successful attempt gets aggregate billing;
599
+ * other successful attempts without router cost get per-attempt catalog pricing when enabled.
600
+ */
601
+ export async function enrichTraceAttemptsWithBilling(attempts, finalBilling, options) {
602
+ if (!attempts.length)
603
+ return;
604
+ let lastOkIdx = -1;
605
+ for (let i = attempts.length - 1; i >= 0; i--) {
606
+ if (attempts[i].ok) {
607
+ lastOkIdx = i;
608
+ break;
609
+ }
610
+ }
611
+ if (lastOkIdx >= 0) {
612
+ applyBillingToTraceAttempt(attempts[lastOkIdx], finalBilling);
613
+ }
614
+ if (options?.calculateCost === false || !options?.calculator) {
615
+ return;
616
+ }
617
+ await Promise.all(attempts.map(async (attempt, idx) => {
618
+ if (!attempt.ok || idx === lastOkIdx)
619
+ return;
620
+ const tokens = attempt.usage?.tokens;
621
+ if (!tokens || !hasNonZeroTokenUsage(tokens))
622
+ return;
623
+ if (attempt.costStatus === 'priced' && typeof attempt.costUsd === 'number')
624
+ return;
625
+ const slice = await resolveCostCompletionWithAiTools(buildTraceAttemptPricingRecord(attempt, options.mergedConfig), tokens, options);
626
+ applyBillingToTraceAttempt(attempt, slice);
627
+ }));
486
628
  }
487
629
  /**
488
630
  * Stable routing facts for gateway response metadata (router metadata + merged config fallbacks).
@@ -693,3 +835,30 @@ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIV
693
835
  _preview: serialized.slice(0, maxChars)
694
836
  };
695
837
  }
838
+ export function resolveFinishReasonFromRouterResponse(response) {
839
+ if (response == null || typeof response !== 'object')
840
+ return undefined;
841
+ const r = response;
842
+ const meta = r.metadata != null && typeof r.metadata === 'object' ? r.metadata : undefined;
843
+ const candidates = [
844
+ r.finishReason,
845
+ r.finish_reason,
846
+ meta?.finishReason,
847
+ meta?.finish_reason
848
+ ];
849
+ for (const c of candidates) {
850
+ if (typeof c === 'string' && c.trim())
851
+ return c.trim();
852
+ }
853
+ return undefined;
854
+ }
855
+ export function buildOptimixerActualUsage(tokens, response, latencyMs) {
856
+ const finishReason = resolveFinishReasonFromRouterResponse(response);
857
+ return {
858
+ promptTokens: tokens.prompt,
859
+ completionTokens: tokens.completion,
860
+ totalTokens: tokens.total,
861
+ ...(finishReason ? { finishReason } : {}),
862
+ latencyMs
863
+ };
864
+ }
@@ -2,9 +2,9 @@
2
2
  * Gateway Utilities Module
3
3
  * Handles utility functions
4
4
  */
5
- import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceMergedConfig, GatewayTraceRequestIds, ModelConfig } from './types.js';
5
+ import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
6
6
  import type { Logxer } from '@x12i/logxer';
7
- import { type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
7
+ import { type AiCostResult, type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
8
8
  /**
9
9
  * Generates MD5 hash of a string
10
10
  */
@@ -17,6 +17,12 @@ export type MergeConfigOptions = {
17
17
  defaultModelConfig?: Record<string, unknown>;
18
18
  catalog?: AiModelsCatalogClient | null;
19
19
  };
20
+ /**
21
+ * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
22
+ */
23
+ export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
24
+ useInternalDefaults?: 'skill' | 'audit';
25
+ }, config: GatewayConfig): boolean;
20
26
  /**
21
27
  * Merges config with defaults
22
28
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -91,6 +97,13 @@ export type ResolveCostCompletionOptions = {
91
97
  calculator?: CostCalculator | null;
92
98
  calculateCost?: boolean;
93
99
  };
100
+ /** Record shape for {@link CostCalculator.calculateFromRecord} (router + merged config + usage). */
101
+ export declare function buildGatewayPricingRecord(routerResponse: unknown, tokens: {
102
+ prompt: number;
103
+ completion: number;
104
+ total: number;
105
+ }, mergedConfig?: unknown): Record<string, unknown>;
106
+ export declare function mapAiCostResultToResolvedActivityCost(base: ResolvedActivityCost, result: AiCostResult): ResolvedActivityCost;
94
107
  /**
95
108
  * Router cost passthrough, then optional @x12i/ai-tools catalog pricing when still unpriced.
96
109
  */
@@ -99,6 +112,19 @@ export declare function resolveCostCompletionWithAiTools(routerResponse: unknown
99
112
  completion: number;
100
113
  total: number;
101
114
  }, options?: ResolveCostCompletionOptions): Promise<ResolvedActivityCost>;
115
+ /**
116
+ * Trace-mode summary: final token usage + resolved billing (after catalog pricing when applicable).
117
+ */
118
+ export declare function buildTraceUsageSummary(tokens: {
119
+ prompt: number;
120
+ completion: number;
121
+ total: number;
122
+ }, billing: ResolvedActivityCost, maxTokensRequested?: number): GatewayTraceUsageSummary | undefined;
123
+ /**
124
+ * Apply resolved billing to trace attempts: final successful attempt gets aggregate billing;
125
+ * other successful attempts without router cost get per-attempt catalog pricing when enabled.
126
+ */
127
+ export declare function enrichTraceAttemptsWithBilling(attempts: GatewayTraceAttempt[], finalBilling: ResolvedActivityCost, options?: ResolveCostCompletionOptions): Promise<void>;
102
128
  /**
103
129
  * Stable routing facts for gateway response metadata (router metadata + merged config fallbacks).
104
130
  * Matches trace-mode resolution; intended for every successful invoke(), not only diagnostics.trace.
@@ -145,4 +171,10 @@ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
145
171
  * Non-serializable values become a small marker object instead of throwing.
146
172
  */
147
173
  export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
174
+ export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
175
+ export declare function buildOptimixerActualUsage(tokens: {
176
+ prompt: number;
177
+ completion: number;
178
+ total: number;
179
+ }, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
148
180
  export {};
@@ -7,9 +7,9 @@ import { validateChatRequest, validateAIRequest } from './gateway-validation.js'
7
7
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
8
  import { initializeGatewayComponents } from './gateway-config.js';
9
9
  import { buildMessages } from './message-builder.js';
10
- import { extractJsonFromFlexMd } from './flex-md-loader.js';
10
+ import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
11
11
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
12
- import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
12
+ import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
13
  import { getAiToolsClient } from './ai-tools-client.js';
14
14
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
15
15
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
@@ -45,6 +45,7 @@ export class AIGateway {
45
45
  config;
46
46
  logger;
47
47
  activityManager;
48
+ optimixerManager;
48
49
  messageBuilderConfig;
49
50
  defaultModelConfig = {};
50
51
  _autoRegisterDone = false;
@@ -56,6 +57,7 @@ export class AIGateway {
56
57
  this.logger = components.logger;
57
58
  this.router = components.router;
58
59
  this.activityManager = components.activityManager;
60
+ this.optimixerManager = components.optimixerManager;
59
61
  this.messageBuilderConfig = components.messageBuilderConfig;
60
62
  this.defaultModelConfig = components.defaultModelConfig ?? {};
61
63
  setGatewayRuntimeClients({
@@ -93,6 +95,7 @@ export class AIGateway {
93
95
  await autoRegisterProviders(this.router, this.logger);
94
96
  this._autoRegisterDone = true;
95
97
  }
98
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
96
99
  // Start activity tracking if available
97
100
  let activity = undefined;
98
101
  if (this.activityManager) {
@@ -166,6 +169,9 @@ export class AIGateway {
166
169
  });
167
170
  }
168
171
  }
172
+ if (optimixerPrediction) {
173
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
174
+ }
169
175
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
170
176
  tokens: enhancedResponse.metadata.tokens,
171
177
  costUsd: enhancedResponse.metadata.costUsd,
@@ -279,6 +285,7 @@ export class AIGateway {
279
285
  await autoRegisterProviders(this.router, this.logger);
280
286
  this._autoRegisterDone = true;
281
287
  }
288
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
282
289
  // Start activity tracking if available
283
290
  let activity = undefined;
284
291
  if (this.activityManager) {
@@ -567,6 +574,16 @@ export class AIGateway {
567
574
  const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
568
575
  const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
569
576
  const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
577
+ if (traceEnabled && traceAttempts) {
578
+ await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
579
+ mergedConfig,
580
+ calculator: aiTools?.calculator ?? null,
581
+ calculateCost: this.config.aiTools?.calculateCost
582
+ });
583
+ }
584
+ const traceUsageSummary = traceEnabled
585
+ ? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
586
+ : undefined;
570
587
  const enhancedResponse = {
571
588
  content: content,
572
589
  parsedContent: parsedContent,
@@ -597,6 +614,7 @@ export class AIGateway {
597
614
  retryCount: traceRetryCount,
598
615
  fallbackCount: traceFallbackCount,
599
616
  attempts: traceAttempts,
617
+ ...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
600
618
  ...(traceMergedRouterSnapshot !== undefined
601
619
  ? { mergedRouterConfig: traceMergedRouterSnapshot }
602
620
  : {})
@@ -643,6 +661,9 @@ export class AIGateway {
643
661
  });
644
662
  }
645
663
  }
664
+ if (optimixerPrediction) {
665
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
666
+ }
646
667
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
647
668
  tokens: enhancedResponse.metadata.tokens,
648
669
  costUsd: enhancedResponse.metadata.costUsd,
@@ -676,6 +697,52 @@ export class AIGateway {
676
697
  throw err;
677
698
  }
678
699
  }
700
+ async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
701
+ if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
702
+ return undefined;
703
+ }
704
+ const prediction = await this.optimixerManager.predictMaxTokens({
705
+ request,
706
+ mergedConfig,
707
+ messages
708
+ });
709
+ if (prediction) {
710
+ let maxTokens = prediction.recommendedMaxTokens;
711
+ const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
712
+ if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
713
+ try {
714
+ const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
715
+ if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
716
+ maxTokens = ceiling;
717
+ }
718
+ }
719
+ catch {
720
+ // Non-blocking: use uncapped prediction
721
+ }
722
+ }
723
+ mergedConfig.maxTokens = maxTokens;
724
+ request._mergedRouterConfig = mergedConfig;
725
+ this.logger.debug('Applied Optimixer recommended max_tokens', {
726
+ aiRequestId: request.aiRequestId,
727
+ recommendedMaxTokens: prediction.recommendedMaxTokens,
728
+ maxTokens,
729
+ confidence: prediction.confidence,
730
+ requestId: prediction.requestId
731
+ });
732
+ return prediction;
733
+ }
734
+ if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
735
+ try {
736
+ const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
737
+ mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
738
+ }
739
+ catch {
740
+ mergedConfig.maxTokens = 2000;
741
+ }
742
+ request._mergedRouterConfig = mergedConfig;
743
+ }
744
+ return undefined;
745
+ }
679
746
  /**
680
747
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
681
748
  */
@@ -15,6 +15,7 @@ export declare class AIGateway {
15
15
  private config;
16
16
  private logger;
17
17
  private activityManager?;
18
+ private optimixerManager?;
18
19
  private messageBuilderConfig?;
19
20
  private defaultModelConfig;
20
21
  private _autoRegisterDone;
@@ -28,6 +29,7 @@ export declare class AIGateway {
28
29
  * Invoke AI request (with structured output support)
29
30
  */
30
31
  invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
32
+ private applyAdaptiveMaxTokensIfEnabled;
31
33
  /**
32
34
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
33
35
  */
@@ -17,7 +17,7 @@ export * from '@x12i/ai-providers-router';
17
17
  export { AIGateway } from './gateway.js';
18
18
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
19
19
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
23
23
  export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
@@ -26,7 +26,9 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
26
26
  // (x-models was previously used for RPM/TPM tracking but is no longer integrated)
27
27
  // Re-export activity tracking primitives (Activix)
28
28
  export { Activix } from '@x12i/activix';
29
+ export { normalizeToActivixCostShape } from '@x12i/activix';
29
30
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
31
+ export { OptimixerManager } from './optimixer-manager.js';
30
32
  export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
31
33
  // Re-export logging (@x12i/logxer)
32
34
  export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
@@ -39,22 +41,5 @@ export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from '
39
41
  export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
40
42
  // Export object types library
41
43
  export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
42
- // Re-export outputs library integration functions
44
+ // Object-types library stubs (optional @x12i/outputs-library integration; see object-types-library-integration.ts)
43
45
  export { initializeObjectTypesLibrary, getObjectTypesLibrary, resetObjectTypesLibrary } from './object-types-library-integration.js';
44
- // Re-export outputs library types and utilities for convenience
45
- // Note: Since we use dynamic imports for the outputs library, these types may not be available
46
- // at compile time if the package isn't installed. Users can import directly from
47
- // @x12i/outputs-library if they need these types or utilities.
48
- //
49
- // Recommended: Import types and utilities directly from @x12i/outputs-library:
50
- // import type { ClassificationOutput } from '@x12i/outputs-library/types';
51
- // import { ResponseParser } from '@x12i/outputs-library/parsers';
52
- // import type { ObjectTypesLibrary, FlexMdSupport } from '@x12i/outputs-library';
53
- //
54
- // The gateway integrates with the outputs library internally via dynamic imports,
55
- // so these re-exports are optional and mainly for convenience.
56
- //
57
- // For outputs-library v3.3.1+ with flex-md support:
58
- // - ObjectTypesLibrary class with flex-md methods (getFlexMdTemplate, getFlexMdFormatSpec, etc.)
59
- // - FlexMdSupport type for object type definitions
60
- // - All flex-md methods are available on the library instance returned by getObjectTypesLibrary()
@@ -16,8 +16,8 @@ export * from '@x12i/ai-providers-router';
16
16
  export { AIGateway } from './gateway.js';
17
17
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
18
18
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
19
- export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, hasNonZeroTokenUsage } from './gateway-utils.js';
19
+ export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
23
23
  export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
@@ -29,8 +29,10 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
29
29
  export type { GatewayDualMemoryRoot } from './memory-path-resolution.js';
30
30
  export type { UsageTier } from './types.js';
31
31
  export { Activix } from '@x12i/activix';
32
- export type { ActivixRunContext, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
32
+ export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
33
+ export { normalizeToActivixCostShape } from '@x12i/activix';
33
34
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
35
+ export { OptimixerManager } from './optimixer-manager.js';
34
36
  export type { ActivityIdentity } from './types.js';
35
37
  export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
36
38
  export { createLogxer, DebugLogAbstract } from '@x12i/logxer';