@x12i/ai-gateway 9.4.0 → 9.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -230,6 +230,7 @@ Live tests use `LIVE_TEST_PROVIDER` / `LIVE_TEST_MODEL` (default `openrouter` +
230
230
  | [LOGGER_INITIALIZATION.md](./docs/LOGGER_INITIALIZATION.md) | Logxer setup |
231
231
  | [flex-md-compliance.md](./docs/flex-md-compliance.md) | Output format levels |
232
232
  | [PROMPT_TEMPLATE_USAGE.md](./docs/PROMPT_TEMPLATE_USAGE.md) | Rendrix templates |
233
+ | [upstream-reports/README.md](./docs/upstream-reports/README.md) | Upstream issues (one file per package/gap) |
233
234
  | [UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md](./docs/UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md) | Parser v4 |
234
235
  | [RUNTIME_OBJECTS_OBSERVABILITY.md](./docs/RUNTIME_OBJECTS_OBSERVABILITY.md) | Runtime object keys |
235
236
  | [GRAPH_EXECUTION_SUPPORT.md](./docs/GRAPH_EXECUTION_SUPPORT.md) | Graph / node identity |
@@ -198,6 +198,9 @@ export declare class ActivityManager {
198
198
  * @returns Activix instance or undefined if not enabled
199
199
  */
200
200
  getTracker(): Activix | undefined;
201
+ /** Await Activix init (no-op when tracking is disabled). */
202
+ getReadyTracker(): Promise<Activix | undefined>;
203
+ private logActivixBackendReady;
201
204
  /**
202
205
  * Get status of activity tracker
203
206
  */
@@ -376,41 +376,24 @@ export class ActivityManager {
376
376
  ? { enabled: true, overwriteOuterCost: false }
377
377
  : { enabled: true, overwriteOuterCost: false, ...config.autoCost };
378
378
  }
379
- this.activix = config.customTracker ?? new Activix(activixOptions);
380
- this.initPromise = this.activix
381
- .init()
382
- .then(() => {
383
- const ax = this.activix;
384
- if (!ax) {
385
- return;
386
- }
387
- const backend = ax.storageBackend;
388
- const mongoDb = backend === 'database' ? resolveActivixLogsDatabaseName() : undefined;
389
- const mongoUriConfigured = Boolean(resolveActivixMongoUriFromEnv());
390
- this.logger.info('Activity tracking persistence backend ready', {
391
- storageBackend: backend,
392
- mongoDatabase: mongoDb,
393
- mongoUriConfigured,
394
- mainCollection: collectionName,
395
- badRequestsCollection: badRequestsCollectionName,
396
- skillExecutionsCollection: this.skillExecutionsCollectionName,
397
- ...(backend === 'local'
398
- ? {
399
- note: 'Activix is using local playground storage, not MongoDB. The ai-actions collection will not appear in Mongo until URI is set (MONGO_URI or MONGO_LOGS_URI), Activix can ping the database, and at least one activity is written.'
400
- }
401
- : {
402
- note: 'MongoDB stores one document per activity; the ai-actions collection is created on first insert (empty collections may be hidden in some tools until then).'
403
- })
404
- });
405
- })
406
- .catch((error) => {
407
- // Init threw — disable tracker so requests are not blocked.
408
- this.logger.warn('Activity tracking enabled but Activix init failed. Activity records will not be persisted.', {
409
- error: error instanceof Error ? error.message : String(error),
410
- hint: 'Set MONGO_URI or MONGO_LOGS_URI and a database name (MONGO_LOGS_DB, MONGO_DB, MONGO_AI_LOGS_DB, or ACTIVIX_DB_NAME). See README: Activity tracking / persistence troubleshooting.'
379
+ if (config.customTracker) {
380
+ this.activix = config.customTracker;
381
+ this.initPromise = Promise.resolve().then(() => this.logActivixBackendReady(collectionName, badRequestsCollectionName));
382
+ }
383
+ else {
384
+ this.initPromise = Activix.create(activixOptions)
385
+ .then((ax) => {
386
+ this.activix = ax;
387
+ this.logActivixBackendReady(collectionName, badRequestsCollectionName);
388
+ })
389
+ .catch((error) => {
390
+ this.logger.warn('Activity tracking enabled but Activix init failed. Activity records will not be persisted.', {
391
+ error: error instanceof Error ? error.message : String(error),
392
+ hint: 'Set MONGO_URI or MONGO_LOGS_URI and a database name (MONGO_LOGS_DB, MONGO_DB, MONGO_AI_LOGS_DB, or ACTIVIX_DB_NAME). See README: Activity tracking / persistence troubleshooting.'
393
+ });
394
+ this.activix = undefined;
411
395
  });
412
- this.activix = undefined;
413
- });
396
+ }
414
397
  this.logger.debug('Activity tracking enabled with Activix', {
415
398
  collection: collectionName,
416
399
  badRequestsCollection: badRequestsCollectionName,
@@ -1215,6 +1198,36 @@ export class ActivityManager {
1215
1198
  getTracker() {
1216
1199
  return this.activix;
1217
1200
  }
1201
+ /** Await Activix init (no-op when tracking is disabled). */
1202
+ async getReadyTracker() {
1203
+ if (this.initPromise) {
1204
+ await this.initPromise;
1205
+ }
1206
+ return this.activix;
1207
+ }
1208
+ logActivixBackendReady(collectionName, badRequestsCollectionName) {
1209
+ const ax = this.activix;
1210
+ if (!ax)
1211
+ return;
1212
+ const backend = ax.storageBackend;
1213
+ const mongoDb = backend === 'database' ? resolveActivixLogsDatabaseName() : undefined;
1214
+ const mongoUriConfigured = Boolean(resolveActivixMongoUriFromEnv());
1215
+ this.logger.info('Activity tracking persistence backend ready', {
1216
+ storageBackend: backend,
1217
+ mongoDatabase: mongoDb,
1218
+ mongoUriConfigured,
1219
+ mainCollection: collectionName,
1220
+ badRequestsCollection: badRequestsCollectionName,
1221
+ skillExecutionsCollection: this.skillExecutionsCollectionName,
1222
+ ...(backend === 'local'
1223
+ ? {
1224
+ note: 'Activix is using local playground storage, not MongoDB. The ai-actions collection will not appear in Mongo until URI is set (MONGO_URI or MONGO_LOGS_URI), Activix can ping the database, and at least one activity is written.'
1225
+ }
1226
+ : {
1227
+ note: 'MongoDB stores one document per activity; the ai-actions collection is created on first insert (empty collections may be hidden in some tools until then).'
1228
+ })
1229
+ });
1230
+ }
1218
1231
  /**
1219
1232
  * Get status of activity tracker
1220
1233
  */
@@ -6,6 +6,7 @@ import type { GatewayConfig } from './types.js';
6
6
  import type { Logxer } from '@x12i/logxer';
7
7
  import { LLMProviderRouter } from '@x12i/ai-providers-router';
8
8
  import { ActivityManager } from './activity-manager.js';
9
+ import { OptimixerManager } from './optimixer-manager.js';
9
10
  import { UsageTracker } from './usage-tracker.js';
10
11
  import type { MessageBuilderConfig } from './message-builder.js';
11
12
  import type { TemplateRenderOptions } from '@x12i/rendrix';
@@ -16,6 +17,7 @@ export interface GatewayConfigContext {
16
17
  logger: Logxer;
17
18
  router: LLMProviderRouter;
18
19
  activityManager: ActivityManager;
20
+ optimixerManager: OptimixerManager;
19
21
  usageTracker: UsageTracker;
20
22
  messageBuilderConfig: MessageBuilderConfig;
21
23
  }
@@ -45,6 +47,7 @@ export declare function initializeGatewayComponents(config: GatewayConfig): {
45
47
  logger: Logxer;
46
48
  router: LLMProviderRouter;
47
49
  activityManager: ActivityManager;
50
+ optimixerManager: OptimixerManager;
48
51
  usageTracker: UsageTracker;
49
52
  messageBuilderConfig: MessageBuilderConfig;
50
53
  defaultModelConfig: Record<string, unknown>;
@@ -48,6 +48,7 @@ function getDefaultsDir() {
48
48
  import { LLMProviderRouter } from '@x12i/ai-providers-router';
49
49
  import { createGatewayLogger } from './logger-factory.js';
50
50
  import { ActivityManager } from './activity-manager.js';
51
+ import { OptimixerManager } from './optimixer-manager.js';
51
52
  import { UsageTracker } from './usage-tracker.js';
52
53
  import { mergeTemplateRenderOptions } from './template-render-merge.js';
53
54
  import { GatewayRateLimiter } from './gateway-rate-limiter.js';
@@ -278,6 +279,11 @@ export function initializeGatewayComponents(config) {
278
279
  }
279
280
  })
280
281
  });
282
+ const optimixerManager = new OptimixerManager({
283
+ optimixer: config.optimixer,
284
+ logger,
285
+ getActivix: () => activityManager.getReadyTracker()
286
+ });
281
287
  const templateRendering = mergeTemplateRenderOptions(defaultTemplateRendering, config.templateRendering);
282
288
  const instructionsBlockOverrides = {
283
289
  ...(config.instructionsBlocks ?? {})
@@ -293,6 +299,7 @@ export function initializeGatewayComponents(config) {
293
299
  logger,
294
300
  router,
295
301
  activityManager,
302
+ optimixerManager,
296
303
  usageTracker,
297
304
  messageBuilderConfig,
298
305
  defaultModelConfig
@@ -17,6 +17,12 @@ export type MergeConfigOptions = {
17
17
  defaultModelConfig?: Record<string, unknown>;
18
18
  catalog?: AiModelsCatalogClient | null;
19
19
  };
20
+ /**
21
+ * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
22
+ */
23
+ export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
24
+ useInternalDefaults?: 'skill' | 'audit';
25
+ }, config: GatewayConfig): boolean;
20
26
  /**
21
27
  * Merges config with defaults
22
28
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -165,4 +171,10 @@ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
165
171
  * Non-serializable values become a small marker object instead of throwing.
166
172
  */
167
173
  export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
174
+ export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
175
+ export declare function buildOptimixerActualUsage(tokens: {
176
+ prompt: number;
177
+ completion: number;
178
+ total: number;
179
+ }, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
168
180
  export {};
@@ -55,6 +55,21 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
55
55
  });
56
56
  applyGatewayDefaultToMerged(merged, defaults, config);
57
57
  }
58
+ /**
59
+ * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
60
+ */
61
+ export function isMaxTokensExplicitlySet(request, config) {
62
+ const useInternalDefaults = request.useInternalDefaults;
63
+ const internalDefaults = useInternalDefaults === 'skill'
64
+ ? config.internalSystemActions?.internalSkill
65
+ : useInternalDefaults === 'audit'
66
+ ? config.internalSystemActions?.skillAudit
67
+ : undefined;
68
+ return (request.config?.maxTokens !== undefined ||
69
+ request.modelConfig?.maxTokens !== undefined ||
70
+ internalDefaults?.maxTokens !== undefined ||
71
+ config.maxTokens !== undefined);
72
+ }
58
73
  /**
59
74
  * Merges config with defaults
60
75
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -175,12 +190,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
175
190
  if (!merged.model) {
176
191
  await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
177
192
  }
178
- // Auto-get maxTokens from flex-md if not explicitly set in ANY config source
179
- // Check all possible sources: request.config, internalDefaults, gateway config
180
- const maxTokensExplicitlySet = request.config?.maxTokens !== undefined ||
181
- internalDefaults?.maxTokens !== undefined ||
182
- config.maxTokens !== undefined;
183
- if (!maxTokensExplicitlySet && merged.model && merged.provider) {
193
+ const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
194
+ const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
195
+ // Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
196
+ if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
184
197
  // Try to get maxTokens from flex-md
185
198
  try {
186
199
  const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
@@ -219,7 +232,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
219
232
  });
220
233
  }
221
234
  }
222
- else if (!merged.maxTokens) {
235
+ else if (!merged.maxTokens && !optimixerWillPredict) {
223
236
  // If maxTokens wasn't set and wasn't auto-detected, use fallback
224
237
  // This should rarely happen, but handle edge cases
225
238
  merged.maxTokens = 2000;
@@ -228,7 +241,15 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
228
241
  model: merged.model,
229
242
  provider: merged.provider,
230
243
  maxTokens: merged.maxTokens,
231
- maxTokensExplicitlySet
244
+ maxTokensExplicitlySet,
245
+ optimixerWillPredict
246
+ });
247
+ }
248
+ else if (optimixerWillPredict) {
249
+ logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
250
+ jobId: request.identity.jobId,
251
+ model: merged.model,
252
+ provider: merged.provider
232
253
  });
233
254
  }
234
255
  else {
@@ -814,3 +835,30 @@ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIV
814
835
  _preview: serialized.slice(0, maxChars)
815
836
  };
816
837
  }
838
+ export function resolveFinishReasonFromRouterResponse(response) {
839
+ if (response == null || typeof response !== 'object')
840
+ return undefined;
841
+ const r = response;
842
+ const meta = r.metadata != null && typeof r.metadata === 'object' ? r.metadata : undefined;
843
+ const candidates = [
844
+ r.finishReason,
845
+ r.finish_reason,
846
+ meta?.finishReason,
847
+ meta?.finish_reason
848
+ ];
849
+ for (const c of candidates) {
850
+ if (typeof c === 'string' && c.trim())
851
+ return c.trim();
852
+ }
853
+ return undefined;
854
+ }
855
+ export function buildOptimixerActualUsage(tokens, response, latencyMs) {
856
+ const finishReason = resolveFinishReasonFromRouterResponse(response);
857
+ return {
858
+ promptTokens: tokens.prompt,
859
+ completionTokens: tokens.completion,
860
+ totalTokens: tokens.total,
861
+ ...(finishReason ? { finishReason } : {}),
862
+ latencyMs
863
+ };
864
+ }
package/dist/gateway.d.ts CHANGED
@@ -15,6 +15,7 @@ export declare class AIGateway {
15
15
  private config;
16
16
  private logger;
17
17
  private activityManager?;
18
+ private optimixerManager?;
18
19
  private messageBuilderConfig?;
19
20
  private defaultModelConfig;
20
21
  private _autoRegisterDone;
@@ -28,6 +29,7 @@ export declare class AIGateway {
28
29
  * Invoke AI request (with structured output support)
29
30
  */
30
31
  invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
32
+ private applyAdaptiveMaxTokensIfEnabled;
31
33
  /**
32
34
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
33
35
  */
package/dist/gateway.js CHANGED
@@ -7,9 +7,9 @@ import { validateChatRequest, validateAIRequest } from './gateway-validation.js'
7
7
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
8
  import { initializeGatewayComponents } from './gateway-config.js';
9
9
  import { buildMessages } from './message-builder.js';
10
- import { extractJsonFromFlexMd } from './flex-md-loader.js';
10
+ import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
11
11
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
12
- import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
12
+ import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
13
  import { getAiToolsClient } from './ai-tools-client.js';
14
14
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
15
15
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
@@ -45,6 +45,7 @@ export class AIGateway {
45
45
  config;
46
46
  logger;
47
47
  activityManager;
48
+ optimixerManager;
48
49
  messageBuilderConfig;
49
50
  defaultModelConfig = {};
50
51
  _autoRegisterDone = false;
@@ -56,6 +57,7 @@ export class AIGateway {
56
57
  this.logger = components.logger;
57
58
  this.router = components.router;
58
59
  this.activityManager = components.activityManager;
60
+ this.optimixerManager = components.optimixerManager;
59
61
  this.messageBuilderConfig = components.messageBuilderConfig;
60
62
  this.defaultModelConfig = components.defaultModelConfig ?? {};
61
63
  setGatewayRuntimeClients({
@@ -93,6 +95,7 @@ export class AIGateway {
93
95
  await autoRegisterProviders(this.router, this.logger);
94
96
  this._autoRegisterDone = true;
95
97
  }
98
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
96
99
  // Start activity tracking if available
97
100
  let activity = undefined;
98
101
  if (this.activityManager) {
@@ -166,6 +169,9 @@ export class AIGateway {
166
169
  });
167
170
  }
168
171
  }
172
+ if (optimixerPrediction) {
173
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
174
+ }
169
175
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
170
176
  tokens: enhancedResponse.metadata.tokens,
171
177
  costUsd: enhancedResponse.metadata.costUsd,
@@ -279,6 +285,7 @@ export class AIGateway {
279
285
  await autoRegisterProviders(this.router, this.logger);
280
286
  this._autoRegisterDone = true;
281
287
  }
288
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
282
289
  // Start activity tracking if available
283
290
  let activity = undefined;
284
291
  if (this.activityManager) {
@@ -654,6 +661,9 @@ export class AIGateway {
654
661
  });
655
662
  }
656
663
  }
664
+ if (optimixerPrediction) {
665
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
666
+ }
657
667
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
658
668
  tokens: enhancedResponse.metadata.tokens,
659
669
  costUsd: enhancedResponse.metadata.costUsd,
@@ -687,6 +697,52 @@ export class AIGateway {
687
697
  throw err;
688
698
  }
689
699
  }
700
+ async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
701
+ if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
702
+ return undefined;
703
+ }
704
+ const prediction = await this.optimixerManager.predictMaxTokens({
705
+ request,
706
+ mergedConfig,
707
+ messages
708
+ });
709
+ if (prediction) {
710
+ let maxTokens = prediction.recommendedMaxTokens;
711
+ const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
712
+ if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
713
+ try {
714
+ const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
715
+ if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
716
+ maxTokens = ceiling;
717
+ }
718
+ }
719
+ catch {
720
+ // Non-blocking: use uncapped prediction
721
+ }
722
+ }
723
+ mergedConfig.maxTokens = maxTokens;
724
+ request._mergedRouterConfig = mergedConfig;
725
+ this.logger.debug('Applied Optimixer recommended max_tokens', {
726
+ aiRequestId: request.aiRequestId,
727
+ recommendedMaxTokens: prediction.recommendedMaxTokens,
728
+ maxTokens,
729
+ confidence: prediction.confidence,
730
+ requestId: prediction.requestId
731
+ });
732
+ return prediction;
733
+ }
734
+ if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
735
+ try {
736
+ const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
737
+ mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
738
+ }
739
+ catch {
740
+ mergedConfig.maxTokens = 2000;
741
+ }
742
+ request._mergedRouterConfig = mergedConfig;
743
+ }
744
+ return undefined;
745
+ }
690
746
  /**
691
747
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
692
748
  */
package/dist/index.d.ts CHANGED
@@ -32,6 +32,7 @@ export { Activix } from '@x12i/activix';
32
32
  export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
33
33
  export { normalizeToActivixCostShape } from '@x12i/activix';
34
34
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
35
+ export { OptimixerManager } from './optimixer-manager.js';
35
36
  export type { ActivityIdentity } from './types.js';
36
37
  export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
37
38
  export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
package/dist/index.js CHANGED
@@ -28,6 +28,7 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
28
28
  export { Activix } from '@x12i/activix';
29
29
  export { normalizeToActivixCostShape } from '@x12i/activix';
30
30
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
31
+ export { OptimixerManager } from './optimixer-manager.js';
31
32
  export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
32
33
  // Re-export logging (@x12i/logxer)
33
34
  export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
@@ -0,0 +1,33 @@
1
+ import type { AiMaxTokensActualUsage, AiMaxTokensPredictionResult } from '@x12i/optimixer';
2
+ import type { Activix } from '@x12i/activix';
3
+ import type { Logxer } from '@x12i/logxer';
4
+ import type { ChatRequest, GatewayConfig } from './types.js';
5
+ export type OptimixerGatewayConfig = NonNullable<GatewayConfig['optimixer']>;
6
+ export interface OptimixerManagerConfig {
7
+ optimixer?: OptimixerGatewayConfig;
8
+ logger: Logxer;
9
+ getActivix: () => Promise<Activix | undefined>;
10
+ }
11
+ export type OptimixerMaxTokensContext = {
12
+ request: ChatRequest;
13
+ mergedConfig: ChatRequest['config'];
14
+ messages: Array<{
15
+ role?: string;
16
+ content?: unknown;
17
+ }>;
18
+ };
19
+ export declare class OptimixerManager {
20
+ private readonly config;
21
+ private readonly logger;
22
+ private readonly getActivix;
23
+ private optimixer?;
24
+ private initPromise?;
25
+ private readonly activixCollection;
26
+ constructor(config: OptimixerManagerConfig);
27
+ isEnabled(): boolean;
28
+ private ensureReady;
29
+ private initialize;
30
+ predictMaxTokens(ctx: OptimixerMaxTokensContext): Promise<AiMaxTokensPredictionResult | undefined>;
31
+ completePrediction(requestId: string, actual: AiMaxTokensActualUsage): Promise<void>;
32
+ shutdown(): Promise<void>;
33
+ }
@@ -0,0 +1,128 @@
1
+ import { Optimixer } from '@x12i/optimixer';
2
+ import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
3
+ import { estimateMessagesTokenSizes } from './token-estimate.js';
4
+ function resolveActionTypeId(request) {
5
+ const identity = request.identity;
6
+ if (identity?.actionType && String(identity.actionType).trim()) {
7
+ return String(identity.actionType).trim();
8
+ }
9
+ if (request.taskTypeId && String(request.taskTypeId).trim()) {
10
+ return String(request.taskTypeId).trim();
11
+ }
12
+ return 'gateway.invoke';
13
+ }
14
+ function toActivixRunContext(identity) {
15
+ if (!identity)
16
+ return undefined;
17
+ return identity;
18
+ }
19
+ export class OptimixerManager {
20
+ config;
21
+ logger;
22
+ getActivix;
23
+ optimixer;
24
+ initPromise;
25
+ activixCollection;
26
+ constructor(config) {
27
+ this.config = config.optimixer;
28
+ this.logger = config.logger;
29
+ this.getActivix = config.getActivix;
30
+ this.activixCollection = resolveActivityTrackingConfig().collectionName;
31
+ }
32
+ isEnabled() {
33
+ return this.config?.enabled === true;
34
+ }
35
+ async ensureReady() {
36
+ if (!this.isEnabled())
37
+ return undefined;
38
+ if (this.optimixer)
39
+ return this.optimixer;
40
+ if (!this.initPromise) {
41
+ this.initPromise = this.initialize();
42
+ }
43
+ await this.initPromise;
44
+ return this.optimixer;
45
+ }
46
+ async initialize() {
47
+ const activix = await this.getActivix();
48
+ if (!activix) {
49
+ this.logger.warn('Optimixer enabled but Activix is unavailable; adaptive max_tokens disabled', {
50
+ activixCollection: this.activixCollection
51
+ });
52
+ return;
53
+ }
54
+ try {
55
+ this.optimixer = await Optimixer.create({
56
+ activixClient: activix,
57
+ activixCollection: this.activixCollection,
58
+ pipelines: { aiMaxTokens: { enabled: true } },
59
+ ...(typeof this.config?.warmupLimit === 'number' ? { warmupLimit: this.config.warmupLimit } : {})
60
+ });
61
+ this.logger.info('Optimixer initialized for adaptive max_tokens', {
62
+ activixCollection: this.activixCollection,
63
+ acceptableRisk: this.config?.acceptableRisk ?? 'medium'
64
+ });
65
+ }
66
+ catch (error) {
67
+ this.logger.warn('Optimixer initialization failed; adaptive max_tokens disabled', {
68
+ error: error instanceof Error ? error.message : String(error)
69
+ });
70
+ this.optimixer = undefined;
71
+ }
72
+ }
73
+ async predictMaxTokens(ctx) {
74
+ const optimixer = await this.ensureReady();
75
+ if (!optimixer)
76
+ return undefined;
77
+ const { request, mergedConfig, messages } = ctx;
78
+ const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
79
+ const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
80
+ try {
81
+ return await optimixer.predictAiMaxTokens({
82
+ actionTypeId: resolveActionTypeId(request),
83
+ inputSize,
84
+ contextSize,
85
+ acceptableRisk,
86
+ runContext: toActivixRunContext(request.identity),
87
+ provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
88
+ model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
89
+ });
90
+ }
91
+ catch (error) {
92
+ this.logger.warn('Optimixer predictAiMaxTokens failed; caller should use fallback max_tokens', {
93
+ error: error instanceof Error ? error.message : String(error),
94
+ aiRequestId: request.aiRequestId
95
+ });
96
+ return undefined;
97
+ }
98
+ }
99
+ async completePrediction(requestId, actual) {
100
+ const optimixer = await this.ensureReady();
101
+ if (!optimixer)
102
+ return;
103
+ try {
104
+ await optimixer.completeAiMaxTokensPrediction({ requestId, actual });
105
+ }
106
+ catch (error) {
107
+ this.logger.warn('Optimixer completeAiMaxTokensPrediction failed (non-blocking)', {
108
+ requestId,
109
+ error: error instanceof Error ? error.message : String(error)
110
+ });
111
+ }
112
+ }
113
+ async shutdown() {
114
+ const optimixer = this.optimixer;
115
+ this.optimixer = undefined;
116
+ this.initPromise = undefined;
117
+ if (optimixer) {
118
+ try {
119
+ await optimixer.close();
120
+ }
121
+ catch (error) {
122
+ this.logger.warn('OptimixerManager shutdown: close failed (non-blocking)', {
123
+ error: error instanceof Error ? error.message : String(error)
124
+ });
125
+ }
126
+ }
127
+ }
128
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Lightweight token-size estimates for Optimixer predict inputs.
3
+ * Uses a chars/4 heuristic (no tiktoken dependency).
4
+ */
5
+ export declare function estimateTextTokens(text: string): number;
6
+ export declare function estimateMessagesTokenSizes(messages: Array<{
7
+ role?: string;
8
+ content?: unknown;
9
+ }>): {
10
+ inputSize: number;
11
+ contextSize: number;
12
+ };
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Lightweight token-size estimates for Optimixer predict inputs.
3
+ * Uses a chars/4 heuristic (no tiktoken dependency).
4
+ */
5
+ export function estimateTextTokens(text) {
6
+ const trimmed = text.trim();
7
+ if (!trimmed)
8
+ return 0;
9
+ return Math.max(1, Math.ceil(trimmed.length / 4));
10
+ }
11
+ export function estimateMessagesTokenSizes(messages) {
12
+ let inputSize = 0;
13
+ let contextSize = 0;
14
+ for (const message of messages) {
15
+ const role = typeof message.role === 'string' ? message.role.toLowerCase() : '';
16
+ const content = typeof message.content === 'string'
17
+ ? message.content
18
+ : message.content != null
19
+ ? JSON.stringify(message.content)
20
+ : '';
21
+ const tokens = estimateTextTokens(content);
22
+ if (role === 'system') {
23
+ contextSize += tokens;
24
+ }
25
+ else {
26
+ inputSize += tokens;
27
+ }
28
+ }
29
+ return { inputSize, contextSize };
30
+ }