@x12i/ai-gateway 9.4.0 → 9.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/activity-manager.d.ts +3 -0
- package/dist/activity-manager.js +47 -34
- package/dist/gateway-config.d.ts +3 -0
- package/dist/gateway-config.js +7 -0
- package/dist/gateway-utils.d.ts +12 -0
- package/dist/gateway-utils.js +56 -8
- package/dist/gateway.d.ts +2 -0
- package/dist/gateway.js +58 -2
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/optimixer-manager.d.ts +33 -0
- package/dist/optimixer-manager.js +128 -0
- package/dist/token-estimate.d.ts +12 -0
- package/dist/token-estimate.js +30 -0
- package/dist/types.d.ts +14 -0
- package/dist-cjs/activity-manager.cjs +47 -34
- package/dist-cjs/activity-manager.d.ts +3 -0
- package/dist-cjs/gateway-config.cjs +7 -0
- package/dist-cjs/gateway-config.d.ts +3 -0
- package/dist-cjs/gateway-utils.cjs +56 -8
- package/dist-cjs/gateway-utils.d.ts +12 -0
- package/dist-cjs/gateway.cjs +58 -2
- package/dist-cjs/gateway.d.ts +2 -0
- package/dist-cjs/index.cjs +1 -0
- package/dist-cjs/index.d.ts +1 -0
- package/dist-cjs/optimixer-manager.cjs +128 -0
- package/dist-cjs/optimixer-manager.d.ts +33 -0
- package/dist-cjs/token-estimate.cjs +30 -0
- package/dist-cjs/token-estimate.d.ts +12 -0
- package/dist-cjs/types.d.ts +14 -0
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -230,6 +230,7 @@ Live tests use `LIVE_TEST_PROVIDER` / `LIVE_TEST_MODEL` (default `openrouter` +
|
|
|
230
230
|
| [LOGGER_INITIALIZATION.md](./docs/LOGGER_INITIALIZATION.md) | Logxer setup |
|
|
231
231
|
| [flex-md-compliance.md](./docs/flex-md-compliance.md) | Output format levels |
|
|
232
232
|
| [PROMPT_TEMPLATE_USAGE.md](./docs/PROMPT_TEMPLATE_USAGE.md) | Rendrix templates |
|
|
233
|
+
| [upstream-reports/README.md](./docs/upstream-reports/README.md) | Upstream issues (one file per package/gap) |
|
|
233
234
|
| [UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md](./docs/UPSTREAM_TEMPLATE_RENDERING_AND_PARSER_V4.md) | Parser v4 |
|
|
234
235
|
| [RUNTIME_OBJECTS_OBSERVABILITY.md](./docs/RUNTIME_OBJECTS_OBSERVABILITY.md) | Runtime object keys |
|
|
235
236
|
| [GRAPH_EXECUTION_SUPPORT.md](./docs/GRAPH_EXECUTION_SUPPORT.md) | Graph / node identity |
|
|
@@ -198,6 +198,9 @@ export declare class ActivityManager {
|
|
|
198
198
|
* @returns Activix instance or undefined if not enabled
|
|
199
199
|
*/
|
|
200
200
|
getTracker(): Activix | undefined;
|
|
201
|
+
/** Await Activix init (no-op when tracking is disabled). */
|
|
202
|
+
getReadyTracker(): Promise<Activix | undefined>;
|
|
203
|
+
private logActivixBackendReady;
|
|
201
204
|
/**
|
|
202
205
|
* Get status of activity tracker
|
|
203
206
|
*/
|
package/dist/activity-manager.js
CHANGED
|
@@ -376,41 +376,24 @@ export class ActivityManager {
|
|
|
376
376
|
? { enabled: true, overwriteOuterCost: false }
|
|
377
377
|
: { enabled: true, overwriteOuterCost: false, ...config.autoCost };
|
|
378
378
|
}
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
.
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
badRequestsCollection: badRequestsCollectionName,
|
|
396
|
-
skillExecutionsCollection: this.skillExecutionsCollectionName,
|
|
397
|
-
...(backend === 'local'
|
|
398
|
-
? {
|
|
399
|
-
note: 'Activix is using local playground storage, not MongoDB. The ai-actions collection will not appear in Mongo until URI is set (MONGO_URI or MONGO_LOGS_URI), Activix can ping the database, and at least one activity is written.'
|
|
400
|
-
}
|
|
401
|
-
: {
|
|
402
|
-
note: 'MongoDB stores one document per activity; the ai-actions collection is created on first insert (empty collections may be hidden in some tools until then).'
|
|
403
|
-
})
|
|
404
|
-
});
|
|
405
|
-
})
|
|
406
|
-
.catch((error) => {
|
|
407
|
-
// Init threw — disable tracker so requests are not blocked.
|
|
408
|
-
this.logger.warn('Activity tracking enabled but Activix init failed. Activity records will not be persisted.', {
|
|
409
|
-
error: error instanceof Error ? error.message : String(error),
|
|
410
|
-
hint: 'Set MONGO_URI or MONGO_LOGS_URI and a database name (MONGO_LOGS_DB, MONGO_DB, MONGO_AI_LOGS_DB, or ACTIVIX_DB_NAME). See README: Activity tracking / persistence troubleshooting.'
|
|
379
|
+
if (config.customTracker) {
|
|
380
|
+
this.activix = config.customTracker;
|
|
381
|
+
this.initPromise = Promise.resolve().then(() => this.logActivixBackendReady(collectionName, badRequestsCollectionName));
|
|
382
|
+
}
|
|
383
|
+
else {
|
|
384
|
+
this.initPromise = Activix.create(activixOptions)
|
|
385
|
+
.then((ax) => {
|
|
386
|
+
this.activix = ax;
|
|
387
|
+
this.logActivixBackendReady(collectionName, badRequestsCollectionName);
|
|
388
|
+
})
|
|
389
|
+
.catch((error) => {
|
|
390
|
+
this.logger.warn('Activity tracking enabled but Activix init failed. Activity records will not be persisted.', {
|
|
391
|
+
error: error instanceof Error ? error.message : String(error),
|
|
392
|
+
hint: 'Set MONGO_URI or MONGO_LOGS_URI and a database name (MONGO_LOGS_DB, MONGO_DB, MONGO_AI_LOGS_DB, or ACTIVIX_DB_NAME). See README: Activity tracking / persistence troubleshooting.'
|
|
393
|
+
});
|
|
394
|
+
this.activix = undefined;
|
|
411
395
|
});
|
|
412
|
-
|
|
413
|
-
});
|
|
396
|
+
}
|
|
414
397
|
this.logger.debug('Activity tracking enabled with Activix', {
|
|
415
398
|
collection: collectionName,
|
|
416
399
|
badRequestsCollection: badRequestsCollectionName,
|
|
@@ -1215,6 +1198,36 @@ export class ActivityManager {
|
|
|
1215
1198
|
getTracker() {
|
|
1216
1199
|
return this.activix;
|
|
1217
1200
|
}
|
|
1201
|
+
/** Await Activix init (no-op when tracking is disabled). */
|
|
1202
|
+
async getReadyTracker() {
|
|
1203
|
+
if (this.initPromise) {
|
|
1204
|
+
await this.initPromise;
|
|
1205
|
+
}
|
|
1206
|
+
return this.activix;
|
|
1207
|
+
}
|
|
1208
|
+
logActivixBackendReady(collectionName, badRequestsCollectionName) {
|
|
1209
|
+
const ax = this.activix;
|
|
1210
|
+
if (!ax)
|
|
1211
|
+
return;
|
|
1212
|
+
const backend = ax.storageBackend;
|
|
1213
|
+
const mongoDb = backend === 'database' ? resolveActivixLogsDatabaseName() : undefined;
|
|
1214
|
+
const mongoUriConfigured = Boolean(resolveActivixMongoUriFromEnv());
|
|
1215
|
+
this.logger.info('Activity tracking persistence backend ready', {
|
|
1216
|
+
storageBackend: backend,
|
|
1217
|
+
mongoDatabase: mongoDb,
|
|
1218
|
+
mongoUriConfigured,
|
|
1219
|
+
mainCollection: collectionName,
|
|
1220
|
+
badRequestsCollection: badRequestsCollectionName,
|
|
1221
|
+
skillExecutionsCollection: this.skillExecutionsCollectionName,
|
|
1222
|
+
...(backend === 'local'
|
|
1223
|
+
? {
|
|
1224
|
+
note: 'Activix is using local playground storage, not MongoDB. The ai-actions collection will not appear in Mongo until URI is set (MONGO_URI or MONGO_LOGS_URI), Activix can ping the database, and at least one activity is written.'
|
|
1225
|
+
}
|
|
1226
|
+
: {
|
|
1227
|
+
note: 'MongoDB stores one document per activity; the ai-actions collection is created on first insert (empty collections may be hidden in some tools until then).'
|
|
1228
|
+
})
|
|
1229
|
+
});
|
|
1230
|
+
}
|
|
1218
1231
|
/**
|
|
1219
1232
|
* Get status of activity tracker
|
|
1220
1233
|
*/
|
package/dist/gateway-config.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type { GatewayConfig } from './types.js';
|
|
|
6
6
|
import type { Logxer } from '@x12i/logxer';
|
|
7
7
|
import { LLMProviderRouter } from '@x12i/ai-providers-router';
|
|
8
8
|
import { ActivityManager } from './activity-manager.js';
|
|
9
|
+
import { OptimixerManager } from './optimixer-manager.js';
|
|
9
10
|
import { UsageTracker } from './usage-tracker.js';
|
|
10
11
|
import type { MessageBuilderConfig } from './message-builder.js';
|
|
11
12
|
import type { TemplateRenderOptions } from '@x12i/rendrix';
|
|
@@ -16,6 +17,7 @@ export interface GatewayConfigContext {
|
|
|
16
17
|
logger: Logxer;
|
|
17
18
|
router: LLMProviderRouter;
|
|
18
19
|
activityManager: ActivityManager;
|
|
20
|
+
optimixerManager: OptimixerManager;
|
|
19
21
|
usageTracker: UsageTracker;
|
|
20
22
|
messageBuilderConfig: MessageBuilderConfig;
|
|
21
23
|
}
|
|
@@ -45,6 +47,7 @@ export declare function initializeGatewayComponents(config: GatewayConfig): {
|
|
|
45
47
|
logger: Logxer;
|
|
46
48
|
router: LLMProviderRouter;
|
|
47
49
|
activityManager: ActivityManager;
|
|
50
|
+
optimixerManager: OptimixerManager;
|
|
48
51
|
usageTracker: UsageTracker;
|
|
49
52
|
messageBuilderConfig: MessageBuilderConfig;
|
|
50
53
|
defaultModelConfig: Record<string, unknown>;
|
package/dist/gateway-config.js
CHANGED
|
@@ -48,6 +48,7 @@ function getDefaultsDir() {
|
|
|
48
48
|
import { LLMProviderRouter } from '@x12i/ai-providers-router';
|
|
49
49
|
import { createGatewayLogger } from './logger-factory.js';
|
|
50
50
|
import { ActivityManager } from './activity-manager.js';
|
|
51
|
+
import { OptimixerManager } from './optimixer-manager.js';
|
|
51
52
|
import { UsageTracker } from './usage-tracker.js';
|
|
52
53
|
import { mergeTemplateRenderOptions } from './template-render-merge.js';
|
|
53
54
|
import { GatewayRateLimiter } from './gateway-rate-limiter.js';
|
|
@@ -278,6 +279,11 @@ export function initializeGatewayComponents(config) {
|
|
|
278
279
|
}
|
|
279
280
|
})
|
|
280
281
|
});
|
|
282
|
+
const optimixerManager = new OptimixerManager({
|
|
283
|
+
optimixer: config.optimixer,
|
|
284
|
+
logger,
|
|
285
|
+
getActivix: () => activityManager.getReadyTracker()
|
|
286
|
+
});
|
|
281
287
|
const templateRendering = mergeTemplateRenderOptions(defaultTemplateRendering, config.templateRendering);
|
|
282
288
|
const instructionsBlockOverrides = {
|
|
283
289
|
...(config.instructionsBlocks ?? {})
|
|
@@ -293,6 +299,7 @@ export function initializeGatewayComponents(config) {
|
|
|
293
299
|
logger,
|
|
294
300
|
router,
|
|
295
301
|
activityManager,
|
|
302
|
+
optimixerManager,
|
|
296
303
|
usageTracker,
|
|
297
304
|
messageBuilderConfig,
|
|
298
305
|
defaultModelConfig
|
package/dist/gateway-utils.d.ts
CHANGED
|
@@ -17,6 +17,12 @@ export type MergeConfigOptions = {
|
|
|
17
17
|
defaultModelConfig?: Record<string, unknown>;
|
|
18
18
|
catalog?: AiModelsCatalogClient | null;
|
|
19
19
|
};
|
|
20
|
+
/**
|
|
21
|
+
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
22
|
+
*/
|
|
23
|
+
export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
|
|
24
|
+
useInternalDefaults?: 'skill' | 'audit';
|
|
25
|
+
}, config: GatewayConfig): boolean;
|
|
20
26
|
/**
|
|
21
27
|
* Merges config with defaults
|
|
22
28
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -165,4 +171,10 @@ export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
|
|
|
165
171
|
* Non-serializable values become a small marker object instead of throwing.
|
|
166
172
|
*/
|
|
167
173
|
export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
|
|
174
|
+
export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
|
|
175
|
+
export declare function buildOptimixerActualUsage(tokens: {
|
|
176
|
+
prompt: number;
|
|
177
|
+
completion: number;
|
|
178
|
+
total: number;
|
|
179
|
+
}, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
|
|
168
180
|
export {};
|
package/dist/gateway-utils.js
CHANGED
|
@@ -55,6 +55,21 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
|
|
|
55
55
|
});
|
|
56
56
|
applyGatewayDefaultToMerged(merged, defaults, config);
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
60
|
+
*/
|
|
61
|
+
export function isMaxTokensExplicitlySet(request, config) {
|
|
62
|
+
const useInternalDefaults = request.useInternalDefaults;
|
|
63
|
+
const internalDefaults = useInternalDefaults === 'skill'
|
|
64
|
+
? config.internalSystemActions?.internalSkill
|
|
65
|
+
: useInternalDefaults === 'audit'
|
|
66
|
+
? config.internalSystemActions?.skillAudit
|
|
67
|
+
: undefined;
|
|
68
|
+
return (request.config?.maxTokens !== undefined ||
|
|
69
|
+
request.modelConfig?.maxTokens !== undefined ||
|
|
70
|
+
internalDefaults?.maxTokens !== undefined ||
|
|
71
|
+
config.maxTokens !== undefined);
|
|
72
|
+
}
|
|
58
73
|
/**
|
|
59
74
|
* Merges config with defaults
|
|
60
75
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -175,12 +190,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
175
190
|
if (!merged.model) {
|
|
176
191
|
await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
|
|
177
192
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
config.maxTokens !== undefined;
|
|
183
|
-
if (!maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
193
|
+
const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
|
|
194
|
+
const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
|
|
195
|
+
// Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
|
|
196
|
+
if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
|
|
184
197
|
// Try to get maxTokens from flex-md
|
|
185
198
|
try {
|
|
186
199
|
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
|
|
@@ -219,7 +232,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
219
232
|
});
|
|
220
233
|
}
|
|
221
234
|
}
|
|
222
|
-
else if (!merged.maxTokens) {
|
|
235
|
+
else if (!merged.maxTokens && !optimixerWillPredict) {
|
|
223
236
|
// If maxTokens wasn't set and wasn't auto-detected, use fallback
|
|
224
237
|
// This should rarely happen, but handle edge cases
|
|
225
238
|
merged.maxTokens = 2000;
|
|
@@ -228,7 +241,15 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
228
241
|
model: merged.model,
|
|
229
242
|
provider: merged.provider,
|
|
230
243
|
maxTokens: merged.maxTokens,
|
|
231
|
-
maxTokensExplicitlySet
|
|
244
|
+
maxTokensExplicitlySet,
|
|
245
|
+
optimixerWillPredict
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
else if (optimixerWillPredict) {
|
|
249
|
+
logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
|
|
250
|
+
jobId: request.identity.jobId,
|
|
251
|
+
model: merged.model,
|
|
252
|
+
provider: merged.provider
|
|
232
253
|
});
|
|
233
254
|
}
|
|
234
255
|
else {
|
|
@@ -814,3 +835,30 @@ export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIV
|
|
|
814
835
|
_preview: serialized.slice(0, maxChars)
|
|
815
836
|
};
|
|
816
837
|
}
|
|
838
|
+
export function resolveFinishReasonFromRouterResponse(response) {
|
|
839
|
+
if (response == null || typeof response !== 'object')
|
|
840
|
+
return undefined;
|
|
841
|
+
const r = response;
|
|
842
|
+
const meta = r.metadata != null && typeof r.metadata === 'object' ? r.metadata : undefined;
|
|
843
|
+
const candidates = [
|
|
844
|
+
r.finishReason,
|
|
845
|
+
r.finish_reason,
|
|
846
|
+
meta?.finishReason,
|
|
847
|
+
meta?.finish_reason
|
|
848
|
+
];
|
|
849
|
+
for (const c of candidates) {
|
|
850
|
+
if (typeof c === 'string' && c.trim())
|
|
851
|
+
return c.trim();
|
|
852
|
+
}
|
|
853
|
+
return undefined;
|
|
854
|
+
}
|
|
855
|
+
export function buildOptimixerActualUsage(tokens, response, latencyMs) {
|
|
856
|
+
const finishReason = resolveFinishReasonFromRouterResponse(response);
|
|
857
|
+
return {
|
|
858
|
+
promptTokens: tokens.prompt,
|
|
859
|
+
completionTokens: tokens.completion,
|
|
860
|
+
totalTokens: tokens.total,
|
|
861
|
+
...(finishReason ? { finishReason } : {}),
|
|
862
|
+
latencyMs
|
|
863
|
+
};
|
|
864
|
+
}
|
package/dist/gateway.d.ts
CHANGED
|
@@ -15,6 +15,7 @@ export declare class AIGateway {
|
|
|
15
15
|
private config;
|
|
16
16
|
private logger;
|
|
17
17
|
private activityManager?;
|
|
18
|
+
private optimixerManager?;
|
|
18
19
|
private messageBuilderConfig?;
|
|
19
20
|
private defaultModelConfig;
|
|
20
21
|
private _autoRegisterDone;
|
|
@@ -28,6 +29,7 @@ export declare class AIGateway {
|
|
|
28
29
|
* Invoke AI request (with structured output support)
|
|
29
30
|
*/
|
|
30
31
|
invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
|
|
32
|
+
private applyAdaptiveMaxTokensIfEnabled;
|
|
31
33
|
/**
|
|
32
34
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
33
35
|
*/
|
package/dist/gateway.js
CHANGED
|
@@ -7,9 +7,9 @@ import { validateChatRequest, validateAIRequest } from './gateway-validation.js'
|
|
|
7
7
|
import { ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
8
8
|
import { initializeGatewayComponents } from './gateway-config.js';
|
|
9
9
|
import { buildMessages } from './message-builder.js';
|
|
10
|
-
import { extractJsonFromFlexMd } from './flex-md-loader.js';
|
|
10
|
+
import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
|
|
11
11
|
import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
|
|
12
|
-
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
12
|
+
import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
13
13
|
import { getAiToolsClient } from './ai-tools-client.js';
|
|
14
14
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
15
15
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
@@ -45,6 +45,7 @@ export class AIGateway {
|
|
|
45
45
|
config;
|
|
46
46
|
logger;
|
|
47
47
|
activityManager;
|
|
48
|
+
optimixerManager;
|
|
48
49
|
messageBuilderConfig;
|
|
49
50
|
defaultModelConfig = {};
|
|
50
51
|
_autoRegisterDone = false;
|
|
@@ -56,6 +57,7 @@ export class AIGateway {
|
|
|
56
57
|
this.logger = components.logger;
|
|
57
58
|
this.router = components.router;
|
|
58
59
|
this.activityManager = components.activityManager;
|
|
60
|
+
this.optimixerManager = components.optimixerManager;
|
|
59
61
|
this.messageBuilderConfig = components.messageBuilderConfig;
|
|
60
62
|
this.defaultModelConfig = components.defaultModelConfig ?? {};
|
|
61
63
|
setGatewayRuntimeClients({
|
|
@@ -93,6 +95,7 @@ export class AIGateway {
|
|
|
93
95
|
await autoRegisterProviders(this.router, this.logger);
|
|
94
96
|
this._autoRegisterDone = true;
|
|
95
97
|
}
|
|
98
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
96
99
|
// Start activity tracking if available
|
|
97
100
|
let activity = undefined;
|
|
98
101
|
if (this.activityManager) {
|
|
@@ -166,6 +169,9 @@ export class AIGateway {
|
|
|
166
169
|
});
|
|
167
170
|
}
|
|
168
171
|
}
|
|
172
|
+
if (optimixerPrediction) {
|
|
173
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
|
|
174
|
+
}
|
|
169
175
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
170
176
|
tokens: enhancedResponse.metadata.tokens,
|
|
171
177
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -279,6 +285,7 @@ export class AIGateway {
|
|
|
279
285
|
await autoRegisterProviders(this.router, this.logger);
|
|
280
286
|
this._autoRegisterDone = true;
|
|
281
287
|
}
|
|
288
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
282
289
|
// Start activity tracking if available
|
|
283
290
|
let activity = undefined;
|
|
284
291
|
if (this.activityManager) {
|
|
@@ -654,6 +661,9 @@ export class AIGateway {
|
|
|
654
661
|
});
|
|
655
662
|
}
|
|
656
663
|
}
|
|
664
|
+
if (optimixerPrediction) {
|
|
665
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
|
|
666
|
+
}
|
|
657
667
|
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
658
668
|
tokens: enhancedResponse.metadata.tokens,
|
|
659
669
|
costUsd: enhancedResponse.metadata.costUsd,
|
|
@@ -687,6 +697,52 @@ export class AIGateway {
|
|
|
687
697
|
throw err;
|
|
688
698
|
}
|
|
689
699
|
}
|
|
700
|
+
async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
|
|
701
|
+
if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
|
|
702
|
+
return undefined;
|
|
703
|
+
}
|
|
704
|
+
const prediction = await this.optimixerManager.predictMaxTokens({
|
|
705
|
+
request,
|
|
706
|
+
mergedConfig,
|
|
707
|
+
messages
|
|
708
|
+
});
|
|
709
|
+
if (prediction) {
|
|
710
|
+
let maxTokens = prediction.recommendedMaxTokens;
|
|
711
|
+
const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
|
|
712
|
+
if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
|
|
713
|
+
try {
|
|
714
|
+
const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
715
|
+
if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
|
|
716
|
+
maxTokens = ceiling;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
catch {
|
|
720
|
+
// Non-blocking: use uncapped prediction
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
mergedConfig.maxTokens = maxTokens;
|
|
724
|
+
request._mergedRouterConfig = mergedConfig;
|
|
725
|
+
this.logger.debug('Applied Optimixer recommended max_tokens', {
|
|
726
|
+
aiRequestId: request.aiRequestId,
|
|
727
|
+
recommendedMaxTokens: prediction.recommendedMaxTokens,
|
|
728
|
+
maxTokens,
|
|
729
|
+
confidence: prediction.confidence,
|
|
730
|
+
requestId: prediction.requestId
|
|
731
|
+
});
|
|
732
|
+
return prediction;
|
|
733
|
+
}
|
|
734
|
+
if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
|
|
735
|
+
try {
|
|
736
|
+
const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
|
|
737
|
+
mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
|
|
738
|
+
}
|
|
739
|
+
catch {
|
|
740
|
+
mergedConfig.maxTokens = 2000;
|
|
741
|
+
}
|
|
742
|
+
request._mergedRouterConfig = mergedConfig;
|
|
743
|
+
}
|
|
744
|
+
return undefined;
|
|
745
|
+
}
|
|
690
746
|
/**
|
|
691
747
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
692
748
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -32,6 +32,7 @@ export { Activix } from '@x12i/activix';
|
|
|
32
32
|
export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
|
|
33
33
|
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
34
34
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
35
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
35
36
|
export type { ActivityIdentity } from './types.js';
|
|
36
37
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
37
38
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|
package/dist/index.js
CHANGED
|
@@ -28,6 +28,7 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
|
|
|
28
28
|
export { Activix } from '@x12i/activix';
|
|
29
29
|
export { normalizeToActivixCostShape } from '@x12i/activix';
|
|
30
30
|
export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
|
|
31
|
+
export { OptimixerManager } from './optimixer-manager.js';
|
|
31
32
|
export { activityIdentityToLogMeta, withActivityIdentity, gatewayLogDebug } from './gateway-log-meta.js';
|
|
32
33
|
// Re-export logging (@x12i/logxer)
|
|
33
34
|
export { createLogxer, DebugLogAbstract } from '@x12i/logxer';
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { AiMaxTokensActualUsage, AiMaxTokensPredictionResult } from '@x12i/optimixer';
|
|
2
|
+
import type { Activix } from '@x12i/activix';
|
|
3
|
+
import type { Logxer } from '@x12i/logxer';
|
|
4
|
+
import type { ChatRequest, GatewayConfig } from './types.js';
|
|
5
|
+
export type OptimixerGatewayConfig = NonNullable<GatewayConfig['optimixer']>;
|
|
6
|
+
export interface OptimixerManagerConfig {
|
|
7
|
+
optimixer?: OptimixerGatewayConfig;
|
|
8
|
+
logger: Logxer;
|
|
9
|
+
getActivix: () => Promise<Activix | undefined>;
|
|
10
|
+
}
|
|
11
|
+
export type OptimixerMaxTokensContext = {
|
|
12
|
+
request: ChatRequest;
|
|
13
|
+
mergedConfig: ChatRequest['config'];
|
|
14
|
+
messages: Array<{
|
|
15
|
+
role?: string;
|
|
16
|
+
content?: unknown;
|
|
17
|
+
}>;
|
|
18
|
+
};
|
|
19
|
+
export declare class OptimixerManager {
|
|
20
|
+
private readonly config;
|
|
21
|
+
private readonly logger;
|
|
22
|
+
private readonly getActivix;
|
|
23
|
+
private optimixer?;
|
|
24
|
+
private initPromise?;
|
|
25
|
+
private readonly activixCollection;
|
|
26
|
+
constructor(config: OptimixerManagerConfig);
|
|
27
|
+
isEnabled(): boolean;
|
|
28
|
+
private ensureReady;
|
|
29
|
+
private initialize;
|
|
30
|
+
predictMaxTokens(ctx: OptimixerMaxTokensContext): Promise<AiMaxTokensPredictionResult | undefined>;
|
|
31
|
+
completePrediction(requestId: string, actual: AiMaxTokensActualUsage): Promise<void>;
|
|
32
|
+
shutdown(): Promise<void>;
|
|
33
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { Optimixer } from '@x12i/optimixer';
|
|
2
|
+
import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
|
|
3
|
+
import { estimateMessagesTokenSizes } from './token-estimate.js';
|
|
4
|
+
function resolveActionTypeId(request) {
|
|
5
|
+
const identity = request.identity;
|
|
6
|
+
if (identity?.actionType && String(identity.actionType).trim()) {
|
|
7
|
+
return String(identity.actionType).trim();
|
|
8
|
+
}
|
|
9
|
+
if (request.taskTypeId && String(request.taskTypeId).trim()) {
|
|
10
|
+
return String(request.taskTypeId).trim();
|
|
11
|
+
}
|
|
12
|
+
return 'gateway.invoke';
|
|
13
|
+
}
|
|
14
|
+
function toActivixRunContext(identity) {
|
|
15
|
+
if (!identity)
|
|
16
|
+
return undefined;
|
|
17
|
+
return identity;
|
|
18
|
+
}
|
|
19
|
+
export class OptimixerManager {
|
|
20
|
+
config;
|
|
21
|
+
logger;
|
|
22
|
+
getActivix;
|
|
23
|
+
optimixer;
|
|
24
|
+
initPromise;
|
|
25
|
+
activixCollection;
|
|
26
|
+
constructor(config) {
|
|
27
|
+
this.config = config.optimixer;
|
|
28
|
+
this.logger = config.logger;
|
|
29
|
+
this.getActivix = config.getActivix;
|
|
30
|
+
this.activixCollection = resolveActivityTrackingConfig().collectionName;
|
|
31
|
+
}
|
|
32
|
+
isEnabled() {
|
|
33
|
+
return this.config?.enabled === true;
|
|
34
|
+
}
|
|
35
|
+
async ensureReady() {
|
|
36
|
+
if (!this.isEnabled())
|
|
37
|
+
return undefined;
|
|
38
|
+
if (this.optimixer)
|
|
39
|
+
return this.optimixer;
|
|
40
|
+
if (!this.initPromise) {
|
|
41
|
+
this.initPromise = this.initialize();
|
|
42
|
+
}
|
|
43
|
+
await this.initPromise;
|
|
44
|
+
return this.optimixer;
|
|
45
|
+
}
|
|
46
|
+
async initialize() {
|
|
47
|
+
const activix = await this.getActivix();
|
|
48
|
+
if (!activix) {
|
|
49
|
+
this.logger.warn('Optimixer enabled but Activix is unavailable; adaptive max_tokens disabled', {
|
|
50
|
+
activixCollection: this.activixCollection
|
|
51
|
+
});
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
try {
|
|
55
|
+
this.optimixer = await Optimixer.create({
|
|
56
|
+
activixClient: activix,
|
|
57
|
+
activixCollection: this.activixCollection,
|
|
58
|
+
pipelines: { aiMaxTokens: { enabled: true } },
|
|
59
|
+
...(typeof this.config?.warmupLimit === 'number' ? { warmupLimit: this.config.warmupLimit } : {})
|
|
60
|
+
});
|
|
61
|
+
this.logger.info('Optimixer initialized for adaptive max_tokens', {
|
|
62
|
+
activixCollection: this.activixCollection,
|
|
63
|
+
acceptableRisk: this.config?.acceptableRisk ?? 'medium'
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
this.logger.warn('Optimixer initialization failed; adaptive max_tokens disabled', {
|
|
68
|
+
error: error instanceof Error ? error.message : String(error)
|
|
69
|
+
});
|
|
70
|
+
this.optimixer = undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
async predictMaxTokens(ctx) {
|
|
74
|
+
const optimixer = await this.ensureReady();
|
|
75
|
+
if (!optimixer)
|
|
76
|
+
return undefined;
|
|
77
|
+
const { request, mergedConfig, messages } = ctx;
|
|
78
|
+
const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
|
|
79
|
+
const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
|
|
80
|
+
try {
|
|
81
|
+
return await optimixer.predictAiMaxTokens({
|
|
82
|
+
actionTypeId: resolveActionTypeId(request),
|
|
83
|
+
inputSize,
|
|
84
|
+
contextSize,
|
|
85
|
+
acceptableRisk,
|
|
86
|
+
runContext: toActivixRunContext(request.identity),
|
|
87
|
+
provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
|
|
88
|
+
model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
this.logger.warn('Optimixer predictAiMaxTokens failed; caller should use fallback max_tokens', {
|
|
93
|
+
error: error instanceof Error ? error.message : String(error),
|
|
94
|
+
aiRequestId: request.aiRequestId
|
|
95
|
+
});
|
|
96
|
+
return undefined;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
async completePrediction(requestId, actual) {
|
|
100
|
+
const optimixer = await this.ensureReady();
|
|
101
|
+
if (!optimixer)
|
|
102
|
+
return;
|
|
103
|
+
try {
|
|
104
|
+
await optimixer.completeAiMaxTokensPrediction({ requestId, actual });
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
this.logger.warn('Optimixer completeAiMaxTokensPrediction failed (non-blocking)', {
|
|
108
|
+
requestId,
|
|
109
|
+
error: error instanceof Error ? error.message : String(error)
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
async shutdown() {
|
|
114
|
+
const optimixer = this.optimixer;
|
|
115
|
+
this.optimixer = undefined;
|
|
116
|
+
this.initPromise = undefined;
|
|
117
|
+
if (optimixer) {
|
|
118
|
+
try {
|
|
119
|
+
await optimixer.close();
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
this.logger.warn('OptimixerManager shutdown: close failed (non-blocking)', {
|
|
123
|
+
error: error instanceof Error ? error.message : String(error)
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight token-size estimates for Optimixer predict inputs.
|
|
3
|
+
* Uses a chars/4 heuristic (no tiktoken dependency).
|
|
4
|
+
*/
|
|
5
|
+
export declare function estimateTextTokens(text: string): number;
|
|
6
|
+
export declare function estimateMessagesTokenSizes(messages: Array<{
|
|
7
|
+
role?: string;
|
|
8
|
+
content?: unknown;
|
|
9
|
+
}>): {
|
|
10
|
+
inputSize: number;
|
|
11
|
+
contextSize: number;
|
|
12
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight token-size estimates for Optimixer predict inputs.
|
|
3
|
+
* Uses a chars/4 heuristic (no tiktoken dependency).
|
|
4
|
+
*/
|
|
5
|
+
export function estimateTextTokens(text) {
|
|
6
|
+
const trimmed = text.trim();
|
|
7
|
+
if (!trimmed)
|
|
8
|
+
return 0;
|
|
9
|
+
return Math.max(1, Math.ceil(trimmed.length / 4));
|
|
10
|
+
}
|
|
11
|
+
export function estimateMessagesTokenSizes(messages) {
|
|
12
|
+
let inputSize = 0;
|
|
13
|
+
let contextSize = 0;
|
|
14
|
+
for (const message of messages) {
|
|
15
|
+
const role = typeof message.role === 'string' ? message.role.toLowerCase() : '';
|
|
16
|
+
const content = typeof message.content === 'string'
|
|
17
|
+
? message.content
|
|
18
|
+
: message.content != null
|
|
19
|
+
? JSON.stringify(message.content)
|
|
20
|
+
: '';
|
|
21
|
+
const tokens = estimateTextTokens(content);
|
|
22
|
+
if (role === 'system') {
|
|
23
|
+
contextSize += tokens;
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
inputSize += tokens;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return { inputSize, contextSize };
|
|
30
|
+
}
|