@x12i/ai-gateway 9.3.5 → 9.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ import { Optimixer } from '@x12i/optimixer';
2
+ import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
3
+ import { estimateMessagesTokenSizes } from './token-estimate.js';
4
+ function resolveActionTypeId(request) {
5
+ const identity = request.identity;
6
+ if (identity?.actionType && String(identity.actionType).trim()) {
7
+ return String(identity.actionType).trim();
8
+ }
9
+ if (request.taskTypeId && String(request.taskTypeId).trim()) {
10
+ return String(request.taskTypeId).trim();
11
+ }
12
+ return 'gateway.invoke';
13
+ }
14
+ function toActivixRunContext(identity) {
15
+ if (!identity)
16
+ return undefined;
17
+ return identity;
18
+ }
19
+ export class OptimixerManager {
20
+ config;
21
+ logger;
22
+ getActivix;
23
+ optimixer;
24
+ initPromise;
25
+ activixCollection;
26
+ constructor(config) {
27
+ this.config = config.optimixer;
28
+ this.logger = config.logger;
29
+ this.getActivix = config.getActivix;
30
+ this.activixCollection = resolveActivityTrackingConfig().collectionName;
31
+ }
32
+ isEnabled() {
33
+ return this.config?.enabled === true;
34
+ }
35
+ async ensureReady() {
36
+ if (!this.isEnabled())
37
+ return undefined;
38
+ if (this.optimixer)
39
+ return this.optimixer;
40
+ if (!this.initPromise) {
41
+ this.initPromise = this.initialize();
42
+ }
43
+ await this.initPromise;
44
+ return this.optimixer;
45
+ }
46
+ async initialize() {
47
+ const activix = await this.getActivix();
48
+ if (!activix) {
49
+ this.logger.warn('Optimixer enabled but Activix is unavailable; adaptive max_tokens disabled', {
50
+ activixCollection: this.activixCollection
51
+ });
52
+ return;
53
+ }
54
+ try {
55
+ this.optimixer = await Optimixer.create({
56
+ activixClient: activix,
57
+ activixCollection: this.activixCollection,
58
+ pipelines: { aiMaxTokens: { enabled: true } },
59
+ ...(typeof this.config?.warmupLimit === 'number' ? { warmupLimit: this.config.warmupLimit } : {})
60
+ });
61
+ this.logger.info('Optimixer initialized for adaptive max_tokens', {
62
+ activixCollection: this.activixCollection,
63
+ acceptableRisk: this.config?.acceptableRisk ?? 'medium'
64
+ });
65
+ }
66
+ catch (error) {
67
+ this.logger.warn('Optimixer initialization failed; adaptive max_tokens disabled', {
68
+ error: error instanceof Error ? error.message : String(error)
69
+ });
70
+ this.optimixer = undefined;
71
+ }
72
+ }
73
+ async predictMaxTokens(ctx) {
74
+ const optimixer = await this.ensureReady();
75
+ if (!optimixer)
76
+ return undefined;
77
+ const { request, mergedConfig, messages } = ctx;
78
+ const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
79
+ const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
80
+ try {
81
+ return await optimixer.predictAiMaxTokens({
82
+ actionTypeId: resolveActionTypeId(request),
83
+ inputSize,
84
+ contextSize,
85
+ acceptableRisk,
86
+ runContext: toActivixRunContext(request.identity),
87
+ provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
88
+ model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
89
+ });
90
+ }
91
+ catch (error) {
92
+ this.logger.warn('Optimixer predictAiMaxTokens failed; caller should use fallback max_tokens', {
93
+ error: error instanceof Error ? error.message : String(error),
94
+ aiRequestId: request.aiRequestId
95
+ });
96
+ return undefined;
97
+ }
98
+ }
99
+ async completePrediction(requestId, actual) {
100
+ const optimixer = await this.ensureReady();
101
+ if (!optimixer)
102
+ return;
103
+ try {
104
+ await optimixer.completeAiMaxTokensPrediction({ requestId, actual });
105
+ }
106
+ catch (error) {
107
+ this.logger.warn('Optimixer completeAiMaxTokensPrediction failed (non-blocking)', {
108
+ requestId,
109
+ error: error instanceof Error ? error.message : String(error)
110
+ });
111
+ }
112
+ }
113
+ async shutdown() {
114
+ const optimixer = this.optimixer;
115
+ this.optimixer = undefined;
116
+ this.initPromise = undefined;
117
+ if (optimixer) {
118
+ try {
119
+ await optimixer.close();
120
+ }
121
+ catch (error) {
122
+ this.logger.warn('OptimixerManager shutdown: close failed (non-blocking)', {
123
+ error: error instanceof Error ? error.message : String(error)
124
+ });
125
+ }
126
+ }
127
+ }
128
+ }
@@ -0,0 +1,33 @@
1
+ import type { AiMaxTokensActualUsage, AiMaxTokensPredictionResult } from '@x12i/optimixer';
2
+ import type { Activix } from '@x12i/activix';
3
+ import type { Logxer } from '@x12i/logxer';
4
+ import type { ChatRequest, GatewayConfig } from './types.js';
5
+ export type OptimixerGatewayConfig = NonNullable<GatewayConfig['optimixer']>;
6
+ export interface OptimixerManagerConfig {
7
+ optimixer?: OptimixerGatewayConfig;
8
+ logger: Logxer;
9
+ getActivix: () => Promise<Activix | undefined>;
10
+ }
11
+ export type OptimixerMaxTokensContext = {
12
+ request: ChatRequest;
13
+ mergedConfig: ChatRequest['config'];
14
+ messages: Array<{
15
+ role?: string;
16
+ content?: unknown;
17
+ }>;
18
+ };
19
+ export declare class OptimixerManager {
20
+ private readonly config;
21
+ private readonly logger;
22
+ private readonly getActivix;
23
+ private optimixer?;
24
+ private initPromise?;
25
+ private readonly activixCollection;
26
+ constructor(config: OptimixerManagerConfig);
27
+ isEnabled(): boolean;
28
+ private ensureReady;
29
+ private initialize;
30
+ predictMaxTokens(ctx: OptimixerMaxTokensContext): Promise<AiMaxTokensPredictionResult | undefined>;
31
+ completePrediction(requestId: string, actual: AiMaxTokensActualUsage): Promise<void>;
32
+ shutdown(): Promise<void>;
33
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Lightweight token-size estimates for Optimixer predict inputs.
3
+ * Uses a chars/4 heuristic (no tiktoken dependency).
4
+ */
5
+ export function estimateTextTokens(text) {
6
+ const trimmed = text.trim();
7
+ if (!trimmed)
8
+ return 0;
9
+ return Math.max(1, Math.ceil(trimmed.length / 4));
10
+ }
11
+ export function estimateMessagesTokenSizes(messages) {
12
+ let inputSize = 0;
13
+ let contextSize = 0;
14
+ for (const message of messages) {
15
+ const role = typeof message.role === 'string' ? message.role.toLowerCase() : '';
16
+ const content = typeof message.content === 'string'
17
+ ? message.content
18
+ : message.content != null
19
+ ? JSON.stringify(message.content)
20
+ : '';
21
+ const tokens = estimateTextTokens(content);
22
+ if (role === 'system') {
23
+ contextSize += tokens;
24
+ }
25
+ else {
26
+ inputSize += tokens;
27
+ }
28
+ }
29
+ return { inputSize, contextSize };
30
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Lightweight token-size estimates for Optimixer predict inputs.
3
+ * Uses a chars/4 heuristic (no tiktoken dependency).
4
+ */
5
+ export declare function estimateTextTokens(text: string): number;
6
+ export declare function estimateMessagesTokenSizes(messages: Array<{
7
+ role?: string;
8
+ content?: unknown;
9
+ }>): {
10
+ inputSize: number;
11
+ contextSize: number;
12
+ };
@@ -73,6 +73,17 @@ export type GatewayTraceAttempt = {
73
73
  };
74
74
  modelUsed?: string;
75
75
  costUsd?: number;
76
+ /** Billing state for this attempt (trace mode; mirrors top-level {@link EnhancedLLMResponse.metadata.costStatus}). */
77
+ costStatus?: 'priced' | 'unpriced';
78
+ costBreakdown?: {
79
+ promptCostUsd: number;
80
+ completionCostUsd: number;
81
+ cachingCostUsd?: number;
82
+ reasoningCostUsd?: number;
83
+ audioCostUsd?: number;
84
+ imageCostUsd?: number;
85
+ requestFlatCostUsd?: number;
86
+ };
76
87
  ok: boolean;
77
88
  error?: {
78
89
  name: string;
@@ -88,6 +99,22 @@ export type GatewayTraceAttempt = {
88
99
  * Allowlisted merged router/generation config returned in {@link EnhancedLLMResponse.metadata}
89
100
  * when `diagnostics.mode === 'trace'`. Omits arbitrary extras and secrets.
90
101
  */
102
+ /**
103
+ * Consolidated usage + billing summary on {@link EnhancedLLMResponse.metadata} when
104
+ * `diagnostics.mode === 'trace'` (single object for orchestrators / Run Analysis).
105
+ */
106
+ export type GatewayTraceUsageSummary = {
107
+ tokens: {
108
+ prompt: number;
109
+ completion: number;
110
+ total: number;
111
+ };
112
+ maxTokensRequested?: number;
113
+ costUsd?: number;
114
+ cost?: number;
115
+ costStatus?: 'priced' | 'unpriced';
116
+ costBreakdown?: GatewayTraceAttempt['costBreakdown'];
117
+ };
91
118
  export type GatewayTraceMergedConfig = Partial<Pick<ModelConfig, 'model' | 'modelId' | 'provider' | 'temperature' | 'maxTokens' | 'topP' | 'frequencyPenalty' | 'presencePenalty' | 'stop'>>;
92
119
  /**
93
120
  * Normalized observability payload attached to thrown errors from {@link AIGateway.invoke}
@@ -348,19 +375,35 @@ export interface GatewayConfig extends Omit<RouterConfig, 'defaultEngine' | 'log
348
375
  mode?: 'dev' | 'debug' | 'prod';
349
376
  /**
350
377
  * @x12i/ai-tools integration: catalog model resolution (request) and cost calculation (response).
378
+ * Pricing catalogs load from open-assets JSON (remote with bundled fallback).
351
379
  */
352
380
  aiTools?: {
353
381
  /** @default true */
354
382
  enabled?: boolean;
355
- /** Inject Catalox; otherwise `createCataloxFromEnv()` from `@x12i/catalox/firebase`. */
356
- catalox?: import('@x12i/catalox').Catalox;
383
+ /** In-memory catalog cache TTL (ms). Default in ai-tools is 24h. */
357
384
  cacheTtlMs?: number;
385
+ /** Use bundled catalog JSON only (offline / tests). */
386
+ bundledOnly?: boolean;
358
387
  /** @default true */
359
388
  resolveModels?: boolean;
360
389
  /** @default true */
361
390
  calculateCost?: boolean;
362
391
  costIncludeBreakdown?: boolean;
363
392
  };
393
+ /**
394
+ * Adaptive `max_tokens` via @x12i/optimixer (embedded Activix mode).
395
+ * When enabled, the gateway predicts completion budget before each LLM call unless
396
+ * the caller explicitly sets `maxTokens` on the request / modelConfig / gateway config.
397
+ */
398
+ optimixer?: {
399
+ /** @default false */
400
+ enabled?: boolean;
401
+ acceptableRisk?: 'very-low' | 'low' | 'medium' | 'high' | number;
402
+ /** Cap predicted max tokens with flex-md model limit when available. @default true */
403
+ useFlexMdCeiling?: boolean;
404
+ /** Passed to Optimixer warmup on create. */
405
+ warmupLimit?: number;
406
+ };
364
407
  /**
365
408
  * InstructionsBlocks overrides
366
409
  * Key: block name, Value: block content
@@ -1009,6 +1052,11 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
1009
1052
  * Ordered, authoritative attempts across retries and fallbacks (trace mode).
1010
1053
  */
1011
1054
  attempts?: GatewayTraceAttempt[];
1055
+ /**
1056
+ * Final usage + billing for the invocation (trace mode). Mirrors successful-attempt
1057
+ * tokens and {@link costUsd} / {@link costStatus} after router passthrough + catalog pricing.
1058
+ */
1059
+ usage?: GatewayTraceUsageSummary;
1012
1060
  /**
1013
1061
  * Merged gateway/router generation config actually used for the invocation (after
1014
1062
  * {@link mergeConfig}: modelConfig / request.config / defaults / flex-md maxTokens).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@x12i/ai-gateway",
3
- "version": "9.3.5",
3
+ "version": "9.5.2",
4
4
  "description": "AI Gateway - Unified interface for LLM provider routing and management",
5
5
  "type": "module",
6
6
  "exports": {
@@ -19,39 +19,16 @@
19
19
  "build:cjs": "tsc -p tsconfig.cjs.json",
20
20
  "rename:cjs": "node scripts/rename-cjs.cjs",
21
21
  "verify:cjs": "node scripts/verify-cjs-deps.cjs",
22
- "copy:defaults": "node -e \"const fs=require('fs');const path=require('path');const src=path.join('src','defaults');const dist=path.join('dist','defaults');const distCjs=path.join('dist-cjs','defaults');const testCompiled=path.join('.tests-compiled','src','defaults');[dist,distCjs,testCompiled].forEach(dir=>{if(!fs.existsSync(dir))fs.mkdirSync(dir,{recursive:true});fs.readdirSync(src).filter(f=>f.endsWith('.json')).forEach(f=>fs.copyFileSync(path.join(src,f),path.join(dir,f)));});fs.copyFileSync('config.defaults.json',path.join('dist','config.defaults.json'));fs.copyFileSync('config.defaults.json',path.join('dist-cjs','config.defaults.json'));\n\"",
22
+ "copy:defaults": "node scripts/copy-defaults.cjs",
23
23
  "prepublishOnly": "rimraf dist dist-cjs && npm run build",
24
24
  "prepack": "rimraf dist dist-cjs && npm run build",
25
25
  "test": "node .tests/run-all.js",
26
- "test:basic": "node .tests/basic-usage.test.js",
27
- "test:prompt": "node .tests/prompt-input.test.js",
28
- "test:memory": "node .tests/working-memory.test.js",
29
- "test:types": "node .tests/generic-types.test.js",
30
- "test:compile": "tsc -p tsconfig.test.json",
31
- "test:object-types": "npm run test:compile && node .tests-compiled/.tests/object-types.test.js",
32
- "test:real": "npm run test:compile && node .tests-compiled/.tests/test.js",
33
- "test:real:comprehensive": "npm run test:real && npm test",
34
- "test:custom-instructions": "npm run test:compile && node .tests-compiled/.tests/test-custom-instructions-only.js",
35
- "test:diagnostic": "tsc -p tsconfig.test.json && node .tests-compiled/.tests/diagnostic-test.js",
36
- "test:config": "node .tests/config-overrides.test.js",
37
- "test:multi": "node .tests/multi-provider.test.js",
38
- "test:activities": "node .tests/ai-activities.test.js",
39
- "test:activities:standalone": "node .tests/test-activities-standalone.js",
40
- "test:focused": "npm run test:compile && node .tests-compiled/.tests/focused-scenarios.test.js",
41
- "test:compile:db": "npx tsc .tests/database-verification.test.ts --outDir .tests-compiled --module commonjs --target ES2020 --esModuleInterop --skipLibCheck --resolveJsonModule",
42
- "test:db-verification": "npm run test:compile:db && cross-env ACTIVITY_TRACKING_ENABLE_LOGGING=true node .tests-compiled/database-verification.test.js",
43
- "test:template-parsing": "npm run test:compile && node .tests-compiled/template-parsing-full.test.js",
44
- "test:template-features": "npm run test:compile && node .tests-compiled/template-parsing-features.test.js",
45
- "test:schema-format": "npm run test:compile && node .tests-compiled/simple-schema-format.test.js",
46
- "test:object-enrichment": "npm run test:compile && node .tests-compiled/object-types-enrichment.test.js",
47
- "test:erc": "npm run test:compile && node .tests-compiled/.tests/erc-compliance.test.js",
26
+ "test:ai-tools": "tsx .tests/ai-tools-gateway.test.ts",
27
+ "test:ai-tools:live": "tsx .tests/ai-tools-live.test.ts",
48
28
  "test:flex-md-parsing": "tsx .tests/flex-md-parsing-scenarios.test.ts",
49
29
  "test:flex-md-esm-regression": "npm run build:esm && node .tests/flex-md-esm-regression.test.mjs",
50
- "test:openai": "npm run test:compile && node .tests-compiled/.tests/callOpenAI.js",
51
- "test:ai-tools": "node .tests/ai-tools-gateway.test.js",
52
- "test:ai-tools:live": "tsx .tests/ai-tools-live.test.ts",
53
- "test:live": "npm run test:ai-tools:live && npm run test:openai",
54
- "test:prepublish": "npm run build && npm test && npm run test:ai-tools && npm run test:live",
30
+ "test:live": "npm run test:ai-tools:live",
31
+ "test:prepublish": "npm run build && npm test",
55
32
  "deps:latest": "npx npm-check-updates -u && npm install"
56
33
  },
57
34
  "keywords": [
@@ -64,14 +41,12 @@
64
41
  "author": "x12i",
65
42
  "license": "mit",
66
43
  "dependencies": {
67
- "@aws-sdk/s3-request-presigner": "^3.953.0",
68
- "@x12i/activix": "^7.1.2",
44
+ "@x12i/activix": "^8.0.0",
69
45
  "@x12i/ai-providers-router": "^4.8.0",
70
- "@x12i/ai-tools": "^1.0.3",
71
- "@x12i/catalox": "^4.2.0",
72
- "@x12i/env": "^4.0.1",
46
+ "@x12i/ai-tools": "^2.0.0",
73
47
  "@x12i/flex-md": "^4.8.0",
74
48
  "@x12i/logxer": "^4.3.5",
49
+ "@x12i/optimixer": "^0.1.0",
75
50
  "@x12i/rendrix": "^4.3.0"
76
51
  },
77
52
  "devDependencies": {
@@ -87,8 +62,7 @@
87
62
  "dist",
88
63
  "dist-cjs",
89
64
  "config.defaults.json",
90
- "README.md",
91
- "CONTENT_RESOLVER_UPSTREAM_GUIDE.md"
65
+ "README.md"
92
66
  ],
93
67
  "repository": {
94
68
  "type": "git",