npm - @x12i/ai-gateway - Versions diffs - 9.3.5 → 9.5.2 - Mend

@x12i/ai-gateway 9.3.5 → 9.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +151 -4147
package/dist/activity-manager.d.ts +9 -1
package/dist/activity-manager.js +85 -81
package/dist/ai-tools-client.js +4 -12
package/dist/gateway-config.d.ts +3 -0
package/dist/gateway-config.js +19 -1
package/dist/gateway-utils.d.ts +34 -2
package/dist/gateway-utils.js +204 -35
package/dist/gateway.d.ts +2 -0
package/dist/gateway.js +69 -2
package/dist/index.d.ts +5 -3
package/dist/index.js +4 -19
package/dist/optimixer-manager.d.ts +33 -0
package/dist/optimixer-manager.js +128 -0
package/dist/token-estimate.d.ts +12 -0
package/dist/token-estimate.js +30 -0
package/dist/types.d.ts +50 -2
package/dist-cjs/activity-manager.cjs +85 -81
package/dist-cjs/activity-manager.d.ts +9 -1
package/dist-cjs/ai-tools-client.cjs +4 -12
package/dist-cjs/gateway-config.cjs +19 -1
package/dist-cjs/gateway-config.d.ts +3 -0
package/dist-cjs/gateway-utils.cjs +204 -35
package/dist-cjs/gateway-utils.d.ts +34 -2
package/dist-cjs/gateway.cjs +69 -2
package/dist-cjs/gateway.d.ts +2 -0
package/dist-cjs/index.cjs +4 -19
package/dist-cjs/index.d.ts +5 -3
package/dist-cjs/optimixer-manager.cjs +128 -0
package/dist-cjs/optimixer-manager.d.ts +33 -0
package/dist-cjs/token-estimate.cjs +30 -0
package/dist-cjs/token-estimate.d.ts +12 -0
package/dist-cjs/types.d.ts +50 -2
package/package.json +10 -36

package/dist-cjs/optimixer-manager.cjs ADDED Viewed

@@ -0,0 +1,128 @@
+import { Optimixer } from '@x12i/optimixer';
+import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
+import { estimateMessagesTokenSizes } from './token-estimate.js';
+function resolveActionTypeId(request) {
+    const identity = request.identity;
+    if (identity?.actionType && String(identity.actionType).trim()) {
+        return String(identity.actionType).trim();
+    }
+    if (request.taskTypeId && String(request.taskTypeId).trim()) {
+        return String(request.taskTypeId).trim();
+    }
+    return 'gateway.invoke';
+}
+function toActivixRunContext(identity) {
+    if (!identity)
+        return undefined;
+    return identity;
+}
+export class OptimixerManager {
+    config;
+    logger;
+    getActivix;
+    optimixer;
+    initPromise;
+    activixCollection;
+    constructor(config) {
+        this.config = config.optimixer;
+        this.logger = config.logger;
+        this.getActivix = config.getActivix;
+        this.activixCollection = resolveActivityTrackingConfig().collectionName;
+    }
+    isEnabled() {
+        return this.config?.enabled === true;
+    }
+    async ensureReady() {
+        if (!this.isEnabled())
+            return undefined;
+        if (this.optimixer)
+            return this.optimixer;
+        if (!this.initPromise) {
+            this.initPromise = this.initialize();
+        }
+        await this.initPromise;
+        return this.optimixer;
+    }
+    async initialize() {
+        const activix = await this.getActivix();
+        if (!activix) {
+            this.logger.warn('Optimixer enabled but Activix is unavailable; adaptive max_tokens disabled', {
+                activixCollection: this.activixCollection
+            });
+            return;
+        }
+        try {
+            this.optimixer = await Optimixer.create({
+                activixClient: activix,
+                activixCollection: this.activixCollection,
+                pipelines: { aiMaxTokens: { enabled: true } },
+                ...(typeof this.config?.warmupLimit === 'number' ? { warmupLimit: this.config.warmupLimit } : {})
+            });
+            this.logger.info('Optimixer initialized for adaptive max_tokens', {
+                activixCollection: this.activixCollection,
+                acceptableRisk: this.config?.acceptableRisk ?? 'medium'
+            });
+        }
+        catch (error) {
+            this.logger.warn('Optimixer initialization failed; adaptive max_tokens disabled', {
+                error: error instanceof Error ? error.message : String(error)
+            });
+            this.optimixer = undefined;
+        }
+    }
+    async predictMaxTokens(ctx) {
+        const optimixer = await this.ensureReady();
+        if (!optimixer)
+            return undefined;
+        const { request, mergedConfig, messages } = ctx;
+        const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
+        const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
+        try {
+            return await optimixer.predictAiMaxTokens({
+                actionTypeId: resolveActionTypeId(request),
+                inputSize,
+                contextSize,
+                acceptableRisk,
+                runContext: toActivixRunContext(request.identity),
+                provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
+                model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
+            });
+        }
+        catch (error) {
+            this.logger.warn('Optimixer predictAiMaxTokens failed; caller should use fallback max_tokens', {
+                error: error instanceof Error ? error.message : String(error),
+                aiRequestId: request.aiRequestId
+            });
+            return undefined;
+        }
+    }
+    async completePrediction(requestId, actual) {
+        const optimixer = await this.ensureReady();
+        if (!optimixer)
+            return;
+        try {
+            await optimixer.completeAiMaxTokensPrediction({ requestId, actual });
+        }
+        catch (error) {
+            this.logger.warn('Optimixer completeAiMaxTokensPrediction failed (non-blocking)', {
+                requestId,
+                error: error instanceof Error ? error.message : String(error)
+            });
+        }
+    }
+    async shutdown() {
+        const optimixer = this.optimixer;
+        this.optimixer = undefined;
+        this.initPromise = undefined;
+        if (optimixer) {
+            try {
+                await optimixer.close();
+            }
+            catch (error) {
+                this.logger.warn('OptimixerManager shutdown: close failed (non-blocking)', {
+                    error: error instanceof Error ? error.message : String(error)
+                });
+            }
+        }
+    }
+}

package/dist-cjs/optimixer-manager.d.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import type { AiMaxTokensActualUsage, AiMaxTokensPredictionResult } from '@x12i/optimixer';
+import type { Activix } from '@x12i/activix';
+import type { Logxer } from '@x12i/logxer';
+import type { ChatRequest, GatewayConfig } from './types.js';
+export type OptimixerGatewayConfig = NonNullable<GatewayConfig['optimixer']>;
+export interface OptimixerManagerConfig {
+    optimixer?: OptimixerGatewayConfig;
+    logger: Logxer;
+    getActivix: () => Promise<Activix | undefined>;
+}
+export type OptimixerMaxTokensContext = {
+    request: ChatRequest;
+    mergedConfig: ChatRequest['config'];
+    messages: Array<{
+        role?: string;
+        content?: unknown;
+    }>;
+};
+export declare class OptimixerManager {
+    private readonly config;
+    private readonly logger;
+    private readonly getActivix;
+    private optimixer?;
+    private initPromise?;
+    private readonly activixCollection;
+    constructor(config: OptimixerManagerConfig);
+    isEnabled(): boolean;
+    private ensureReady;
+    private initialize;
+    predictMaxTokens(ctx: OptimixerMaxTokensContext): Promise<AiMaxTokensPredictionResult | undefined>;
+    completePrediction(requestId: string, actual: AiMaxTokensActualUsage): Promise<void>;
+    shutdown(): Promise<void>;
+}

package/dist-cjs/token-estimate.cjs ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Lightweight token-size estimates for Optimixer predict inputs.
+ * Uses a chars/4 heuristic (no tiktoken dependency).
+ */
+export function estimateTextTokens(text) {
+    const trimmed = text.trim();
+    if (!trimmed)
+        return 0;
+    return Math.max(1, Math.ceil(trimmed.length / 4));
+}
+export function estimateMessagesTokenSizes(messages) {
+    let inputSize = 0;
+    let contextSize = 0;
+    for (const message of messages) {
+        const role = typeof message.role === 'string' ? message.role.toLowerCase() : '';
+        const content = typeof message.content === 'string'
+            ? message.content
+            : message.content != null
+                ? JSON.stringify(message.content)
+                : '';
+        const tokens = estimateTextTokens(content);
+        if (role === 'system') {
+            contextSize += tokens;
+        }
+        else {
+            inputSize += tokens;
+        }
+    }
+    return { inputSize, contextSize };
+}

package/dist-cjs/token-estimate.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * Lightweight token-size estimates for Optimixer predict inputs.
+ * Uses a chars/4 heuristic (no tiktoken dependency).
+ */
+export declare function estimateTextTokens(text: string): number;
+export declare function estimateMessagesTokenSizes(messages: Array<{
+    role?: string;
+    content?: unknown;
+}>): {
+    inputSize: number;
+    contextSize: number;
+};

package/dist-cjs/types.d.ts CHANGED Viewed

@@ -73,6 +73,17 @@ export type GatewayTraceAttempt = {
     };
     modelUsed?: string;
     costUsd?: number;
+    /** Billing state for this attempt (trace mode; mirrors top-level {@link EnhancedLLMResponse.metadata.costStatus}). */
+    costStatus?: 'priced' | 'unpriced';
+    costBreakdown?: {
+        promptCostUsd: number;
+        completionCostUsd: number;
+        cachingCostUsd?: number;
+        reasoningCostUsd?: number;
+        audioCostUsd?: number;
+        imageCostUsd?: number;
+        requestFlatCostUsd?: number;
+    };
     ok: boolean;
     error?: {
         name: string;
@@ -88,6 +99,22 @@ export type GatewayTraceAttempt = {
  * Allowlisted merged router/generation config returned in {@link EnhancedLLMResponse.metadata}
  * when `diagnostics.mode === 'trace'`. Omits arbitrary extras and secrets.
  */
+/**
+ * Consolidated usage + billing summary on {@link EnhancedLLMResponse.metadata} when
+ * `diagnostics.mode === 'trace'` (single object for orchestrators / Run Analysis).
+ */
+export type GatewayTraceUsageSummary = {
+    tokens: {
+        prompt: number;
+        completion: number;
+        total: number;
+    };
+    maxTokensRequested?: number;
+    costUsd?: number;
+    cost?: number;
+    costStatus?: 'priced' | 'unpriced';
+    costBreakdown?: GatewayTraceAttempt['costBreakdown'];
+};
 export type GatewayTraceMergedConfig = Partial<Pick<ModelConfig, 'model' | 'modelId' | 'provider' | 'temperature' | 'maxTokens' | 'topP' | 'frequencyPenalty' | 'presencePenalty' | 'stop'>>;
 /**
  * Normalized observability payload attached to thrown errors from {@link AIGateway.invoke}
@@ -348,19 +375,35 @@ export interface GatewayConfig extends Omit<RouterConfig, 'defaultEngine' | 'log
     mode?: 'dev' | 'debug' | 'prod';
     /**
      * @x12i/ai-tools integration: catalog model resolution (request) and cost calculation (response).
+     * Pricing catalogs load from open-assets JSON (remote with bundled fallback).
      */
     aiTools?: {
         /** @default true */
         enabled?: boolean;
-        /** Inject Catalox; otherwise `createCataloxFromEnv()` from `@x12i/catalox/firebase`. */
-        catalox?: import('@x12i/catalox').Catalox;
+        /** In-memory catalog cache TTL (ms). Default in ai-tools is 24h. */
         cacheTtlMs?: number;
+        /** Use bundled catalog JSON only (offline / tests). */
+        bundledOnly?: boolean;
         /** @default true */
         resolveModels?: boolean;
         /** @default true */
         calculateCost?: boolean;
         costIncludeBreakdown?: boolean;
     };
+    /**
+     * Adaptive `max_tokens` via @x12i/optimixer (embedded Activix mode).
+     * When enabled, the gateway predicts completion budget before each LLM call unless
+     * the caller explicitly sets `maxTokens` on the request / modelConfig / gateway config.
+     */
+    optimixer?: {
+        /** @default false */
+        enabled?: boolean;
+        acceptableRisk?: 'very-low' | 'low' | 'medium' | 'high' | number;
+        /** Cap predicted max tokens with flex-md model limit when available. @default true */
+        useFlexMdCeiling?: boolean;
+        /** Passed to Optimixer warmup on create. */
+        warmupLimit?: number;
+    };
     /**
      * InstructionsBlocks overrides
      * Key: block name, Value: block content
@@ -1009,6 +1052,11 @@ export interface EnhancedLLMResponse<TContent = unknown> extends Omit<AIResponse
          * Ordered, authoritative attempts across retries and fallbacks (trace mode).
          */
         attempts?: GatewayTraceAttempt[];
+        /**
+         * Final usage + billing for the invocation (trace mode). Mirrors successful-attempt
+         * tokens and {@link costUsd} / {@link costStatus} after router passthrough + catalog pricing.
+         */
+        usage?: GatewayTraceUsageSummary;
         /**
          * Merged gateway/router generation config actually used for the invocation (after
          * {@link mergeConfig}: modelConfig / request.config / defaults / flex-md maxTokens).

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@x12i/ai-gateway",
-  "version": "9.3.5",
+  "version": "9.5.2",
   "description": "AI Gateway - Unified interface for LLM provider routing and management",
   "type": "module",
   "exports": {
@@ -19,39 +19,16 @@
     "build:cjs": "tsc -p tsconfig.cjs.json",
     "rename:cjs": "node scripts/rename-cjs.cjs",
     "verify:cjs": "node scripts/verify-cjs-deps.cjs",
-    "copy:defaults": "node -e \"const fs=require('fs');const path=require('path');const src=path.join('src','defaults');const dist=path.join('dist','defaults');const distCjs=path.join('dist-cjs','defaults');const testCompiled=path.join('.tests-compiled','src','defaults');[dist,distCjs,testCompiled].forEach(dir=>{if(!fs.existsSync(dir))fs.mkdirSync(dir,{recursive:true});fs.readdirSync(src).filter(f=>f.endsWith('.json')).forEach(f=>fs.copyFileSync(path.join(src,f),path.join(dir,f)));});fs.copyFileSync('config.defaults.json',path.join('dist','config.defaults.json'));fs.copyFileSync('config.defaults.json',path.join('dist-cjs','config.defaults.json'));\n\"",
+    "copy:defaults": "node scripts/copy-defaults.cjs",
     "prepublishOnly": "rimraf dist dist-cjs && npm run build",
     "prepack": "rimraf dist dist-cjs && npm run build",
     "test": "node .tests/run-all.js",
-    "test:basic": "node .tests/basic-usage.test.js",
-    "test:prompt": "node .tests/prompt-input.test.js",
-    "test:memory": "node .tests/working-memory.test.js",
-    "test:types": "node .tests/generic-types.test.js",
-    "test:compile": "tsc -p tsconfig.test.json",
-    "test:object-types": "npm run test:compile && node .tests-compiled/.tests/object-types.test.js",
-    "test:real": "npm run test:compile && node .tests-compiled/.tests/test.js",
-    "test:real:comprehensive": "npm run test:real && npm test",
-    "test:custom-instructions": "npm run test:compile && node .tests-compiled/.tests/test-custom-instructions-only.js",
-    "test:diagnostic": "tsc -p tsconfig.test.json && node .tests-compiled/.tests/diagnostic-test.js",
-    "test:config": "node .tests/config-overrides.test.js",
-    "test:multi": "node .tests/multi-provider.test.js",
-    "test:activities": "node .tests/ai-activities.test.js",
-    "test:activities:standalone": "node .tests/test-activities-standalone.js",
-    "test:focused": "npm run test:compile && node .tests-compiled/.tests/focused-scenarios.test.js",
-    "test:compile:db": "npx tsc .tests/database-verification.test.ts --outDir .tests-compiled --module commonjs --target ES2020 --esModuleInterop --skipLibCheck --resolveJsonModule",
-    "test:db-verification": "npm run test:compile:db && cross-env ACTIVITY_TRACKING_ENABLE_LOGGING=true node .tests-compiled/database-verification.test.js",
-    "test:template-parsing": "npm run test:compile && node .tests-compiled/template-parsing-full.test.js",
-    "test:template-features": "npm run test:compile && node .tests-compiled/template-parsing-features.test.js",
-    "test:schema-format": "npm run test:compile && node .tests-compiled/simple-schema-format.test.js",
-    "test:object-enrichment": "npm run test:compile && node .tests-compiled/object-types-enrichment.test.js",
-    "test:erc": "npm run test:compile && node .tests-compiled/.tests/erc-compliance.test.js",
+    "test:ai-tools": "tsx .tests/ai-tools-gateway.test.ts",
+    "test:ai-tools:live": "tsx .tests/ai-tools-live.test.ts",
     "test:flex-md-parsing": "tsx .tests/flex-md-parsing-scenarios.test.ts",
     "test:flex-md-esm-regression": "npm run build:esm && node .tests/flex-md-esm-regression.test.mjs",
-    "test:openai": "npm run test:compile && node .tests-compiled/.tests/callOpenAI.js",
-    "test:ai-tools": "node .tests/ai-tools-gateway.test.js",
-    "test:ai-tools:live": "tsx .tests/ai-tools-live.test.ts",
-    "test:live": "npm run test:ai-tools:live && npm run test:openai",
-    "test:prepublish": "npm run build && npm test && npm run test:ai-tools && npm run test:live",
+    "test:live": "npm run test:ai-tools:live",
+    "test:prepublish": "npm run build && npm test",
     "deps:latest": "npx npm-check-updates -u && npm install"
   },
   "keywords": [
@@ -64,14 +41,12 @@
   "author": "x12i",
   "license": "mit",
   "dependencies": {
-    "@aws-sdk/s3-request-presigner": "^3.953.0",
-    "@x12i/activix": "^7.1.2",
+    "@x12i/activix": "^8.0.0",
     "@x12i/ai-providers-router": "^4.8.0",
-    "@x12i/ai-tools": "^1.0.3",
-    "@x12i/catalox": "^4.2.0",
-    "@x12i/env": "^4.0.1",
+    "@x12i/ai-tools": "^2.0.0",
     "@x12i/flex-md": "^4.8.0",
     "@x12i/logxer": "^4.3.5",
+    "@x12i/optimixer": "^0.1.0",
     "@x12i/rendrix": "^4.3.0"
   },
   "devDependencies": {
@@ -87,8 +62,7 @@
     "dist",
     "dist-cjs",
     "config.defaults.json",
-    "README.md",
-    "CONTENT_RESOLVER_UPSTREAM_GUIDE.md"
+    "README.md"
   ],
   "repository": {
     "type": "git",