@x12i/ai-gateway 9.7.9 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -12
- package/dist/defaults/log-diagnostics.json +0 -68
- package/dist/gateway-config.d.ts +1 -15
- package/dist/gateway-config.js +17 -134
- package/dist/gateway-defaults.d.ts +23 -0
- package/dist/gateway-defaults.js +29 -0
- package/dist/gateway-log-diagnostics.d.ts +0 -4
- package/dist/gateway-log-diagnostics.js +1 -5
- package/dist/gateway-log-levels.d.ts +0 -1
- package/dist/gateway-log-levels.js +0 -1
- package/dist/gateway-messages.js +0 -3
- package/dist/gateway-meta.js +12 -10
- package/dist/gateway-mode.d.ts +3 -26
- package/dist/gateway-mode.js +3 -48
- package/dist/gateway-retry.js +7 -6
- package/dist/gateway-utils.d.ts +1 -19
- package/dist/gateway-utils.js +37 -199
- package/dist/gateway.d.ts +0 -3
- package/dist/gateway.js +4 -63
- package/dist/index.d.ts +4 -6
- package/dist/index.js +4 -7
- package/dist/instruction-errors.d.ts +9 -1
- package/dist/instruction-errors.js +15 -1
- package/dist/instruction-optimizer.js +5 -1
- package/dist/message-builder.d.ts +0 -6
- package/dist/message-builder.js +4 -145
- package/dist/types.d.ts +16 -57
- package/dist-cjs/defaults/log-diagnostics.json +0 -68
- package/dist-cjs/gateway-config.cjs +17 -134
- package/dist-cjs/gateway-config.d.ts +1 -15
- package/dist-cjs/gateway-defaults.cjs +29 -0
- package/dist-cjs/gateway-defaults.d.ts +23 -0
- package/dist-cjs/gateway-log-diagnostics.cjs +1 -5
- package/dist-cjs/gateway-log-diagnostics.d.ts +0 -4
- package/dist-cjs/gateway-log-levels.cjs +0 -1
- package/dist-cjs/gateway-log-levels.d.ts +0 -1
- package/dist-cjs/gateway-messages.cjs +0 -3
- package/dist-cjs/gateway-meta.cjs +12 -10
- package/dist-cjs/gateway-mode.cjs +3 -48
- package/dist-cjs/gateway-mode.d.ts +3 -26
- package/dist-cjs/gateway-retry.cjs +7 -6
- package/dist-cjs/gateway-utils.cjs +37 -199
- package/dist-cjs/gateway-utils.d.ts +1 -19
- package/dist-cjs/gateway.cjs +4 -63
- package/dist-cjs/gateway.d.ts +0 -3
- package/dist-cjs/index.cjs +4 -7
- package/dist-cjs/index.d.ts +4 -6
- package/dist-cjs/instruction-errors.cjs +15 -1
- package/dist-cjs/instruction-errors.d.ts +9 -1
- package/dist-cjs/instruction-optimizer.cjs +5 -1
- package/dist-cjs/message-builder.cjs +4 -145
- package/dist-cjs/message-builder.d.ts +0 -6
- package/dist-cjs/types.d.ts +16 -57
- package/package.json +1 -2
- package/dist/defaults/instructions-blocks.json +0 -61
- package/dist/defaults/model-config.json +0 -15
- package/dist/gateway-instructions.d.ts +0 -30
- package/dist/gateway-instructions.js +0 -62
- package/dist/gateway-rate-limiter-constants.d.ts +0 -16
- package/dist/gateway-rate-limiter-constants.js +0 -16
- package/dist/gateway-rate-limiter.d.ts +0 -56
- package/dist/gateway-rate-limiter.js +0 -107
- package/dist/optimixer-manager.d.ts +0 -33
- package/dist/optimixer-manager.js +0 -142
- package/dist/token-estimate.d.ts +0 -12
- package/dist/token-estimate.js +0 -30
- package/dist-cjs/defaults/instructions-blocks.json +0 -61
- package/dist-cjs/defaults/model-config.json +0 -15
- package/dist-cjs/gateway-instructions.cjs +0 -62
- package/dist-cjs/gateway-instructions.d.ts +0 -30
- package/dist-cjs/gateway-rate-limiter-constants.cjs +0 -16
- package/dist-cjs/gateway-rate-limiter-constants.d.ts +0 -16
- package/dist-cjs/gateway-rate-limiter.cjs +0 -107
- package/dist-cjs/gateway-rate-limiter.d.ts +0 -56
- package/dist-cjs/optimixer-manager.cjs +0 -142
- package/dist-cjs/optimixer-manager.d.ts +0 -33
- package/dist-cjs/token-estimate.cjs +0 -30
- package/dist-cjs/token-estimate.d.ts +0 -12
package/dist/gateway-meta.js
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* Gateway Meta Operations Module
|
|
3
3
|
* Handles meta operations like instruction optimization and testing
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
5
|
+
import { GATEWAY_DEFAULT_TEMPERATURE } from './gateway-defaults.js';
|
|
6
|
+
import { MaxTokensRequiredError } from './instruction-errors.js';
|
|
6
7
|
/**
|
|
7
8
|
* Test instructions by running them and analyzing the response
|
|
8
9
|
*/
|
|
@@ -10,14 +11,15 @@ export async function testInstructions(instructions, testInput, expectedSchema,
|
|
|
10
11
|
// Get internal system action config (instruction audit)
|
|
11
12
|
const internalConfig = config.internalSystemActions?.instructionAudit;
|
|
12
13
|
const defaultEngine = config.defaultEngine || 'openai';
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
} = options;
|
|
18
|
-
if (!model) {
|
|
19
|
-
throw new Error('Model must be provided in options.model or configured as default');
|
|
14
|
+
const resolvedModel = options.model ?? internalConfig?.model;
|
|
15
|
+
const resolvedProvider = options.provider ?? internalConfig?.engine ?? defaultEngine;
|
|
16
|
+
if (!resolvedModel) {
|
|
17
|
+
throw new Error('Model must be provided in options.model or internalSystemActions.instructionAudit.model');
|
|
20
18
|
}
|
|
19
|
+
if (typeof internalConfig?.maxTokens !== 'number' || internalConfig.maxTokens <= 0) {
|
|
20
|
+
throw new MaxTokensRequiredError('maxTokens must be set in internalSystemActions.instructionAudit for testInstructions');
|
|
21
|
+
}
|
|
22
|
+
const { agentId = 'instruction-tester', model = resolvedModel, provider = resolvedProvider } = options;
|
|
21
23
|
const aiRequestId = `test-instructions-${Date.now()}`;
|
|
22
24
|
const runtimeIdentity = {
|
|
23
25
|
...options.identity,
|
|
@@ -37,8 +39,8 @@ export async function testInstructions(instructions, testInput, expectedSchema,
|
|
|
37
39
|
config: {
|
|
38
40
|
model,
|
|
39
41
|
provider,
|
|
40
|
-
temperature: internalConfig?.temperature ??
|
|
41
|
-
maxTokens: internalConfig
|
|
42
|
+
temperature: internalConfig?.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
|
|
43
|
+
maxTokens: internalConfig.maxTokens
|
|
42
44
|
}
|
|
43
45
|
};
|
|
44
46
|
// Run the test
|
package/dist/gateway-mode.d.ts
CHANGED
|
@@ -1,21 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Gateway operational mode (prod vs dev/debug)
|
|
2
|
+
* Gateway operational mode (prod vs dev/debug).
|
|
3
3
|
*/
|
|
4
|
-
import type {
|
|
5
|
-
import type { ActivityIdentity, GatewayConfig } from './types.js';
|
|
4
|
+
import type { GatewayConfig } from './types.js';
|
|
6
5
|
export type GatewayOperationalMode = 'prod' | 'debug' | 'dev';
|
|
7
|
-
export type GatewayDefaultModelSource = 'env' | 'model-config.json' | 'code';
|
|
8
|
-
export type DefaultModelSubstitutionReason = 'no_model_provided' | 'model_resolution_failed' | 'ai_tools_unavailable';
|
|
9
|
-
/** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
|
|
10
|
-
export declare const CODE_DEFAULT_MODEL = "cheap";
|
|
11
|
-
export type ResolvedGatewayDefault = {
|
|
12
|
-
model: string;
|
|
13
|
-
provider?: string;
|
|
14
|
-
source: GatewayDefaultModelSource;
|
|
15
|
-
};
|
|
16
6
|
/**
|
|
17
7
|
* Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
|
|
18
|
-
*
|
|
8
|
+
* Affects ai-tools model resolution strictness — does not substitute missing models.
|
|
19
9
|
*/
|
|
20
10
|
export declare function getGatewayOperationalMode(config?: Pick<GatewayConfig, 'mode'>): GatewayOperationalMode;
|
|
21
11
|
export declare function isProdGatewayMode(mode: GatewayOperationalMode): boolean;
|
|
@@ -26,16 +16,3 @@ export declare function parseModelProviderSpec(spec: string): {
|
|
|
26
16
|
provider?: string;
|
|
27
17
|
model: string;
|
|
28
18
|
};
|
|
29
|
-
/**
|
|
30
|
-
* Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
|
|
31
|
-
*/
|
|
32
|
-
export declare function resolveGatewayDefaultModel(defaultModelConfig?: Record<string, unknown>, gatewayDefaultEngine?: string): ResolvedGatewayDefault;
|
|
33
|
-
export declare function warnDefaultModelSubstitution(logger: Logxer, identity: Partial<ActivityIdentity> | undefined, details: {
|
|
34
|
-
reason: DefaultModelSubstitutionReason;
|
|
35
|
-
mode: GatewayOperationalMode;
|
|
36
|
-
defaultSource: GatewayDefaultModelSource;
|
|
37
|
-
defaultProvider?: string;
|
|
38
|
-
defaultModel: string;
|
|
39
|
-
originalProvider?: string;
|
|
40
|
-
originalModel?: string;
|
|
41
|
-
}): void;
|
package/dist/gateway-mode.js
CHANGED
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Gateway operational mode (prod vs dev/debug)
|
|
2
|
+
* Gateway operational mode (prod vs dev/debug).
|
|
3
3
|
*/
|
|
4
|
-
import { gatewayLogDebug } from './gateway-log-meta.js';
|
|
5
|
-
import { fieldEvidence, GatewayLogCode, gatewayWarnCode } from './gateway-log-diagnostics.js';
|
|
6
|
-
/** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
|
|
7
|
-
export const CODE_DEFAULT_MODEL = 'cheap';
|
|
8
4
|
/**
|
|
9
5
|
* Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
|
|
10
|
-
*
|
|
6
|
+
* Affects ai-tools model resolution strictness — does not substitute missing models.
|
|
11
7
|
*/
|
|
12
8
|
export function getGatewayOperationalMode(config) {
|
|
13
9
|
if (config?.mode) {
|
|
@@ -29,7 +25,7 @@ export function isProdGatewayMode(mode) {
|
|
|
29
25
|
export function parseModelProviderSpec(spec) {
|
|
30
26
|
const trimmed = spec.trim();
|
|
31
27
|
if (!trimmed) {
|
|
32
|
-
|
|
28
|
+
throw new Error('Model spec must be a non-empty string');
|
|
33
29
|
}
|
|
34
30
|
const slash = trimmed.indexOf('/');
|
|
35
31
|
if (slash === -1) {
|
|
@@ -42,44 +38,3 @@ export function parseModelProviderSpec(spec) {
|
|
|
42
38
|
}
|
|
43
39
|
return { provider: first, model: rest };
|
|
44
40
|
}
|
|
45
|
-
/**
|
|
46
|
-
* Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
|
|
47
|
-
*/
|
|
48
|
-
export function resolveGatewayDefaultModel(defaultModelConfig, gatewayDefaultEngine) {
|
|
49
|
-
const envSpec = process.env.AI_GATEWAY_DEFAULT_MODEL?.trim();
|
|
50
|
-
if (envSpec) {
|
|
51
|
-
const parsed = parseModelProviderSpec(envSpec);
|
|
52
|
-
return { model: parsed.model, provider: parsed.provider, source: 'env' };
|
|
53
|
-
}
|
|
54
|
-
const jsonModel = typeof defaultModelConfig?.defaultModel === 'string' ? defaultModelConfig.defaultModel : undefined;
|
|
55
|
-
if (jsonModel) {
|
|
56
|
-
const parsed = parseModelProviderSpec(jsonModel);
|
|
57
|
-
const jsonEngine = typeof defaultModelConfig?.defaultEngine === 'string'
|
|
58
|
-
? defaultModelConfig.defaultEngine
|
|
59
|
-
: gatewayDefaultEngine;
|
|
60
|
-
return {
|
|
61
|
-
model: parsed.model,
|
|
62
|
-
provider: parsed.provider ?? jsonEngine,
|
|
63
|
-
source: 'model-config.json'
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
return {
|
|
67
|
-
model: CODE_DEFAULT_MODEL,
|
|
68
|
-
provider: gatewayDefaultEngine,
|
|
69
|
-
source: 'code'
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
export function warnDefaultModelSubstitution(logger, identity, details) {
|
|
73
|
-
gatewayWarnCode(logger, GatewayLogCode.DEFAULT_MODEL_SUBSTITUTED, identity, {
|
|
74
|
-
...details,
|
|
75
|
-
debugKind: gatewayLogDebug.anomaly,
|
|
76
|
-
evidence: [
|
|
77
|
-
fieldEvidence('defaultModel', details.defaultModel),
|
|
78
|
-
fieldEvidence('defaultSource', details.defaultSource),
|
|
79
|
-
fieldEvidence('reason', details.reason),
|
|
80
|
-
fieldEvidence('mode', details.mode),
|
|
81
|
-
...(details.originalModel ? [fieldEvidence('originalModel', details.originalModel)] : []),
|
|
82
|
-
...(details.originalProvider ? [fieldEvidence('originalProvider', details.originalProvider)] : [])
|
|
83
|
-
]
|
|
84
|
-
});
|
|
85
|
-
}
|
package/dist/gateway-retry.js
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* NOTE: Retry delays use SIMPLE SLEEP (not smart rate limiting).
|
|
6
6
|
* Between-calls rate limiting is handled separately in gateway-rate-limiter.ts (smart).
|
|
7
7
|
*/
|
|
8
|
+
import { GATEWAY_DEFAULT_RETRY } from './gateway-defaults.js';
|
|
8
9
|
import { exceptionEvidence, fieldEvidence, GatewayLogCode, gatewayWarnCode } from './gateway-log-diagnostics.js';
|
|
9
10
|
/**
|
|
10
11
|
* Determines if an error is a network error (fetch failed, DNS, connectivity)
|
|
@@ -95,12 +96,12 @@ export function sleep(ms) {
|
|
|
95
96
|
* Returns response and retry metadata
|
|
96
97
|
*/
|
|
97
98
|
export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
|
|
98
|
-
const maxRetries = retryConfig.maxRetries ??
|
|
99
|
-
const initialDelay = retryConfig.initialDelay ??
|
|
100
|
-
const maxDelay = retryConfig.maxDelay ??
|
|
101
|
-
const backoffMultiplier = retryConfig.backoffMultiplier ??
|
|
102
|
-
const enableJitter = retryConfig.enableJitter ??
|
|
103
|
-
const throttlingDelay = retryConfig.throttlingDelay ??
|
|
99
|
+
const maxRetries = retryConfig.maxRetries ?? GATEWAY_DEFAULT_RETRY.maxRetries;
|
|
100
|
+
const initialDelay = retryConfig.initialDelay ?? GATEWAY_DEFAULT_RETRY.initialDelay;
|
|
101
|
+
const maxDelay = retryConfig.maxDelay ?? GATEWAY_DEFAULT_RETRY.maxDelay;
|
|
102
|
+
const backoffMultiplier = retryConfig.backoffMultiplier ?? GATEWAY_DEFAULT_RETRY.backoffMultiplier;
|
|
103
|
+
const enableJitter = retryConfig.enableJitter ?? GATEWAY_DEFAULT_RETRY.enableJitter;
|
|
104
|
+
const throttlingDelay = retryConfig.throttlingDelay ?? GATEWAY_DEFAULT_RETRY.throttlingDelay;
|
|
104
105
|
let lastError;
|
|
105
106
|
const retryAttempts = [];
|
|
106
107
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
package/dist/gateway-utils.d.ts
CHANGED
|
@@ -14,16 +14,9 @@ export declare function generateMD5Hash(text: string): string;
|
|
|
14
14
|
*/
|
|
15
15
|
export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer): Promise<string>;
|
|
16
16
|
export type MergeConfigOptions = {
|
|
17
|
-
defaultModelConfig?: Record<string, unknown>;
|
|
18
17
|
catalog?: AiModelsCatalogClient | null;
|
|
19
18
|
routingEnv?: OpenRouterRoutingConfig;
|
|
20
19
|
};
|
|
21
|
-
/**
|
|
22
|
-
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
23
|
-
*/
|
|
24
|
-
export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
|
|
25
|
-
useInternalDefaults?: 'skill' | 'audit';
|
|
26
|
-
}, config: GatewayConfig): boolean;
|
|
27
20
|
/**
|
|
28
21
|
* Merges config with defaults
|
|
29
22
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -199,17 +192,6 @@ export declare function buildInvokeRejectionMetadata(args: {
|
|
|
199
192
|
error?: unknown;
|
|
200
193
|
}): GatewayInvokeRejectionMetadata;
|
|
201
194
|
export declare function attachGatewayInvokeRejectionMetadata(err: Error, metadata: GatewayInvokeRejectionMetadata): void;
|
|
202
|
-
|
|
203
|
-
export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
|
|
204
|
-
/**
|
|
205
|
-
* Size-cap a provider/router payload before storing on an activity record.
|
|
206
|
-
* Non-serializable values become a small marker object instead of throwing.
|
|
207
|
-
*/
|
|
195
|
+
export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
|
|
208
196
|
export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
|
|
209
197
|
export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
|
|
210
|
-
export declare function buildOptimixerActualUsage(tokens: {
|
|
211
|
-
prompt: number;
|
|
212
|
-
completion: number;
|
|
213
|
-
total: number;
|
|
214
|
-
}, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
|
|
215
|
-
export {};
|
package/dist/gateway-utils.js
CHANGED
|
@@ -7,10 +7,12 @@ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
|
|
|
7
7
|
import { ModelResolutionError, isKnownProfileOrShortcut } from '@x12i/ai-tools';
|
|
8
8
|
import { extractHttpStatusCode } from './gateway-retry.js';
|
|
9
9
|
import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
|
|
10
|
-
import {
|
|
11
|
-
import { getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
|
|
10
|
+
import { MaxTokensRequiredError, ModelRequiredError } from './instruction-errors.js';
|
|
12
11
|
import { applyModelResolution, buildModelResolverOptions } from './ai-tools-client.js';
|
|
13
|
-
import {
|
|
12
|
+
import { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P } from './gateway-defaults.js';
|
|
13
|
+
function getPreParsedInstructions(instructions) {
|
|
14
|
+
return instructions ?? '';
|
|
15
|
+
}
|
|
14
16
|
/**
|
|
15
17
|
* Generates MD5 hash of a string
|
|
16
18
|
*/
|
|
@@ -35,83 +37,6 @@ export async function ensureTaskTypeId(request, logger) {
|
|
|
35
37
|
});
|
|
36
38
|
return taskTypeId;
|
|
37
39
|
}
|
|
38
|
-
function applyGatewayDefaultToMerged(merged, defaults, config) {
|
|
39
|
-
merged.model = defaults.model;
|
|
40
|
-
if (defaults.provider) {
|
|
41
|
-
merged.provider = defaults.provider;
|
|
42
|
-
}
|
|
43
|
-
else if (!merged.provider) {
|
|
44
|
-
merged.provider = config.defaultEngine;
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
async function substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original) {
|
|
48
|
-
const operationalMode = getGatewayOperationalMode(config);
|
|
49
|
-
const defaults = resolveGatewayDefaultModel(mergeOptions?.defaultModelConfig, config.defaultEngine);
|
|
50
|
-
warnDefaultModelSubstitution(logger, request.identity, {
|
|
51
|
-
reason,
|
|
52
|
-
mode: operationalMode,
|
|
53
|
-
defaultSource: defaults.source,
|
|
54
|
-
defaultProvider: defaults.provider ?? merged.provider,
|
|
55
|
-
defaultModel: defaults.model,
|
|
56
|
-
originalProvider: original?.provider ?? merged.provider,
|
|
57
|
-
originalModel: original?.model
|
|
58
|
-
});
|
|
59
|
-
applyGatewayDefaultToMerged(merged, defaults, config);
|
|
60
|
-
}
|
|
61
|
-
async function tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original) {
|
|
62
|
-
const resolveModels = config.aiTools?.resolveModels !== false;
|
|
63
|
-
const catalog = mergeOptions?.catalog;
|
|
64
|
-
if (!resolveModels || !catalog || !merged.model) {
|
|
65
|
-
return;
|
|
66
|
-
}
|
|
67
|
-
try {
|
|
68
|
-
const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
|
|
69
|
-
const resolution = await catalog.resolveModel({
|
|
70
|
-
provider: merged.provider,
|
|
71
|
-
model: merged.model,
|
|
72
|
-
}, resolverOptions);
|
|
73
|
-
if (!resolution.found) {
|
|
74
|
-
return;
|
|
75
|
-
}
|
|
76
|
-
applyModelResolution(merged, resolution, config.defaultEngine, merged.model);
|
|
77
|
-
request._modelResolution = {
|
|
78
|
-
modelId: resolution.modelId,
|
|
79
|
-
routedViaOpenRouter: resolution.routedViaOpenRouter,
|
|
80
|
-
confidence: resolution.confidence,
|
|
81
|
-
resolvedVia: resolution.resolvedVia,
|
|
82
|
-
originalProvider: original?.provider ?? merged.provider,
|
|
83
|
-
originalModel: original?.model ?? merged.model
|
|
84
|
-
};
|
|
85
|
-
logger.verbose('Catalog resolved substituted default model', {
|
|
86
|
-
jobId: request.identity.jobId,
|
|
87
|
-
model: merged.model,
|
|
88
|
-
provider: merged.provider,
|
|
89
|
-
resolvedModelId: resolution.modelId
|
|
90
|
-
});
|
|
91
|
-
}
|
|
92
|
-
catch {
|
|
93
|
-
// Prod keeps the substituted bare default when re-resolution fails.
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
async function substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, reason, original) {
|
|
97
|
-
await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original);
|
|
98
|
-
await tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original);
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
|
|
102
|
-
*/
|
|
103
|
-
export function isMaxTokensExplicitlySet(request, config) {
|
|
104
|
-
const useInternalDefaults = request.useInternalDefaults;
|
|
105
|
-
const internalDefaults = useInternalDefaults === 'skill'
|
|
106
|
-
? config.internalSystemActions?.internalSkill
|
|
107
|
-
: useInternalDefaults === 'audit'
|
|
108
|
-
? config.internalSystemActions?.skillAudit
|
|
109
|
-
: undefined;
|
|
110
|
-
return (request.config?.maxTokens !== undefined ||
|
|
111
|
-
request.modelConfig?.maxTokens !== undefined ||
|
|
112
|
-
internalDefaults?.maxTokens !== undefined ||
|
|
113
|
-
config.maxTokens !== undefined);
|
|
114
|
-
}
|
|
115
40
|
/**
|
|
116
41
|
* Merges config with defaults
|
|
117
42
|
* Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
|
|
@@ -135,7 +60,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
135
60
|
useInternalDefaults,
|
|
136
61
|
hasInternalDefaults: !!internalDefaults
|
|
137
62
|
});
|
|
138
|
-
const operationalMode = getGatewayOperationalMode(config);
|
|
139
63
|
const resolveModels = config.aiTools?.resolveModels !== false;
|
|
140
64
|
// Priority: modelConfig > request.config > internalSystemActions[useInternalDefaults] > gateway defaults
|
|
141
65
|
// First, merge modelConfig into a config-like object if present
|
|
@@ -152,36 +76,29 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
152
76
|
...Object.fromEntries(Object.entries(request.modelConfig).filter(([key]) => !['model', 'modelId', 'provider', 'temperature', 'maxTokens', 'topP', 'frequencyPenalty', 'presencePenalty', 'stop'].includes(key)))
|
|
153
77
|
} : undefined;
|
|
154
78
|
const merged = {
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
presencePenalty: config.presencePenalty ?? 0.0,
|
|
161
|
-
// Apply internal system action defaults (medium priority) if useInternalDefaults is set
|
|
79
|
+
temperature: config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
|
|
80
|
+
topP: config.topP ?? GATEWAY_DEFAULT_TOP_P,
|
|
81
|
+
frequencyPenalty: config.frequencyPenalty ?? GATEWAY_DEFAULT_FREQUENCY_PENALTY,
|
|
82
|
+
presencePenalty: config.presencePenalty ?? GATEWAY_DEFAULT_PRESENCE_PENALTY,
|
|
83
|
+
...(config.maxTokens !== undefined ? { maxTokens: config.maxTokens } : {}),
|
|
162
84
|
...(internalDefaults ? {
|
|
163
85
|
model: internalDefaults.model,
|
|
164
86
|
provider: internalDefaults.engine || config.defaultEngine,
|
|
165
|
-
temperature: internalDefaults.temperature ?? config.temperature ??
|
|
166
|
-
// maxTokens from internalDefaults only if explicitly set, otherwise will be auto-detected
|
|
87
|
+
temperature: internalDefaults.temperature ?? config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
|
|
167
88
|
...(internalDefaults.maxTokens !== undefined ? { maxTokens: internalDefaults.maxTokens } : {})
|
|
168
89
|
} : {}),
|
|
169
|
-
// Request config overrides (higher priority)
|
|
170
90
|
...request.config,
|
|
171
|
-
// ModelConfig overrides (highest priority) - merge only defined values
|
|
172
91
|
...(modelConfigAsConfig ? Object.fromEntries(Object.entries(modelConfigAsConfig).filter(([_, value]) => value !== undefined)) : {}),
|
|
173
|
-
// Model resolved below (catalog, default chain, or explicit pass-through)
|
|
174
92
|
model: modelConfigAsConfig?.model || request.config?.model || internalDefaults?.model,
|
|
175
|
-
// Ensure provider is set: modelConfig > request.config > internalDefaults > gateway default
|
|
176
93
|
provider: modelConfigAsConfig?.provider || request.config?.provider || internalDefaults?.engine || config.defaultEngine
|
|
177
94
|
};
|
|
178
95
|
const explicitModel = merged.model;
|
|
179
96
|
const originalProvider = merged.provider;
|
|
180
97
|
const originalModel = explicitModel;
|
|
181
98
|
if (!explicitModel) {
|
|
182
|
-
|
|
99
|
+
throw new ModelRequiredError();
|
|
183
100
|
}
|
|
184
|
-
|
|
101
|
+
if (resolveModels && mergeOptions?.catalog) {
|
|
185
102
|
try {
|
|
186
103
|
const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
|
|
187
104
|
const resolution = await mergeOptions.catalog.resolveModel({
|
|
@@ -208,9 +125,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
208
125
|
resolvedVia: resolution.resolvedVia
|
|
209
126
|
});
|
|
210
127
|
}
|
|
211
|
-
else if (isProdGatewayMode(operationalMode)) {
|
|
212
|
-
await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
|
|
213
|
-
}
|
|
214
128
|
else {
|
|
215
129
|
throw buildModelResolutionFailureError(explicitModel, merged.provider, resolution);
|
|
216
130
|
}
|
|
@@ -219,95 +133,34 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
|
|
|
219
133
|
if (error instanceof ModelResolutionError) {
|
|
220
134
|
throw error;
|
|
221
135
|
}
|
|
222
|
-
if (
|
|
223
|
-
await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
|
|
224
|
-
}
|
|
225
|
-
else {
|
|
136
|
+
if (error instanceof ModelProfileUnroutableError) {
|
|
226
137
|
throw error;
|
|
227
138
|
}
|
|
139
|
+
throw error;
|
|
228
140
|
}
|
|
229
141
|
}
|
|
230
|
-
else if (resolveModels && !mergeOptions?.catalog && isProdGatewayMode(operationalMode)) {
|
|
231
|
-
await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
|
|
232
|
-
}
|
|
233
142
|
if (!merged.model) {
|
|
234
|
-
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
merged.maxTokens = 2000;
|
|
256
|
-
logger.debug('Using fallback maxTokens (flex-md unavailable or no model info)', {
|
|
257
|
-
jobId: request.identity.jobId,
|
|
258
|
-
model: merged.model,
|
|
259
|
-
provider: merged.provider,
|
|
260
|
-
maxTokens: merged.maxTokens,
|
|
261
|
-
note: 'Set maxTokens explicitly in config for custom values.'
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
catch (error) {
|
|
266
|
-
// Error loading flex-md or getting model info - use fallback
|
|
267
|
-
merged.maxTokens = 2000;
|
|
268
|
-
logger.debug('Using fallback maxTokens (flex-md error)', {
|
|
269
|
-
jobId: request.identity.jobId,
|
|
270
|
-
model: merged.model,
|
|
271
|
-
provider: merged.provider,
|
|
272
|
-
maxTokens: merged.maxTokens,
|
|
273
|
-
error: error instanceof Error ? error.message : String(error),
|
|
274
|
-
note: 'Set maxTokens explicitly in config for custom values.'
|
|
275
|
-
});
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
else if (!merged.maxTokens && !optimixerWillPredict) {
|
|
279
|
-
// If maxTokens wasn't set and wasn't auto-detected, use fallback
|
|
280
|
-
// This should rarely happen, but handle edge cases
|
|
281
|
-
merged.maxTokens = 2000;
|
|
282
|
-
logger.debug('Using fallback maxTokens (not auto-detected and not explicitly set)', {
|
|
283
|
-
jobId: request.identity.jobId,
|
|
284
|
-
model: merged.model,
|
|
285
|
-
provider: merged.provider,
|
|
286
|
-
maxTokens: merged.maxTokens,
|
|
287
|
-
maxTokensExplicitlySet,
|
|
288
|
-
optimixerWillPredict
|
|
289
|
-
});
|
|
290
|
-
}
|
|
291
|
-
else if (optimixerWillPredict) {
|
|
292
|
-
logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
|
|
293
|
-
jobId: request.identity.jobId,
|
|
294
|
-
model: merged.model,
|
|
295
|
-
provider: merged.provider
|
|
296
|
-
});
|
|
297
|
-
}
|
|
298
|
-
else {
|
|
299
|
-
// maxTokens was explicitly set - log which source
|
|
300
|
-
const source = request.config?.maxTokens !== undefined ? 'request.config' :
|
|
301
|
-
internalDefaults?.maxTokens !== undefined ? `internalSystemActions.${useInternalDefaults}` :
|
|
302
|
-
'gateway.config';
|
|
303
|
-
logger.debug('Using explicitly set maxTokens', {
|
|
304
|
-
jobId: request.identity.jobId,
|
|
305
|
-
model: merged.model,
|
|
306
|
-
provider: merged.provider,
|
|
307
|
-
maxTokens: merged.maxTokens,
|
|
308
|
-
source
|
|
309
|
-
});
|
|
310
|
-
}
|
|
143
|
+
throw new ModelRequiredError();
|
|
144
|
+
}
|
|
145
|
+
if (typeof merged.maxTokens !== 'number' || !Number.isFinite(merged.maxTokens) || merged.maxTokens <= 0) {
|
|
146
|
+
throw new MaxTokensRequiredError();
|
|
147
|
+
}
|
|
148
|
+
const maxTokensSource = request.config?.maxTokens !== undefined
|
|
149
|
+
? 'request.config'
|
|
150
|
+
: request.modelConfig?.maxTokens !== undefined
|
|
151
|
+
? 'modelConfig'
|
|
152
|
+
: internalDefaults?.maxTokens !== undefined
|
|
153
|
+
? `internalSystemActions.${useInternalDefaults}`
|
|
154
|
+
: config.maxTokens !== undefined
|
|
155
|
+
? 'gateway.config'
|
|
156
|
+
: 'unknown';
|
|
157
|
+
logger.debug('Using maxTokens', {
|
|
158
|
+
jobId: request.identity.jobId,
|
|
159
|
+
model: merged.model,
|
|
160
|
+
provider: merged.provider,
|
|
161
|
+
maxTokens: merged.maxTokens,
|
|
162
|
+
source: maxTokensSource
|
|
163
|
+
});
|
|
311
164
|
logger.debug('Config merged', {
|
|
312
165
|
jobId: request.identity.jobId,
|
|
313
166
|
finalModel: merged.model,
|
|
@@ -1014,12 +867,7 @@ export function buildInvokeRejectionMetadata(args) {
|
|
|
1014
867
|
export function attachGatewayInvokeRejectionMetadata(err, metadata) {
|
|
1015
868
|
err.metadata = metadata;
|
|
1016
869
|
}
|
|
1017
|
-
|
|
1018
|
-
export const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
|
|
1019
|
-
/**
|
|
1020
|
-
* Size-cap a provider/router payload before storing on an activity record.
|
|
1021
|
-
* Non-serializable values become a small marker object instead of throwing.
|
|
1022
|
-
*/
|
|
870
|
+
export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
|
|
1023
871
|
export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS) {
|
|
1024
872
|
if (payload == null)
|
|
1025
873
|
return payload;
|
|
@@ -1055,13 +903,3 @@ export function resolveFinishReasonFromRouterResponse(response) {
|
|
|
1055
903
|
}
|
|
1056
904
|
return undefined;
|
|
1057
905
|
}
|
|
1058
|
-
export function buildOptimixerActualUsage(tokens, response, latencyMs) {
|
|
1059
|
-
const finishReason = resolveFinishReasonFromRouterResponse(response);
|
|
1060
|
-
return {
|
|
1061
|
-
promptTokens: tokens.prompt,
|
|
1062
|
-
completionTokens: tokens.completion,
|
|
1063
|
-
totalTokens: tokens.total,
|
|
1064
|
-
...(finishReason ? { finishReason } : {}),
|
|
1065
|
-
latencyMs
|
|
1066
|
-
};
|
|
1067
|
-
}
|
package/dist/gateway.d.ts
CHANGED
|
@@ -15,9 +15,7 @@ export declare class AIGateway {
|
|
|
15
15
|
private config;
|
|
16
16
|
private logger;
|
|
17
17
|
private activityManager?;
|
|
18
|
-
private optimixerManager?;
|
|
19
18
|
private messageBuilderConfig?;
|
|
20
|
-
private defaultModelConfig;
|
|
21
19
|
private _autoRegisterDone;
|
|
22
20
|
private _aiToolsClient;
|
|
23
21
|
private readonly preferOpenRouter;
|
|
@@ -31,7 +29,6 @@ export declare class AIGateway {
|
|
|
31
29
|
* Invoke AI request (with structured output support)
|
|
32
30
|
*/
|
|
33
31
|
invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
|
|
34
|
-
private applyAdaptiveMaxTokensIfEnabled;
|
|
35
32
|
/**
|
|
36
33
|
* Build simple messages from request (instructions and prompt as literal template text; no registry).
|
|
37
34
|
*/
|