@x12i/ai-gateway 9.7.9 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +67 -12
  2. package/dist/defaults/log-diagnostics.json +0 -68
  3. package/dist/gateway-config.d.ts +1 -15
  4. package/dist/gateway-config.js +17 -134
  5. package/dist/gateway-defaults.d.ts +23 -0
  6. package/dist/gateway-defaults.js +29 -0
  7. package/dist/gateway-log-diagnostics.d.ts +0 -4
  8. package/dist/gateway-log-diagnostics.js +1 -5
  9. package/dist/gateway-log-levels.d.ts +0 -1
  10. package/dist/gateway-log-levels.js +0 -1
  11. package/dist/gateway-messages.js +0 -3
  12. package/dist/gateway-meta.js +12 -10
  13. package/dist/gateway-mode.d.ts +3 -26
  14. package/dist/gateway-mode.js +3 -48
  15. package/dist/gateway-retry.js +7 -6
  16. package/dist/gateway-utils.d.ts +1 -19
  17. package/dist/gateway-utils.js +37 -199
  18. package/dist/gateway.d.ts +0 -3
  19. package/dist/gateway.js +4 -63
  20. package/dist/index.d.ts +4 -6
  21. package/dist/index.js +4 -7
  22. package/dist/instruction-errors.d.ts +9 -1
  23. package/dist/instruction-errors.js +15 -1
  24. package/dist/instruction-optimizer.js +5 -1
  25. package/dist/message-builder.d.ts +0 -6
  26. package/dist/message-builder.js +4 -145
  27. package/dist/types.d.ts +16 -57
  28. package/dist-cjs/defaults/log-diagnostics.json +0 -68
  29. package/dist-cjs/gateway-config.cjs +17 -134
  30. package/dist-cjs/gateway-config.d.ts +1 -15
  31. package/dist-cjs/gateway-defaults.cjs +29 -0
  32. package/dist-cjs/gateway-defaults.d.ts +23 -0
  33. package/dist-cjs/gateway-log-diagnostics.cjs +1 -5
  34. package/dist-cjs/gateway-log-diagnostics.d.ts +0 -4
  35. package/dist-cjs/gateway-log-levels.cjs +0 -1
  36. package/dist-cjs/gateway-log-levels.d.ts +0 -1
  37. package/dist-cjs/gateway-messages.cjs +0 -3
  38. package/dist-cjs/gateway-meta.cjs +12 -10
  39. package/dist-cjs/gateway-mode.cjs +3 -48
  40. package/dist-cjs/gateway-mode.d.ts +3 -26
  41. package/dist-cjs/gateway-retry.cjs +7 -6
  42. package/dist-cjs/gateway-utils.cjs +37 -199
  43. package/dist-cjs/gateway-utils.d.ts +1 -19
  44. package/dist-cjs/gateway.cjs +4 -63
  45. package/dist-cjs/gateway.d.ts +0 -3
  46. package/dist-cjs/index.cjs +4 -7
  47. package/dist-cjs/index.d.ts +4 -6
  48. package/dist-cjs/instruction-errors.cjs +15 -1
  49. package/dist-cjs/instruction-errors.d.ts +9 -1
  50. package/dist-cjs/instruction-optimizer.cjs +5 -1
  51. package/dist-cjs/message-builder.cjs +4 -145
  52. package/dist-cjs/message-builder.d.ts +0 -6
  53. package/dist-cjs/types.d.ts +16 -57
  54. package/package.json +1 -2
  55. package/dist/defaults/instructions-blocks.json +0 -61
  56. package/dist/defaults/model-config.json +0 -15
  57. package/dist/gateway-instructions.d.ts +0 -30
  58. package/dist/gateway-instructions.js +0 -62
  59. package/dist/gateway-rate-limiter-constants.d.ts +0 -16
  60. package/dist/gateway-rate-limiter-constants.js +0 -16
  61. package/dist/gateway-rate-limiter.d.ts +0 -56
  62. package/dist/gateway-rate-limiter.js +0 -107
  63. package/dist/optimixer-manager.d.ts +0 -33
  64. package/dist/optimixer-manager.js +0 -142
  65. package/dist/token-estimate.d.ts +0 -12
  66. package/dist/token-estimate.js +0 -30
  67. package/dist-cjs/defaults/instructions-blocks.json +0 -61
  68. package/dist-cjs/defaults/model-config.json +0 -15
  69. package/dist-cjs/gateway-instructions.cjs +0 -62
  70. package/dist-cjs/gateway-instructions.d.ts +0 -30
  71. package/dist-cjs/gateway-rate-limiter-constants.cjs +0 -16
  72. package/dist-cjs/gateway-rate-limiter-constants.d.ts +0 -16
  73. package/dist-cjs/gateway-rate-limiter.cjs +0 -107
  74. package/dist-cjs/gateway-rate-limiter.d.ts +0 -56
  75. package/dist-cjs/optimixer-manager.cjs +0 -142
  76. package/dist-cjs/optimixer-manager.d.ts +0 -33
  77. package/dist-cjs/token-estimate.cjs +0 -30
  78. package/dist-cjs/token-estimate.d.ts +0 -12
@@ -2,7 +2,8 @@
2
2
  * Gateway Meta Operations Module
3
3
  * Handles meta operations like instruction optimization and testing
4
4
  */
5
- import { CODE_DEFAULT_MODEL } from './gateway-mode.js';
5
+ import { GATEWAY_DEFAULT_TEMPERATURE } from './gateway-defaults.js';
6
+ import { MaxTokensRequiredError } from './instruction-errors.js';
6
7
  /**
7
8
  * Test instructions by running them and analyzing the response
8
9
  */
@@ -10,14 +11,15 @@ export async function testInstructions(instructions, testInput, expectedSchema,
10
11
  // Get internal system action config (instruction audit)
11
12
  const internalConfig = config.internalSystemActions?.instructionAudit;
12
13
  const defaultEngine = config.defaultEngine || 'openai';
13
- const defaultModel = internalConfig?.model || CODE_DEFAULT_MODEL;
14
- const defaultProvider = internalConfig?.engine || defaultEngine;
15
- const { agentId = 'instruction-tester', model = options.model || defaultModel, // Use internal config default if not provided
16
- provider = options.provider || defaultProvider // Use internal config default if not provided
17
- } = options;
18
- if (!model) {
19
- throw new Error('Model must be provided in options.model or configured as default');
14
+ const resolvedModel = options.model ?? internalConfig?.model;
15
+ const resolvedProvider = options.provider ?? internalConfig?.engine ?? defaultEngine;
16
+ if (!resolvedModel) {
17
+ throw new Error('Model must be provided in options.model or internalSystemActions.instructionAudit.model');
20
18
  }
19
+ if (typeof internalConfig?.maxTokens !== 'number' || internalConfig.maxTokens <= 0) {
20
+ throw new MaxTokensRequiredError('maxTokens must be set in internalSystemActions.instructionAudit for testInstructions');
21
+ }
22
+ const { agentId = 'instruction-tester', model = resolvedModel, provider = resolvedProvider } = options;
21
23
  const aiRequestId = `test-instructions-${Date.now()}`;
22
24
  const runtimeIdentity = {
23
25
  ...options.identity,
@@ -37,8 +39,8 @@ export async function testInstructions(instructions, testInput, expectedSchema,
37
39
  config: {
38
40
  model,
39
41
  provider,
40
- temperature: internalConfig?.temperature ?? 0.7, // Use internal config or default
41
- maxTokens: internalConfig?.maxTokens ?? 2000 // Use internal config or default
42
+ temperature: internalConfig?.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
43
+ maxTokens: internalConfig.maxTokens
42
44
  }
43
45
  };
44
46
  // Run the test
@@ -1,21 +1,11 @@
1
1
  /**
2
- * Gateway operational mode (prod vs dev/debug) and default model resolution.
2
+ * Gateway operational mode (prod vs dev/debug).
3
3
  */
4
- import type { Logxer } from '@x12i/logxer';
5
- import type { ActivityIdentity, GatewayConfig } from './types.js';
4
+ import type { GatewayConfig } from './types.js';
6
5
  export type GatewayOperationalMode = 'prod' | 'debug' | 'dev';
7
- export type GatewayDefaultModelSource = 'env' | 'model-config.json' | 'code';
8
- export type DefaultModelSubstitutionReason = 'no_model_provided' | 'model_resolution_failed' | 'ai_tools_unavailable';
9
- /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
10
- export declare const CODE_DEFAULT_MODEL = "cheap";
11
- export type ResolvedGatewayDefault = {
12
- model: string;
13
- provider?: string;
14
- source: GatewayDefaultModelSource;
15
- };
16
6
  /**
17
7
  * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
18
- * Only `prod` allows silent default-model substitution; all other values are strict.
8
+ * Affects ai-tools model resolution strictness does not substitute missing models.
19
9
  */
20
10
  export declare function getGatewayOperationalMode(config?: Pick<GatewayConfig, 'mode'>): GatewayOperationalMode;
21
11
  export declare function isProdGatewayMode(mode: GatewayOperationalMode): boolean;
@@ -26,16 +16,3 @@ export declare function parseModelProviderSpec(spec: string): {
26
16
  provider?: string;
27
17
  model: string;
28
18
  };
29
- /**
30
- * Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
31
- */
32
- export declare function resolveGatewayDefaultModel(defaultModelConfig?: Record<string, unknown>, gatewayDefaultEngine?: string): ResolvedGatewayDefault;
33
- export declare function warnDefaultModelSubstitution(logger: Logxer, identity: Partial<ActivityIdentity> | undefined, details: {
34
- reason: DefaultModelSubstitutionReason;
35
- mode: GatewayOperationalMode;
36
- defaultSource: GatewayDefaultModelSource;
37
- defaultProvider?: string;
38
- defaultModel: string;
39
- originalProvider?: string;
40
- originalModel?: string;
41
- }): void;
@@ -1,13 +1,9 @@
1
1
  /**
2
- * Gateway operational mode (prod vs dev/debug) and default model resolution.
2
+ * Gateway operational mode (prod vs dev/debug).
3
3
  */
4
- import { gatewayLogDebug } from './gateway-log-meta.js';
5
- import { fieldEvidence, GatewayLogCode, gatewayWarnCode } from './gateway-log-diagnostics.js';
6
- /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
7
- export const CODE_DEFAULT_MODEL = 'cheap';
8
4
  /**
9
5
  * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
10
- * Only `prod` allows silent default-model substitution; all other values are strict.
6
+ * Affects ai-tools model resolution strictness does not substitute missing models.
11
7
  */
12
8
  export function getGatewayOperationalMode(config) {
13
9
  if (config?.mode) {
@@ -29,7 +25,7 @@ export function isProdGatewayMode(mode) {
29
25
  export function parseModelProviderSpec(spec) {
30
26
  const trimmed = spec.trim();
31
27
  if (!trimmed) {
32
- return { model: CODE_DEFAULT_MODEL };
28
+ throw new Error('Model spec must be a non-empty string');
33
29
  }
34
30
  const slash = trimmed.indexOf('/');
35
31
  if (slash === -1) {
@@ -42,44 +38,3 @@ export function parseModelProviderSpec(spec) {
42
38
  }
43
39
  return { provider: first, model: rest };
44
40
  }
45
- /**
46
- * Default model priority: AI_GATEWAY_DEFAULT_MODEL → model-config.json → code constant.
47
- */
48
- export function resolveGatewayDefaultModel(defaultModelConfig, gatewayDefaultEngine) {
49
- const envSpec = process.env.AI_GATEWAY_DEFAULT_MODEL?.trim();
50
- if (envSpec) {
51
- const parsed = parseModelProviderSpec(envSpec);
52
- return { model: parsed.model, provider: parsed.provider, source: 'env' };
53
- }
54
- const jsonModel = typeof defaultModelConfig?.defaultModel === 'string' ? defaultModelConfig.defaultModel : undefined;
55
- if (jsonModel) {
56
- const parsed = parseModelProviderSpec(jsonModel);
57
- const jsonEngine = typeof defaultModelConfig?.defaultEngine === 'string'
58
- ? defaultModelConfig.defaultEngine
59
- : gatewayDefaultEngine;
60
- return {
61
- model: parsed.model,
62
- provider: parsed.provider ?? jsonEngine,
63
- source: 'model-config.json'
64
- };
65
- }
66
- return {
67
- model: CODE_DEFAULT_MODEL,
68
- provider: gatewayDefaultEngine,
69
- source: 'code'
70
- };
71
- }
72
- export function warnDefaultModelSubstitution(logger, identity, details) {
73
- gatewayWarnCode(logger, GatewayLogCode.DEFAULT_MODEL_SUBSTITUTED, identity, {
74
- ...details,
75
- debugKind: gatewayLogDebug.anomaly,
76
- evidence: [
77
- fieldEvidence('defaultModel', details.defaultModel),
78
- fieldEvidence('defaultSource', details.defaultSource),
79
- fieldEvidence('reason', details.reason),
80
- fieldEvidence('mode', details.mode),
81
- ...(details.originalModel ? [fieldEvidence('originalModel', details.originalModel)] : []),
82
- ...(details.originalProvider ? [fieldEvidence('originalProvider', details.originalProvider)] : [])
83
- ]
84
- });
85
- }
@@ -5,6 +5,7 @@
5
5
  * NOTE: Retry delays use SIMPLE SLEEP (not smart rate limiting).
6
6
  * Between-calls rate limiting is handled separately in gateway-rate-limiter.ts (smart).
7
7
  */
8
+ import { GATEWAY_DEFAULT_RETRY } from './gateway-defaults.js';
8
9
  import { exceptionEvidence, fieldEvidence, GatewayLogCode, gatewayWarnCode } from './gateway-log-diagnostics.js';
9
10
  /**
10
11
  * Determines if an error is a network error (fetch failed, DNS, connectivity)
@@ -95,12 +96,12 @@ export function sleep(ms) {
95
96
  * Returns response and retry metadata
96
97
  */
97
98
  export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
98
- const maxRetries = retryConfig.maxRetries ?? 3;
99
- const initialDelay = retryConfig.initialDelay ?? 1000;
100
- const maxDelay = retryConfig.maxDelay ?? 30000;
101
- const backoffMultiplier = retryConfig.backoffMultiplier ?? 2;
102
- const enableJitter = retryConfig.enableJitter ?? true;
103
- const throttlingDelay = retryConfig.throttlingDelay ?? 5000;
99
+ const maxRetries = retryConfig.maxRetries ?? GATEWAY_DEFAULT_RETRY.maxRetries;
100
+ const initialDelay = retryConfig.initialDelay ?? GATEWAY_DEFAULT_RETRY.initialDelay;
101
+ const maxDelay = retryConfig.maxDelay ?? GATEWAY_DEFAULT_RETRY.maxDelay;
102
+ const backoffMultiplier = retryConfig.backoffMultiplier ?? GATEWAY_DEFAULT_RETRY.backoffMultiplier;
103
+ const enableJitter = retryConfig.enableJitter ?? GATEWAY_DEFAULT_RETRY.enableJitter;
104
+ const throttlingDelay = retryConfig.throttlingDelay ?? GATEWAY_DEFAULT_RETRY.throttlingDelay;
104
105
  let lastError;
105
106
  const retryAttempts = [];
106
107
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
@@ -14,16 +14,9 @@ export declare function generateMD5Hash(text: string): string;
14
14
  */
15
15
  export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer): Promise<string>;
16
16
  export type MergeConfigOptions = {
17
- defaultModelConfig?: Record<string, unknown>;
18
17
  catalog?: AiModelsCatalogClient | null;
19
18
  routingEnv?: OpenRouterRoutingConfig;
20
19
  };
21
- /**
22
- * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
23
- */
24
- export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
25
- useInternalDefaults?: 'skill' | 'audit';
26
- }, config: GatewayConfig): boolean;
27
20
  /**
28
21
  * Merges config with defaults
29
22
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -199,17 +192,6 @@ export declare function buildInvokeRejectionMetadata(args: {
199
192
  error?: unknown;
200
193
  }): GatewayInvokeRejectionMetadata;
201
194
  export declare function attachGatewayInvokeRejectionMetadata(err: Error, metadata: GatewayInvokeRejectionMetadata): void;
202
- /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
203
- export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
204
- /**
205
- * Size-cap a provider/router payload before storing on an activity record.
206
- * Non-serializable values become a small marker object instead of throwing.
207
- */
195
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
208
196
  export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
209
197
  export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
210
- export declare function buildOptimixerActualUsage(tokens: {
211
- prompt: number;
212
- completion: number;
213
- total: number;
214
- }, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
215
- export {};
@@ -7,10 +7,12 @@ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
7
  import { ModelResolutionError, isKnownProfileOrShortcut } from '@x12i/ai-tools';
8
8
  import { extractHttpStatusCode } from './gateway-retry.js';
9
9
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
10
- import { getPreParsedInstructions } from './gateway-instructions.js';
11
- import { getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
10
+ import { MaxTokensRequiredError, ModelRequiredError } from './instruction-errors.js';
12
11
  import { applyModelResolution, buildModelResolverOptions } from './ai-tools-client.js';
13
- import { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, warnDefaultModelSubstitution } from './gateway-mode.js';
12
+ import { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P } from './gateway-defaults.js';
13
+ function getPreParsedInstructions(instructions) {
14
+ return instructions ?? '';
15
+ }
14
16
  /**
15
17
  * Generates MD5 hash of a string
16
18
  */
@@ -35,83 +37,6 @@ export async function ensureTaskTypeId(request, logger) {
35
37
  });
36
38
  return taskTypeId;
37
39
  }
38
- function applyGatewayDefaultToMerged(merged, defaults, config) {
39
- merged.model = defaults.model;
40
- if (defaults.provider) {
41
- merged.provider = defaults.provider;
42
- }
43
- else if (!merged.provider) {
44
- merged.provider = config.defaultEngine;
45
- }
46
- }
47
- async function substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original) {
48
- const operationalMode = getGatewayOperationalMode(config);
49
- const defaults = resolveGatewayDefaultModel(mergeOptions?.defaultModelConfig, config.defaultEngine);
50
- warnDefaultModelSubstitution(logger, request.identity, {
51
- reason,
52
- mode: operationalMode,
53
- defaultSource: defaults.source,
54
- defaultProvider: defaults.provider ?? merged.provider,
55
- defaultModel: defaults.model,
56
- originalProvider: original?.provider ?? merged.provider,
57
- originalModel: original?.model
58
- });
59
- applyGatewayDefaultToMerged(merged, defaults, config);
60
- }
61
- async function tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original) {
62
- const resolveModels = config.aiTools?.resolveModels !== false;
63
- const catalog = mergeOptions?.catalog;
64
- if (!resolveModels || !catalog || !merged.model) {
65
- return;
66
- }
67
- try {
68
- const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
69
- const resolution = await catalog.resolveModel({
70
- provider: merged.provider,
71
- model: merged.model,
72
- }, resolverOptions);
73
- if (!resolution.found) {
74
- return;
75
- }
76
- applyModelResolution(merged, resolution, config.defaultEngine, merged.model);
77
- request._modelResolution = {
78
- modelId: resolution.modelId,
79
- routedViaOpenRouter: resolution.routedViaOpenRouter,
80
- confidence: resolution.confidence,
81
- resolvedVia: resolution.resolvedVia,
82
- originalProvider: original?.provider ?? merged.provider,
83
- originalModel: original?.model ?? merged.model
84
- };
85
- logger.verbose('Catalog resolved substituted default model', {
86
- jobId: request.identity.jobId,
87
- model: merged.model,
88
- provider: merged.provider,
89
- resolvedModelId: resolution.modelId
90
- });
91
- }
92
- catch {
93
- // Prod keeps the substituted bare default when re-resolution fails.
94
- }
95
- }
96
- async function substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, reason, original) {
97
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original);
98
- await tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original);
99
- }
100
- /**
101
- * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
102
- */
103
- export function isMaxTokensExplicitlySet(request, config) {
104
- const useInternalDefaults = request.useInternalDefaults;
105
- const internalDefaults = useInternalDefaults === 'skill'
106
- ? config.internalSystemActions?.internalSkill
107
- : useInternalDefaults === 'audit'
108
- ? config.internalSystemActions?.skillAudit
109
- : undefined;
110
- return (request.config?.maxTokens !== undefined ||
111
- request.modelConfig?.maxTokens !== undefined ||
112
- internalDefaults?.maxTokens !== undefined ||
113
- config.maxTokens !== undefined);
114
- }
115
40
  /**
116
41
  * Merges config with defaults
117
42
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -135,7 +60,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
135
60
  useInternalDefaults,
136
61
  hasInternalDefaults: !!internalDefaults
137
62
  });
138
- const operationalMode = getGatewayOperationalMode(config);
139
63
  const resolveModels = config.aiTools?.resolveModels !== false;
140
64
  // Priority: modelConfig > request.config > internalSystemActions[useInternalDefaults] > gateway defaults
141
65
  // First, merge modelConfig into a config-like object if present
@@ -152,36 +76,29 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
152
76
  ...Object.fromEntries(Object.entries(request.modelConfig).filter(([key]) => !['model', 'modelId', 'provider', 'temperature', 'maxTokens', 'topP', 'frequencyPenalty', 'presencePenalty', 'stop'].includes(key)))
153
77
  } : undefined;
154
78
  const merged = {
155
- // Start with gateway defaults (lowest priority)
156
- temperature: config.temperature ?? 0.7,
157
- // maxTokens will be set from flex-md if available, otherwise fallback to 2000
158
- topP: config.topP ?? 1.0,
159
- frequencyPenalty: config.frequencyPenalty ?? 0.0,
160
- presencePenalty: config.presencePenalty ?? 0.0,
161
- // Apply internal system action defaults (medium priority) if useInternalDefaults is set
79
+ temperature: config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
80
+ topP: config.topP ?? GATEWAY_DEFAULT_TOP_P,
81
+ frequencyPenalty: config.frequencyPenalty ?? GATEWAY_DEFAULT_FREQUENCY_PENALTY,
82
+ presencePenalty: config.presencePenalty ?? GATEWAY_DEFAULT_PRESENCE_PENALTY,
83
+ ...(config.maxTokens !== undefined ? { maxTokens: config.maxTokens } : {}),
162
84
  ...(internalDefaults ? {
163
85
  model: internalDefaults.model,
164
86
  provider: internalDefaults.engine || config.defaultEngine,
165
- temperature: internalDefaults.temperature ?? config.temperature ?? 0.7,
166
- // maxTokens from internalDefaults only if explicitly set, otherwise will be auto-detected
87
+ temperature: internalDefaults.temperature ?? config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
167
88
  ...(internalDefaults.maxTokens !== undefined ? { maxTokens: internalDefaults.maxTokens } : {})
168
89
  } : {}),
169
- // Request config overrides (higher priority)
170
90
  ...request.config,
171
- // ModelConfig overrides (highest priority) - merge only defined values
172
91
  ...(modelConfigAsConfig ? Object.fromEntries(Object.entries(modelConfigAsConfig).filter(([_, value]) => value !== undefined)) : {}),
173
- // Model resolved below (catalog, default chain, or explicit pass-through)
174
92
  model: modelConfigAsConfig?.model || request.config?.model || internalDefaults?.model,
175
- // Ensure provider is set: modelConfig > request.config > internalDefaults > gateway default
176
93
  provider: modelConfigAsConfig?.provider || request.config?.provider || internalDefaults?.engine || config.defaultEngine
177
94
  };
178
95
  const explicitModel = merged.model;
179
96
  const originalProvider = merged.provider;
180
97
  const originalModel = explicitModel;
181
98
  if (!explicitModel) {
182
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
99
+ throw new ModelRequiredError();
183
100
  }
184
- else if (resolveModels && mergeOptions?.catalog) {
101
+ if (resolveModels && mergeOptions?.catalog) {
185
102
  try {
186
103
  const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
187
104
  const resolution = await mergeOptions.catalog.resolveModel({
@@ -208,9 +125,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
208
125
  resolvedVia: resolution.resolvedVia
209
126
  });
210
127
  }
211
- else if (isProdGatewayMode(operationalMode)) {
212
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
213
- }
214
128
  else {
215
129
  throw buildModelResolutionFailureError(explicitModel, merged.provider, resolution);
216
130
  }
@@ -219,95 +133,34 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
219
133
  if (error instanceof ModelResolutionError) {
220
134
  throw error;
221
135
  }
222
- if (isProdGatewayMode(operationalMode)) {
223
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
224
- }
225
- else {
136
+ if (error instanceof ModelProfileUnroutableError) {
226
137
  throw error;
227
138
  }
139
+ throw error;
228
140
  }
229
141
  }
230
- else if (resolveModels && !mergeOptions?.catalog && isProdGatewayMode(operationalMode)) {
231
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
232
- }
233
142
  if (!merged.model) {
234
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
235
- }
236
- const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
237
- const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
238
- // Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
239
- if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
240
- // Try to get maxTokens from flex-md
241
- try {
242
- const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
243
- if (flexMdMaxTokens && flexMdMaxTokens > 0) {
244
- merged.maxTokens = flexMdMaxTokens;
245
- logger.debug('Using maxTokens from flex-md', {
246
- jobId: request.identity.jobId,
247
- model: merged.model,
248
- provider: merged.provider,
249
- maxTokens: merged.maxTokens,
250
- source: 'flex-md'
251
- });
252
- }
253
- else {
254
- // flex-md doesn't have model info or returned invalid value - use fallback
255
- merged.maxTokens = 2000;
256
- logger.debug('Using fallback maxTokens (flex-md unavailable or no model info)', {
257
- jobId: request.identity.jobId,
258
- model: merged.model,
259
- provider: merged.provider,
260
- maxTokens: merged.maxTokens,
261
- note: 'Set maxTokens explicitly in config for custom values.'
262
- });
263
- }
264
- }
265
- catch (error) {
266
- // Error loading flex-md or getting model info - use fallback
267
- merged.maxTokens = 2000;
268
- logger.debug('Using fallback maxTokens (flex-md error)', {
269
- jobId: request.identity.jobId,
270
- model: merged.model,
271
- provider: merged.provider,
272
- maxTokens: merged.maxTokens,
273
- error: error instanceof Error ? error.message : String(error),
274
- note: 'Set maxTokens explicitly in config for custom values.'
275
- });
276
- }
277
- }
278
- else if (!merged.maxTokens && !optimixerWillPredict) {
279
- // If maxTokens wasn't set and wasn't auto-detected, use fallback
280
- // This should rarely happen, but handle edge cases
281
- merged.maxTokens = 2000;
282
- logger.debug('Using fallback maxTokens (not auto-detected and not explicitly set)', {
283
- jobId: request.identity.jobId,
284
- model: merged.model,
285
- provider: merged.provider,
286
- maxTokens: merged.maxTokens,
287
- maxTokensExplicitlySet,
288
- optimixerWillPredict
289
- });
290
- }
291
- else if (optimixerWillPredict) {
292
- logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
293
- jobId: request.identity.jobId,
294
- model: merged.model,
295
- provider: merged.provider
296
- });
297
- }
298
- else {
299
- // maxTokens was explicitly set - log which source
300
- const source = request.config?.maxTokens !== undefined ? 'request.config' :
301
- internalDefaults?.maxTokens !== undefined ? `internalSystemActions.${useInternalDefaults}` :
302
- 'gateway.config';
303
- logger.debug('Using explicitly set maxTokens', {
304
- jobId: request.identity.jobId,
305
- model: merged.model,
306
- provider: merged.provider,
307
- maxTokens: merged.maxTokens,
308
- source
309
- });
310
- }
143
+ throw new ModelRequiredError();
144
+ }
145
+ if (typeof merged.maxTokens !== 'number' || !Number.isFinite(merged.maxTokens) || merged.maxTokens <= 0) {
146
+ throw new MaxTokensRequiredError();
147
+ }
148
+ const maxTokensSource = request.config?.maxTokens !== undefined
149
+ ? 'request.config'
150
+ : request.modelConfig?.maxTokens !== undefined
151
+ ? 'modelConfig'
152
+ : internalDefaults?.maxTokens !== undefined
153
+ ? `internalSystemActions.${useInternalDefaults}`
154
+ : config.maxTokens !== undefined
155
+ ? 'gateway.config'
156
+ : 'unknown';
157
+ logger.debug('Using maxTokens', {
158
+ jobId: request.identity.jobId,
159
+ model: merged.model,
160
+ provider: merged.provider,
161
+ maxTokens: merged.maxTokens,
162
+ source: maxTokensSource
163
+ });
311
164
  logger.debug('Config merged', {
312
165
  jobId: request.identity.jobId,
313
166
  finalModel: merged.model,
@@ -1014,12 +867,7 @@ export function buildInvokeRejectionMetadata(args) {
1014
867
  export function attachGatewayInvokeRejectionMetadata(err, metadata) {
1015
868
  err.metadata = metadata;
1016
869
  }
1017
- /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
1018
- export const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
1019
- /**
1020
- * Size-cap a provider/router payload before storing on an activity record.
1021
- * Non-serializable values become a small marker object instead of throwing.
1022
- */
870
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
1023
871
  export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS) {
1024
872
  if (payload == null)
1025
873
  return payload;
@@ -1055,13 +903,3 @@ export function resolveFinishReasonFromRouterResponse(response) {
1055
903
  }
1056
904
  return undefined;
1057
905
  }
1058
- export function buildOptimixerActualUsage(tokens, response, latencyMs) {
1059
- const finishReason = resolveFinishReasonFromRouterResponse(response);
1060
- return {
1061
- promptTokens: tokens.prompt,
1062
- completionTokens: tokens.completion,
1063
- totalTokens: tokens.total,
1064
- ...(finishReason ? { finishReason } : {}),
1065
- latencyMs
1066
- };
1067
- }
package/dist/gateway.d.ts CHANGED
@@ -15,9 +15,7 @@ export declare class AIGateway {
15
15
  private config;
16
16
  private logger;
17
17
  private activityManager?;
18
- private optimixerManager?;
19
18
  private messageBuilderConfig?;
20
- private defaultModelConfig;
21
19
  private _autoRegisterDone;
22
20
  private _aiToolsClient;
23
21
  private readonly preferOpenRouter;
@@ -31,7 +29,6 @@ export declare class AIGateway {
31
29
  * Invoke AI request (with structured output support)
32
30
  */
33
31
  invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
34
- private applyAdaptiveMaxTokensIfEnabled;
35
32
  /**
36
33
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
37
34
  */