@x12i/ai-gateway 9.7.8 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +67 -12
  2. package/dist/defaults/log-diagnostics.json +0 -68
  3. package/dist/gateway-config.d.ts +1 -15
  4. package/dist/gateway-config.js +17 -134
  5. package/dist/gateway-defaults.d.ts +23 -0
  6. package/dist/gateway-defaults.js +29 -0
  7. package/dist/gateway-log-diagnostics.d.ts +0 -4
  8. package/dist/gateway-log-diagnostics.js +1 -5
  9. package/dist/gateway-log-levels.d.ts +0 -1
  10. package/dist/gateway-log-levels.js +0 -1
  11. package/dist/gateway-messages.js +0 -3
  12. package/dist/gateway-meta.js +12 -10
  13. package/dist/gateway-mode.d.ts +3 -26
  14. package/dist/gateway-mode.js +3 -48
  15. package/dist/gateway-retry.js +7 -6
  16. package/dist/gateway-utils.d.ts +1 -19
  17. package/dist/gateway-utils.js +37 -199
  18. package/dist/gateway.d.ts +0 -3
  19. package/dist/gateway.js +4 -63
  20. package/dist/index.d.ts +4 -6
  21. package/dist/index.js +4 -7
  22. package/dist/instruction-errors.d.ts +9 -1
  23. package/dist/instruction-errors.js +15 -1
  24. package/dist/instruction-optimizer.js +5 -1
  25. package/dist/message-builder.d.ts +0 -6
  26. package/dist/message-builder.js +4 -145
  27. package/dist/types.d.ts +16 -57
  28. package/dist-cjs/defaults/log-diagnostics.json +0 -68
  29. package/dist-cjs/gateway-config.cjs +17 -134
  30. package/dist-cjs/gateway-config.d.ts +1 -15
  31. package/dist-cjs/gateway-defaults.cjs +29 -0
  32. package/dist-cjs/gateway-defaults.d.ts +23 -0
  33. package/dist-cjs/gateway-log-diagnostics.cjs +1 -5
  34. package/dist-cjs/gateway-log-diagnostics.d.ts +0 -4
  35. package/dist-cjs/gateway-log-levels.cjs +0 -1
  36. package/dist-cjs/gateway-log-levels.d.ts +0 -1
  37. package/dist-cjs/gateway-messages.cjs +0 -3
  38. package/dist-cjs/gateway-meta.cjs +12 -10
  39. package/dist-cjs/gateway-mode.cjs +3 -48
  40. package/dist-cjs/gateway-mode.d.ts +3 -26
  41. package/dist-cjs/gateway-retry.cjs +7 -6
  42. package/dist-cjs/gateway-utils.cjs +37 -199
  43. package/dist-cjs/gateway-utils.d.ts +1 -19
  44. package/dist-cjs/gateway.cjs +4 -63
  45. package/dist-cjs/gateway.d.ts +0 -3
  46. package/dist-cjs/index.cjs +4 -7
  47. package/dist-cjs/index.d.ts +4 -6
  48. package/dist-cjs/instruction-errors.cjs +15 -1
  49. package/dist-cjs/instruction-errors.d.ts +9 -1
  50. package/dist-cjs/instruction-optimizer.cjs +5 -1
  51. package/dist-cjs/message-builder.cjs +4 -145
  52. package/dist-cjs/message-builder.d.ts +0 -6
  53. package/dist-cjs/types.d.ts +16 -57
  54. package/package.json +2 -3
  55. package/dist/defaults/instructions-blocks.json +0 -61
  56. package/dist/defaults/model-config.json +0 -15
  57. package/dist/gateway-instructions.d.ts +0 -30
  58. package/dist/gateway-instructions.js +0 -62
  59. package/dist/gateway-rate-limiter-constants.d.ts +0 -16
  60. package/dist/gateway-rate-limiter-constants.js +0 -16
  61. package/dist/gateway-rate-limiter.d.ts +0 -56
  62. package/dist/gateway-rate-limiter.js +0 -107
  63. package/dist/optimixer-manager.d.ts +0 -33
  64. package/dist/optimixer-manager.js +0 -142
  65. package/dist/token-estimate.d.ts +0 -12
  66. package/dist/token-estimate.js +0 -30
  67. package/dist-cjs/defaults/instructions-blocks.json +0 -61
  68. package/dist-cjs/defaults/model-config.json +0 -15
  69. package/dist-cjs/gateway-instructions.cjs +0 -62
  70. package/dist-cjs/gateway-instructions.d.ts +0 -30
  71. package/dist-cjs/gateway-rate-limiter-constants.cjs +0 -16
  72. package/dist-cjs/gateway-rate-limiter-constants.d.ts +0 -16
  73. package/dist-cjs/gateway-rate-limiter.cjs +0 -107
  74. package/dist-cjs/gateway-rate-limiter.d.ts +0 -56
  75. package/dist-cjs/optimixer-manager.cjs +0 -142
  76. package/dist-cjs/optimixer-manager.d.ts +0 -33
  77. package/dist-cjs/token-estimate.cjs +0 -30
  78. package/dist-cjs/token-estimate.d.ts +0 -12
@@ -5,6 +5,7 @@
5
5
  * NOTE: Retry delays use SIMPLE SLEEP (not smart rate limiting).
6
6
  * Between-calls rate limiting is handled separately in gateway-rate-limiter.ts (smart).
7
7
  */
8
+ import { GATEWAY_DEFAULT_RETRY } from './gateway-defaults.js';
8
9
  import { exceptionEvidence, fieldEvidence, GatewayLogCode, gatewayWarnCode } from './gateway-log-diagnostics.js';
9
10
  /**
10
11
  * Determines if an error is a network error (fetch failed, DNS, connectivity)
@@ -95,12 +96,12 @@ export function sleep(ms) {
95
96
  * Returns response and retry metadata
96
97
  */
97
98
  export async function invokeWithRetry(routerRequest, retryConfig, jobId, router, logger, hooks) {
98
- const maxRetries = retryConfig.maxRetries ?? 3;
99
- const initialDelay = retryConfig.initialDelay ?? 1000;
100
- const maxDelay = retryConfig.maxDelay ?? 30000;
101
- const backoffMultiplier = retryConfig.backoffMultiplier ?? 2;
102
- const enableJitter = retryConfig.enableJitter ?? true;
103
- const throttlingDelay = retryConfig.throttlingDelay ?? 5000;
99
+ const maxRetries = retryConfig.maxRetries ?? GATEWAY_DEFAULT_RETRY.maxRetries;
100
+ const initialDelay = retryConfig.initialDelay ?? GATEWAY_DEFAULT_RETRY.initialDelay;
101
+ const maxDelay = retryConfig.maxDelay ?? GATEWAY_DEFAULT_RETRY.maxDelay;
102
+ const backoffMultiplier = retryConfig.backoffMultiplier ?? GATEWAY_DEFAULT_RETRY.backoffMultiplier;
103
+ const enableJitter = retryConfig.enableJitter ?? GATEWAY_DEFAULT_RETRY.enableJitter;
104
+ const throttlingDelay = retryConfig.throttlingDelay ?? GATEWAY_DEFAULT_RETRY.throttlingDelay;
104
105
  let lastError;
105
106
  const retryAttempts = [];
106
107
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
@@ -7,10 +7,12 @@ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
7
  import { ModelResolutionError, isKnownProfileOrShortcut } from '@x12i/ai-tools';
8
8
  import { extractHttpStatusCode } from './gateway-retry.js';
9
9
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
10
- import { getPreParsedInstructions } from './gateway-instructions.js';
11
- import { getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
10
+ import { MaxTokensRequiredError, ModelRequiredError } from './instruction-errors.js';
12
11
  import { applyModelResolution, buildModelResolverOptions } from './ai-tools-client.js';
13
- import { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, warnDefaultModelSubstitution } from './gateway-mode.js';
12
+ import { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P } from './gateway-defaults.js';
13
+ function getPreParsedInstructions(instructions) {
14
+ return instructions ?? '';
15
+ }
14
16
  /**
15
17
  * Generates MD5 hash of a string
16
18
  */
@@ -35,83 +37,6 @@ export async function ensureTaskTypeId(request, logger) {
35
37
  });
36
38
  return taskTypeId;
37
39
  }
38
- function applyGatewayDefaultToMerged(merged, defaults, config) {
39
- merged.model = defaults.model;
40
- if (defaults.provider) {
41
- merged.provider = defaults.provider;
42
- }
43
- else if (!merged.provider) {
44
- merged.provider = config.defaultEngine;
45
- }
46
- }
47
- async function substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original) {
48
- const operationalMode = getGatewayOperationalMode(config);
49
- const defaults = resolveGatewayDefaultModel(mergeOptions?.defaultModelConfig, config.defaultEngine);
50
- warnDefaultModelSubstitution(logger, request.identity, {
51
- reason,
52
- mode: operationalMode,
53
- defaultSource: defaults.source,
54
- defaultProvider: defaults.provider ?? merged.provider,
55
- defaultModel: defaults.model,
56
- originalProvider: original?.provider ?? merged.provider,
57
- originalModel: original?.model
58
- });
59
- applyGatewayDefaultToMerged(merged, defaults, config);
60
- }
61
- async function tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original) {
62
- const resolveModels = config.aiTools?.resolveModels !== false;
63
- const catalog = mergeOptions?.catalog;
64
- if (!resolveModels || !catalog || !merged.model) {
65
- return;
66
- }
67
- try {
68
- const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
69
- const resolution = await catalog.resolveModel({
70
- provider: merged.provider,
71
- model: merged.model,
72
- }, resolverOptions);
73
- if (!resolution.found) {
74
- return;
75
- }
76
- applyModelResolution(merged, resolution, config.defaultEngine, merged.model);
77
- request._modelResolution = {
78
- modelId: resolution.modelId,
79
- routedViaOpenRouter: resolution.routedViaOpenRouter,
80
- confidence: resolution.confidence,
81
- resolvedVia: resolution.resolvedVia,
82
- originalProvider: original?.provider ?? merged.provider,
83
- originalModel: original?.model ?? merged.model
84
- };
85
- logger.verbose('Catalog resolved substituted default model', {
86
- jobId: request.identity.jobId,
87
- model: merged.model,
88
- provider: merged.provider,
89
- resolvedModelId: resolution.modelId
90
- });
91
- }
92
- catch {
93
- // Prod keeps the substituted bare default when re-resolution fails.
94
- }
95
- }
96
- async function substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, reason, original) {
97
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original);
98
- await tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original);
99
- }
100
- /**
101
- * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
102
- */
103
- export function isMaxTokensExplicitlySet(request, config) {
104
- const useInternalDefaults = request.useInternalDefaults;
105
- const internalDefaults = useInternalDefaults === 'skill'
106
- ? config.internalSystemActions?.internalSkill
107
- : useInternalDefaults === 'audit'
108
- ? config.internalSystemActions?.skillAudit
109
- : undefined;
110
- return (request.config?.maxTokens !== undefined ||
111
- request.modelConfig?.maxTokens !== undefined ||
112
- internalDefaults?.maxTokens !== undefined ||
113
- config.maxTokens !== undefined);
114
- }
115
40
  /**
116
41
  * Merges config with defaults
117
42
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -135,7 +60,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
135
60
  useInternalDefaults,
136
61
  hasInternalDefaults: !!internalDefaults
137
62
  });
138
- const operationalMode = getGatewayOperationalMode(config);
139
63
  const resolveModels = config.aiTools?.resolveModels !== false;
140
64
  // Priority: modelConfig > request.config > internalSystemActions[useInternalDefaults] > gateway defaults
141
65
  // First, merge modelConfig into a config-like object if present
@@ -152,36 +76,29 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
152
76
  ...Object.fromEntries(Object.entries(request.modelConfig).filter(([key]) => !['model', 'modelId', 'provider', 'temperature', 'maxTokens', 'topP', 'frequencyPenalty', 'presencePenalty', 'stop'].includes(key)))
153
77
  } : undefined;
154
78
  const merged = {
155
- // Start with gateway defaults (lowest priority)
156
- temperature: config.temperature ?? 0.7,
157
- // maxTokens will be set from flex-md if available, otherwise fallback to 2000
158
- topP: config.topP ?? 1.0,
159
- frequencyPenalty: config.frequencyPenalty ?? 0.0,
160
- presencePenalty: config.presencePenalty ?? 0.0,
161
- // Apply internal system action defaults (medium priority) if useInternalDefaults is set
79
+ temperature: config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
80
+ topP: config.topP ?? GATEWAY_DEFAULT_TOP_P,
81
+ frequencyPenalty: config.frequencyPenalty ?? GATEWAY_DEFAULT_FREQUENCY_PENALTY,
82
+ presencePenalty: config.presencePenalty ?? GATEWAY_DEFAULT_PRESENCE_PENALTY,
83
+ ...(config.maxTokens !== undefined ? { maxTokens: config.maxTokens } : {}),
162
84
  ...(internalDefaults ? {
163
85
  model: internalDefaults.model,
164
86
  provider: internalDefaults.engine || config.defaultEngine,
165
- temperature: internalDefaults.temperature ?? config.temperature ?? 0.7,
166
- // maxTokens from internalDefaults only if explicitly set, otherwise will be auto-detected
87
+ temperature: internalDefaults.temperature ?? config.temperature ?? GATEWAY_DEFAULT_TEMPERATURE,
167
88
  ...(internalDefaults.maxTokens !== undefined ? { maxTokens: internalDefaults.maxTokens } : {})
168
89
  } : {}),
169
- // Request config overrides (higher priority)
170
90
  ...request.config,
171
- // ModelConfig overrides (highest priority) - merge only defined values
172
91
  ...(modelConfigAsConfig ? Object.fromEntries(Object.entries(modelConfigAsConfig).filter(([_, value]) => value !== undefined)) : {}),
173
- // Model resolved below (catalog, default chain, or explicit pass-through)
174
92
  model: modelConfigAsConfig?.model || request.config?.model || internalDefaults?.model,
175
- // Ensure provider is set: modelConfig > request.config > internalDefaults > gateway default
176
93
  provider: modelConfigAsConfig?.provider || request.config?.provider || internalDefaults?.engine || config.defaultEngine
177
94
  };
178
95
  const explicitModel = merged.model;
179
96
  const originalProvider = merged.provider;
180
97
  const originalModel = explicitModel;
181
98
  if (!explicitModel) {
182
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
99
+ throw new ModelRequiredError();
183
100
  }
184
- else if (resolveModels && mergeOptions?.catalog) {
101
+ if (resolveModels && mergeOptions?.catalog) {
185
102
  try {
186
103
  const resolverOptions = buildModelResolverOptions(config, mergeOptions?.routingEnv);
187
104
  const resolution = await mergeOptions.catalog.resolveModel({
@@ -208,9 +125,6 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
208
125
  resolvedVia: resolution.resolvedVia
209
126
  });
210
127
  }
211
- else if (isProdGatewayMode(operationalMode)) {
212
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
213
- }
214
128
  else {
215
129
  throw buildModelResolutionFailureError(explicitModel, merged.provider, resolution);
216
130
  }
@@ -219,95 +133,34 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
219
133
  if (error instanceof ModelResolutionError) {
220
134
  throw error;
221
135
  }
222
- if (isProdGatewayMode(operationalMode)) {
223
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
224
- }
225
- else {
136
+ if (error instanceof ModelProfileUnroutableError) {
226
137
  throw error;
227
138
  }
139
+ throw error;
228
140
  }
229
141
  }
230
- else if (resolveModels && !mergeOptions?.catalog && isProdGatewayMode(operationalMode)) {
231
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
232
- }
233
142
  if (!merged.model) {
234
- await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
235
- }
236
- const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
237
- const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
238
- // Auto-get maxTokens from flex-md when Optimixer is not handling adaptive max_tokens.
239
- if (!optimixerWillPredict && !maxTokensExplicitlySet && merged.model && merged.provider) {
240
- // Try to get maxTokens from flex-md
241
- try {
242
- const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(merged.provider, merged.model);
243
- if (flexMdMaxTokens && flexMdMaxTokens > 0) {
244
- merged.maxTokens = flexMdMaxTokens;
245
- logger.debug('Using maxTokens from flex-md', {
246
- jobId: request.identity.jobId,
247
- model: merged.model,
248
- provider: merged.provider,
249
- maxTokens: merged.maxTokens,
250
- source: 'flex-md'
251
- });
252
- }
253
- else {
254
- // flex-md doesn't have model info or returned invalid value - use fallback
255
- merged.maxTokens = 2000;
256
- logger.debug('Using fallback maxTokens (flex-md unavailable or no model info)', {
257
- jobId: request.identity.jobId,
258
- model: merged.model,
259
- provider: merged.provider,
260
- maxTokens: merged.maxTokens,
261
- note: 'Set maxTokens explicitly in config for custom values.'
262
- });
263
- }
264
- }
265
- catch (error) {
266
- // Error loading flex-md or getting model info - use fallback
267
- merged.maxTokens = 2000;
268
- logger.debug('Using fallback maxTokens (flex-md error)', {
269
- jobId: request.identity.jobId,
270
- model: merged.model,
271
- provider: merged.provider,
272
- maxTokens: merged.maxTokens,
273
- error: error instanceof Error ? error.message : String(error),
274
- note: 'Set maxTokens explicitly in config for custom values.'
275
- });
276
- }
277
- }
278
- else if (!merged.maxTokens && !optimixerWillPredict) {
279
- // If maxTokens wasn't set and wasn't auto-detected, use fallback
280
- // This should rarely happen, but handle edge cases
281
- merged.maxTokens = 2000;
282
- logger.debug('Using fallback maxTokens (not auto-detected and not explicitly set)', {
283
- jobId: request.identity.jobId,
284
- model: merged.model,
285
- provider: merged.provider,
286
- maxTokens: merged.maxTokens,
287
- maxTokensExplicitlySet,
288
- optimixerWillPredict
289
- });
290
- }
291
- else if (optimixerWillPredict) {
292
- logger.debug('Deferring maxTokens to Optimixer predictAiMaxTokens', {
293
- jobId: request.identity.jobId,
294
- model: merged.model,
295
- provider: merged.provider
296
- });
297
- }
298
- else {
299
- // maxTokens was explicitly set - log which source
300
- const source = request.config?.maxTokens !== undefined ? 'request.config' :
301
- internalDefaults?.maxTokens !== undefined ? `internalSystemActions.${useInternalDefaults}` :
302
- 'gateway.config';
303
- logger.debug('Using explicitly set maxTokens', {
304
- jobId: request.identity.jobId,
305
- model: merged.model,
306
- provider: merged.provider,
307
- maxTokens: merged.maxTokens,
308
- source
309
- });
310
- }
143
+ throw new ModelRequiredError();
144
+ }
145
+ if (typeof merged.maxTokens !== 'number' || !Number.isFinite(merged.maxTokens) || merged.maxTokens <= 0) {
146
+ throw new MaxTokensRequiredError();
147
+ }
148
+ const maxTokensSource = request.config?.maxTokens !== undefined
149
+ ? 'request.config'
150
+ : request.modelConfig?.maxTokens !== undefined
151
+ ? 'modelConfig'
152
+ : internalDefaults?.maxTokens !== undefined
153
+ ? `internalSystemActions.${useInternalDefaults}`
154
+ : config.maxTokens !== undefined
155
+ ? 'gateway.config'
156
+ : 'unknown';
157
+ logger.debug('Using maxTokens', {
158
+ jobId: request.identity.jobId,
159
+ model: merged.model,
160
+ provider: merged.provider,
161
+ maxTokens: merged.maxTokens,
162
+ source: maxTokensSource
163
+ });
311
164
  logger.debug('Config merged', {
312
165
  jobId: request.identity.jobId,
313
166
  finalModel: merged.model,
@@ -1014,12 +867,7 @@ export function buildInvokeRejectionMetadata(args) {
1014
867
  export function attachGatewayInvokeRejectionMetadata(err, metadata) {
1015
868
  err.metadata = metadata;
1016
869
  }
1017
- /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
1018
- export const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512_000;
1019
- /**
1020
- * Size-cap a provider/router payload before storing on an activity record.
1021
- * Non-serializable values become a small marker object instead of throwing.
1022
- */
870
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
1023
871
  export function capActivityFullResponsePayload(payload, maxChars = DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS) {
1024
872
  if (payload == null)
1025
873
  return payload;
@@ -1055,13 +903,3 @@ export function resolveFinishReasonFromRouterResponse(response) {
1055
903
  }
1056
904
  return undefined;
1057
905
  }
1058
- export function buildOptimixerActualUsage(tokens, response, latencyMs) {
1059
- const finishReason = resolveFinishReasonFromRouterResponse(response);
1060
- return {
1061
- promptTokens: tokens.prompt,
1062
- completionTokens: tokens.completion,
1063
- totalTokens: tokens.total,
1064
- ...(finishReason ? { finishReason } : {}),
1065
- latencyMs
1066
- };
1067
- }
@@ -14,16 +14,9 @@ export declare function generateMD5Hash(text: string): string;
14
14
  */
15
15
  export declare function ensureTaskTypeId(request: ChatRequest, logger: Logxer): Promise<string>;
16
16
  export type MergeConfigOptions = {
17
- defaultModelConfig?: Record<string, unknown>;
18
17
  catalog?: AiModelsCatalogClient | null;
19
18
  routingEnv?: OpenRouterRoutingConfig;
20
19
  };
21
- /**
22
- * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
23
- */
24
- export declare function isMaxTokensExplicitlySet(request: ChatRequest & {
25
- useInternalDefaults?: 'skill' | 'audit';
26
- }, config: GatewayConfig): boolean;
27
20
  /**
28
21
  * Merges config with defaults
29
22
  * Supports using internal system action defaults (internalSkill or skillAudit) when useInternalDefaults is set
@@ -199,17 +192,6 @@ export declare function buildInvokeRejectionMetadata(args: {
199
192
  error?: unknown;
200
193
  }): GatewayInvokeRejectionMetadata;
201
194
  export declare function attachGatewayInvokeRejectionMetadata(err: Error, metadata: GatewayInvokeRejectionMetadata): void;
202
- /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
203
- export declare const DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS = 512000;
204
- /**
205
- * Size-cap a provider/router payload before storing on an activity record.
206
- * Non-serializable values become a small marker object instead of throwing.
207
- */
195
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS } from './gateway-defaults.js';
208
196
  export declare function capActivityFullResponsePayload(payload: unknown, maxChars?: number): unknown;
209
197
  export declare function resolveFinishReasonFromRouterResponse(response: unknown): string | undefined;
210
- export declare function buildOptimixerActualUsage(tokens: {
211
- prompt: number;
212
- completion: number;
213
- total: number;
214
- }, response: unknown, latencyMs: number): import('@x12i/optimixer').AiMaxTokensActualUsage;
215
- export {};
@@ -7,10 +7,11 @@ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
7
  import { validateChatRequest, validateAIRequest } from './gateway-validation.js';
8
8
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
9
9
  import { initializeGatewayComponents } from './gateway-config.js';
10
+ import { resolveRetryConfig } from './gateway-defaults.js';
10
11
  import { buildMessages } from './message-builder.js';
11
- import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
12
+ import { extractJsonFromFlexMd } from './flex-md-loader.js';
12
13
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
13
- import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
14
+ import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
14
15
  import { getAiToolsClient } from './ai-tools-client.js';
15
16
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
16
17
  import { applyOpenRouterInvokePolicy } from './ai-tools-client.js';
@@ -56,9 +57,7 @@ export class AIGateway {
56
57
  config;
57
58
  logger;
58
59
  activityManager;
59
- optimixerManager;
60
60
  messageBuilderConfig;
61
- defaultModelConfig = {};
62
61
  _autoRegisterDone = false;
63
62
  _aiToolsClient = null;
64
63
  preferOpenRouter;
@@ -70,9 +69,7 @@ export class AIGateway {
70
69
  this.logger = components.logger;
71
70
  this.router = components.router;
72
71
  this.activityManager = components.activityManager;
73
- this.optimixerManager = components.optimixerManager;
74
72
  this.messageBuilderConfig = components.messageBuilderConfig;
75
- this.defaultModelConfig = components.defaultModelConfig ?? {};
76
73
  this.preferOpenRouter = components.preferOpenRouter;
77
74
  this.openRouterApiKey = components.openRouterApiKey;
78
75
  setGatewayRuntimeClients({
@@ -101,7 +98,6 @@ export class AIGateway {
101
98
  // Merge config (modelConfig > request.config > gateway defaults)
102
99
  const aiTools = await this.getAiTools();
103
100
  const mergedConfig = await mergeConfig(request, this.config, this.logger, {
104
- defaultModelConfig: this.defaultModelConfig,
105
101
  catalog: aiTools?.catalog ?? null,
106
102
  routingEnv: aiTools?.routingEnv,
107
103
  });
@@ -118,7 +114,6 @@ export class AIGateway {
118
114
  await autoRegisterProviders(this.router, this.logger);
119
115
  this._autoRegisterDone = true;
120
116
  }
121
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
122
117
  // Start activity tracking if available
123
118
  let activity = undefined;
124
119
  if (this.activityManager) {
@@ -192,9 +187,6 @@ export class AIGateway {
192
187
  });
193
188
  }
194
189
  }
195
- if (optimixerPrediction) {
196
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
197
- }
198
190
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
199
191
  tokens: enhancedResponse.metadata.tokens,
200
192
  costUsd: enhancedResponse.metadata.costUsd,
@@ -298,7 +290,6 @@ export class AIGateway {
298
290
  // Merge config (modelConfig > request.config > gateway defaults)
299
291
  const aiTools = await this.getAiTools();
300
292
  const mergedConfig = await mergeConfig(request, this.config, this.logger, {
301
- defaultModelConfig: this.defaultModelConfig,
302
293
  catalog: aiTools?.catalog ?? null,
303
294
  routingEnv: aiTools?.routingEnv,
304
295
  });
@@ -318,7 +309,6 @@ export class AIGateway {
318
309
  await autoRegisterProviders(this.router, this.logger);
319
310
  this._autoRegisterDone = true;
320
311
  }
321
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
322
312
  // Start activity tracking if available
323
313
  let activity = undefined;
324
314
  if (this.activityManager) {
@@ -417,7 +407,7 @@ export class AIGateway {
417
407
  model: candidate.model
418
408
  }
419
409
  }
420
- }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
410
+ }, resolveRetryConfig(request, this.config), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
421
411
  onTryStart: ({ retryIndex, startedAt }) => {
422
412
  const idx = traceAttempts.push({
423
413
  timing: { startedAt, endedAt: startedAt, durationMs: 0 },
@@ -727,9 +717,6 @@ export class AIGateway {
727
717
  });
728
718
  }
729
719
  }
730
- if (optimixerPrediction) {
731
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
732
- }
733
720
  warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
734
721
  tokens: enhancedResponse.metadata.tokens,
735
722
  costUsd: enhancedResponse.metadata.costUsd,
@@ -765,52 +752,6 @@ export class AIGateway {
765
752
  }
766
753
  });
767
754
  }
768
- async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
769
- if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
770
- return undefined;
771
- }
772
- const prediction = await this.optimixerManager.predictMaxTokens({
773
- request,
774
- mergedConfig,
775
- messages
776
- });
777
- if (prediction) {
778
- let maxTokens = prediction.recommendedMaxTokens;
779
- const useCeiling = this.config.optimixer?.useFlexMdCeiling !== false;
780
- if (useCeiling && mergedConfig?.model && mergedConfig?.provider) {
781
- try {
782
- const ceiling = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
783
- if (typeof ceiling === 'number' && ceiling > 0 && maxTokens > ceiling) {
784
- maxTokens = ceiling;
785
- }
786
- }
787
- catch {
788
- // Non-blocking: use uncapped prediction
789
- }
790
- }
791
- mergedConfig.maxTokens = maxTokens;
792
- request._mergedRouterConfig = mergedConfig;
793
- this.logger.debug('Applied Optimixer recommended max_tokens', {
794
- aiRequestId: request.aiRequestId,
795
- recommendedMaxTokens: prediction.recommendedMaxTokens,
796
- maxTokens,
797
- confidence: prediction.confidence,
798
- requestId: prediction.requestId
799
- });
800
- return prediction;
801
- }
802
- if (mergedConfig?.maxTokens === undefined && mergedConfig?.model && mergedConfig?.provider) {
803
- try {
804
- const flexMdMaxTokens = await getModelMaxTokensFromFlexMd(mergedConfig.provider, mergedConfig.model);
805
- mergedConfig.maxTokens = flexMdMaxTokens && flexMdMaxTokens > 0 ? flexMdMaxTokens : 2000;
806
- }
807
- catch {
808
- mergedConfig.maxTokens = 2000;
809
- }
810
- request._mergedRouterConfig = mergedConfig;
811
- }
812
- return undefined;
813
- }
814
755
  /**
815
756
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
816
757
  */
@@ -15,9 +15,7 @@ export declare class AIGateway {
15
15
  private config;
16
16
  private logger;
17
17
  private activityManager?;
18
- private optimixerManager?;
19
18
  private messageBuilderConfig?;
20
- private defaultModelConfig;
21
19
  private _autoRegisterDone;
22
20
  private _aiToolsClient;
23
21
  private readonly preferOpenRouter;
@@ -31,7 +29,6 @@ export declare class AIGateway {
31
29
  * Invoke AI request (with structured output support)
32
30
  */
33
31
  invoke<TContent = unknown>(request: AIInvokeRequest): Promise<EnhancedLLMResponse<TContent>>;
34
- private applyAdaptiveMaxTokensIfEnabled;
35
32
  /**
36
33
  * Build simple messages from request (instructions and prompt as literal template text; no registry).
37
34
  */
@@ -15,10 +15,11 @@ export { ProviderNotFoundError, FallbackExhaustedError } from '@x12i/ai-provider
15
15
  export * from '@x12i/ai-providers-router';
16
16
  // Export enhanced gateway
17
17
  export { AIGateway } from './gateway.js';
18
- export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
18
+ export { InstructionNotFoundError, InstructionBackendError, ModelRequiredError, MaxTokensRequiredError } from './instruction-errors.js';
19
19
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
20
20
  export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
- export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
21
+ export { getGatewayOperationalMode, isProdGatewayMode, parseModelProviderSpec } from './gateway-mode.js';
22
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_RETRY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P, resolveRetryConfig } from './gateway-defaults.js';
22
23
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
23
24
  export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
24
25
  export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory, coalesceMergedInputBucket, extractCallerInputsBag, mapSmartInputPathsInputsToInput, parseLooseJsonObject, prepareWorkingMemoryForTemplateRender, resolveGatewayMemoryPathValue } from './memory-path-resolution.js';
@@ -28,7 +29,6 @@ export { GATEWAY_DUAL_MEMORY_ROOTS, buildMemoryResolutionRootFromWorkingMemory,
28
29
  export { Activix } from '@x12i/activix';
29
30
  export { normalizeToActivixCostShape } from '@x12i/activix';
30
31
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
31
- export { OptimixerManager } from './optimixer-manager.js';
32
32
  export { activityIdentityToLogContext, activityIdentityToLogMeta, withActivityIdentity, withGatewayLogContext, gatewayLogDebug } from './gateway-log-meta.js';
33
33
  export { createGatewayLogger, resolveGatewayVerboseEnabled } from './logger-factory.js';
34
34
  export { GATEWAY_LOG_ENV_PREFIX, GATEWAY_LOGXER_PACKAGE, GATEWAY_STACK_LOG_PREFIXES, initializeGatewayPackageLogLevels, resetGatewayPackageLogLevelsInit } from './gateway-log-levels.js';
@@ -38,10 +38,7 @@ export { createLogxer, DebugLogAbstract, runWithLogContext, getStationRuntimeIde
38
38
  export { ROUTER_LOG_ENV_PREFIX } from '@x12i/ai-providers-router';
39
39
  // Runtime observability surface (leaf package: no downstream runtime objects)
40
40
  export { runtimeObjects } from './runtime-objects.js';
41
- // Export rate limiter
42
- export { GatewayRateLimiter } from './gateway-rate-limiter.js';
43
- export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from './gateway-rate-limiter-constants.js';
44
- // Export troubleshooting helpers
41
+ // Runtime observability surface (leaf package: no downstream runtime objects)
45
42
  export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
46
43
  // Export object types library
47
44
  export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
@@ -14,12 +14,13 @@ export type { RequestInterceptor, ResponseInterceptor } from '@x12i/ai-providers
14
14
  export type { UsageTracker } from '@x12i/ai-providers-router';
15
15
  export * from '@x12i/ai-providers-router';
16
16
  export { AIGateway } from './gateway.js';
17
- export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
17
+ export { InstructionNotFoundError, InstructionBackendError, ModelRequiredError, MaxTokensRequiredError } from './instruction-errors.js';
18
18
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
19
19
  export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayFallbackAttempt, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
20
  export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
- export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
- export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
21
+ export { getGatewayOperationalMode, isProdGatewayMode, parseModelProviderSpec } from './gateway-mode.js';
22
+ export type { GatewayOperationalMode } from './gateway-mode.js';
23
+ export { DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, GATEWAY_DEFAULT_FREQUENCY_PENALTY, GATEWAY_DEFAULT_PRESENCE_PENALTY, GATEWAY_DEFAULT_RETRY, GATEWAY_DEFAULT_TEMPERATURE, GATEWAY_DEFAULT_TOP_P, resolveRetryConfig } from './gateway-defaults.js';
23
24
  export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
24
25
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
25
26
  export type { OutputContractSpec } from './output-contract-normalizer.js';
@@ -32,7 +33,6 @@ export { Activix } from '@x12i/activix';
32
33
  export type { ActivixRunContext, ActivixAutoCostOptions, ActivixCostShape, FindByRunContextCriteria, GetJobActivitiesInput, GetJobActivitiesResult } from '@x12i/activix';
33
34
  export { normalizeToActivixCostShape } from '@x12i/activix';
34
35
  export { ActivityManager, ensureGatewayRequestIdentity } from './activity-manager.js';
35
- export { OptimixerManager } from './optimixer-manager.js';
36
36
  export type { ActivityIdentity } from './types.js';
37
37
  export { activityIdentityToLogContext, activityIdentityToLogMeta, withActivityIdentity, withGatewayLogContext, gatewayLogDebug } from './gateway-log-meta.js';
38
38
  export { createGatewayLogger, resolveGatewayVerboseEnabled } from './logger-factory.js';
@@ -45,8 +45,6 @@ export { ROUTER_LOG_ENV_PREFIX } from '@x12i/ai-providers-router';
45
45
  export type { Logxer, LogMeta, RuntimeIdentity, LogRuntimeContext, GetJobLogsInput, GetJobLogsResult, QueryableLogLine, LogDiagnostics, DiagnosticEvidence, ScopeCriteria, ScopeLogsResult, StackLoggingOptions, PackageLogLevelsConfig, PackageLogLevelSetting } from '@x12i/logxer';
46
46
  export { runtimeObjects } from './runtime-objects.js';
47
47
  export type { ActivixQueryableClient, LogxerQueryableClient, PackageRuntimeObjects, RuntimeObjects } from './runtime-objects.js';
48
- export { GatewayRateLimiter } from './gateway-rate-limiter.js';
49
- export { DEFAULT_RATE_LIMIT_MIN_INTERVAL_MS, DEFAULT_RATE_LIMIT_ENABLED } from './gateway-rate-limiter-constants.js';
50
48
  export { validateAIRequest, validateJSON, extractJSON, validateResponse, diagnoseRequest, diagnoseResponse, supportsJSONMode, createTestAIRequest, createValidationTestCases, runValidationTests, formatDiagnostic, assertValidAIRequest } from './troubleshooting-helper.js';
51
49
  export type { ValidationResult, DiagnosticInfo } from './troubleshooting-helper.js';
52
50
  export { OBJECT_TYPES_LIBRARY, getObjectType, getObjectTypesForAgent } from './object-types-library.js';
@@ -1,6 +1,20 @@
1
1
  /**
2
- * Errors for instruction / prompt template and instructions-block resolution.
2
+ * Errors for instruction / prompt template resolution.
3
3
  */
4
+ export class ModelRequiredError extends Error {
5
+ code = 'MODEL_REQUIRED';
6
+ constructor(message = 'model is required on every invoke request (request.config.model or request.modelConfig.model)') {
7
+ super(message);
8
+ this.name = 'ModelRequiredError';
9
+ }
10
+ }
11
+ export class MaxTokensRequiredError extends Error {
12
+ code = 'MAX_TOKENS_REQUIRED';
13
+ constructor(message = 'maxTokens is required on every invoke request (request.config.maxTokens, request.modelConfig.maxTokens, GatewayConfig.maxTokens, or internalSystemActions)') {
14
+ super(message);
15
+ this.name = 'MaxTokensRequiredError';
16
+ }
17
+ }
4
18
  export class InstructionNotFoundError extends Error {
5
19
  key;
6
20
  backend;
@@ -1,6 +1,14 @@
1
1
  /**
2
- * Errors for instruction / prompt template and instructions-block resolution.
2
+ * Errors for instruction / prompt template resolution.
3
3
  */
4
+ export declare class ModelRequiredError extends Error {
5
+ readonly code = "MODEL_REQUIRED";
6
+ constructor(message?: string);
7
+ }
8
+ export declare class MaxTokensRequiredError extends Error {
9
+ readonly code = "MAX_TOKENS_REQUIRED";
10
+ constructor(message?: string);
11
+ }
4
12
  export declare class InstructionNotFoundError extends Error {
5
13
  key: string;
6
14
  backend: string;