@x12i/ai-gateway 9.5.3 → 9.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -79,7 +79,7 @@ const response = await gateway.invoke({
79
79
  agentId: 'agent-456'
80
80
  },
81
81
  workingMemory: { input: 'Hello!' },
82
- config: { model: 'openai/gpt-4o-mini', provider: 'openrouter' }
82
+ config: { model: 'cheap', provider: 'openrouter' }
83
83
  });
84
84
 
85
85
  console.log(response.content, response.metadata?.costUsd, response.metadata?.tokens);
@@ -87,7 +87,7 @@ console.log(response.content, response.metadata?.costUsd, response.metadata?.tok
87
87
 
88
88
  ### Providers without manual `register()`
89
89
 
90
- - **OpenRouter:** Set `OPEN_ROUTER_KEY` or `OPENROUTER_API_KEY` (unless `USE_OPENROUTER=false`). The gateway can lazy-register on first invoke.
90
+ - **OpenRouter:** Set **`OPENROUTER_API_KEY`** in `.env` (unless `USE_OPENROUTER=false`). The gateway passes this key into the router on init and lazy-registers on first invoke. **`OPEN_ROUTER_KEY`** is a legacy alias still read if `OPENROUTER_API_KEY` is unset — prefer **`OPENROUTER_API_KEY`** so **`@x12i/ai-tools`** model resolution sets `routedViaOpenRouter` correctly (ai-tools does not read `OPEN_ROUTER_KEY`).
91
91
  - **Direct providers:** Set `OPENAI_API_KEY`, `GROK_API_KEY`, etc. Same lazy registration.
92
92
 
93
93
  Load `.env` before constructing the gateway if another package creates it first.
@@ -217,7 +217,7 @@ Set via constructor `mode` or env `mode` / `MODE`.
217
217
  | `npm run test:flex-md-esm-regression` | ESM build regression for flex-md |
218
218
  | `npm run test:prepublish` | `build` + `npm test` |
219
219
 
220
- Live tests use `LIVE_TEST_PROVIDER` / `LIVE_TEST_MODEL` (default `openrouter` + `openai/gpt-4o-mini`). Set `LIVE_SKIP_INVOKE=1` to skip the LLM call.
220
+ Live tests use `LIVE_TEST_PROVIDER` / `LIVE_TEST_MODEL` (default `openrouter` + `cheap`, an ai-tools profile alias). Set `LIVE_SKIP_INVOKE=1` to skip the LLM call.
221
221
 
222
222
  ---
223
223
 
@@ -1,6 +1,6 @@
1
1
  {
2
- "defaultEngine": "openai",
3
- "defaultModel": "gpt-5-nano",
2
+ "defaultEngine": "openrouter",
3
+ "defaultModel": "cheap",
4
4
  "temperature": 0.7,
5
5
  "topP": 1.0,
6
6
  "frequencyPenalty": 0.0,
@@ -13,4 +13,3 @@
13
13
  "throttlingDelay": 5000
14
14
  }
15
15
  }
16
-
@@ -197,7 +197,7 @@ export function initializeGatewayComponents(config) {
197
197
  // Prefer explicit config from consumer (e.g. ai-skills) to avoid env-loading timing; fall back to process.env.
198
198
  const explicitOpenRouterKey = config.openrouter?.apiKey;
199
199
  const isExplicitKey = typeof explicitOpenRouterKey === 'string' && !explicitOpenRouterKey.startsWith('ENV.');
200
- const openRouterKey = isExplicitKey ? explicitOpenRouterKey : (process.env.OPEN_ROUTER_KEY ?? process.env.OPENROUTER_API_KEY);
200
+ const openRouterKey = isExplicitKey ? explicitOpenRouterKey : (process.env.OPENROUTER_API_KEY ?? process.env.OPEN_ROUTER_KEY);
201
201
  const useOpenRouter = config.openRouter?.enabled !== undefined ? config.openRouter?.enabled : process.env.USE_OPENROUTER;
202
202
  if (openRouterKey && useOpenRouter !== false && useOpenRouter !== 'false') {
203
203
  routerConfig.openRouter = { enabled: true };
@@ -2,6 +2,7 @@
2
2
  * Gateway Meta Operations Module
3
3
  * Handles meta operations like instruction optimization and testing
4
4
  */
5
+ import { CODE_DEFAULT_MODEL } from './gateway-mode.js';
5
6
  /**
6
7
  * Test instructions by running them and analyzing the response
7
8
  */
@@ -9,7 +10,7 @@ export async function testInstructions(instructions, testInput, expectedSchema,
9
10
  // Get internal system action config (instruction audit)
10
11
  const internalConfig = config.internalSystemActions?.instructionAudit;
11
12
  const defaultEngine = config.defaultEngine || 'openai';
12
- const defaultModel = internalConfig?.model || 'gpt-5-nano';
13
+ const defaultModel = internalConfig?.model || CODE_DEFAULT_MODEL;
13
14
  const defaultProvider = internalConfig?.engine || defaultEngine;
14
15
  const { agentId = 'instruction-tester', model = options.model || defaultModel, // Use internal config default if not provided
15
16
  provider = options.provider || defaultProvider // Use internal config default if not provided
@@ -6,7 +6,8 @@ import type { ActivityIdentity, GatewayConfig } from './types.js';
6
6
  export type GatewayOperationalMode = 'prod' | 'debug' | 'dev';
7
7
  export type GatewayDefaultModelSource = 'env' | 'model-config.json' | 'code';
8
8
  export type DefaultModelSubstitutionReason = 'no_model_provided' | 'model_resolution_failed' | 'ai_tools_unavailable';
9
- export declare const CODE_DEFAULT_MODEL = "gpt-5-nano";
9
+ /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
10
+ export declare const CODE_DEFAULT_MODEL = "cheap";
10
11
  export type ResolvedGatewayDefault = {
11
12
  model: string;
12
13
  provider?: string;
@@ -2,7 +2,8 @@
2
2
  * Gateway operational mode (prod vs dev/debug) and default model resolution.
3
3
  */
4
4
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
5
- export const CODE_DEFAULT_MODEL = 'gpt-5-nano';
5
+ /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
6
+ export const CODE_DEFAULT_MODEL = 'cheap';
6
7
  /**
7
8
  * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
8
9
  * Only `prod` allows silent default-model substitution; all other values are strict.
@@ -146,7 +146,7 @@ export async function autoRegisterProviders(router, logger) {
146
146
  optionalEnvVars: PROVIDER_CONFIGS
147
147
  .filter(p => p.optional)
148
148
  .map(p => p.envVar),
149
- openRouter: 'Set OPEN_ROUTER_KEY or OPENROUTER_API_KEY (and do not set USE_OPENROUTER=false) to use OpenRouter without registering a provider.',
149
+ openRouter: 'Set OPENROUTER_API_KEY (and do not set USE_OPENROUTER=false) to use OpenRouter without registering a provider. Legacy OPEN_ROUTER_KEY is still accepted.',
150
150
  note: 'You can still manually register providers using gateway.register(provider)'
151
151
  });
152
152
  }
@@ -2,7 +2,7 @@
2
2
  * Gateway Utilities Module
3
3
  * Handles utility functions
4
4
  */
5
- import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
5
+ import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayFallbackAttempt, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
6
6
  import type { Logxer } from '@x12i/logxer';
7
7
  import { type AiCostResult, type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
8
8
  /**
@@ -155,6 +155,39 @@ export declare function pickEffectiveModelConfigFromInvokeRequest(request: Pick<
155
155
  */
156
156
  export declare function tryExtractRouterLikePayloadFromErrorChain(error: unknown, maxDepth?: number): unknown;
157
157
  export declare function pickRequestIdsFromRouterLike(gatewayAiRequestId: string | undefined, routerLike: unknown): GatewayTraceRequestIds | undefined;
158
+ /** Error code hint when a bundled profile name cannot be routed to a catalog target. */
159
+ export declare const MODEL_PROFILE_UNROUTABLE = "MODEL_PROFILE_UNROUTABLE";
160
+ export declare class ModelProfileUnroutableError extends Error {
161
+ readonly profileAlias: string;
162
+ readonly provider: string | undefined;
163
+ readonly code = "MODEL_PROFILE_UNROUTABLE";
164
+ constructor(profileAlias: string, provider: string | undefined, cause?: unknown);
165
+ }
166
+ type ModelResolutionCandidate = {
167
+ provider: string;
168
+ model: string;
169
+ };
170
+ /**
171
+ * Build rejection-metadata fallback attempts from trace-mode {@link GatewayTraceAttempt}s.
172
+ */
173
+ export declare function buildGatewayFallbackAttemptsFromTrace(traceAttempts: GatewayTraceAttempt[], candidates: ModelResolutionCandidate[], lastError?: Error): GatewayFallbackAttempt[];
174
+ /** Human-readable exhaustion message for trace fallback chains and rejection logs. */
175
+ export declare function formatFallbackExhaustionMessage(attempts: GatewayFallbackAttempt[], candidates: ModelResolutionCandidate[]): string;
176
+ export declare function mapGatewayFallbackAttemptsToRouter(attempts: GatewayFallbackAttempt[]): Array<{
177
+ provider: string;
178
+ model?: string;
179
+ httpStatus?: number;
180
+ error: Error;
181
+ responsePreview?: string;
182
+ }>;
183
+ /**
184
+ * Log profile alias vs OpenRouter model id actually sent to the router after catalog resolution.
185
+ */
186
+ export declare function logResolvedModelRouting(logger: Logxer, request: ChatRequest, mergedConfig: ChatRequest['config']): void;
187
+ /**
188
+ * Walk `error` and `error.cause` for {@link FallbackExhaustedError.attempts}.
189
+ */
190
+ export declare function tryExtractFallbackAttemptsFromErrorChain(error: unknown, maxDepth?: number): GatewayFallbackAttempt[] | undefined;
158
191
  export declare function buildInvokeRejectionMetadata(args: {
159
192
  request: Pick<AIInvokeRequest, 'aiRequestId' | 'identity' | 'config' | 'modelConfig'>;
160
193
  taskTypeId: string;
@@ -162,6 +195,7 @@ export declare function buildInvokeRejectionMetadata(args: {
162
195
  mergedConfig?: unknown;
163
196
  partialRouterPayload?: unknown;
164
197
  gatewayAiRequestId?: string;
198
+ error?: unknown;
165
199
  }): GatewayInvokeRejectionMetadata;
166
200
  export declare function attachGatewayInvokeRejectionMetadata(err: Error, metadata: GatewayInvokeRejectionMetadata): void;
167
201
  /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
@@ -3,7 +3,10 @@
3
3
  * Handles utility functions
4
4
  */
5
5
  import * as crypto from 'crypto';
6
- import { ModelResolutionError } from '@x12i/ai-tools';
6
+ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
+ import { ModelResolutionError, isKnownProfileOrShortcut } from '@x12i/ai-tools';
8
+ import { extractHttpStatusCode } from './gateway-retry.js';
9
+ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
7
10
  import { getPreParsedInstructions } from './gateway-instructions.js';
8
11
  import { getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
9
12
  import { applyModelResolution } from './ai-tools-client.js';
@@ -55,6 +58,44 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
55
58
  });
56
59
  applyGatewayDefaultToMerged(merged, defaults, config);
57
60
  }
61
+ async function tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original) {
62
+ const resolveModels = config.aiTools?.resolveModels !== false;
63
+ const catalog = mergeOptions?.catalog;
64
+ if (!resolveModels || !catalog || !merged.model) {
65
+ return;
66
+ }
67
+ try {
68
+ const resolution = await catalog.resolveModel({
69
+ provider: merged.provider,
70
+ model: merged.model
71
+ });
72
+ if (!resolution.found) {
73
+ return;
74
+ }
75
+ applyModelResolution(merged, resolution, config.defaultEngine);
76
+ request._modelResolution = {
77
+ modelId: resolution.modelId,
78
+ routedViaOpenRouter: resolution.routedViaOpenRouter,
79
+ confidence: resolution.confidence,
80
+ resolvedVia: resolution.resolvedVia,
81
+ originalProvider: original?.provider ?? merged.provider,
82
+ originalModel: original?.model ?? merged.model
83
+ };
84
+ logger.verbose('Catalog resolved substituted default model', {
85
+ jobId: request.identity.jobId,
86
+ model: merged.model,
87
+ provider: merged.provider,
88
+ resolvedModelId: resolution.modelId
89
+ });
90
+ }
91
+ catch {
92
+ // Prod keeps the substituted bare default when re-resolution fails.
93
+ }
94
+ }
95
+ async function substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, reason, original) {
96
+ await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original);
97
+ await tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original);
98
+ }
58
99
  /**
59
100
  * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
60
101
  */
@@ -137,7 +178,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
137
178
  const originalProvider = merged.provider;
138
179
  const originalModel = explicitModel;
139
180
  if (!explicitModel) {
140
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
181
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
141
182
  }
142
183
  else if (resolveModels && mergeOptions?.catalog) {
143
184
  try {
@@ -166,10 +207,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
166
207
  });
167
208
  }
168
209
  else if (isProdGatewayMode(operationalMode)) {
169
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
210
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
170
211
  }
171
212
  else {
172
- throw new ModelResolutionError({ provider: merged.provider, model: explicitModel }, resolution);
213
+ throw buildModelResolutionFailureError(explicitModel, merged.provider, resolution);
173
214
  }
174
215
  }
175
216
  catch (error) {
@@ -177,7 +218,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
177
218
  throw error;
178
219
  }
179
220
  if (isProdGatewayMode(operationalMode)) {
180
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
221
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
181
222
  }
182
223
  else {
183
224
  throw error;
@@ -185,10 +226,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
185
226
  }
186
227
  }
187
228
  else if (resolveModels && !mergeOptions?.catalog && isProdGatewayMode(operationalMode)) {
188
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
229
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
189
230
  }
190
231
  if (!merged.model) {
191
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
232
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
192
233
  }
193
234
  const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
194
235
  const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
@@ -783,6 +824,162 @@ export function pickRequestIdsFromRouterLike(gatewayAiRequestId, routerLike) {
783
824
  }
784
825
  return out;
785
826
  }
827
+ /** Error code hint when a bundled profile name cannot be routed to a catalog target. */
828
+ export const MODEL_PROFILE_UNROUTABLE = 'MODEL_PROFILE_UNROUTABLE';
829
+ export class ModelProfileUnroutableError extends Error {
830
+ profileAlias;
831
+ provider;
832
+ code = MODEL_PROFILE_UNROUTABLE;
833
+ constructor(profileAlias, provider, cause) {
834
+ super(`${MODEL_PROFILE_UNROUTABLE}: profile "${profileAlias}" is retired or has no routable catalog target` +
835
+ (provider ? ` (provider: "${provider}")` : '') +
836
+ '. Update @x12i/ai-profiles or choose another profile alias.');
837
+ this.profileAlias = profileAlias;
838
+ this.provider = provider;
839
+ this.name = 'ModelProfileUnroutableError';
840
+ if (cause !== undefined) {
841
+ this.cause = cause;
842
+ }
843
+ }
844
+ }
845
+ function buildModelResolutionFailureError(explicitModel, provider, resolution) {
846
+ const base = new ModelResolutionError({ provider, model: explicitModel }, resolution);
847
+ if (isKnownProfileOrShortcut(explicitModel)) {
848
+ return new ModelProfileUnroutableError(explicitModel, provider, base);
849
+ }
850
+ return base;
851
+ }
852
+ /**
853
+ * Build rejection-metadata fallback attempts from trace-mode {@link GatewayTraceAttempt}s.
854
+ */
855
+ export function buildGatewayFallbackAttemptsFromTrace(traceAttempts, candidates, lastError) {
856
+ const lastFailedByIndex = new Map();
857
+ for (const attempt of traceAttempts) {
858
+ if (!attempt.ok) {
859
+ lastFailedByIndex.set(attempt.routing.fallbackIndex, attempt);
860
+ }
861
+ }
862
+ return candidates.map((candidate, index) => {
863
+ const failed = lastFailedByIndex.get(index);
864
+ const errMsg = failed?.error?.message ??
865
+ (index === candidates.length - 1 && lastError ? lastError.message : 'invoke failed');
866
+ const httpStatus = extractHttpStatusCode(new Error(errMsg));
867
+ let responsePreview;
868
+ const raw = failed?.rawProviderPayload;
869
+ if (raw !== undefined) {
870
+ try {
871
+ const rawStr = typeof raw === 'string' ? raw : JSON.stringify(raw);
872
+ responsePreview = rawStr.length <= 500 ? rawStr : rawStr.slice(0, 500) + '…';
873
+ }
874
+ catch {
875
+ responsePreview = '[Unserializable]';
876
+ }
877
+ }
878
+ return {
879
+ provider: candidate.provider,
880
+ model: candidate.model,
881
+ ...(httpStatus !== undefined ? { httpStatus } : {}),
882
+ error: errMsg,
883
+ ...(responsePreview !== undefined ? { responsePreview } : {})
884
+ };
885
+ });
886
+ }
887
+ /** Human-readable exhaustion message for trace fallback chains and rejection logs. */
888
+ export function formatFallbackExhaustionMessage(attempts, candidates) {
889
+ const providersTried = [...new Set(candidates.map((c) => c.provider))];
890
+ const providerNote = providersTried.length > 1
891
+ ? `; providers tried: ${providersTried.join(' → ')}`
892
+ : providersTried.length === 1
893
+ ? `; provider: ${providersTried[0]}`
894
+ : '';
895
+ const detail = attempts
896
+ .map((a) => {
897
+ const model = a.model ? `${a.provider}/${a.model}` : a.provider;
898
+ const status = a.httpStatus !== undefined ? ` HTTP ${a.httpStatus}` : '';
899
+ const preview = a.responsePreview ? ` body=${a.responsePreview}` : '';
900
+ return `[${model}${status}] ${a.error}${preview}`;
901
+ })
902
+ .join('; ');
903
+ const last = attempts[attempts.length - 1];
904
+ const lastBody = last?.responsePreview && !detail.includes(last.responsePreview)
905
+ ? ` Last response preview: ${last.responsePreview}`
906
+ : '';
907
+ return (`All fallback candidates failed (${candidates.length} tried${providerNote}). ` +
908
+ `Attempts: ${detail || 'no attempt details recorded'}.${lastBody}`);
909
+ }
910
+ export function mapGatewayFallbackAttemptsToRouter(attempts) {
911
+ return attempts.map((a) => ({
912
+ provider: a.provider,
913
+ model: a.model,
914
+ httpStatus: a.httpStatus,
915
+ error: new Error(a.error),
916
+ responsePreview: a.responsePreview
917
+ }));
918
+ }
919
+ /**
920
+ * Log profile alias vs OpenRouter model id actually sent to the router after catalog resolution.
921
+ */
922
+ export function logResolvedModelRouting(logger, request, mergedConfig) {
923
+ const res = request._modelResolution;
924
+ if (!res?.modelId && res?.originalModel === undefined) {
925
+ return;
926
+ }
927
+ const profileAlias = res.originalModel ?? mergedConfig?.model;
928
+ const invokedModelId = res.modelId ?? mergedConfig?.model;
929
+ const provider = mergedConfig?.provider;
930
+ const openRouterPath = res.routedViaOpenRouter === true || provider === 'openrouter';
931
+ if (!openRouterPath) {
932
+ return;
933
+ }
934
+ logger.info('OpenRouter routing: profile alias resolved to model id for invoke', withActivityIdentity(request.identity, {
935
+ profileAlias,
936
+ invokedOpenRouterModelId: invokedModelId,
937
+ provider,
938
+ routedViaOpenRouter: res.routedViaOpenRouter,
939
+ resolvedVia: res.resolvedVia,
940
+ debugKind: gatewayLogDebug.trace
941
+ }));
942
+ }
943
+ function mapRouterFallbackAttempts(attempts) {
944
+ return attempts.map((attempt) => ({
945
+ provider: String(attempt.provider),
946
+ ...(attempt.model !== undefined ? { model: attempt.model } : {}),
947
+ ...(attempt.httpStatus !== undefined ? { httpStatus: attempt.httpStatus } : {}),
948
+ error: attempt.error instanceof Error ? attempt.error.message : String(attempt.error),
949
+ ...(attempt.responsePreview !== undefined ? { responsePreview: attempt.responsePreview } : {})
950
+ }));
951
+ }
952
+ function extractFallbackAttemptsFromError(error) {
953
+ if (error instanceof FallbackExhaustedError && error.attempts.length > 0) {
954
+ return mapRouterFallbackAttempts(error.attempts);
955
+ }
956
+ if (error != null && typeof error === 'object') {
957
+ const record = error;
958
+ if (record.name === 'FallbackExhaustedError' && Array.isArray(record.attempts) && record.attempts.length > 0) {
959
+ return mapRouterFallbackAttempts(record.attempts);
960
+ }
961
+ }
962
+ return undefined;
963
+ }
964
+ /**
965
+ * Walk `error` and `error.cause` for {@link FallbackExhaustedError.attempts}.
966
+ */
967
+ export function tryExtractFallbackAttemptsFromErrorChain(error, maxDepth = 8) {
968
+ const seen = new Set();
969
+ let cur = error;
970
+ for (let i = 0; i < maxDepth && cur != null; i++) {
971
+ if (typeof cur !== 'object')
972
+ break;
973
+ if (seen.has(cur))
974
+ break;
975
+ seen.add(cur);
976
+ const attempts = extractFallbackAttemptsFromError(cur);
977
+ if (attempts?.length)
978
+ return attempts;
979
+ cur = cur.cause;
980
+ }
981
+ return undefined;
982
+ }
786
983
  export function buildInvokeRejectionMetadata(args) {
787
984
  const gid = args.gatewayAiRequestId ?? args.request.aiRequestId;
788
985
  const partial = args.partialRouterPayload;
@@ -796,6 +993,9 @@ export function buildInvokeRejectionMetadata(args) {
796
993
  tokens = undefined;
797
994
  }
798
995
  const requestIds = pickRequestIdsFromRouterLike(gid, partial);
996
+ const fallbackAttempts = args.error !== undefined
997
+ ? tryExtractFallbackAttemptsFromErrorChain(args.error)
998
+ : undefined;
799
999
  return {
800
1000
  aiRequestId: args.request.aiRequestId,
801
1001
  identity: args.request.identity,
@@ -805,6 +1005,7 @@ export function buildInvokeRejectionMetadata(args) {
805
1005
  ...(effective !== undefined ? { effectiveModelConfig: effective } : {}),
806
1006
  ...(tokens !== undefined ? { tokens } : {}),
807
1007
  ...(requestIds !== undefined ? { requestIds } : {}),
1008
+ ...(fallbackAttempts !== undefined ? { fallbackAttempts } : {}),
808
1009
  ...(mc === undefined ? { mergeConfigUnavailable: true } : {})
809
1010
  };
810
1011
  }
package/dist/gateway.js CHANGED
@@ -3,13 +3,14 @@
3
3
  *
4
4
  * Simplified AI Gateway - Clean proxy implementation
5
5
  */
6
+ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
6
7
  import { validateChatRequest, validateAIRequest } from './gateway-validation.js';
7
8
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
9
  import { initializeGatewayComponents } from './gateway-config.js';
9
10
  import { buildMessages } from './message-builder.js';
10
11
  import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
11
12
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
12
- import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
+ import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
14
  import { getAiToolsClient } from './ai-tools-client.js';
14
15
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
15
16
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
@@ -17,7 +18,7 @@ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
17
18
  import { invokeWithRetry } from './gateway-retry.js';
18
19
  /** Error message thrown by the router when no provider is registered or specified */
19
20
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
20
- const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
21
+ const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter (legacy OPEN_ROUTER_KEY is still read as fallback), or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
21
22
  /** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
22
23
  function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
23
24
  const { tokens, costUsd, cost } = meta;
@@ -277,6 +278,7 @@ export class AIGateway {
277
278
  catalog: aiTools?.catalog ?? null
278
279
  });
279
280
  request._mergedRouterConfig = mergedConfig;
281
+ logResolvedModelRouting(this.logger, request, mergedConfig);
280
282
  const diagnosticsMode = request.diagnostics?.mode;
281
283
  const traceEnabled = diagnosticsMode === 'trace';
282
284
  const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
@@ -468,7 +470,20 @@ export class AIGateway {
468
470
  }
469
471
  }
470
472
  if (!response) {
471
- throw lastError ?? new Error('All fallback candidates failed');
473
+ const fallbackAttempts = buildGatewayFallbackAttemptsFromTrace(traceAttempts, deduped, lastError);
474
+ const providersTried = [...new Set(deduped.map((c) => c.provider))];
475
+ this.logger.error('Trace fallback chain exhausted', withActivityIdentity(request.identity, {
476
+ providersTried,
477
+ candidates: deduped,
478
+ fallbackAttempts,
479
+ debugKind: gatewayLogDebug.anomaly
480
+ }));
481
+ const exhausted = new FallbackExhaustedError(mapGatewayFallbackAttemptsToRouter(fallbackAttempts));
482
+ exhausted.message = formatFallbackExhaustionMessage(fallbackAttempts, deduped);
483
+ if (lastError) {
484
+ exhausted.cause = lastError;
485
+ }
486
+ throw exhausted;
472
487
  }
473
488
  // Summary counts + final request ids.
474
489
  traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
@@ -565,11 +580,14 @@ export class AIGateway {
565
580
  tokens = second;
566
581
  }
567
582
  }
568
- const costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
583
+ let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
569
584
  mergedConfig,
570
585
  calculator: aiTools?.calculator ?? null,
571
586
  calculateCost: this.config.aiTools?.calculateCost
572
587
  });
588
+ if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
589
+ costCompletion = { ...costCompletion, costStatus: 'unpriced' };
590
+ }
573
591
  const routerMetaForCost = routerResponse?.metadata || {};
574
592
  const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
575
593
  const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
@@ -685,7 +703,8 @@ export class AIGateway {
685
703
  startTime,
686
704
  mergedConfig,
687
705
  partialRouterPayload: partial,
688
- gatewayAiRequestId: request.aiRequestId
706
+ gatewayAiRequestId: request.aiRequestId,
707
+ error: err
689
708
  });
690
709
  attachGatewayInvokeRejectionMetadata(err, rejectMeta);
691
710
  if (err.message.includes(NO_PROVIDER_ERROR)) {
package/dist/index.d.ts CHANGED
@@ -16,8 +16,8 @@ export * from '@x12i/ai-providers-router';
16
16
  export { AIGateway } from './gateway.js';
17
17
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
18
18
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
19
- export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
19
+ export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayFallbackAttempt, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
23
23
  export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@ export * from '@x12i/ai-providers-router';
17
17
  export { AIGateway } from './gateway.js';
18
18
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
19
19
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
23
23
  export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
@@ -1,14 +1,15 @@
1
1
  import { Optimixer } from '@x12i/optimixer';
2
2
  import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
3
3
  import { estimateMessagesTokenSizes } from './token-estimate.js';
4
- function resolveActionTypeId(request) {
4
+ /** Optimixer bucket key: prefer taskTypeId (template), then identity actionType, else gateway default. */
5
+ function resolveTemplateId(request) {
6
+ if (request.taskTypeId && String(request.taskTypeId).trim()) {
7
+ return String(request.taskTypeId).trim();
8
+ }
5
9
  const identity = request.identity;
6
10
  if (identity?.actionType && String(identity.actionType).trim()) {
7
11
  return String(identity.actionType).trim();
8
12
  }
9
- if (request.taskTypeId && String(request.taskTypeId).trim()) {
10
- return String(request.taskTypeId).trim();
11
- }
12
13
  return 'gateway.invoke';
13
14
  }
14
15
  function toActivixRunContext(identity) {
@@ -77,15 +78,18 @@ export class OptimixerManager {
77
78
  const { request, mergedConfig, messages } = ctx;
78
79
  const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
79
80
  const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
81
+ const provider = typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined;
82
+ const model = typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined;
80
83
  try {
81
84
  return await optimixer.predictAiMaxTokens({
82
- actionTypeId: resolveActionTypeId(request),
85
+ templateId: resolveTemplateId(request),
83
86
  inputSize,
84
87
  contextSize,
85
88
  acceptableRisk,
86
89
  runContext: toActivixRunContext(request.identity),
87
- provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
88
- model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
90
+ ...(provider || model
91
+ ? { modelProfile: { ...(provider ? { provider } : {}), ...(model ? { model } : {}) } }
92
+ : {})
89
93
  });
90
94
  }
91
95
  catch (error) {
package/dist/types.d.ts CHANGED
@@ -138,12 +138,25 @@ export type GatewayInvokeRejectionMetadata = {
138
138
  region?: string;
139
139
  effectiveModelConfig?: Partial<Pick<ModelConfig, 'model' | 'modelId' | 'provider' | 'temperature' | 'maxTokens' | 'topP'>>;
140
140
  requestIds?: GatewayTraceRequestIds;
141
+ /**
142
+ * Fallback candidates tried before exhaustion (non-trace {@link AIGateway.invoke} only).
143
+ * Sourced from {@link FallbackExhaustedError.attempts} on the router error chain.
144
+ */
145
+ fallbackAttempts?: GatewayFallbackAttempt[];
141
146
  /**
142
147
  * True when {@link mergeConfig} did not run (e.g. message-building threw first).
143
148
  * Routing facts may only reflect request.config / modelConfig, not flex-md defaults.
144
149
  */
145
150
  mergeConfigUnavailable?: true;
146
151
  };
152
+ /** Serializable slice of a router fallback attempt for rejection metadata. */
153
+ export type GatewayFallbackAttempt = {
154
+ provider: string;
155
+ model?: string;
156
+ httpStatus?: number;
157
+ error: string;
158
+ responsePreview?: string;
159
+ };
147
160
  /**
148
161
  * Identity object used for activity linkage.
149
162
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
@@ -1,6 +1,6 @@
1
1
  {
2
- "defaultEngine": "openai",
3
- "defaultModel": "gpt-5-nano",
2
+ "defaultEngine": "openrouter",
3
+ "defaultModel": "cheap",
4
4
  "temperature": 0.7,
5
5
  "topP": 1.0,
6
6
  "frequencyPenalty": 0.0,
@@ -13,4 +13,3 @@
13
13
  "throttlingDelay": 5000
14
14
  }
15
15
  }
16
-
@@ -197,7 +197,7 @@ export function initializeGatewayComponents(config) {
197
197
  // Prefer explicit config from consumer (e.g. ai-skills) to avoid env-loading timing; fall back to process.env.
198
198
  const explicitOpenRouterKey = config.openrouter?.apiKey;
199
199
  const isExplicitKey = typeof explicitOpenRouterKey === 'string' && !explicitOpenRouterKey.startsWith('ENV.');
200
- const openRouterKey = isExplicitKey ? explicitOpenRouterKey : (process.env.OPEN_ROUTER_KEY ?? process.env.OPENROUTER_API_KEY);
200
+ const openRouterKey = isExplicitKey ? explicitOpenRouterKey : (process.env.OPENROUTER_API_KEY ?? process.env.OPEN_ROUTER_KEY);
201
201
  const useOpenRouter = config.openRouter?.enabled !== undefined ? config.openRouter?.enabled : process.env.USE_OPENROUTER;
202
202
  if (openRouterKey && useOpenRouter !== false && useOpenRouter !== 'false') {
203
203
  routerConfig.openRouter = { enabled: true };
@@ -2,6 +2,7 @@
2
2
  * Gateway Meta Operations Module
3
3
  * Handles meta operations like instruction optimization and testing
4
4
  */
5
+ import { CODE_DEFAULT_MODEL } from './gateway-mode.js';
5
6
  /**
6
7
  * Test instructions by running them and analyzing the response
7
8
  */
@@ -9,7 +10,7 @@ export async function testInstructions(instructions, testInput, expectedSchema,
9
10
  // Get internal system action config (instruction audit)
10
11
  const internalConfig = config.internalSystemActions?.instructionAudit;
11
12
  const defaultEngine = config.defaultEngine || 'openai';
12
- const defaultModel = internalConfig?.model || 'gpt-5-nano';
13
+ const defaultModel = internalConfig?.model || CODE_DEFAULT_MODEL;
13
14
  const defaultProvider = internalConfig?.engine || defaultEngine;
14
15
  const { agentId = 'instruction-tester', model = options.model || defaultModel, // Use internal config default if not provided
15
16
  provider = options.provider || defaultProvider // Use internal config default if not provided
@@ -2,7 +2,8 @@
2
2
  * Gateway operational mode (prod vs dev/debug) and default model resolution.
3
3
  */
4
4
  import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
5
- export const CODE_DEFAULT_MODEL = 'gpt-5-nano';
5
+ /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
6
+ export const CODE_DEFAULT_MODEL = 'cheap';
6
7
  /**
7
8
  * Operational mode: `GatewayConfig.mode` overrides `process.env.mode` / `MODE`.
8
9
  * Only `prod` allows silent default-model substitution; all other values are strict.
@@ -6,7 +6,8 @@ import type { ActivityIdentity, GatewayConfig } from './types.js';
6
6
  export type GatewayOperationalMode = 'prod' | 'debug' | 'dev';
7
7
  export type GatewayDefaultModelSource = 'env' | 'model-config.json' | 'code';
8
8
  export type DefaultModelSubstitutionReason = 'no_model_provided' | 'model_resolution_failed' | 'ai_tools_unavailable';
9
- export declare const CODE_DEFAULT_MODEL = "gpt-5-nano";
9
+ /** Profile name resolved via ai-tools + {@link @x12i/ai-profiles} when catalog is enabled. */
10
+ export declare const CODE_DEFAULT_MODEL = "cheap";
10
11
  export type ResolvedGatewayDefault = {
11
12
  model: string;
12
13
  provider?: string;
@@ -146,7 +146,7 @@ export async function autoRegisterProviders(router, logger) {
146
146
  optionalEnvVars: PROVIDER_CONFIGS
147
147
  .filter(p => p.optional)
148
148
  .map(p => p.envVar),
149
- openRouter: 'Set OPEN_ROUTER_KEY or OPENROUTER_API_KEY (and do not set USE_OPENROUTER=false) to use OpenRouter without registering a provider.',
149
+ openRouter: 'Set OPENROUTER_API_KEY (and do not set USE_OPENROUTER=false) to use OpenRouter without registering a provider. Legacy OPEN_ROUTER_KEY is still accepted.',
150
150
  note: 'You can still manually register providers using gateway.register(provider)'
151
151
  });
152
152
  }
@@ -3,7 +3,10 @@
3
3
  * Handles utility functions
4
4
  */
5
5
  import * as crypto from 'crypto';
6
- import { ModelResolutionError } from '@x12i/ai-tools';
6
+ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
7
+ import { ModelResolutionError, isKnownProfileOrShortcut } from '@x12i/ai-tools';
8
+ import { extractHttpStatusCode } from './gateway-retry.js';
9
+ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
7
10
  import { getPreParsedInstructions } from './gateway-instructions.js';
8
11
  import { getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
9
12
  import { applyModelResolution } from './ai-tools-client.js';
@@ -55,6 +58,44 @@ async function substituteGatewayDefaultModel(merged, request, config, logger, me
55
58
  });
56
59
  applyGatewayDefaultToMerged(merged, defaults, config);
57
60
  }
61
+ async function tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original) {
62
+ const resolveModels = config.aiTools?.resolveModels !== false;
63
+ const catalog = mergeOptions?.catalog;
64
+ if (!resolveModels || !catalog || !merged.model) {
65
+ return;
66
+ }
67
+ try {
68
+ const resolution = await catalog.resolveModel({
69
+ provider: merged.provider,
70
+ model: merged.model
71
+ });
72
+ if (!resolution.found) {
73
+ return;
74
+ }
75
+ applyModelResolution(merged, resolution, config.defaultEngine);
76
+ request._modelResolution = {
77
+ modelId: resolution.modelId,
78
+ routedViaOpenRouter: resolution.routedViaOpenRouter,
79
+ confidence: resolution.confidence,
80
+ resolvedVia: resolution.resolvedVia,
81
+ originalProvider: original?.provider ?? merged.provider,
82
+ originalModel: original?.model ?? merged.model
83
+ };
84
+ logger.verbose('Catalog resolved substituted default model', {
85
+ jobId: request.identity.jobId,
86
+ model: merged.model,
87
+ provider: merged.provider,
88
+ resolvedModelId: resolution.modelId
89
+ });
90
+ }
91
+ catch {
92
+ // Prod keeps the substituted bare default when re-resolution fails.
93
+ }
94
+ }
95
+ async function substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, reason, original) {
96
+ await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, reason, original);
97
+ await tryResolveSubstitutedDefaultModel(merged, request, config, logger, mergeOptions, original);
98
+ }
58
99
  /**
59
100
  * True when any caller-controlled config source set `maxTokens` (Optimixer should not override).
60
101
  */
@@ -137,7 +178,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
137
178
  const originalProvider = merged.provider;
138
179
  const originalModel = explicitModel;
139
180
  if (!explicitModel) {
140
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
181
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
141
182
  }
142
183
  else if (resolveModels && mergeOptions?.catalog) {
143
184
  try {
@@ -166,10 +207,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
166
207
  });
167
208
  }
168
209
  else if (isProdGatewayMode(operationalMode)) {
169
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
210
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'model_resolution_failed', { provider: originalProvider, model: originalModel });
170
211
  }
171
212
  else {
172
- throw new ModelResolutionError({ provider: merged.provider, model: explicitModel }, resolution);
213
+ throw buildModelResolutionFailureError(explicitModel, merged.provider, resolution);
173
214
  }
174
215
  }
175
216
  catch (error) {
@@ -177,7 +218,7 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
177
218
  throw error;
178
219
  }
179
220
  if (isProdGatewayMode(operationalMode)) {
180
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
221
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
181
222
  }
182
223
  else {
183
224
  throw error;
@@ -185,10 +226,10 @@ export async function mergeConfig(request, config, logger, mergeOptions) {
185
226
  }
186
227
  }
187
228
  else if (resolveModels && !mergeOptions?.catalog && isProdGatewayMode(operationalMode)) {
188
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
229
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'ai_tools_unavailable', { provider: originalProvider, model: originalModel });
189
230
  }
190
231
  if (!merged.model) {
191
- await substituteGatewayDefaultModel(merged, request, config, logger, mergeOptions, 'no_model_provided');
232
+ await substituteGatewayDefaultModelAndResolve(merged, request, config, logger, mergeOptions, 'no_model_provided');
192
233
  }
193
234
  const maxTokensExplicitlySet = isMaxTokensExplicitlySet(request, config);
194
235
  const optimixerWillPredict = config.optimixer?.enabled === true && !maxTokensExplicitlySet;
@@ -783,6 +824,162 @@ export function pickRequestIdsFromRouterLike(gatewayAiRequestId, routerLike) {
783
824
  }
784
825
  return out;
785
826
  }
827
+ /** Error code hint when a bundled profile name cannot be routed to a catalog target. */
828
+ export const MODEL_PROFILE_UNROUTABLE = 'MODEL_PROFILE_UNROUTABLE';
829
+ export class ModelProfileUnroutableError extends Error {
830
+ profileAlias;
831
+ provider;
832
+ code = MODEL_PROFILE_UNROUTABLE;
833
+ constructor(profileAlias, provider, cause) {
834
+ super(`${MODEL_PROFILE_UNROUTABLE}: profile "${profileAlias}" is retired or has no routable catalog target` +
835
+ (provider ? ` (provider: "${provider}")` : '') +
836
+ '. Update @x12i/ai-profiles or choose another profile alias.');
837
+ this.profileAlias = profileAlias;
838
+ this.provider = provider;
839
+ this.name = 'ModelProfileUnroutableError';
840
+ if (cause !== undefined) {
841
+ this.cause = cause;
842
+ }
843
+ }
844
+ }
845
+ function buildModelResolutionFailureError(explicitModel, provider, resolution) {
846
+ const base = new ModelResolutionError({ provider, model: explicitModel }, resolution);
847
+ if (isKnownProfileOrShortcut(explicitModel)) {
848
+ return new ModelProfileUnroutableError(explicitModel, provider, base);
849
+ }
850
+ return base;
851
+ }
852
+ /**
853
+ * Build rejection-metadata fallback attempts from trace-mode {@link GatewayTraceAttempt}s.
854
+ */
855
+ export function buildGatewayFallbackAttemptsFromTrace(traceAttempts, candidates, lastError) {
856
+ const lastFailedByIndex = new Map();
857
+ for (const attempt of traceAttempts) {
858
+ if (!attempt.ok) {
859
+ lastFailedByIndex.set(attempt.routing.fallbackIndex, attempt);
860
+ }
861
+ }
862
+ return candidates.map((candidate, index) => {
863
+ const failed = lastFailedByIndex.get(index);
864
+ const errMsg = failed?.error?.message ??
865
+ (index === candidates.length - 1 && lastError ? lastError.message : 'invoke failed');
866
+ const httpStatus = extractHttpStatusCode(new Error(errMsg));
867
+ let responsePreview;
868
+ const raw = failed?.rawProviderPayload;
869
+ if (raw !== undefined) {
870
+ try {
871
+ const rawStr = typeof raw === 'string' ? raw : JSON.stringify(raw);
872
+ responsePreview = rawStr.length <= 500 ? rawStr : rawStr.slice(0, 500) + '…';
873
+ }
874
+ catch {
875
+ responsePreview = '[Unserializable]';
876
+ }
877
+ }
878
+ return {
879
+ provider: candidate.provider,
880
+ model: candidate.model,
881
+ ...(httpStatus !== undefined ? { httpStatus } : {}),
882
+ error: errMsg,
883
+ ...(responsePreview !== undefined ? { responsePreview } : {})
884
+ };
885
+ });
886
+ }
887
+ /** Human-readable exhaustion message for trace fallback chains and rejection logs. */
888
+ export function formatFallbackExhaustionMessage(attempts, candidates) {
889
+ const providersTried = [...new Set(candidates.map((c) => c.provider))];
890
+ const providerNote = providersTried.length > 1
891
+ ? `; providers tried: ${providersTried.join(' → ')}`
892
+ : providersTried.length === 1
893
+ ? `; provider: ${providersTried[0]}`
894
+ : '';
895
+ const detail = attempts
896
+ .map((a) => {
897
+ const model = a.model ? `${a.provider}/${a.model}` : a.provider;
898
+ const status = a.httpStatus !== undefined ? ` HTTP ${a.httpStatus}` : '';
899
+ const preview = a.responsePreview ? ` body=${a.responsePreview}` : '';
900
+ return `[${model}${status}] ${a.error}${preview}`;
901
+ })
902
+ .join('; ');
903
+ const last = attempts[attempts.length - 1];
904
+ const lastBody = last?.responsePreview && !detail.includes(last.responsePreview)
905
+ ? ` Last response preview: ${last.responsePreview}`
906
+ : '';
907
+ return (`All fallback candidates failed (${candidates.length} tried${providerNote}). ` +
908
+ `Attempts: ${detail || 'no attempt details recorded'}.${lastBody}`);
909
+ }
910
+ export function mapGatewayFallbackAttemptsToRouter(attempts) {
911
+ return attempts.map((a) => ({
912
+ provider: a.provider,
913
+ model: a.model,
914
+ httpStatus: a.httpStatus,
915
+ error: new Error(a.error),
916
+ responsePreview: a.responsePreview
917
+ }));
918
+ }
919
+ /**
920
+ * Log profile alias vs OpenRouter model id actually sent to the router after catalog resolution.
921
+ */
922
+ export function logResolvedModelRouting(logger, request, mergedConfig) {
923
+ const res = request._modelResolution;
924
+ if (!res?.modelId && res?.originalModel === undefined) {
925
+ return;
926
+ }
927
+ const profileAlias = res.originalModel ?? mergedConfig?.model;
928
+ const invokedModelId = res.modelId ?? mergedConfig?.model;
929
+ const provider = mergedConfig?.provider;
930
+ const openRouterPath = res.routedViaOpenRouter === true || provider === 'openrouter';
931
+ if (!openRouterPath) {
932
+ return;
933
+ }
934
+ logger.info('OpenRouter routing: profile alias resolved to model id for invoke', withActivityIdentity(request.identity, {
935
+ profileAlias,
936
+ invokedOpenRouterModelId: invokedModelId,
937
+ provider,
938
+ routedViaOpenRouter: res.routedViaOpenRouter,
939
+ resolvedVia: res.resolvedVia,
940
+ debugKind: gatewayLogDebug.trace
941
+ }));
942
+ }
943
+ function mapRouterFallbackAttempts(attempts) {
944
+ return attempts.map((attempt) => ({
945
+ provider: String(attempt.provider),
946
+ ...(attempt.model !== undefined ? { model: attempt.model } : {}),
947
+ ...(attempt.httpStatus !== undefined ? { httpStatus: attempt.httpStatus } : {}),
948
+ error: attempt.error instanceof Error ? attempt.error.message : String(attempt.error),
949
+ ...(attempt.responsePreview !== undefined ? { responsePreview: attempt.responsePreview } : {})
950
+ }));
951
+ }
952
+ function extractFallbackAttemptsFromError(error) {
953
+ if (error instanceof FallbackExhaustedError && error.attempts.length > 0) {
954
+ return mapRouterFallbackAttempts(error.attempts);
955
+ }
956
+ if (error != null && typeof error === 'object') {
957
+ const record = error;
958
+ if (record.name === 'FallbackExhaustedError' && Array.isArray(record.attempts) && record.attempts.length > 0) {
959
+ return mapRouterFallbackAttempts(record.attempts);
960
+ }
961
+ }
962
+ return undefined;
963
+ }
964
+ /**
965
+ * Walk `error` and `error.cause` for {@link FallbackExhaustedError.attempts}.
966
+ */
967
+ export function tryExtractFallbackAttemptsFromErrorChain(error, maxDepth = 8) {
968
+ const seen = new Set();
969
+ let cur = error;
970
+ for (let i = 0; i < maxDepth && cur != null; i++) {
971
+ if (typeof cur !== 'object')
972
+ break;
973
+ if (seen.has(cur))
974
+ break;
975
+ seen.add(cur);
976
+ const attempts = extractFallbackAttemptsFromError(cur);
977
+ if (attempts?.length)
978
+ return attempts;
979
+ cur = cur.cause;
980
+ }
981
+ return undefined;
982
+ }
786
983
  export function buildInvokeRejectionMetadata(args) {
787
984
  const gid = args.gatewayAiRequestId ?? args.request.aiRequestId;
788
985
  const partial = args.partialRouterPayload;
@@ -796,6 +993,9 @@ export function buildInvokeRejectionMetadata(args) {
796
993
  tokens = undefined;
797
994
  }
798
995
  const requestIds = pickRequestIdsFromRouterLike(gid, partial);
996
+ const fallbackAttempts = args.error !== undefined
997
+ ? tryExtractFallbackAttemptsFromErrorChain(args.error)
998
+ : undefined;
799
999
  return {
800
1000
  aiRequestId: args.request.aiRequestId,
801
1001
  identity: args.request.identity,
@@ -805,6 +1005,7 @@ export function buildInvokeRejectionMetadata(args) {
805
1005
  ...(effective !== undefined ? { effectiveModelConfig: effective } : {}),
806
1006
  ...(tokens !== undefined ? { tokens } : {}),
807
1007
  ...(requestIds !== undefined ? { requestIds } : {}),
1008
+ ...(fallbackAttempts !== undefined ? { fallbackAttempts } : {}),
808
1009
  ...(mc === undefined ? { mergeConfigUnavailable: true } : {})
809
1010
  };
810
1011
  }
@@ -2,7 +2,7 @@
2
2
  * Gateway Utilities Module
3
3
  * Handles utility functions
4
4
  */
5
- import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
5
+ import type { AIInvokeRequest, ChatRequest, GatewayConfig, GatewayFallbackAttempt, GatewayInvokeRejectionMetadata, GatewayTraceAttempt, GatewayTraceMergedConfig, GatewayTraceRequestIds, GatewayTraceUsageSummary, ModelConfig } from './types.js';
6
6
  import type { Logxer } from '@x12i/logxer';
7
7
  import { type AiCostResult, type AiModelsCatalogClient, type CostCalculator } from '@x12i/ai-tools';
8
8
  /**
@@ -155,6 +155,39 @@ export declare function pickEffectiveModelConfigFromInvokeRequest(request: Pick<
155
155
  */
156
156
  export declare function tryExtractRouterLikePayloadFromErrorChain(error: unknown, maxDepth?: number): unknown;
157
157
  export declare function pickRequestIdsFromRouterLike(gatewayAiRequestId: string | undefined, routerLike: unknown): GatewayTraceRequestIds | undefined;
158
+ /** Error code hint when a bundled profile name cannot be routed to a catalog target. */
159
+ export declare const MODEL_PROFILE_UNROUTABLE = "MODEL_PROFILE_UNROUTABLE";
160
+ export declare class ModelProfileUnroutableError extends Error {
161
+ readonly profileAlias: string;
162
+ readonly provider: string | undefined;
163
+ readonly code = "MODEL_PROFILE_UNROUTABLE";
164
+ constructor(profileAlias: string, provider: string | undefined, cause?: unknown);
165
+ }
166
+ type ModelResolutionCandidate = {
167
+ provider: string;
168
+ model: string;
169
+ };
170
+ /**
171
+ * Build rejection-metadata fallback attempts from trace-mode {@link GatewayTraceAttempt}s.
172
+ */
173
+ export declare function buildGatewayFallbackAttemptsFromTrace(traceAttempts: GatewayTraceAttempt[], candidates: ModelResolutionCandidate[], lastError?: Error): GatewayFallbackAttempt[];
174
+ /** Human-readable exhaustion message for trace fallback chains and rejection logs. */
175
+ export declare function formatFallbackExhaustionMessage(attempts: GatewayFallbackAttempt[], candidates: ModelResolutionCandidate[]): string;
176
+ export declare function mapGatewayFallbackAttemptsToRouter(attempts: GatewayFallbackAttempt[]): Array<{
177
+ provider: string;
178
+ model?: string;
179
+ httpStatus?: number;
180
+ error: Error;
181
+ responsePreview?: string;
182
+ }>;
183
+ /**
184
+ * Log profile alias vs OpenRouter model id actually sent to the router after catalog resolution.
185
+ */
186
+ export declare function logResolvedModelRouting(logger: Logxer, request: ChatRequest, mergedConfig: ChatRequest['config']): void;
187
+ /**
188
+ * Walk `error` and `error.cause` for {@link FallbackExhaustedError.attempts}.
189
+ */
190
+ export declare function tryExtractFallbackAttemptsFromErrorChain(error: unknown, maxDepth?: number): GatewayFallbackAttempt[] | undefined;
158
191
  export declare function buildInvokeRejectionMetadata(args: {
159
192
  request: Pick<AIInvokeRequest, 'aiRequestId' | 'identity' | 'config' | 'modelConfig'>;
160
193
  taskTypeId: string;
@@ -162,6 +195,7 @@ export declare function buildInvokeRejectionMetadata(args: {
162
195
  mergedConfig?: unknown;
163
196
  partialRouterPayload?: unknown;
164
197
  gatewayAiRequestId?: string;
198
+ error?: unknown;
165
199
  }): GatewayInvokeRejectionMetadata;
166
200
  export declare function attachGatewayInvokeRejectionMetadata(err: Error, metadata: GatewayInvokeRejectionMetadata): void;
167
201
  /** Default JSON string length cap for Activix `content.fullResponse` when diagnostics allow storing it. */
@@ -3,13 +3,14 @@
3
3
  *
4
4
  * Simplified AI Gateway - Clean proxy implementation
5
5
  */
6
+ import { FallbackExhaustedError } from '@x12i/ai-providers-router';
6
7
  import { validateChatRequest, validateAIRequest } from './gateway-validation.js';
7
8
  import { ensureGatewayRequestIdentity } from './activity-manager.js';
8
9
  import { initializeGatewayComponents } from './gateway-config.js';
9
10
  import { buildMessages } from './message-builder.js';
10
11
  import { extractJsonFromFlexMd, getModelMaxTokensFromFlexMd } from './flex-md-loader.js';
11
12
  import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
12
- import { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, capActivityFullResponsePayload, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
+ import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
13
14
  import { getAiToolsClient } from './ai-tools-client.js';
14
15
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
15
16
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
@@ -17,7 +18,7 @@ import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
17
18
  import { invokeWithRetry } from './gateway-retry.js';
18
19
  /** Error message thrown by the router when no provider is registered or specified */
19
20
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
20
- const NO_PROVIDER_HINT = ' Set OPEN_ROUTER_KEY (or OPENROUTER_API_KEY) in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
21
+ const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter (legacy OPEN_ROUTER_KEY is still read as fallback), or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
21
22
  /** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
22
23
  function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
23
24
  const { tokens, costUsd, cost } = meta;
@@ -277,6 +278,7 @@ export class AIGateway {
277
278
  catalog: aiTools?.catalog ?? null
278
279
  });
279
280
  request._mergedRouterConfig = mergedConfig;
281
+ logResolvedModelRouting(this.logger, request, mergedConfig);
280
282
  const diagnosticsMode = request.diagnostics?.mode;
281
283
  const traceEnabled = diagnosticsMode === 'trace';
282
284
  const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
@@ -468,7 +470,20 @@ export class AIGateway {
468
470
  }
469
471
  }
470
472
  if (!response) {
471
- throw lastError ?? new Error('All fallback candidates failed');
473
+ const fallbackAttempts = buildGatewayFallbackAttemptsFromTrace(traceAttempts, deduped, lastError);
474
+ const providersTried = [...new Set(deduped.map((c) => c.provider))];
475
+ this.logger.error('Trace fallback chain exhausted', withActivityIdentity(request.identity, {
476
+ providersTried,
477
+ candidates: deduped,
478
+ fallbackAttempts,
479
+ debugKind: gatewayLogDebug.anomaly
480
+ }));
481
+ const exhausted = new FallbackExhaustedError(mapGatewayFallbackAttemptsToRouter(fallbackAttempts));
482
+ exhausted.message = formatFallbackExhaustionMessage(fallbackAttempts, deduped);
483
+ if (lastError) {
484
+ exhausted.cause = lastError;
485
+ }
486
+ throw exhausted;
472
487
  }
473
488
  // Summary counts + final request ids.
474
489
  traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
@@ -565,11 +580,14 @@ export class AIGateway {
565
580
  tokens = second;
566
581
  }
567
582
  }
568
- const costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
583
+ let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
569
584
  mergedConfig,
570
585
  calculator: aiTools?.calculator ?? null,
571
586
  calculateCost: this.config.aiTools?.calculateCost
572
587
  });
588
+ if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
589
+ costCompletion = { ...costCompletion, costStatus: 'unpriced' };
590
+ }
573
591
  const routerMetaForCost = routerResponse?.metadata || {};
574
592
  const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
575
593
  const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
@@ -685,7 +703,8 @@ export class AIGateway {
685
703
  startTime,
686
704
  mergedConfig,
687
705
  partialRouterPayload: partial,
688
- gatewayAiRequestId: request.aiRequestId
706
+ gatewayAiRequestId: request.aiRequestId,
707
+ error: err
689
708
  });
690
709
  attachGatewayInvokeRejectionMetadata(err, rejectMeta);
691
710
  if (err.message.includes(NO_PROVIDER_ERROR)) {
@@ -17,7 +17,7 @@ export * from '@x12i/ai-providers-router';
17
17
  export { AIGateway } from './gateway.js';
18
18
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
19
19
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export { contractSpecToFieldKeys, enrichParsedContentForOutputContract, resolveOutputContractFieldKeys } from './output-contract-normalizer.js';
23
23
  export { mergeGatewayAndRequestTemplateRenderOptions, mergeTemplateRenderOptions } from './template-render-merge.js';
@@ -16,8 +16,8 @@ export * from '@x12i/ai-providers-router';
16
16
  export { AIGateway } from './gateway.js';
17
17
  export { InstructionNotFoundError, InstructionBackendError } from './instruction-errors.js';
18
18
  export { autoRegisterProviders } from './gateway-provider-auto-register.js';
19
- export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
- export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage } from './gateway-utils.js';
19
+ export type { GatewayConfig, ProviderModelRef, ModelConfig, RetryConfig, ChatRequest, AIInvokeRequest, AIRequest, GatewayActionType, GatewayInvokeRejectionMetadata, GatewayFallbackAttempt, GatewayTraceRequestIds, GatewayTraceAttempt, GatewayTraceUsageSummary, GatewayTraceMergedConfig, EnhancedLLMResponse, InstructionMetadata, ValidationRule, TemplateRenderOptions, SmartInputConfig, SmartInputRenderOptions } from './types.js';
20
+ export { attachGatewayInvokeRejectionMetadata, buildInvokeRejectionMetadata, tryExtractRouterLikePayloadFromErrorChain, tryExtractFallbackAttemptsFromErrorChain, pickRequestIdsFromRouterLike, resolveActivityCostCompletion, resolveCostCompletionForActivity, resolveCostCompletionWithAiTools, buildGatewayPricingRecord, mapAiCostResultToResolvedActivityCost, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, hasNonZeroTokenUsage, MODEL_PROFILE_UNROUTABLE, ModelProfileUnroutableError, buildGatewayFallbackAttemptsFromTrace, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter } from './gateway-utils.js';
21
21
  export { getGatewayOperationalMode, isProdGatewayMode, resolveGatewayDefaultModel, parseModelProviderSpec, CODE_DEFAULT_MODEL } from './gateway-mode.js';
22
22
  export type { GatewayOperationalMode, GatewayDefaultModelSource, DefaultModelSubstitutionReason, ResolvedGatewayDefault } from './gateway-mode.js';
23
23
  export type { ActivityCostStatus, ResolvedActivityCost } from './gateway-utils.js';
@@ -1,14 +1,15 @@
1
1
  import { Optimixer } from '@x12i/optimixer';
2
2
  import { resolveActivityTrackingConfig } from './config/activity-tracking-config.js';
3
3
  import { estimateMessagesTokenSizes } from './token-estimate.js';
4
- function resolveActionTypeId(request) {
4
+ /** Optimixer bucket key: prefer taskTypeId (template), then identity actionType, else gateway default. */
5
+ function resolveTemplateId(request) {
6
+ if (request.taskTypeId && String(request.taskTypeId).trim()) {
7
+ return String(request.taskTypeId).trim();
8
+ }
5
9
  const identity = request.identity;
6
10
  if (identity?.actionType && String(identity.actionType).trim()) {
7
11
  return String(identity.actionType).trim();
8
12
  }
9
- if (request.taskTypeId && String(request.taskTypeId).trim()) {
10
- return String(request.taskTypeId).trim();
11
- }
12
13
  return 'gateway.invoke';
13
14
  }
14
15
  function toActivixRunContext(identity) {
@@ -77,15 +78,18 @@ export class OptimixerManager {
77
78
  const { request, mergedConfig, messages } = ctx;
78
79
  const { inputSize, contextSize } = estimateMessagesTokenSizes(messages);
79
80
  const acceptableRisk = this.config?.acceptableRisk ?? 'medium';
81
+ const provider = typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined;
82
+ const model = typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined;
80
83
  try {
81
84
  return await optimixer.predictAiMaxTokens({
82
- actionTypeId: resolveActionTypeId(request),
85
+ templateId: resolveTemplateId(request),
83
86
  inputSize,
84
87
  contextSize,
85
88
  acceptableRisk,
86
89
  runContext: toActivixRunContext(request.identity),
87
- provider: typeof mergedConfig?.provider === 'string' ? mergedConfig.provider : undefined,
88
- model: typeof mergedConfig?.model === 'string' ? mergedConfig.model : undefined
90
+ ...(provider || model
91
+ ? { modelProfile: { ...(provider ? { provider } : {}), ...(model ? { model } : {}) } }
92
+ : {})
89
93
  });
90
94
  }
91
95
  catch (error) {
@@ -138,12 +138,25 @@ export type GatewayInvokeRejectionMetadata = {
138
138
  region?: string;
139
139
  effectiveModelConfig?: Partial<Pick<ModelConfig, 'model' | 'modelId' | 'provider' | 'temperature' | 'maxTokens' | 'topP'>>;
140
140
  requestIds?: GatewayTraceRequestIds;
141
+ /**
142
+ * Fallback candidates tried before exhaustion (non-trace {@link AIGateway.invoke} only).
143
+ * Sourced from {@link FallbackExhaustedError.attempts} on the router error chain.
144
+ */
145
+ fallbackAttempts?: GatewayFallbackAttempt[];
141
146
  /**
142
147
  * True when {@link mergeConfig} did not run (e.g. message-building threw first).
143
148
  * Routing facts may only reflect request.config / modelConfig, not flex-md defaults.
144
149
  */
145
150
  mergeConfigUnavailable?: true;
146
151
  };
152
+ /** Serializable slice of a router fallback attempt for rejection metadata. */
153
+ export type GatewayFallbackAttempt = {
154
+ provider: string;
155
+ model?: string;
156
+ httpStatus?: number;
157
+ error: string;
158
+ responsePreview?: string;
159
+ };
147
160
  /**
148
161
  * Identity object used for activity linkage.
149
162
  * On gateway requests/responses it lives on `identity`. When activity tracking persists via Activix v5+,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@x12i/ai-gateway",
3
- "version": "9.5.3",
3
+ "version": "9.6.1",
4
4
  "description": "AI Gateway - Unified interface for LLM provider routing and management",
5
5
  "type": "module",
6
6
  "exports": {
@@ -41,12 +41,12 @@
41
41
  "author": "x12i",
42
42
  "license": "mit",
43
43
  "dependencies": {
44
- "@x12i/activix": "^8.0.0",
45
- "@x12i/ai-providers-router": "^4.8.0",
46
- "@x12i/ai-tools": "^2.0.0",
44
+ "@x12i/activix": "^8.0.7",
45
+ "@x12i/ai-providers-router": "^4.8.5",
46
+ "@x12i/ai-tools": "^2.0.4",
47
47
  "@x12i/flex-md": "^4.8.0",
48
48
  "@x12i/logxer": "^4.3.5",
49
- "@x12i/optimixer": "^0.1.0",
49
+ "@x12i/optimixer": "^2.0.1",
50
50
  "@x12i/rendrix": "^4.3.0"
51
51
  },
52
52
  "devDependencies": {