@x12i/ai-gateway 9.6.3 → 9.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +10 -3
  2. package/dist/ai-tools-client.d.ts +27 -3
  3. package/dist/ai-tools-client.js +54 -8
  4. package/dist/gateway-config.d.ts +2 -0
  5. package/dist/gateway-config.js +16 -10
  6. package/dist/gateway-log-meta.d.ts +5 -1
  7. package/dist/gateway-log-meta.js +19 -1
  8. package/dist/gateway-provider-auto-register.js +1 -1
  9. package/dist/gateway-utils.d.ts +2 -1
  10. package/dist/gateway-utils.js +9 -7
  11. package/dist/gateway.d.ts +2 -0
  12. package/dist/gateway.js +601 -578
  13. package/dist/index.d.ts +4 -3
  14. package/dist/index.js +3 -2
  15. package/dist/logger-factory.d.ts +2 -0
  16. package/dist/logger-factory.js +11 -14
  17. package/dist/openrouter-routing.d.ts +12 -0
  18. package/dist/openrouter-routing.js +27 -0
  19. package/dist/runtime-objects.d.ts +2 -19
  20. package/dist/types.d.ts +4 -1
  21. package/dist-cjs/ai-tools-client.cjs +54 -8
  22. package/dist-cjs/ai-tools-client.d.ts +27 -3
  23. package/dist-cjs/gateway-config.cjs +16 -10
  24. package/dist-cjs/gateway-config.d.ts +2 -0
  25. package/dist-cjs/gateway-log-meta.cjs +19 -1
  26. package/dist-cjs/gateway-log-meta.d.ts +5 -1
  27. package/dist-cjs/gateway-provider-auto-register.cjs +1 -1
  28. package/dist-cjs/gateway-utils.cjs +9 -7
  29. package/dist-cjs/gateway-utils.d.ts +2 -1
  30. package/dist-cjs/gateway.cjs +601 -578
  31. package/dist-cjs/gateway.d.ts +2 -0
  32. package/dist-cjs/index.cjs +3 -2
  33. package/dist-cjs/index.d.ts +4 -3
  34. package/dist-cjs/logger-factory.cjs +11 -14
  35. package/dist-cjs/logger-factory.d.ts +2 -0
  36. package/dist-cjs/openrouter-routing.cjs +27 -0
  37. package/dist-cjs/openrouter-routing.d.ts +12 -0
  38. package/dist-cjs/runtime-objects.d.ts +2 -19
  39. package/dist-cjs/types.d.ts +4 -1
  40. package/package.json +5 -5
@@ -13,12 +13,13 @@ import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys }
13
13
  import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
14
14
  import { getAiToolsClient } from './ai-tools-client.js';
15
15
  import { autoRegisterProviders } from './gateway-provider-auto-register.js';
16
+ import { applyOpenRouterInvokePolicy } from './ai-tools-client.js';
16
17
  import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
17
- import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
18
+ import { gatewayLogDebug, withActivityIdentity, withGatewayLogContext } from './gateway-log-meta.js';
18
19
  import { invokeWithRetry } from './gateway-retry.js';
19
20
  /** Error message thrown by the router when no provider is registered or specified */
20
21
  const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
21
- const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter (legacy OPEN_ROUTER_KEY is still read as fallback), or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
22
+ const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
22
23
  /** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
23
24
  function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
24
25
  const { tokens, costUsd, cost } = meta;
@@ -51,6 +52,8 @@ export class AIGateway {
51
52
  defaultModelConfig = {};
52
53
  _autoRegisterDone = false;
53
54
  _aiToolsClient = null;
55
+ preferOpenRouter;
56
+ openRouterApiKey;
54
57
  constructor(config = {}, activityManager) {
55
58
  this.config = config;
56
59
  this.activityManager = activityManager;
@@ -61,6 +64,8 @@ export class AIGateway {
61
64
  this.optimixerManager = components.optimixerManager;
62
65
  this.messageBuilderConfig = components.messageBuilderConfig;
63
66
  this.defaultModelConfig = components.defaultModelConfig ?? {};
67
+ this.preferOpenRouter = components.preferOpenRouter;
68
+ this.openRouterApiKey = components.openRouterApiKey;
64
69
  setGatewayRuntimeClients({
65
70
  activix: this.activityManager?.getTracker(),
66
71
  logger: this.logger
@@ -74,647 +79,665 @@ export class AIGateway {
74
79
  * Invoke chat request (without structured output requirements)
75
80
  */
76
81
  async invokeChat(request) {
77
- const startTime = Date.now();
78
82
  // Basic validation
79
83
  validateChatRequest(request);
80
84
  ensureGatewayRequestIdentity(request, undefined, this.logger);
81
85
  setGatewayLastJobId(resolveRuntimeJobId(request));
82
- // Generate simple task type ID
83
- const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
84
- // Simple message construction
85
- const messages = this.buildSimpleMessages(request);
86
- // Merge config (modelConfig > request.config > gateway defaults)
87
- const aiTools = await this.getAiTools();
88
- const mergedConfig = await mergeConfig(request, this.config, this.logger, {
89
- defaultModelConfig: this.defaultModelConfig,
90
- catalog: aiTools?.catalog ?? null
91
- });
92
- // Activix start snapshot must match what the router receives (modelConfig-only callers omit request.config.model).
93
- request._mergedRouterConfig = mergedConfig;
94
- // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
95
- if (!this._autoRegisterDone) {
96
- await autoRegisterProviders(this.router, this.logger);
97
- this._autoRegisterDone = true;
98
- }
99
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
100
- // Start activity tracking if available
101
- let activity = undefined;
102
- if (this.activityManager) {
103
- try {
104
- activity = await this.activityManager.startActivity(request, startTime);
105
- }
106
- catch (activityError) {
107
- // Log activity tracking error but don't fail the request
108
- this.logger.warn('Failed to start activity tracking', {
109
- aiRequestId: request.aiRequestId,
110
- error: activityError instanceof Error ? activityError.message : String(activityError)
111
- });
112
- }
113
- }
114
- try {
115
- // Call router directly with merged config
116
- const response = await this.router.invoke({
117
- request: {
118
- messages,
119
- config: mergedConfig,
120
- identity: request.identity
121
- },
122
- mode: 'sync'
86
+ return withGatewayLogContext(request.identity, async () => {
87
+ const startTime = Date.now();
88
+ // Generate simple task type ID
89
+ const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
90
+ // Simple message construction
91
+ const messages = this.buildSimpleMessages(request);
92
+ // Merge config (modelConfig > request.config > gateway defaults)
93
+ const aiTools = await this.getAiTools();
94
+ const mergedConfig = await mergeConfig(request, this.config, this.logger, {
95
+ defaultModelConfig: this.defaultModelConfig,
96
+ catalog: aiTools?.catalog ?? null,
97
+ routingEnv: aiTools?.routingEnv,
123
98
  });
124
- const metaChat = response?.metadata || {};
125
- const tokensChat = extractTokenUsageFromRouterResponse(response);
126
- const costCompletionChat = await resolveCostCompletionWithAiTools(response, tokensChat, {
127
- mergedConfig,
128
- calculator: aiTools?.calculator ?? null,
129
- calculateCost: this.config.aiTools?.calculateCost
99
+ // Activix start snapshot must match what the router receives (modelConfig-only callers omit request.config.model).
100
+ request._mergedRouterConfig = mergedConfig;
101
+ applyOpenRouterInvokePolicy(mergedConfig, {
102
+ preferOpenRouter: this.preferOpenRouter,
103
+ openRouterApiKey: this.openRouterApiKey,
104
+ routingEnv: aiTools?.routingEnv,
105
+ resolution: request._modelResolution,
130
106
  });
131
- // Create enhanced response
132
- const enhancedResponse = {
133
- content: response.content || '',
134
- metadata: {
135
- aiRequestId: request.aiRequestId,
136
- identity: request.identity,
137
- latencyMs: Date.now() - startTime,
138
- tokens: tokensChat,
139
- taskTypeId,
140
- agentType: 'chat',
141
- ...(costCompletionChat.costStatus === 'priced'
142
- ? {
143
- costUsd: costCompletionChat.cost,
144
- ...(typeof metaChat.cost === 'number'
145
- ? { cost: metaChat.cost }
146
- : { cost: costCompletionChat.cost })
147
- }
148
- : {}),
149
- ...(costCompletionChat.costStatus ? { costStatus: costCompletionChat.costStatus } : {}),
150
- ...(costCompletionChat.costBreakdown
151
- ? { costBreakdown: costCompletionChat.costBreakdown }
152
- : {})
153
- }
154
- };
155
- // Track activity success if activity was started
156
- if (activity) {
107
+ // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
108
+ if (!this._autoRegisterDone) {
109
+ await autoRegisterProviders(this.router, this.logger);
110
+ this._autoRegisterDone = true;
111
+ }
112
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
113
+ // Start activity tracking if available
114
+ let activity = undefined;
115
+ if (this.activityManager) {
157
116
  try {
158
- await this.activityManager.logSuccess(activity, {
159
- ...costCompletionChat,
160
- response: enhancedResponse,
161
- endTime: Date.now(),
162
- duration: Date.now() - startTime
163
- });
117
+ activity = await this.activityManager.startActivity(request, startTime);
164
118
  }
165
119
  catch (activityError) {
166
120
  // Log activity tracking error but don't fail the request
167
- this.logger.warn('Failed to track activity success', {
121
+ this.logger.warn('Failed to start activity tracking', {
168
122
  aiRequestId: request.aiRequestId,
169
123
  error: activityError instanceof Error ? activityError.message : String(activityError)
170
124
  });
171
125
  }
172
126
  }
173
- if (optimixerPrediction) {
174
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
127
+ try {
128
+ // Call router directly with merged config
129
+ const response = await this.router.invoke({
130
+ request: {
131
+ messages,
132
+ config: mergedConfig,
133
+ identity: request.identity
134
+ },
135
+ mode: 'sync'
136
+ });
137
+ const metaChat = response?.metadata || {};
138
+ const tokensChat = extractTokenUsageFromRouterResponse(response);
139
+ const costCompletionChat = await resolveCostCompletionWithAiTools(response, tokensChat, {
140
+ mergedConfig,
141
+ calculator: aiTools?.calculator ?? null,
142
+ calculateCost: this.config.aiTools?.calculateCost
143
+ });
144
+ // Create enhanced response
145
+ const enhancedResponse = {
146
+ content: response.content || '',
147
+ metadata: {
148
+ aiRequestId: request.aiRequestId,
149
+ identity: request.identity,
150
+ latencyMs: Date.now() - startTime,
151
+ tokens: tokensChat,
152
+ taskTypeId,
153
+ agentType: 'chat',
154
+ ...(costCompletionChat.costStatus === 'priced'
155
+ ? {
156
+ costUsd: costCompletionChat.cost,
157
+ ...(typeof metaChat.cost === 'number'
158
+ ? { cost: metaChat.cost }
159
+ : { cost: costCompletionChat.cost })
160
+ }
161
+ : {}),
162
+ ...(costCompletionChat.costStatus ? { costStatus: costCompletionChat.costStatus } : {}),
163
+ ...(costCompletionChat.costBreakdown
164
+ ? { costBreakdown: costCompletionChat.costBreakdown }
165
+ : {})
166
+ }
167
+ };
168
+ // Track activity success if activity was started
169
+ if (activity) {
170
+ try {
171
+ await this.activityManager.logSuccess(activity, {
172
+ ...costCompletionChat,
173
+ response: enhancedResponse,
174
+ endTime: Date.now(),
175
+ duration: Date.now() - startTime
176
+ });
177
+ }
178
+ catch (activityError) {
179
+ // Log activity tracking error but don't fail the request
180
+ this.logger.warn('Failed to track activity success', {
181
+ aiRequestId: request.aiRequestId,
182
+ error: activityError instanceof Error ? activityError.message : String(activityError)
183
+ });
184
+ }
185
+ }
186
+ if (optimixerPrediction) {
187
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
188
+ }
189
+ warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
190
+ tokens: enhancedResponse.metadata.tokens,
191
+ costUsd: enhancedResponse.metadata.costUsd,
192
+ cost: enhancedResponse.metadata.cost
193
+ }, 'invokeChat');
194
+ return enhancedResponse;
175
195
  }
176
- warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
177
- tokens: enhancedResponse.metadata.tokens,
178
- costUsd: enhancedResponse.metadata.costUsd,
179
- cost: enhancedResponse.metadata.cost
180
- }, 'invokeChat');
181
- return enhancedResponse;
182
- }
183
- catch (error) {
184
- const err = error instanceof Error ? error : new Error(String(error));
185
- if (err.message.includes(NO_PROVIDER_ERROR)) {
186
- throw new Error(err.message + NO_PROVIDER_HINT);
196
+ catch (error) {
197
+ const err = error instanceof Error ? error : new Error(String(error));
198
+ if (err.message.includes(NO_PROVIDER_ERROR)) {
199
+ throw new Error(err.message + NO_PROVIDER_HINT);
200
+ }
201
+ throw err;
187
202
  }
188
- throw err;
189
- }
203
+ });
190
204
  }
191
205
  /**
192
206
  * Invoke AI request (with structured output support)
193
207
  */
194
208
  async invoke(request) {
195
- const startTime = Date.now();
196
209
  // Basic validation
197
210
  validateAIRequest(request);
198
211
  ensureGatewayRequestIdentity(request, undefined, this.logger);
199
212
  setGatewayLastJobId(resolveRuntimeJobId(request));
200
- // Generate simple task type ID
201
- const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
202
- // Resolve instructions and build messages using proper components
203
- let resolvedRequest = request;
204
- let messages = [];
205
- // Create parsedSnapshot to store parsed request data and messages
206
- const parsedSnapshot = {};
207
- // Use proper instruction resolution and message building
208
- let builtMessages;
209
- try {
210
- builtMessages = await buildMessages(request, this.messageBuilderConfig, {
211
- parsedSnapshot
212
- });
213
- messages = builtMessages.messages;
214
- resolvedRequest = request;
215
- }
216
- catch (error) {
217
- // If message building fails (e.g., prompt/instruction resolution error), log as bad request
218
- const err = error instanceof Error ? error : new Error(String(error));
219
- const endTime = Date.now();
220
- const duration = endTime - startTime;
221
- // Determine if this is a prompt/instruction resolution error
222
- // If a key was provided but couldn't be resolved to content, it's a bad request
223
- const errWithCode = err; // Type assertion for error with optional code property
224
- const isResolutionError = err.name === 'InstructionNotFoundError' ||
225
- err.name === 'InstructionBackendError' ||
226
- err.name === 'TemplateResolutionError' ||
227
- errWithCode.code === 'PROMPT_NOT_FOUND' ||
228
- errWithCode.code === 'PROMPT_RESOLUTION_ERROR' ||
229
- errWithCode.code === 'PROMPT_RENDERED_EMPTY' ||
230
- errWithCode.code === 'TEMPLATE_RESOLUTION_ERROR' ||
231
- errWithCode.code === 'TEMPLATE_VARIABLE_MISSING' ||
232
- err.message.includes('Failed to resolve') ||
233
- err.message.includes('Failed to render prompt template') ||
234
- err.message.includes('not found') ||
235
- err.message.includes('Instruction not found') ||
236
- err.message.includes('Prompt not found');
237
- if (isResolutionError && this.activityManager) {
238
- // Log to bad requests collection
239
- await this.activityManager.logBadRequest(request, err, {
240
- endTime,
241
- duration,
242
- error: err.message,
243
- errorType: errWithCode.code || 'MessageBuildError',
244
- diagnosticInfo: {
245
- errorCode: errWithCode.code,
246
- errorName: err.name,
247
- failureType: 'validation-failure',
248
- stage: 'message-building',
249
- prompt: request.prompt,
250
- instructions: typeof request.instructions === 'string' ? request.instructions.substring(0, 100) : '(object)'
251
- },
252
- failureType: 'validation-failure'
253
- }, startTime);
254
- }
255
- const rejectMeta = buildInvokeRejectionMetadata({
256
- request,
257
- taskTypeId,
258
- startTime,
259
- gatewayAiRequestId: request.aiRequestId
260
- });
261
- attachGatewayInvokeRejectionMetadata(err, rejectMeta);
262
- // Re-throw the error so it propagates to the caller
263
- throw err;
264
- }
265
- // Store messages in parsedSnapshot for activity tracking
266
- parsedSnapshot.messages = messages;
267
- // parsed.instructions and parsed.prompt are set by buildMessages to the resolved/rendered content
268
- // (after key resolution and Rendrix). Do not overwrite with raw request keys.
269
- if (parsedSnapshot.context === undefined) {
270
- parsedSnapshot.context = request.context;
271
- }
272
- // Attach parsedSnapshot to request for activity tracking
273
- request._parsedRequest = parsedSnapshot;
274
- // Merge config (modelConfig > request.config > gateway defaults)
275
- const aiTools = await this.getAiTools();
276
- const mergedConfig = await mergeConfig(request, this.config, this.logger, {
277
- defaultModelConfig: this.defaultModelConfig,
278
- catalog: aiTools?.catalog ?? null
279
- });
280
- request._mergedRouterConfig = mergedConfig;
281
- logResolvedModelRouting(this.logger, request, mergedConfig);
282
- const diagnosticsMode = request.diagnostics?.mode;
283
- const traceEnabled = diagnosticsMode === 'trace';
284
- const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
285
- // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
286
- if (!this._autoRegisterDone) {
287
- await autoRegisterProviders(this.router, this.logger);
288
- this._autoRegisterDone = true;
289
- }
290
- const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
291
- // Start activity tracking if available
292
- let activity = undefined;
293
- if (this.activityManager) {
213
+ return withGatewayLogContext(request.identity, async () => {
214
+ const startTime = Date.now();
215
+ // Generate simple task type ID
216
+ const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
217
+ // Resolve instructions and build messages using proper components
218
+ let resolvedRequest = request;
219
+ let messages = [];
220
+ // Create parsedSnapshot to store parsed request data and messages
221
+ const parsedSnapshot = {};
222
+ // Use proper instruction resolution and message building
223
+ let builtMessages;
294
224
  try {
295
- activity = await this.activityManager.startActivity(request, startTime);
296
- }
297
- catch (activityError) {
298
- // Log activity tracking error but don't fail the request
299
- this.logger.warn('Failed to start activity tracking', {
300
- aiRequestId: request.aiRequestId,
301
- error: activityError instanceof Error ? activityError.message : String(activityError)
225
+ builtMessages = await buildMessages(request, this.messageBuilderConfig, {
226
+ parsedSnapshot
302
227
  });
228
+ messages = builtMessages.messages;
229
+ resolvedRequest = request;
303
230
  }
304
- }
305
- try {
306
- let response;
307
- let traceAttempts;
308
- let traceRetryCount;
309
- let traceFallbackCount;
310
- let traceRequestIds;
311
- let providerCallLatencyMs;
312
- if (!traceEnabled) {
313
- // Default minimal behavior (no extra allocations/payload).
314
- response = await this.router.invoke({
315
- request: {
316
- messages,
317
- config: mergedConfig,
318
- identity: request.identity
319
- },
320
- mode: 'sync'
231
+ catch (error) {
232
+ // If message building fails (e.g., prompt/instruction resolution error), log as bad request
233
+ const err = error instanceof Error ? error : new Error(String(error));
234
+ const endTime = Date.now();
235
+ const duration = endTime - startTime;
236
+ // Determine if this is a prompt/instruction resolution error
237
+ // If a key was provided but couldn't be resolved to content, it's a bad request
238
+ const errWithCode = err; // Type assertion for error with optional code property
239
+ const isResolutionError = err.name === 'InstructionNotFoundError' ||
240
+ err.name === 'InstructionBackendError' ||
241
+ err.name === 'TemplateResolutionError' ||
242
+ errWithCode.code === 'PROMPT_NOT_FOUND' ||
243
+ errWithCode.code === 'PROMPT_RESOLUTION_ERROR' ||
244
+ errWithCode.code === 'PROMPT_RENDERED_EMPTY' ||
245
+ errWithCode.code === 'TEMPLATE_RESOLUTION_ERROR' ||
246
+ errWithCode.code === 'TEMPLATE_VARIABLE_MISSING' ||
247
+ err.message.includes('Failed to resolve') ||
248
+ err.message.includes('Failed to render prompt template') ||
249
+ err.message.includes('not found') ||
250
+ err.message.includes('Instruction not found') ||
251
+ err.message.includes('Prompt not found');
252
+ if (isResolutionError && this.activityManager) {
253
+ // Log to bad requests collection
254
+ await this.activityManager.logBadRequest(request, err, {
255
+ endTime,
256
+ duration,
257
+ error: err.message,
258
+ errorType: errWithCode.code || 'MessageBuildError',
259
+ diagnosticInfo: {
260
+ errorCode: errWithCode.code,
261
+ errorName: err.name,
262
+ failureType: 'validation-failure',
263
+ stage: 'message-building',
264
+ prompt: request.prompt,
265
+ instructions: typeof request.instructions === 'string' ? request.instructions.substring(0, 100) : '(object)'
266
+ },
267
+ failureType: 'validation-failure'
268
+ }, startTime);
269
+ }
270
+ const rejectMeta = buildInvokeRejectionMetadata({
271
+ request,
272
+ taskTypeId,
273
+ startTime,
274
+ gatewayAiRequestId: request.aiRequestId
321
275
  });
276
+ attachGatewayInvokeRejectionMetadata(err, rejectMeta);
277
+ // Re-throw the error so it propagates to the caller
278
+ throw err;
322
279
  }
323
- else {
324
- const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
325
- const capErrorMessage = (s) => capString(s, 500);
326
- const safeJsonStringify = (value) => {
327
- try {
328
- return JSON.stringify(value);
329
- }
330
- catch {
331
- return '[Unserializable]';
332
- }
333
- };
334
- const gatewayAiRequestId = request.aiRequestId;
335
- const baseRequest = {
336
- request: {
337
- messages,
338
- config: mergedConfig,
339
- identity: request.identity
340
- },
341
- mode: 'sync'
342
- };
343
- // Build deterministic provider/model candidate chain.
344
- const candidates = [];
345
- const primaryProvider = mergedConfig?.provider;
346
- const primaryModel = mergedConfig?.model;
347
- if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
348
- candidates.push({ provider: primaryProvider, model: primaryModel });
280
+ // Store messages in parsedSnapshot for activity tracking
281
+ parsedSnapshot.messages = messages;
282
+ // parsed.instructions and parsed.prompt are set by buildMessages to the resolved/rendered content
283
+ // (after key resolution and Rendrix). Do not overwrite with raw request keys.
284
+ if (parsedSnapshot.context === undefined) {
285
+ parsedSnapshot.context = request.context;
286
+ }
287
+ // Attach parsedSnapshot to request for activity tracking
288
+ request._parsedRequest = parsedSnapshot;
289
+ // Merge config (modelConfig > request.config > gateway defaults)
290
+ const aiTools = await this.getAiTools();
291
+ const mergedConfig = await mergeConfig(request, this.config, this.logger, {
292
+ defaultModelConfig: this.defaultModelConfig,
293
+ catalog: aiTools?.catalog ?? null,
294
+ routingEnv: aiTools?.routingEnv,
295
+ });
296
+ request._mergedRouterConfig = mergedConfig;
297
+ applyOpenRouterInvokePolicy(mergedConfig, {
298
+ preferOpenRouter: this.preferOpenRouter,
299
+ openRouterApiKey: this.openRouterApiKey,
300
+ routingEnv: aiTools?.routingEnv,
301
+ resolution: request._modelResolution,
302
+ });
303
+ logResolvedModelRouting(this.logger, request, mergedConfig);
304
+ const diagnosticsMode = request.diagnostics?.mode;
305
+ const traceEnabled = diagnosticsMode === 'trace';
306
+ const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
307
+ // Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
308
+ if (!this._autoRegisterDone) {
309
+ await autoRegisterProviders(this.router, this.logger);
310
+ this._autoRegisterDone = true;
311
+ }
312
+ const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
313
+ // Start activity tracking if available
314
+ let activity = undefined;
315
+ if (this.activityManager) {
316
+ try {
317
+ activity = await this.activityManager.startActivity(request, startTime);
318
+ }
319
+ catch (activityError) {
320
+ // Log activity tracking error but don't fail the request
321
+ this.logger.warn('Failed to start activity tracking', {
322
+ aiRequestId: request.aiRequestId,
323
+ error: activityError instanceof Error ? activityError.message : String(activityError)
324
+ });
349
325
  }
350
- const defaultTarget = this.config?.defaultTarget;
351
- if (defaultTarget?.engine && defaultTarget?.model) {
352
- candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
326
+ }
327
+ try {
328
+ let response;
329
+ let traceAttempts;
330
+ let traceRetryCount;
331
+ let traceFallbackCount;
332
+ let traceRequestIds;
333
+ let providerCallLatencyMs;
334
+ if (!traceEnabled) {
335
+ // Default minimal behavior (no extra allocations/payload).
336
+ response = await this.router.invoke({
337
+ request: {
338
+ messages,
339
+ config: mergedConfig,
340
+ identity: request.identity
341
+ },
342
+ mode: 'sync'
343
+ });
353
344
  }
354
- const fallbackChain = this.config?.fallbackChain;
355
- if (Array.isArray(fallbackChain)) {
356
- for (const item of fallbackChain) {
357
- if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
358
- candidates.push({ provider: String(item.engine), model: String(item.model) });
345
+ else {
346
+ const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
347
+ const capErrorMessage = (s) => capString(s, 500);
348
+ const safeJsonStringify = (value) => {
349
+ try {
350
+ return JSON.stringify(value);
359
351
  }
352
+ catch {
353
+ return '[Unserializable]';
354
+ }
355
+ };
356
+ const gatewayAiRequestId = request.aiRequestId;
357
+ const baseRequest = {
358
+ request: {
359
+ messages,
360
+ config: mergedConfig,
361
+ identity: request.identity
362
+ },
363
+ mode: 'sync'
364
+ };
365
+ // Build deterministic provider/model candidate chain.
366
+ const candidates = [];
367
+ const primaryProvider = mergedConfig?.provider;
368
+ const primaryModel = mergedConfig?.model;
369
+ if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
370
+ candidates.push({ provider: primaryProvider, model: primaryModel });
360
371
  }
361
- }
362
- // De-dup while preserving order.
363
- const seen = new Set();
364
- const deduped = candidates.filter(c => {
365
- const key = `${c.provider}::${c.model}`;
366
- if (seen.has(key))
367
- return false;
368
- seen.add(key);
369
- return true;
370
- });
371
- traceAttempts = [];
372
- // Attempt execution across fallbacks (authoritative ordering).
373
- let lastError;
374
- for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
375
- const candidate = deduped[fallbackIndex];
376
- // Track per-retry attempt objects through retry hooks.
377
- const attemptIndexByRetry = new Map();
378
- try {
379
- const result = await invokeWithRetry({
380
- ...baseRequest,
381
- request: {
382
- ...baseRequest.request,
383
- config: {
384
- ...mergedConfig,
385
- provider: candidate.provider,
386
- model: candidate.model
387
- }
372
+ const defaultTarget = this.config?.defaultTarget;
373
+ if (defaultTarget?.engine && defaultTarget?.model) {
374
+ candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
375
+ }
376
+ const fallbackChain = this.config?.fallbackChain;
377
+ if (Array.isArray(fallbackChain)) {
378
+ for (const item of fallbackChain) {
379
+ if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
380
+ candidates.push({ provider: String(item.engine), model: String(item.model) });
388
381
  }
389
- }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
390
- onTryStart: ({ retryIndex, startedAt }) => {
391
- const idx = traceAttempts.push({
392
- timing: { startedAt, endedAt: startedAt, durationMs: 0 },
393
- routing: {
382
+ }
383
+ }
384
+ // De-dup while preserving order.
385
+ const seen = new Set();
386
+ const deduped = candidates.filter(c => {
387
+ const key = `${c.provider}::${c.model}`;
388
+ if (seen.has(key))
389
+ return false;
390
+ seen.add(key);
391
+ return true;
392
+ });
393
+ traceAttempts = [];
394
+ // Attempt execution across fallbacks (authoritative ordering).
395
+ let lastError;
396
+ for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
397
+ const candidate = deduped[fallbackIndex];
398
+ // Track per-retry attempt objects through retry hooks.
399
+ const attemptIndexByRetry = new Map();
400
+ try {
401
+ const result = await invokeWithRetry({
402
+ ...baseRequest,
403
+ request: {
404
+ ...baseRequest.request,
405
+ config: {
406
+ ...mergedConfig,
394
407
  provider: candidate.provider,
395
- requestIds: { gatewayAiRequestId },
396
- retryIndex,
397
- fallbackIndex
398
- },
399
- usage: {
400
- tokens: { prompt: 0, completion: 0, total: 0 },
401
- maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
402
- },
403
- modelUsed: candidate.model,
404
- ok: false
405
- }) - 1;
406
- attemptIndexByRetry.set(retryIndex, idx);
407
- },
408
- onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
409
- const idx = attemptIndexByRetry.get(retryIndex);
410
- if (idx === undefined)
411
- return;
412
- const a = traceAttempts[idx];
413
- a.timing.endedAt = endedAt;
414
- a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
415
- a.ok = ok;
416
- const respAny = tryResp;
417
- if (ok && respAny) {
418
- const meta = respAny.metadata || {};
419
- const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
420
- a.usage = {
421
- tokens: tokenCounts,
422
- maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
423
- ? meta.maxTokensRequested
424
- : typeof mergedConfig?.maxTokens === 'number'
425
- ? mergedConfig.maxTokens
426
- : undefined
427
- };
428
- a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
429
- if (typeof meta?.region === 'string')
430
- a.routing.region = meta.region;
431
- const requestIds = {
432
- gatewayAiRequestId,
433
- routerRequestId: respAny.requestId || meta?.requestId
434
- };
435
- if (typeof meta?.providerRequestId === 'string')
436
- requestIds.providerRequestId = meta.providerRequestId;
437
- if (typeof meta?.openrouterRequestId === 'string')
438
- requestIds.openrouterRequestId = meta.openrouterRequestId;
439
- if (meta?.requestIds && typeof meta.requestIds === 'object') {
440
- for (const [k, v] of Object.entries(meta.requestIds)) {
441
- if (typeof v === 'string')
442
- requestIds[k] = v;
408
+ model: candidate.model
409
+ }
410
+ }
411
+ }, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
412
+ onTryStart: ({ retryIndex, startedAt }) => {
413
+ const idx = traceAttempts.push({
414
+ timing: { startedAt, endedAt: startedAt, durationMs: 0 },
415
+ routing: {
416
+ provider: candidate.provider,
417
+ requestIds: { gatewayAiRequestId },
418
+ retryIndex,
419
+ fallbackIndex
420
+ },
421
+ usage: {
422
+ tokens: { prompt: 0, completion: 0, total: 0 },
423
+ maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
424
+ },
425
+ modelUsed: candidate.model,
426
+ ok: false
427
+ }) - 1;
428
+ attemptIndexByRetry.set(retryIndex, idx);
429
+ },
430
+ onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
431
+ const idx = attemptIndexByRetry.get(retryIndex);
432
+ if (idx === undefined)
433
+ return;
434
+ const a = traceAttempts[idx];
435
+ a.timing.endedAt = endedAt;
436
+ a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
437
+ a.ok = ok;
438
+ const respAny = tryResp;
439
+ if (ok && respAny) {
440
+ const meta = respAny.metadata || {};
441
+ const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
442
+ a.usage = {
443
+ tokens: tokenCounts,
444
+ maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
445
+ ? meta.maxTokensRequested
446
+ : typeof mergedConfig?.maxTokens === 'number'
447
+ ? mergedConfig.maxTokens
448
+ : undefined
449
+ };
450
+ a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
451
+ if (typeof meta?.region === 'string')
452
+ a.routing.region = meta.region;
453
+ const requestIds = {
454
+ gatewayAiRequestId,
455
+ routerRequestId: respAny.requestId || meta?.requestId
456
+ };
457
+ if (typeof meta?.providerRequestId === 'string')
458
+ requestIds.providerRequestId = meta.providerRequestId;
459
+ if (typeof meta?.openrouterRequestId === 'string')
460
+ requestIds.openrouterRequestId = meta.openrouterRequestId;
461
+ if (meta?.requestIds && typeof meta.requestIds === 'object') {
462
+ for (const [k, v] of Object.entries(meta.requestIds)) {
463
+ if (typeof v === 'string')
464
+ requestIds[k] = v;
465
+ }
466
+ }
467
+ a.routing.requestIds = requestIds;
468
+ a.modelUsed =
469
+ meta?.modelUsed || meta?.model || respAny.model || candidate.model;
470
+ const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
471
+ if (typeof attemptCostUsd === 'number')
472
+ a.costUsd = attemptCostUsd;
473
+ if (includeRawProviderPayload) {
474
+ // Size-capped preview only.
475
+ const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
476
+ const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
477
+ a.rawProviderPayload = capString(rawStr, 4000);
443
478
  }
444
479
  }
445
- a.routing.requestIds = requestIds;
446
- a.modelUsed =
447
- meta?.modelUsed || meta?.model || respAny.model || candidate.model;
448
- const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
449
- if (typeof attemptCostUsd === 'number')
450
- a.costUsd = attemptCostUsd;
451
- if (includeRawProviderPayload) {
452
- // Size-capped preview only.
453
- const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
454
- const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
455
- a.rawProviderPayload = capString(rawStr, 4000);
480
+ else if (tryErr) {
481
+ a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
456
482
  }
457
483
  }
458
- else if (tryErr) {
459
- a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
460
- }
461
- }
462
- });
463
- response = result.response;
464
- lastError = undefined;
465
- break; // success => stop fallback chain
484
+ });
485
+ response = result.response;
486
+ lastError = undefined;
487
+ break; // success => stop fallback chain
488
+ }
489
+ catch (err) {
490
+ lastError = err instanceof Error ? err : new Error(String(err));
491
+ continue;
492
+ }
466
493
  }
467
- catch (err) {
468
- lastError = err instanceof Error ? err : new Error(String(err));
469
- continue;
494
+ if (!response) {
495
+ const fallbackAttempts = buildGatewayFallbackAttemptsFromTrace(traceAttempts, deduped, lastError);
496
+ const providersTried = [...new Set(deduped.map((c) => c.provider))];
497
+ this.logger.error('Trace fallback chain exhausted', withActivityIdentity(request.identity, {
498
+ providersTried,
499
+ candidates: deduped,
500
+ fallbackAttempts,
501
+ debugKind: gatewayLogDebug.anomaly
502
+ }));
503
+ const exhausted = new FallbackExhaustedError(mapGatewayFallbackAttemptsToRouter(fallbackAttempts));
504
+ exhausted.message = formatFallbackExhaustionMessage(fallbackAttempts, deduped);
505
+ if (lastError) {
506
+ exhausted.cause = lastError;
507
+ }
508
+ throw exhausted;
470
509
  }
471
- }
472
- if (!response) {
473
- const fallbackAttempts = buildGatewayFallbackAttemptsFromTrace(traceAttempts, deduped, lastError);
474
- const providersTried = [...new Set(deduped.map((c) => c.provider))];
475
- this.logger.error('Trace fallback chain exhausted', withActivityIdentity(request.identity, {
476
- providersTried,
477
- candidates: deduped,
478
- fallbackAttempts,
479
- debugKind: gatewayLogDebug.anomaly
480
- }));
481
- const exhausted = new FallbackExhaustedError(mapGatewayFallbackAttemptsToRouter(fallbackAttempts));
482
- exhausted.message = formatFallbackExhaustionMessage(fallbackAttempts, deduped);
483
- if (lastError) {
484
- exhausted.cause = lastError;
510
+ // Summary counts + final request ids.
511
+ traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
512
+ const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
513
+ traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
514
+ const finalResp = response;
515
+ const finalMeta = finalResp?.metadata || {};
516
+ traceRequestIds = {
517
+ gatewayAiRequestId,
518
+ routerRequestId: finalResp?.requestId || finalMeta?.requestId
519
+ };
520
+ if (typeof finalMeta?.providerRequestId === 'string')
521
+ traceRequestIds.providerRequestId = finalMeta.providerRequestId;
522
+ if (typeof finalMeta?.openrouterRequestId === 'string')
523
+ traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
524
+ if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
525
+ for (const [k, v] of Object.entries(finalMeta.requestIds)) {
526
+ if (typeof v === 'string')
527
+ traceRequestIds[k] = v;
528
+ }
485
529
  }
486
- throw exhausted;
530
+ const lastOk = [...traceAttempts].reverse().find(a => a.ok);
531
+ providerCallLatencyMs = lastOk?.timing?.durationMs;
487
532
  }
488
- // Summary counts + final request ids.
489
- traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
490
- const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
491
- traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
492
- const finalResp = response;
493
- const finalMeta = finalResp?.metadata || {};
494
- traceRequestIds = {
495
- gatewayAiRequestId,
496
- routerRequestId: finalResp?.requestId || finalMeta?.requestId
497
- };
498
- if (typeof finalMeta?.providerRequestId === 'string')
499
- traceRequestIds.providerRequestId = finalMeta.providerRequestId;
500
- if (typeof finalMeta?.openrouterRequestId === 'string')
501
- traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
502
- if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
503
- for (const [k, v] of Object.entries(finalMeta.requestIds)) {
504
- if (typeof v === 'string')
505
- traceRequestIds[k] = v;
506
- }
533
+ // Contract output processing removed - expectedSchema no longer supported
534
+ // Create enhanced response - extract content properly from router response
535
+ const routerResponse = response;
536
+ // Extract content from router response - router returns outputText, not content
537
+ let content = routerResponse.content || routerResponse.outputText || '';
538
+ // If content is still empty, try to extract from ai-activities metadata
539
+ if (!content && routerResponse.metadata?.['ai-activities-response']?.outputText) {
540
+ content = routerResponse.metadata['ai-activities-response'].outputText;
507
541
  }
508
- const lastOk = [...traceAttempts].reverse().find(a => a.ok);
509
- providerCallLatencyMs = lastOk?.timing?.durationMs;
510
- }
511
- // Contract output processing removed - expectedSchema no longer supported
512
- // Create enhanced response - extract content properly from router response
513
- const routerResponse = response;
514
- // Extract content from router response - router returns outputText, not content
515
- let content = routerResponse.content || routerResponse.outputText || '';
516
- // If content is still empty, try to extract from ai-activities metadata
517
- if (!content && routerResponse.metadata?.['ai-activities-response']?.outputText) {
518
- content = routerResponse.metadata['ai-activities-response'].outputText;
519
- }
520
- // Parse content using available parsers (flex-md, content normalizer, etc.)
521
- let parsedContent = undefined;
522
- let contentType = undefined;
523
- let parsingMethod = undefined;
524
- // Actually use flex-md parsing - extract structured data from markdown
525
- try {
526
- this.logger.debug('Attempting flex-md extraction', withActivityIdentity(request.identity, {
527
- contentLength: content.length,
528
- hasInstructions: !!resolvedRequest.instructions,
529
- debugKind: gatewayLogDebug.intent
530
- }));
531
- // Let flex-md extract structured data from the response content
532
- const extractionResult = await extractJsonFromFlexMd(content, this.logger);
533
- this.logger.debug('Flex-md extraction result', withActivityIdentity(request.identity, {
534
- hasResult: !!extractionResult,
535
- hasJson: !!(extractionResult && extractionResult.json),
536
- method: extractionResult?.method,
537
- jsonType: extractionResult?.json ? typeof extractionResult.json : 'none',
538
- debugKind: gatewayLogDebug.state
539
- }));
540
- if (extractionResult && extractionResult.json) {
541
- // Successfully extracted structured data
542
- parsedContent = extractionResult.json;
543
- this.logger.info('Flex-md extraction successful - parsed into structured object', withActivityIdentity(request.identity, {
544
- method: extractionResult.method,
545
- extractedKeys: Object.keys(extractionResult.json),
546
- debugKind: gatewayLogDebug.event
542
+ // Parse content using available parsers (flex-md, content normalizer, etc.)
543
+ let parsedContent = undefined;
544
+ let contentType = undefined;
545
+ let parsingMethod = undefined;
546
+ // Actually use flex-md parsing - extract structured data from markdown
547
+ try {
548
+ this.logger.debug('Attempting flex-md extraction', withActivityIdentity(request.identity, {
549
+ contentLength: content.length,
550
+ hasInstructions: !!resolvedRequest.instructions,
551
+ debugKind: gatewayLogDebug.intent
552
+ }));
553
+ // Let flex-md extract structured data from the response content
554
+ const extractionResult = await extractJsonFromFlexMd(content, this.logger);
555
+ this.logger.debug('Flex-md extraction result', withActivityIdentity(request.identity, {
556
+ hasResult: !!extractionResult,
557
+ hasJson: !!(extractionResult && extractionResult.json),
558
+ method: extractionResult?.method,
559
+ jsonType: extractionResult?.json ? typeof extractionResult.json : 'none',
560
+ debugKind: gatewayLogDebug.state
547
561
  }));
562
+ if (extractionResult && extractionResult.json) {
563
+ // Successfully extracted structured data
564
+ parsedContent = extractionResult.json;
565
+ this.logger.info('Flex-md extraction successful - parsed into structured object', withActivityIdentity(request.identity, {
566
+ method: extractionResult.method,
567
+ extractedKeys: Object.keys(extractionResult.json),
568
+ debugKind: gatewayLogDebug.event
569
+ }));
570
+ }
571
+ else {
572
+ // Extraction failed, fall back to raw text wrapper
573
+ this.logger.warn('Flex-md extraction failed - no structured data extracted', withActivityIdentity(request.identity, {
574
+ hasResult: !!extractionResult,
575
+ method: extractionResult?.method || 'none',
576
+ debugKind: gatewayLogDebug.anomaly
577
+ }));
578
+ parsedContent = { rawText: content };
579
+ }
548
580
  }
549
- else {
581
+ catch (extractionError) {
550
582
  // Extraction failed, fall back to raw text wrapper
551
- this.logger.warn('Flex-md extraction failed - no structured data extracted', withActivityIdentity(request.identity, {
552
- hasResult: !!extractionResult,
553
- method: extractionResult?.method || 'none',
583
+ const errorMessage = extractionError instanceof Error ? extractionError.message : String(extractionError);
584
+ this.logger.warn('Flex-md extraction failed - flex-md library compatibility issue', withActivityIdentity(request.identity, {
585
+ error: errorMessage,
586
+ issue: 'flex-md uses require() in ES module context - needs fixing in flex-md-loader.ts',
587
+ fallback: 'using rawText wrapper',
554
588
  debugKind: gatewayLogDebug.anomaly
555
589
  }));
556
590
  parsedContent = { rawText: content };
557
591
  }
558
- }
559
- catch (extractionError) {
560
- // Extraction failed, fall back to raw text wrapper
561
- const errorMessage = extractionError instanceof Error ? extractionError.message : String(extractionError);
562
- this.logger.warn('Flex-md extraction failed - flex-md library compatibility issue', withActivityIdentity(request.identity, {
563
- error: errorMessage,
564
- issue: 'flex-md uses require() in ES module context - needs fixing in flex-md-loader.ts',
565
- fallback: 'using rawText wrapper',
566
- debugKind: gatewayLogDebug.anomaly
567
- }));
568
- parsedContent = { rawText: content };
569
- }
570
- contentType = 'structured';
571
- parsingMethod = 'flex-md';
572
- const outputContractKeys = resolveOutputContractFieldKeys(request);
573
- parsedContent = await enrichParsedContentForOutputContract(parsedContent, content, outputContractKeys, this.logger);
574
- let tokens = extractTokenUsageFromRouterResponse(routerResponse);
575
- if (!(tokens.prompt || tokens.completion || tokens.total)) {
576
- const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
577
- if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
578
- const second = extractTokenUsageFromRouterResponse(alt);
579
- if (second.prompt || second.completion || second.total)
580
- tokens = second;
592
+ contentType = 'structured';
593
+ parsingMethod = 'flex-md';
594
+ const outputContractKeys = resolveOutputContractFieldKeys(request);
595
+ parsedContent = await enrichParsedContentForOutputContract(parsedContent, content, outputContractKeys, this.logger);
596
+ let tokens = extractTokenUsageFromRouterResponse(routerResponse);
597
+ if (!(tokens.prompt || tokens.completion || tokens.total)) {
598
+ const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
599
+ if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
600
+ const second = extractTokenUsageFromRouterResponse(alt);
601
+ if (second.prompt || second.completion || second.total)
602
+ tokens = second;
603
+ }
581
604
  }
582
- }
583
- let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
584
- mergedConfig,
585
- calculator: aiTools?.calculator ?? null,
586
- calculateCost: this.config.aiTools?.calculateCost
587
- });
588
- if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
589
- costCompletion = { ...costCompletion, costStatus: 'unpriced' };
590
- }
591
- const routerMetaForCost = routerResponse?.metadata || {};
592
- const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
593
- const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
594
- const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
595
- if (traceEnabled && traceAttempts) {
596
- await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
605
+ let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
597
606
  mergedConfig,
598
607
  calculator: aiTools?.calculator ?? null,
599
608
  calculateCost: this.config.aiTools?.calculateCost
600
609
  });
601
- }
602
- const traceUsageSummary = traceEnabled
603
- ? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
604
- : undefined;
605
- const enhancedResponse = {
606
- content: content,
607
- parsedContent: parsedContent,
608
- metadata: {
609
- aiRequestId: request.aiRequestId,
610
- identity: request.identity,
611
- latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
612
- tokens: tokens,
613
- taskTypeId,
614
- agentType: 'ai',
615
- contentType,
616
- parsingMethod,
617
- ...routingMetadataSlice,
618
- ...(effectiveModelConfig !== undefined ? { effectiveModelConfig } : {}),
619
- ...(costCompletion.costStatus === 'priced'
620
- ? {
621
- costUsd: costCompletion.cost,
622
- ...(typeof routerMetaForCost.cost === 'number'
623
- ? { cost: routerMetaForCost.cost }
624
- : { cost: costCompletion.cost })
625
- }
626
- : {}),
627
- ...(costCompletion.costStatus ? { costStatus: costCompletion.costStatus } : {}),
628
- ...(costCompletion.costBreakdown ? { costBreakdown: costCompletion.costBreakdown } : {}),
629
- ...(traceEnabled
630
- ? {
631
- requestIds: traceRequestIds,
632
- retryCount: traceRetryCount,
633
- fallbackCount: traceFallbackCount,
634
- attempts: traceAttempts,
635
- ...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
636
- ...(traceMergedRouterSnapshot !== undefined
637
- ? { mergedRouterConfig: traceMergedRouterSnapshot }
638
- : {})
639
- }
640
- : {})
610
+ if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
611
+ costCompletion = { ...costCompletion, costStatus: 'unpriced' };
641
612
  }
642
- };
643
- // Track activity success if activity was started
644
- if (activity) {
645
- try {
646
- const diag = request.diagnostics;
647
- const includeFullProviderBlob = diag?.includeFullProviderResponseInActivity !== false;
648
- const maxFullChars = typeof diag?.activityFullResponseMaxChars === 'number' && diag.activityFullResponseMaxChars > 0
649
- ? diag.activityFullResponseMaxChars
650
- : DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS;
651
- const rawFull = routerResponse.rawResponse || routerResponse;
652
- const fullResponseForActivity = includeFullProviderBlob
653
- ? capActivityFullResponsePayload(rawFull, maxFullChars)
654
- : undefined;
655
- // Create activity response with proper structure for ActivityTracker
656
- const activityResponse = {
657
- content: {
658
- rawContent: content, // Store the actual response content as rawContent
659
- ...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
660
- },
661
- parsed: parsedContent, // Include parsed content in activity record
662
- metadata: enhancedResponse.metadata,
663
- status: 'success',
664
- error: null,
665
- usage: tokens
666
- };
667
- await this.activityManager.logSuccess(activity, {
668
- ...costCompletion,
669
- response: activityResponse,
670
- endTime: Date.now(),
671
- duration: Date.now() - startTime
613
+ const routerMetaForCost = routerResponse?.metadata || {};
614
+ const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
615
+ const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
616
+ const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
617
+ if (traceEnabled && traceAttempts) {
618
+ await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
619
+ mergedConfig,
620
+ calculator: aiTools?.calculator ?? null,
621
+ calculateCost: this.config.aiTools?.calculateCost
672
622
  });
673
623
  }
674
- catch (activityError) {
675
- // Log activity tracking error but don't fail the request
676
- this.logger.warn('Failed to track activity success', {
624
+ const traceUsageSummary = traceEnabled
625
+ ? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
626
+ : undefined;
627
+ const enhancedResponse = {
628
+ content: content,
629
+ parsedContent: parsedContent,
630
+ metadata: {
677
631
  aiRequestId: request.aiRequestId,
678
- error: activityError instanceof Error ? activityError.message : String(activityError)
679
- });
632
+ identity: request.identity,
633
+ latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
634
+ tokens: tokens,
635
+ taskTypeId,
636
+ agentType: 'ai',
637
+ contentType,
638
+ parsingMethod,
639
+ ...routingMetadataSlice,
640
+ ...(effectiveModelConfig !== undefined ? { effectiveModelConfig } : {}),
641
+ ...(costCompletion.costStatus === 'priced'
642
+ ? {
643
+ costUsd: costCompletion.cost,
644
+ ...(typeof routerMetaForCost.cost === 'number'
645
+ ? { cost: routerMetaForCost.cost }
646
+ : { cost: costCompletion.cost })
647
+ }
648
+ : {}),
649
+ ...(costCompletion.costStatus ? { costStatus: costCompletion.costStatus } : {}),
650
+ ...(costCompletion.costBreakdown ? { costBreakdown: costCompletion.costBreakdown } : {}),
651
+ ...(traceEnabled
652
+ ? {
653
+ requestIds: traceRequestIds,
654
+ retryCount: traceRetryCount,
655
+ fallbackCount: traceFallbackCount,
656
+ attempts: traceAttempts,
657
+ ...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
658
+ ...(traceMergedRouterSnapshot !== undefined
659
+ ? { mergedRouterConfig: traceMergedRouterSnapshot }
660
+ : {})
661
+ }
662
+ : {})
663
+ }
664
+ };
665
+ // Track activity success if activity was started
666
+ if (activity) {
667
+ try {
668
+ const diag = request.diagnostics;
669
+ const includeFullProviderBlob = diag?.includeFullProviderResponseInActivity !== false;
670
+ const maxFullChars = typeof diag?.activityFullResponseMaxChars === 'number' && diag.activityFullResponseMaxChars > 0
671
+ ? diag.activityFullResponseMaxChars
672
+ : DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS;
673
+ const rawFull = routerResponse.rawResponse || routerResponse;
674
+ const fullResponseForActivity = includeFullProviderBlob
675
+ ? capActivityFullResponsePayload(rawFull, maxFullChars)
676
+ : undefined;
677
+ // Create activity response with proper structure for ActivityTracker
678
+ const activityResponse = {
679
+ content: {
680
+ rawContent: content, // Store the actual response content as rawContent
681
+ ...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
682
+ },
683
+ parsed: parsedContent, // Include parsed content in activity record
684
+ metadata: enhancedResponse.metadata,
685
+ status: 'success',
686
+ error: null,
687
+ usage: tokens
688
+ };
689
+ await this.activityManager.logSuccess(activity, {
690
+ ...costCompletion,
691
+ response: activityResponse,
692
+ endTime: Date.now(),
693
+ duration: Date.now() - startTime
694
+ });
695
+ }
696
+ catch (activityError) {
697
+ // Log activity tracking error but don't fail the request
698
+ this.logger.warn('Failed to track activity success', {
699
+ aiRequestId: request.aiRequestId,
700
+ error: activityError instanceof Error ? activityError.message : String(activityError)
701
+ });
702
+ }
680
703
  }
704
+ if (optimixerPrediction) {
705
+ await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
706
+ }
707
+ warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
708
+ tokens: enhancedResponse.metadata.tokens,
709
+ costUsd: enhancedResponse.metadata.costUsd,
710
+ cost: enhancedResponse.metadata.cost
711
+ }, 'invoke');
712
+ this.logger.debug('gateway: enhancedResponse', withActivityIdentity(request.identity, {
713
+ latencyMs: enhancedResponse.metadata?.latencyMs,
714
+ contentType: enhancedResponse.metadata?.contentType,
715
+ debugKind: gatewayLogDebug.state
716
+ }));
717
+ return enhancedResponse;
681
718
  }
682
- if (optimixerPrediction) {
683
- await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
684
- }
685
- warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
686
- tokens: enhancedResponse.metadata.tokens,
687
- costUsd: enhancedResponse.metadata.costUsd,
688
- cost: enhancedResponse.metadata.cost
689
- }, 'invoke');
690
- this.logger.debug('gateway: enhancedResponse', withActivityIdentity(request.identity, {
691
- latencyMs: enhancedResponse.metadata?.latencyMs,
692
- contentType: enhancedResponse.metadata?.contentType,
693
- debugKind: gatewayLogDebug.state
694
- }));
695
- return enhancedResponse;
696
- }
697
- catch (error) {
698
- const err = error instanceof Error ? error : new Error(String(error));
699
- const partial = tryExtractRouterLikePayloadFromErrorChain(err);
700
- const rejectMeta = buildInvokeRejectionMetadata({
701
- request,
702
- taskTypeId,
703
- startTime,
704
- mergedConfig,
705
- partialRouterPayload: partial,
706
- gatewayAiRequestId: request.aiRequestId,
707
- error: err
708
- });
709
- attachGatewayInvokeRejectionMetadata(err, rejectMeta);
710
- if (err.message.includes(NO_PROVIDER_ERROR)) {
711
- const wrapped = new Error(err.message + NO_PROVIDER_HINT);
712
- wrapped.cause = err;
713
- attachGatewayInvokeRejectionMetadata(wrapped, rejectMeta);
714
- throw wrapped;
719
+ catch (error) {
720
+ const err = error instanceof Error ? error : new Error(String(error));
721
+ const partial = tryExtractRouterLikePayloadFromErrorChain(err);
722
+ const rejectMeta = buildInvokeRejectionMetadata({
723
+ request,
724
+ taskTypeId,
725
+ startTime,
726
+ mergedConfig,
727
+ partialRouterPayload: partial,
728
+ gatewayAiRequestId: request.aiRequestId,
729
+ error: err
730
+ });
731
+ attachGatewayInvokeRejectionMetadata(err, rejectMeta);
732
+ if (err.message.includes(NO_PROVIDER_ERROR)) {
733
+ const wrapped = new Error(err.message + NO_PROVIDER_HINT);
734
+ wrapped.cause = err;
735
+ attachGatewayInvokeRejectionMetadata(wrapped, rejectMeta);
736
+ throw wrapped;
737
+ }
738
+ throw err;
715
739
  }
716
- throw err;
717
- }
740
+ });
718
741
  }
719
742
  async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
720
743
  if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {