@x12i/ai-gateway 9.6.3 → 9.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -3
- package/dist/ai-tools-client.d.ts +27 -3
- package/dist/ai-tools-client.js +54 -8
- package/dist/gateway-config.d.ts +2 -0
- package/dist/gateway-config.js +16 -10
- package/dist/gateway-log-meta.d.ts +5 -1
- package/dist/gateway-log-meta.js +19 -1
- package/dist/gateway-provider-auto-register.js +1 -1
- package/dist/gateway-utils.d.ts +2 -1
- package/dist/gateway-utils.js +9 -7
- package/dist/gateway.d.ts +2 -0
- package/dist/gateway.js +601 -578
- package/dist/index.d.ts +4 -3
- package/dist/index.js +3 -2
- package/dist/logger-factory.d.ts +2 -0
- package/dist/logger-factory.js +11 -14
- package/dist/openrouter-routing.d.ts +12 -0
- package/dist/openrouter-routing.js +27 -0
- package/dist/runtime-objects.d.ts +2 -19
- package/dist/types.d.ts +4 -1
- package/dist-cjs/ai-tools-client.cjs +54 -8
- package/dist-cjs/ai-tools-client.d.ts +27 -3
- package/dist-cjs/gateway-config.cjs +16 -10
- package/dist-cjs/gateway-config.d.ts +2 -0
- package/dist-cjs/gateway-log-meta.cjs +19 -1
- package/dist-cjs/gateway-log-meta.d.ts +5 -1
- package/dist-cjs/gateway-provider-auto-register.cjs +1 -1
- package/dist-cjs/gateway-utils.cjs +9 -7
- package/dist-cjs/gateway-utils.d.ts +2 -1
- package/dist-cjs/gateway.cjs +601 -578
- package/dist-cjs/gateway.d.ts +2 -0
- package/dist-cjs/index.cjs +3 -2
- package/dist-cjs/index.d.ts +4 -3
- package/dist-cjs/logger-factory.cjs +11 -14
- package/dist-cjs/logger-factory.d.ts +2 -0
- package/dist-cjs/openrouter-routing.cjs +27 -0
- package/dist-cjs/openrouter-routing.d.ts +12 -0
- package/dist-cjs/runtime-objects.d.ts +2 -19
- package/dist-cjs/types.d.ts +4 -1
- package/package.json +5 -5
package/dist-cjs/gateway.cjs
CHANGED
|
@@ -13,12 +13,13 @@ import { enrichParsedContentForOutputContract, resolveOutputContractFieldKeys }
|
|
|
13
13
|
import { attachGatewayInvokeRejectionMetadata, buildGatewayFallbackAttemptsFromTrace, buildInvokeRejectionMetadata, capActivityFullResponsePayload, formatFallbackExhaustionMessage, logResolvedModelRouting, mapGatewayFallbackAttemptsToRouter, hasNonZeroTokenUsage, DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS, extractCostUsdFromRouterResponse, extractTokenUsageFromRouterResponse, mergeConfig, pickEffectiveModelConfigForMetadata, pickInvokeRoutingMetadataSlice, pickTraceMergedRouterConfig, resolveCostCompletionWithAiTools, buildOptimixerActualUsage, buildTraceUsageSummary, enrichTraceAttemptsWithBilling, isMaxTokensExplicitlySet, tryExtractRouterLikePayloadFromErrorChain } from './gateway-utils.js';
|
|
14
14
|
import { getAiToolsClient } from './ai-tools-client.js';
|
|
15
15
|
import { autoRegisterProviders } from './gateway-provider-auto-register.js';
|
|
16
|
+
import { applyOpenRouterInvokePolicy } from './ai-tools-client.js';
|
|
16
17
|
import { setGatewayLastJobId, setGatewayRuntimeClients } from './runtime-objects.js';
|
|
17
|
-
import { gatewayLogDebug, withActivityIdentity } from './gateway-log-meta.js';
|
|
18
|
+
import { gatewayLogDebug, withActivityIdentity, withGatewayLogContext } from './gateway-log-meta.js';
|
|
18
19
|
import { invokeWithRetry } from './gateway-retry.js';
|
|
19
20
|
/** Error message thrown by the router when no provider is registered or specified */
|
|
20
21
|
const NO_PROVIDER_ERROR = 'No provider specified and no providers registered';
|
|
21
|
-
const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter
|
|
22
|
+
const NO_PROVIDER_HINT = ' Set OPENROUTER_API_KEY in the environment to use OpenRouter, or register a provider with the router (e.g. via autoRegisterProviders or gateway config).';
|
|
22
23
|
/** Warn when a successful call reports no tokens and/or explicit zero cost (often missing adapter metadata). */
|
|
23
24
|
function warnIfSuccessfulInvokeReportsZeroUsageOrCost(logger, identity, meta, invokeKind) {
|
|
24
25
|
const { tokens, costUsd, cost } = meta;
|
|
@@ -51,6 +52,8 @@ export class AIGateway {
|
|
|
51
52
|
defaultModelConfig = {};
|
|
52
53
|
_autoRegisterDone = false;
|
|
53
54
|
_aiToolsClient = null;
|
|
55
|
+
preferOpenRouter;
|
|
56
|
+
openRouterApiKey;
|
|
54
57
|
constructor(config = {}, activityManager) {
|
|
55
58
|
this.config = config;
|
|
56
59
|
this.activityManager = activityManager;
|
|
@@ -61,6 +64,8 @@ export class AIGateway {
|
|
|
61
64
|
this.optimixerManager = components.optimixerManager;
|
|
62
65
|
this.messageBuilderConfig = components.messageBuilderConfig;
|
|
63
66
|
this.defaultModelConfig = components.defaultModelConfig ?? {};
|
|
67
|
+
this.preferOpenRouter = components.preferOpenRouter;
|
|
68
|
+
this.openRouterApiKey = components.openRouterApiKey;
|
|
64
69
|
setGatewayRuntimeClients({
|
|
65
70
|
activix: this.activityManager?.getTracker(),
|
|
66
71
|
logger: this.logger
|
|
@@ -74,647 +79,665 @@ export class AIGateway {
|
|
|
74
79
|
* Invoke chat request (without structured output requirements)
|
|
75
80
|
*/
|
|
76
81
|
async invokeChat(request) {
|
|
77
|
-
const startTime = Date.now();
|
|
78
82
|
// Basic validation
|
|
79
83
|
validateChatRequest(request);
|
|
80
84
|
ensureGatewayRequestIdentity(request, undefined, this.logger);
|
|
81
85
|
setGatewayLastJobId(resolveRuntimeJobId(request));
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
95
|
-
if (!this._autoRegisterDone) {
|
|
96
|
-
await autoRegisterProviders(this.router, this.logger);
|
|
97
|
-
this._autoRegisterDone = true;
|
|
98
|
-
}
|
|
99
|
-
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
100
|
-
// Start activity tracking if available
|
|
101
|
-
let activity = undefined;
|
|
102
|
-
if (this.activityManager) {
|
|
103
|
-
try {
|
|
104
|
-
activity = await this.activityManager.startActivity(request, startTime);
|
|
105
|
-
}
|
|
106
|
-
catch (activityError) {
|
|
107
|
-
// Log activity tracking error but don't fail the request
|
|
108
|
-
this.logger.warn('Failed to start activity tracking', {
|
|
109
|
-
aiRequestId: request.aiRequestId,
|
|
110
|
-
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
111
|
-
});
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
try {
|
|
115
|
-
// Call router directly with merged config
|
|
116
|
-
const response = await this.router.invoke({
|
|
117
|
-
request: {
|
|
118
|
-
messages,
|
|
119
|
-
config: mergedConfig,
|
|
120
|
-
identity: request.identity
|
|
121
|
-
},
|
|
122
|
-
mode: 'sync'
|
|
86
|
+
return withGatewayLogContext(request.identity, async () => {
|
|
87
|
+
const startTime = Date.now();
|
|
88
|
+
// Generate simple task type ID
|
|
89
|
+
const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
|
|
90
|
+
// Simple message construction
|
|
91
|
+
const messages = this.buildSimpleMessages(request);
|
|
92
|
+
// Merge config (modelConfig > request.config > gateway defaults)
|
|
93
|
+
const aiTools = await this.getAiTools();
|
|
94
|
+
const mergedConfig = await mergeConfig(request, this.config, this.logger, {
|
|
95
|
+
defaultModelConfig: this.defaultModelConfig,
|
|
96
|
+
catalog: aiTools?.catalog ?? null,
|
|
97
|
+
routingEnv: aiTools?.routingEnv,
|
|
123
98
|
});
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
99
|
+
// Activix start snapshot must match what the router receives (modelConfig-only callers omit request.config.model).
|
|
100
|
+
request._mergedRouterConfig = mergedConfig;
|
|
101
|
+
applyOpenRouterInvokePolicy(mergedConfig, {
|
|
102
|
+
preferOpenRouter: this.preferOpenRouter,
|
|
103
|
+
openRouterApiKey: this.openRouterApiKey,
|
|
104
|
+
routingEnv: aiTools?.routingEnv,
|
|
105
|
+
resolution: request._modelResolution,
|
|
130
106
|
});
|
|
131
|
-
//
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
agentType: 'chat',
|
|
141
|
-
...(costCompletionChat.costStatus === 'priced'
|
|
142
|
-
? {
|
|
143
|
-
costUsd: costCompletionChat.cost,
|
|
144
|
-
...(typeof metaChat.cost === 'number'
|
|
145
|
-
? { cost: metaChat.cost }
|
|
146
|
-
: { cost: costCompletionChat.cost })
|
|
147
|
-
}
|
|
148
|
-
: {}),
|
|
149
|
-
...(costCompletionChat.costStatus ? { costStatus: costCompletionChat.costStatus } : {}),
|
|
150
|
-
...(costCompletionChat.costBreakdown
|
|
151
|
-
? { costBreakdown: costCompletionChat.costBreakdown }
|
|
152
|
-
: {})
|
|
153
|
-
}
|
|
154
|
-
};
|
|
155
|
-
// Track activity success if activity was started
|
|
156
|
-
if (activity) {
|
|
107
|
+
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
108
|
+
if (!this._autoRegisterDone) {
|
|
109
|
+
await autoRegisterProviders(this.router, this.logger);
|
|
110
|
+
this._autoRegisterDone = true;
|
|
111
|
+
}
|
|
112
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
113
|
+
// Start activity tracking if available
|
|
114
|
+
let activity = undefined;
|
|
115
|
+
if (this.activityManager) {
|
|
157
116
|
try {
|
|
158
|
-
await this.activityManager.
|
|
159
|
-
...costCompletionChat,
|
|
160
|
-
response: enhancedResponse,
|
|
161
|
-
endTime: Date.now(),
|
|
162
|
-
duration: Date.now() - startTime
|
|
163
|
-
});
|
|
117
|
+
activity = await this.activityManager.startActivity(request, startTime);
|
|
164
118
|
}
|
|
165
119
|
catch (activityError) {
|
|
166
120
|
// Log activity tracking error but don't fail the request
|
|
167
|
-
this.logger.warn('Failed to
|
|
121
|
+
this.logger.warn('Failed to start activity tracking', {
|
|
168
122
|
aiRequestId: request.aiRequestId,
|
|
169
123
|
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
170
124
|
});
|
|
171
125
|
}
|
|
172
126
|
}
|
|
173
|
-
|
|
174
|
-
|
|
127
|
+
try {
|
|
128
|
+
// Call router directly with merged config
|
|
129
|
+
const response = await this.router.invoke({
|
|
130
|
+
request: {
|
|
131
|
+
messages,
|
|
132
|
+
config: mergedConfig,
|
|
133
|
+
identity: request.identity
|
|
134
|
+
},
|
|
135
|
+
mode: 'sync'
|
|
136
|
+
});
|
|
137
|
+
const metaChat = response?.metadata || {};
|
|
138
|
+
const tokensChat = extractTokenUsageFromRouterResponse(response);
|
|
139
|
+
const costCompletionChat = await resolveCostCompletionWithAiTools(response, tokensChat, {
|
|
140
|
+
mergedConfig,
|
|
141
|
+
calculator: aiTools?.calculator ?? null,
|
|
142
|
+
calculateCost: this.config.aiTools?.calculateCost
|
|
143
|
+
});
|
|
144
|
+
// Create enhanced response
|
|
145
|
+
const enhancedResponse = {
|
|
146
|
+
content: response.content || '',
|
|
147
|
+
metadata: {
|
|
148
|
+
aiRequestId: request.aiRequestId,
|
|
149
|
+
identity: request.identity,
|
|
150
|
+
latencyMs: Date.now() - startTime,
|
|
151
|
+
tokens: tokensChat,
|
|
152
|
+
taskTypeId,
|
|
153
|
+
agentType: 'chat',
|
|
154
|
+
...(costCompletionChat.costStatus === 'priced'
|
|
155
|
+
? {
|
|
156
|
+
costUsd: costCompletionChat.cost,
|
|
157
|
+
...(typeof metaChat.cost === 'number'
|
|
158
|
+
? { cost: metaChat.cost }
|
|
159
|
+
: { cost: costCompletionChat.cost })
|
|
160
|
+
}
|
|
161
|
+
: {}),
|
|
162
|
+
...(costCompletionChat.costStatus ? { costStatus: costCompletionChat.costStatus } : {}),
|
|
163
|
+
...(costCompletionChat.costBreakdown
|
|
164
|
+
? { costBreakdown: costCompletionChat.costBreakdown }
|
|
165
|
+
: {})
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
// Track activity success if activity was started
|
|
169
|
+
if (activity) {
|
|
170
|
+
try {
|
|
171
|
+
await this.activityManager.logSuccess(activity, {
|
|
172
|
+
...costCompletionChat,
|
|
173
|
+
response: enhancedResponse,
|
|
174
|
+
endTime: Date.now(),
|
|
175
|
+
duration: Date.now() - startTime
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
catch (activityError) {
|
|
179
|
+
// Log activity tracking error but don't fail the request
|
|
180
|
+
this.logger.warn('Failed to track activity success', {
|
|
181
|
+
aiRequestId: request.aiRequestId,
|
|
182
|
+
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
if (optimixerPrediction) {
|
|
187
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokensChat, response, Date.now() - startTime));
|
|
188
|
+
}
|
|
189
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
190
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
191
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
192
|
+
cost: enhancedResponse.metadata.cost
|
|
193
|
+
}, 'invokeChat');
|
|
194
|
+
return enhancedResponse;
|
|
175
195
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
}
|
|
183
|
-
catch (error) {
|
|
184
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
185
|
-
if (err.message.includes(NO_PROVIDER_ERROR)) {
|
|
186
|
-
throw new Error(err.message + NO_PROVIDER_HINT);
|
|
196
|
+
catch (error) {
|
|
197
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
198
|
+
if (err.message.includes(NO_PROVIDER_ERROR)) {
|
|
199
|
+
throw new Error(err.message + NO_PROVIDER_HINT);
|
|
200
|
+
}
|
|
201
|
+
throw err;
|
|
187
202
|
}
|
|
188
|
-
|
|
189
|
-
}
|
|
203
|
+
});
|
|
190
204
|
}
|
|
191
205
|
/**
|
|
192
206
|
* Invoke AI request (with structured output support)
|
|
193
207
|
*/
|
|
194
208
|
async invoke(request) {
|
|
195
|
-
const startTime = Date.now();
|
|
196
209
|
// Basic validation
|
|
197
210
|
validateAIRequest(request);
|
|
198
211
|
ensureGatewayRequestIdentity(request, undefined, this.logger);
|
|
199
212
|
setGatewayLastJobId(resolveRuntimeJobId(request));
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
builtMessages
|
|
211
|
-
parsedSnapshot
|
|
212
|
-
});
|
|
213
|
-
messages = builtMessages.messages;
|
|
214
|
-
resolvedRequest = request;
|
|
215
|
-
}
|
|
216
|
-
catch (error) {
|
|
217
|
-
// If message building fails (e.g., prompt/instruction resolution error), log as bad request
|
|
218
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
219
|
-
const endTime = Date.now();
|
|
220
|
-
const duration = endTime - startTime;
|
|
221
|
-
// Determine if this is a prompt/instruction resolution error
|
|
222
|
-
// If a key was provided but couldn't be resolved to content, it's a bad request
|
|
223
|
-
const errWithCode = err; // Type assertion for error with optional code property
|
|
224
|
-
const isResolutionError = err.name === 'InstructionNotFoundError' ||
|
|
225
|
-
err.name === 'InstructionBackendError' ||
|
|
226
|
-
err.name === 'TemplateResolutionError' ||
|
|
227
|
-
errWithCode.code === 'PROMPT_NOT_FOUND' ||
|
|
228
|
-
errWithCode.code === 'PROMPT_RESOLUTION_ERROR' ||
|
|
229
|
-
errWithCode.code === 'PROMPT_RENDERED_EMPTY' ||
|
|
230
|
-
errWithCode.code === 'TEMPLATE_RESOLUTION_ERROR' ||
|
|
231
|
-
errWithCode.code === 'TEMPLATE_VARIABLE_MISSING' ||
|
|
232
|
-
err.message.includes('Failed to resolve') ||
|
|
233
|
-
err.message.includes('Failed to render prompt template') ||
|
|
234
|
-
err.message.includes('not found') ||
|
|
235
|
-
err.message.includes('Instruction not found') ||
|
|
236
|
-
err.message.includes('Prompt not found');
|
|
237
|
-
if (isResolutionError && this.activityManager) {
|
|
238
|
-
// Log to bad requests collection
|
|
239
|
-
await this.activityManager.logBadRequest(request, err, {
|
|
240
|
-
endTime,
|
|
241
|
-
duration,
|
|
242
|
-
error: err.message,
|
|
243
|
-
errorType: errWithCode.code || 'MessageBuildError',
|
|
244
|
-
diagnosticInfo: {
|
|
245
|
-
errorCode: errWithCode.code,
|
|
246
|
-
errorName: err.name,
|
|
247
|
-
failureType: 'validation-failure',
|
|
248
|
-
stage: 'message-building',
|
|
249
|
-
prompt: request.prompt,
|
|
250
|
-
instructions: typeof request.instructions === 'string' ? request.instructions.substring(0, 100) : '(object)'
|
|
251
|
-
},
|
|
252
|
-
failureType: 'validation-failure'
|
|
253
|
-
}, startTime);
|
|
254
|
-
}
|
|
255
|
-
const rejectMeta = buildInvokeRejectionMetadata({
|
|
256
|
-
request,
|
|
257
|
-
taskTypeId,
|
|
258
|
-
startTime,
|
|
259
|
-
gatewayAiRequestId: request.aiRequestId
|
|
260
|
-
});
|
|
261
|
-
attachGatewayInvokeRejectionMetadata(err, rejectMeta);
|
|
262
|
-
// Re-throw the error so it propagates to the caller
|
|
263
|
-
throw err;
|
|
264
|
-
}
|
|
265
|
-
// Store messages in parsedSnapshot for activity tracking
|
|
266
|
-
parsedSnapshot.messages = messages;
|
|
267
|
-
// parsed.instructions and parsed.prompt are set by buildMessages to the resolved/rendered content
|
|
268
|
-
// (after key resolution and Rendrix). Do not overwrite with raw request keys.
|
|
269
|
-
if (parsedSnapshot.context === undefined) {
|
|
270
|
-
parsedSnapshot.context = request.context;
|
|
271
|
-
}
|
|
272
|
-
// Attach parsedSnapshot to request for activity tracking
|
|
273
|
-
request._parsedRequest = parsedSnapshot;
|
|
274
|
-
// Merge config (modelConfig > request.config > gateway defaults)
|
|
275
|
-
const aiTools = await this.getAiTools();
|
|
276
|
-
const mergedConfig = await mergeConfig(request, this.config, this.logger, {
|
|
277
|
-
defaultModelConfig: this.defaultModelConfig,
|
|
278
|
-
catalog: aiTools?.catalog ?? null
|
|
279
|
-
});
|
|
280
|
-
request._mergedRouterConfig = mergedConfig;
|
|
281
|
-
logResolvedModelRouting(this.logger, request, mergedConfig);
|
|
282
|
-
const diagnosticsMode = request.diagnostics?.mode;
|
|
283
|
-
const traceEnabled = diagnosticsMode === 'trace';
|
|
284
|
-
const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
|
|
285
|
-
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
286
|
-
if (!this._autoRegisterDone) {
|
|
287
|
-
await autoRegisterProviders(this.router, this.logger);
|
|
288
|
-
this._autoRegisterDone = true;
|
|
289
|
-
}
|
|
290
|
-
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
291
|
-
// Start activity tracking if available
|
|
292
|
-
let activity = undefined;
|
|
293
|
-
if (this.activityManager) {
|
|
213
|
+
return withGatewayLogContext(request.identity, async () => {
|
|
214
|
+
const startTime = Date.now();
|
|
215
|
+
// Generate simple task type ID
|
|
216
|
+
const taskTypeId = request.taskTypeId || `task-${Date.now()}`;
|
|
217
|
+
// Resolve instructions and build messages using proper components
|
|
218
|
+
let resolvedRequest = request;
|
|
219
|
+
let messages = [];
|
|
220
|
+
// Create parsedSnapshot to store parsed request data and messages
|
|
221
|
+
const parsedSnapshot = {};
|
|
222
|
+
// Use proper instruction resolution and message building
|
|
223
|
+
let builtMessages;
|
|
294
224
|
try {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
catch (activityError) {
|
|
298
|
-
// Log activity tracking error but don't fail the request
|
|
299
|
-
this.logger.warn('Failed to start activity tracking', {
|
|
300
|
-
aiRequestId: request.aiRequestId,
|
|
301
|
-
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
225
|
+
builtMessages = await buildMessages(request, this.messageBuilderConfig, {
|
|
226
|
+
parsedSnapshot
|
|
302
227
|
});
|
|
228
|
+
messages = builtMessages.messages;
|
|
229
|
+
resolvedRequest = request;
|
|
303
230
|
}
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
231
|
+
catch (error) {
|
|
232
|
+
// If message building fails (e.g., prompt/instruction resolution error), log as bad request
|
|
233
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
234
|
+
const endTime = Date.now();
|
|
235
|
+
const duration = endTime - startTime;
|
|
236
|
+
// Determine if this is a prompt/instruction resolution error
|
|
237
|
+
// If a key was provided but couldn't be resolved to content, it's a bad request
|
|
238
|
+
const errWithCode = err; // Type assertion for error with optional code property
|
|
239
|
+
const isResolutionError = err.name === 'InstructionNotFoundError' ||
|
|
240
|
+
err.name === 'InstructionBackendError' ||
|
|
241
|
+
err.name === 'TemplateResolutionError' ||
|
|
242
|
+
errWithCode.code === 'PROMPT_NOT_FOUND' ||
|
|
243
|
+
errWithCode.code === 'PROMPT_RESOLUTION_ERROR' ||
|
|
244
|
+
errWithCode.code === 'PROMPT_RENDERED_EMPTY' ||
|
|
245
|
+
errWithCode.code === 'TEMPLATE_RESOLUTION_ERROR' ||
|
|
246
|
+
errWithCode.code === 'TEMPLATE_VARIABLE_MISSING' ||
|
|
247
|
+
err.message.includes('Failed to resolve') ||
|
|
248
|
+
err.message.includes('Failed to render prompt template') ||
|
|
249
|
+
err.message.includes('not found') ||
|
|
250
|
+
err.message.includes('Instruction not found') ||
|
|
251
|
+
err.message.includes('Prompt not found');
|
|
252
|
+
if (isResolutionError && this.activityManager) {
|
|
253
|
+
// Log to bad requests collection
|
|
254
|
+
await this.activityManager.logBadRequest(request, err, {
|
|
255
|
+
endTime,
|
|
256
|
+
duration,
|
|
257
|
+
error: err.message,
|
|
258
|
+
errorType: errWithCode.code || 'MessageBuildError',
|
|
259
|
+
diagnosticInfo: {
|
|
260
|
+
errorCode: errWithCode.code,
|
|
261
|
+
errorName: err.name,
|
|
262
|
+
failureType: 'validation-failure',
|
|
263
|
+
stage: 'message-building',
|
|
264
|
+
prompt: request.prompt,
|
|
265
|
+
instructions: typeof request.instructions === 'string' ? request.instructions.substring(0, 100) : '(object)'
|
|
266
|
+
},
|
|
267
|
+
failureType: 'validation-failure'
|
|
268
|
+
}, startTime);
|
|
269
|
+
}
|
|
270
|
+
const rejectMeta = buildInvokeRejectionMetadata({
|
|
271
|
+
request,
|
|
272
|
+
taskTypeId,
|
|
273
|
+
startTime,
|
|
274
|
+
gatewayAiRequestId: request.aiRequestId
|
|
321
275
|
});
|
|
276
|
+
attachGatewayInvokeRejectionMetadata(err, rejectMeta);
|
|
277
|
+
// Re-throw the error so it propagates to the caller
|
|
278
|
+
throw err;
|
|
322
279
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
280
|
+
// Store messages in parsedSnapshot for activity tracking
|
|
281
|
+
parsedSnapshot.messages = messages;
|
|
282
|
+
// parsed.instructions and parsed.prompt are set by buildMessages to the resolved/rendered content
|
|
283
|
+
// (after key resolution and Rendrix). Do not overwrite with raw request keys.
|
|
284
|
+
if (parsedSnapshot.context === undefined) {
|
|
285
|
+
parsedSnapshot.context = request.context;
|
|
286
|
+
}
|
|
287
|
+
// Attach parsedSnapshot to request for activity tracking
|
|
288
|
+
request._parsedRequest = parsedSnapshot;
|
|
289
|
+
// Merge config (modelConfig > request.config > gateway defaults)
|
|
290
|
+
const aiTools = await this.getAiTools();
|
|
291
|
+
const mergedConfig = await mergeConfig(request, this.config, this.logger, {
|
|
292
|
+
defaultModelConfig: this.defaultModelConfig,
|
|
293
|
+
catalog: aiTools?.catalog ?? null,
|
|
294
|
+
routingEnv: aiTools?.routingEnv,
|
|
295
|
+
});
|
|
296
|
+
request._mergedRouterConfig = mergedConfig;
|
|
297
|
+
applyOpenRouterInvokePolicy(mergedConfig, {
|
|
298
|
+
preferOpenRouter: this.preferOpenRouter,
|
|
299
|
+
openRouterApiKey: this.openRouterApiKey,
|
|
300
|
+
routingEnv: aiTools?.routingEnv,
|
|
301
|
+
resolution: request._modelResolution,
|
|
302
|
+
});
|
|
303
|
+
logResolvedModelRouting(this.logger, request, mergedConfig);
|
|
304
|
+
const diagnosticsMode = request.diagnostics?.mode;
|
|
305
|
+
const traceEnabled = diagnosticsMode === 'trace';
|
|
306
|
+
const includeRawProviderPayload = request.diagnostics?.includeRawProviderPayload === true;
|
|
307
|
+
// Lazy auto-register providers from env (OPENAI_API_KEY, etc.) so consumers don't have to call init
|
|
308
|
+
if (!this._autoRegisterDone) {
|
|
309
|
+
await autoRegisterProviders(this.router, this.logger);
|
|
310
|
+
this._autoRegisterDone = true;
|
|
311
|
+
}
|
|
312
|
+
const optimixerPrediction = await this.applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages);
|
|
313
|
+
// Start activity tracking if available
|
|
314
|
+
let activity = undefined;
|
|
315
|
+
if (this.activityManager) {
|
|
316
|
+
try {
|
|
317
|
+
activity = await this.activityManager.startActivity(request, startTime);
|
|
318
|
+
}
|
|
319
|
+
catch (activityError) {
|
|
320
|
+
// Log activity tracking error but don't fail the request
|
|
321
|
+
this.logger.warn('Failed to start activity tracking', {
|
|
322
|
+
aiRequestId: request.aiRequestId,
|
|
323
|
+
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
324
|
+
});
|
|
349
325
|
}
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
326
|
+
}
|
|
327
|
+
try {
|
|
328
|
+
let response;
|
|
329
|
+
let traceAttempts;
|
|
330
|
+
let traceRetryCount;
|
|
331
|
+
let traceFallbackCount;
|
|
332
|
+
let traceRequestIds;
|
|
333
|
+
let providerCallLatencyMs;
|
|
334
|
+
if (!traceEnabled) {
|
|
335
|
+
// Default minimal behavior (no extra allocations/payload).
|
|
336
|
+
response = await this.router.invoke({
|
|
337
|
+
request: {
|
|
338
|
+
messages,
|
|
339
|
+
config: mergedConfig,
|
|
340
|
+
identity: request.identity
|
|
341
|
+
},
|
|
342
|
+
mode: 'sync'
|
|
343
|
+
});
|
|
353
344
|
}
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
345
|
+
else {
|
|
346
|
+
const capString = (s, maxLen) => (s.length <= maxLen ? s : s.slice(0, maxLen) + '…');
|
|
347
|
+
const capErrorMessage = (s) => capString(s, 500);
|
|
348
|
+
const safeJsonStringify = (value) => {
|
|
349
|
+
try {
|
|
350
|
+
return JSON.stringify(value);
|
|
359
351
|
}
|
|
352
|
+
catch {
|
|
353
|
+
return '[Unserializable]';
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
const gatewayAiRequestId = request.aiRequestId;
|
|
357
|
+
const baseRequest = {
|
|
358
|
+
request: {
|
|
359
|
+
messages,
|
|
360
|
+
config: mergedConfig,
|
|
361
|
+
identity: request.identity
|
|
362
|
+
},
|
|
363
|
+
mode: 'sync'
|
|
364
|
+
};
|
|
365
|
+
// Build deterministic provider/model candidate chain.
|
|
366
|
+
const candidates = [];
|
|
367
|
+
const primaryProvider = mergedConfig?.provider;
|
|
368
|
+
const primaryModel = mergedConfig?.model;
|
|
369
|
+
if (typeof primaryProvider === 'string' && typeof primaryModel === 'string') {
|
|
370
|
+
candidates.push({ provider: primaryProvider, model: primaryModel });
|
|
360
371
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
const
|
|
366
|
-
if (
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
});
|
|
371
|
-
traceAttempts = [];
|
|
372
|
-
// Attempt execution across fallbacks (authoritative ordering).
|
|
373
|
-
let lastError;
|
|
374
|
-
for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
|
|
375
|
-
const candidate = deduped[fallbackIndex];
|
|
376
|
-
// Track per-retry attempt objects through retry hooks.
|
|
377
|
-
const attemptIndexByRetry = new Map();
|
|
378
|
-
try {
|
|
379
|
-
const result = await invokeWithRetry({
|
|
380
|
-
...baseRequest,
|
|
381
|
-
request: {
|
|
382
|
-
...baseRequest.request,
|
|
383
|
-
config: {
|
|
384
|
-
...mergedConfig,
|
|
385
|
-
provider: candidate.provider,
|
|
386
|
-
model: candidate.model
|
|
387
|
-
}
|
|
372
|
+
const defaultTarget = this.config?.defaultTarget;
|
|
373
|
+
if (defaultTarget?.engine && defaultTarget?.model) {
|
|
374
|
+
candidates.push({ provider: String(defaultTarget.engine), model: String(defaultTarget.model) });
|
|
375
|
+
}
|
|
376
|
+
const fallbackChain = this.config?.fallbackChain;
|
|
377
|
+
if (Array.isArray(fallbackChain)) {
|
|
378
|
+
for (const item of fallbackChain) {
|
|
379
|
+
if (item && typeof item === 'object' && 'engine' in item && 'model' in item) {
|
|
380
|
+
candidates.push({ provider: String(item.engine), model: String(item.model) });
|
|
388
381
|
}
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
// De-dup while preserving order.
|
|
385
|
+
const seen = new Set();
|
|
386
|
+
const deduped = candidates.filter(c => {
|
|
387
|
+
const key = `${c.provider}::${c.model}`;
|
|
388
|
+
if (seen.has(key))
|
|
389
|
+
return false;
|
|
390
|
+
seen.add(key);
|
|
391
|
+
return true;
|
|
392
|
+
});
|
|
393
|
+
traceAttempts = [];
|
|
394
|
+
// Attempt execution across fallbacks (authoritative ordering).
|
|
395
|
+
let lastError;
|
|
396
|
+
for (let fallbackIndex = 0; fallbackIndex < deduped.length; fallbackIndex++) {
|
|
397
|
+
const candidate = deduped[fallbackIndex];
|
|
398
|
+
// Track per-retry attempt objects through retry hooks.
|
|
399
|
+
const attemptIndexByRetry = new Map();
|
|
400
|
+
try {
|
|
401
|
+
const result = await invokeWithRetry({
|
|
402
|
+
...baseRequest,
|
|
403
|
+
request: {
|
|
404
|
+
...baseRequest.request,
|
|
405
|
+
config: {
|
|
406
|
+
...mergedConfig,
|
|
394
407
|
provider: candidate.provider,
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
const
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
408
|
+
model: candidate.model
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}, (this.config.retry ?? {}), request.identity.jobId || request.aiRequestId, this.router, this.logger, {
|
|
412
|
+
onTryStart: ({ retryIndex, startedAt }) => {
|
|
413
|
+
const idx = traceAttempts.push({
|
|
414
|
+
timing: { startedAt, endedAt: startedAt, durationMs: 0 },
|
|
415
|
+
routing: {
|
|
416
|
+
provider: candidate.provider,
|
|
417
|
+
requestIds: { gatewayAiRequestId },
|
|
418
|
+
retryIndex,
|
|
419
|
+
fallbackIndex
|
|
420
|
+
},
|
|
421
|
+
usage: {
|
|
422
|
+
tokens: { prompt: 0, completion: 0, total: 0 },
|
|
423
|
+
maxTokensRequested: typeof mergedConfig?.maxTokens === 'number' ? mergedConfig.maxTokens : undefined
|
|
424
|
+
},
|
|
425
|
+
modelUsed: candidate.model,
|
|
426
|
+
ok: false
|
|
427
|
+
}) - 1;
|
|
428
|
+
attemptIndexByRetry.set(retryIndex, idx);
|
|
429
|
+
},
|
|
430
|
+
onTryEnd: ({ retryIndex, endedAt, ok, response: tryResp, error: tryErr }) => {
|
|
431
|
+
const idx = attemptIndexByRetry.get(retryIndex);
|
|
432
|
+
if (idx === undefined)
|
|
433
|
+
return;
|
|
434
|
+
const a = traceAttempts[idx];
|
|
435
|
+
a.timing.endedAt = endedAt;
|
|
436
|
+
a.timing.durationMs = Math.max(0, endedAt - a.timing.startedAt);
|
|
437
|
+
a.ok = ok;
|
|
438
|
+
const respAny = tryResp;
|
|
439
|
+
if (ok && respAny) {
|
|
440
|
+
const meta = respAny.metadata || {};
|
|
441
|
+
const tokenCounts = extractTokenUsageFromRouterResponse(respAny);
|
|
442
|
+
a.usage = {
|
|
443
|
+
tokens: tokenCounts,
|
|
444
|
+
maxTokensRequested: typeof meta?.maxTokensRequested === 'number'
|
|
445
|
+
? meta.maxTokensRequested
|
|
446
|
+
: typeof mergedConfig?.maxTokens === 'number'
|
|
447
|
+
? mergedConfig.maxTokens
|
|
448
|
+
: undefined
|
|
449
|
+
};
|
|
450
|
+
a.routing.provider = meta?.provider || respAny.provider || candidate.provider;
|
|
451
|
+
if (typeof meta?.region === 'string')
|
|
452
|
+
a.routing.region = meta.region;
|
|
453
|
+
const requestIds = {
|
|
454
|
+
gatewayAiRequestId,
|
|
455
|
+
routerRequestId: respAny.requestId || meta?.requestId
|
|
456
|
+
};
|
|
457
|
+
if (typeof meta?.providerRequestId === 'string')
|
|
458
|
+
requestIds.providerRequestId = meta.providerRequestId;
|
|
459
|
+
if (typeof meta?.openrouterRequestId === 'string')
|
|
460
|
+
requestIds.openrouterRequestId = meta.openrouterRequestId;
|
|
461
|
+
if (meta?.requestIds && typeof meta.requestIds === 'object') {
|
|
462
|
+
for (const [k, v] of Object.entries(meta.requestIds)) {
|
|
463
|
+
if (typeof v === 'string')
|
|
464
|
+
requestIds[k] = v;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
a.routing.requestIds = requestIds;
|
|
468
|
+
a.modelUsed =
|
|
469
|
+
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
470
|
+
const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
|
|
471
|
+
if (typeof attemptCostUsd === 'number')
|
|
472
|
+
a.costUsd = attemptCostUsd;
|
|
473
|
+
if (includeRawProviderPayload) {
|
|
474
|
+
// Size-capped preview only.
|
|
475
|
+
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
476
|
+
const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
|
|
477
|
+
a.rawProviderPayload = capString(rawStr, 4000);
|
|
443
478
|
}
|
|
444
479
|
}
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
meta?.modelUsed || meta?.model || respAny.model || candidate.model;
|
|
448
|
-
const attemptCostUsd = extractCostUsdFromRouterResponse(respAny);
|
|
449
|
-
if (typeof attemptCostUsd === 'number')
|
|
450
|
-
a.costUsd = attemptCostUsd;
|
|
451
|
-
if (includeRawProviderPayload) {
|
|
452
|
-
// Size-capped preview only.
|
|
453
|
-
const raw = respAny.rawResponse ?? respAny.raw ?? respAny;
|
|
454
|
-
const rawStr = typeof raw === 'string' ? raw : safeJsonStringify(raw);
|
|
455
|
-
a.rawProviderPayload = capString(rawStr, 4000);
|
|
480
|
+
else if (tryErr) {
|
|
481
|
+
a.error = { name: tryErr.name || 'Error', message: capErrorMessage(tryErr.message || String(tryErr)) };
|
|
456
482
|
}
|
|
457
483
|
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
484
|
+
});
|
|
485
|
+
response = result.response;
|
|
486
|
+
lastError = undefined;
|
|
487
|
+
break; // success => stop fallback chain
|
|
488
|
+
}
|
|
489
|
+
catch (err) {
|
|
490
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
491
|
+
continue;
|
|
492
|
+
}
|
|
466
493
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
494
|
+
if (!response) {
|
|
495
|
+
const fallbackAttempts = buildGatewayFallbackAttemptsFromTrace(traceAttempts, deduped, lastError);
|
|
496
|
+
const providersTried = [...new Set(deduped.map((c) => c.provider))];
|
|
497
|
+
this.logger.error('Trace fallback chain exhausted', withActivityIdentity(request.identity, {
|
|
498
|
+
providersTried,
|
|
499
|
+
candidates: deduped,
|
|
500
|
+
fallbackAttempts,
|
|
501
|
+
debugKind: gatewayLogDebug.anomaly
|
|
502
|
+
}));
|
|
503
|
+
const exhausted = new FallbackExhaustedError(mapGatewayFallbackAttemptsToRouter(fallbackAttempts));
|
|
504
|
+
exhausted.message = formatFallbackExhaustionMessage(fallbackAttempts, deduped);
|
|
505
|
+
if (lastError) {
|
|
506
|
+
exhausted.cause = lastError;
|
|
507
|
+
}
|
|
508
|
+
throw exhausted;
|
|
470
509
|
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
const
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
if (
|
|
484
|
-
|
|
510
|
+
// Summary counts + final request ids.
|
|
511
|
+
traceRetryCount = traceAttempts.filter(a => a.routing.retryIndex > 0).length;
|
|
512
|
+
const fallbackIndices = new Set(traceAttempts.map(a => a.routing.fallbackIndex));
|
|
513
|
+
traceFallbackCount = Math.max(0, fallbackIndices.size - 1);
|
|
514
|
+
const finalResp = response;
|
|
515
|
+
const finalMeta = finalResp?.metadata || {};
|
|
516
|
+
traceRequestIds = {
|
|
517
|
+
gatewayAiRequestId,
|
|
518
|
+
routerRequestId: finalResp?.requestId || finalMeta?.requestId
|
|
519
|
+
};
|
|
520
|
+
if (typeof finalMeta?.providerRequestId === 'string')
|
|
521
|
+
traceRequestIds.providerRequestId = finalMeta.providerRequestId;
|
|
522
|
+
if (typeof finalMeta?.openrouterRequestId === 'string')
|
|
523
|
+
traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
|
|
524
|
+
if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
|
|
525
|
+
for (const [k, v] of Object.entries(finalMeta.requestIds)) {
|
|
526
|
+
if (typeof v === 'string')
|
|
527
|
+
traceRequestIds[k] = v;
|
|
528
|
+
}
|
|
485
529
|
}
|
|
486
|
-
|
|
530
|
+
const lastOk = [...traceAttempts].reverse().find(a => a.ok);
|
|
531
|
+
providerCallLatencyMs = lastOk?.timing?.durationMs;
|
|
487
532
|
}
|
|
488
|
-
//
|
|
489
|
-
|
|
490
|
-
const
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
routerRequestId: finalResp?.requestId || finalMeta?.requestId
|
|
497
|
-
};
|
|
498
|
-
if (typeof finalMeta?.providerRequestId === 'string')
|
|
499
|
-
traceRequestIds.providerRequestId = finalMeta.providerRequestId;
|
|
500
|
-
if (typeof finalMeta?.openrouterRequestId === 'string')
|
|
501
|
-
traceRequestIds.openrouterRequestId = finalMeta.openrouterRequestId;
|
|
502
|
-
if (finalMeta?.requestIds && typeof finalMeta.requestIds === 'object') {
|
|
503
|
-
for (const [k, v] of Object.entries(finalMeta.requestIds)) {
|
|
504
|
-
if (typeof v === 'string')
|
|
505
|
-
traceRequestIds[k] = v;
|
|
506
|
-
}
|
|
533
|
+
// Contract output processing removed - expectedSchema no longer supported
|
|
534
|
+
// Create enhanced response - extract content properly from router response
|
|
535
|
+
const routerResponse = response;
|
|
536
|
+
// Extract content from router response - router returns outputText, not content
|
|
537
|
+
let content = routerResponse.content || routerResponse.outputText || '';
|
|
538
|
+
// If content is still empty, try to extract from ai-activities metadata
|
|
539
|
+
if (!content && routerResponse.metadata?.['ai-activities-response']?.outputText) {
|
|
540
|
+
content = routerResponse.metadata['ai-activities-response'].outputText;
|
|
507
541
|
}
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
contentLength: content.length,
|
|
528
|
-
hasInstructions: !!resolvedRequest.instructions,
|
|
529
|
-
debugKind: gatewayLogDebug.intent
|
|
530
|
-
}));
|
|
531
|
-
// Let flex-md extract structured data from the response content
|
|
532
|
-
const extractionResult = await extractJsonFromFlexMd(content, this.logger);
|
|
533
|
-
this.logger.debug('Flex-md extraction result', withActivityIdentity(request.identity, {
|
|
534
|
-
hasResult: !!extractionResult,
|
|
535
|
-
hasJson: !!(extractionResult && extractionResult.json),
|
|
536
|
-
method: extractionResult?.method,
|
|
537
|
-
jsonType: extractionResult?.json ? typeof extractionResult.json : 'none',
|
|
538
|
-
debugKind: gatewayLogDebug.state
|
|
539
|
-
}));
|
|
540
|
-
if (extractionResult && extractionResult.json) {
|
|
541
|
-
// Successfully extracted structured data
|
|
542
|
-
parsedContent = extractionResult.json;
|
|
543
|
-
this.logger.info('Flex-md extraction successful - parsed into structured object', withActivityIdentity(request.identity, {
|
|
544
|
-
method: extractionResult.method,
|
|
545
|
-
extractedKeys: Object.keys(extractionResult.json),
|
|
546
|
-
debugKind: gatewayLogDebug.event
|
|
542
|
+
// Parse content using available parsers (flex-md, content normalizer, etc.)
|
|
543
|
+
let parsedContent = undefined;
|
|
544
|
+
let contentType = undefined;
|
|
545
|
+
let parsingMethod = undefined;
|
|
546
|
+
// Actually use flex-md parsing - extract structured data from markdown
|
|
547
|
+
try {
|
|
548
|
+
this.logger.debug('Attempting flex-md extraction', withActivityIdentity(request.identity, {
|
|
549
|
+
contentLength: content.length,
|
|
550
|
+
hasInstructions: !!resolvedRequest.instructions,
|
|
551
|
+
debugKind: gatewayLogDebug.intent
|
|
552
|
+
}));
|
|
553
|
+
// Let flex-md extract structured data from the response content
|
|
554
|
+
const extractionResult = await extractJsonFromFlexMd(content, this.logger);
|
|
555
|
+
this.logger.debug('Flex-md extraction result', withActivityIdentity(request.identity, {
|
|
556
|
+
hasResult: !!extractionResult,
|
|
557
|
+
hasJson: !!(extractionResult && extractionResult.json),
|
|
558
|
+
method: extractionResult?.method,
|
|
559
|
+
jsonType: extractionResult?.json ? typeof extractionResult.json : 'none',
|
|
560
|
+
debugKind: gatewayLogDebug.state
|
|
547
561
|
}));
|
|
562
|
+
if (extractionResult && extractionResult.json) {
|
|
563
|
+
// Successfully extracted structured data
|
|
564
|
+
parsedContent = extractionResult.json;
|
|
565
|
+
this.logger.info('Flex-md extraction successful - parsed into structured object', withActivityIdentity(request.identity, {
|
|
566
|
+
method: extractionResult.method,
|
|
567
|
+
extractedKeys: Object.keys(extractionResult.json),
|
|
568
|
+
debugKind: gatewayLogDebug.event
|
|
569
|
+
}));
|
|
570
|
+
}
|
|
571
|
+
else {
|
|
572
|
+
// Extraction failed, fall back to raw text wrapper
|
|
573
|
+
this.logger.warn('Flex-md extraction failed - no structured data extracted', withActivityIdentity(request.identity, {
|
|
574
|
+
hasResult: !!extractionResult,
|
|
575
|
+
method: extractionResult?.method || 'none',
|
|
576
|
+
debugKind: gatewayLogDebug.anomaly
|
|
577
|
+
}));
|
|
578
|
+
parsedContent = { rawText: content };
|
|
579
|
+
}
|
|
548
580
|
}
|
|
549
|
-
|
|
581
|
+
catch (extractionError) {
|
|
550
582
|
// Extraction failed, fall back to raw text wrapper
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
583
|
+
const errorMessage = extractionError instanceof Error ? extractionError.message : String(extractionError);
|
|
584
|
+
this.logger.warn('Flex-md extraction failed - flex-md library compatibility issue', withActivityIdentity(request.identity, {
|
|
585
|
+
error: errorMessage,
|
|
586
|
+
issue: 'flex-md uses require() in ES module context - needs fixing in flex-md-loader.ts',
|
|
587
|
+
fallback: 'using rawText wrapper',
|
|
554
588
|
debugKind: gatewayLogDebug.anomaly
|
|
555
589
|
}));
|
|
556
590
|
parsedContent = { rawText: content };
|
|
557
591
|
}
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
contentType = 'structured';
|
|
571
|
-
parsingMethod = 'flex-md';
|
|
572
|
-
const outputContractKeys = resolveOutputContractFieldKeys(request);
|
|
573
|
-
parsedContent = await enrichParsedContentForOutputContract(parsedContent, content, outputContractKeys, this.logger);
|
|
574
|
-
let tokens = extractTokenUsageFromRouterResponse(routerResponse);
|
|
575
|
-
if (!(tokens.prompt || tokens.completion || tokens.total)) {
|
|
576
|
-
const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
|
|
577
|
-
if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
|
|
578
|
-
const second = extractTokenUsageFromRouterResponse(alt);
|
|
579
|
-
if (second.prompt || second.completion || second.total)
|
|
580
|
-
tokens = second;
|
|
592
|
+
contentType = 'structured';
|
|
593
|
+
parsingMethod = 'flex-md';
|
|
594
|
+
const outputContractKeys = resolveOutputContractFieldKeys(request);
|
|
595
|
+
parsedContent = await enrichParsedContentForOutputContract(parsedContent, content, outputContractKeys, this.logger);
|
|
596
|
+
let tokens = extractTokenUsageFromRouterResponse(routerResponse);
|
|
597
|
+
if (!(tokens.prompt || tokens.completion || tokens.total)) {
|
|
598
|
+
const alt = routerResponse?.rawResponse ?? routerResponse?.raw;
|
|
599
|
+
if (alt != null && typeof alt === 'object' && alt !== routerResponse) {
|
|
600
|
+
const second = extractTokenUsageFromRouterResponse(alt);
|
|
601
|
+
if (second.prompt || second.completion || second.total)
|
|
602
|
+
tokens = second;
|
|
603
|
+
}
|
|
581
604
|
}
|
|
582
|
-
|
|
583
|
-
let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
|
|
584
|
-
mergedConfig,
|
|
585
|
-
calculator: aiTools?.calculator ?? null,
|
|
586
|
-
calculateCost: this.config.aiTools?.calculateCost
|
|
587
|
-
});
|
|
588
|
-
if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
|
|
589
|
-
costCompletion = { ...costCompletion, costStatus: 'unpriced' };
|
|
590
|
-
}
|
|
591
|
-
const routerMetaForCost = routerResponse?.metadata || {};
|
|
592
|
-
const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
593
|
-
const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
|
|
594
|
-
const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
|
|
595
|
-
if (traceEnabled && traceAttempts) {
|
|
596
|
-
await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
|
|
605
|
+
let costCompletion = await resolveCostCompletionWithAiTools(routerResponse, tokens, {
|
|
597
606
|
mergedConfig,
|
|
598
607
|
calculator: aiTools?.calculator ?? null,
|
|
599
608
|
calculateCost: this.config.aiTools?.calculateCost
|
|
600
609
|
});
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
|
|
604
|
-
: undefined;
|
|
605
|
-
const enhancedResponse = {
|
|
606
|
-
content: content,
|
|
607
|
-
parsedContent: parsedContent,
|
|
608
|
-
metadata: {
|
|
609
|
-
aiRequestId: request.aiRequestId,
|
|
610
|
-
identity: request.identity,
|
|
611
|
-
latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
|
|
612
|
-
tokens: tokens,
|
|
613
|
-
taskTypeId,
|
|
614
|
-
agentType: 'ai',
|
|
615
|
-
contentType,
|
|
616
|
-
parsingMethod,
|
|
617
|
-
...routingMetadataSlice,
|
|
618
|
-
...(effectiveModelConfig !== undefined ? { effectiveModelConfig } : {}),
|
|
619
|
-
...(costCompletion.costStatus === 'priced'
|
|
620
|
-
? {
|
|
621
|
-
costUsd: costCompletion.cost,
|
|
622
|
-
...(typeof routerMetaForCost.cost === 'number'
|
|
623
|
-
? { cost: routerMetaForCost.cost }
|
|
624
|
-
: { cost: costCompletion.cost })
|
|
625
|
-
}
|
|
626
|
-
: {}),
|
|
627
|
-
...(costCompletion.costStatus ? { costStatus: costCompletion.costStatus } : {}),
|
|
628
|
-
...(costCompletion.costBreakdown ? { costBreakdown: costCompletion.costBreakdown } : {}),
|
|
629
|
-
...(traceEnabled
|
|
630
|
-
? {
|
|
631
|
-
requestIds: traceRequestIds,
|
|
632
|
-
retryCount: traceRetryCount,
|
|
633
|
-
fallbackCount: traceFallbackCount,
|
|
634
|
-
attempts: traceAttempts,
|
|
635
|
-
...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
|
|
636
|
-
...(traceMergedRouterSnapshot !== undefined
|
|
637
|
-
? { mergedRouterConfig: traceMergedRouterSnapshot }
|
|
638
|
-
: {})
|
|
639
|
-
}
|
|
640
|
-
: {})
|
|
610
|
+
if (!costCompletion.costStatus && hasNonZeroTokenUsage(tokens)) {
|
|
611
|
+
costCompletion = { ...costCompletion, costStatus: 'unpriced' };
|
|
641
612
|
}
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
:
|
|
651
|
-
const rawFull = routerResponse.rawResponse || routerResponse;
|
|
652
|
-
const fullResponseForActivity = includeFullProviderBlob
|
|
653
|
-
? capActivityFullResponsePayload(rawFull, maxFullChars)
|
|
654
|
-
: undefined;
|
|
655
|
-
// Create activity response with proper structure for ActivityTracker
|
|
656
|
-
const activityResponse = {
|
|
657
|
-
content: {
|
|
658
|
-
rawContent: content, // Store the actual response content as rawContent
|
|
659
|
-
...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
|
|
660
|
-
},
|
|
661
|
-
parsed: parsedContent, // Include parsed content in activity record
|
|
662
|
-
metadata: enhancedResponse.metadata,
|
|
663
|
-
status: 'success',
|
|
664
|
-
error: null,
|
|
665
|
-
usage: tokens
|
|
666
|
-
};
|
|
667
|
-
await this.activityManager.logSuccess(activity, {
|
|
668
|
-
...costCompletion,
|
|
669
|
-
response: activityResponse,
|
|
670
|
-
endTime: Date.now(),
|
|
671
|
-
duration: Date.now() - startTime
|
|
613
|
+
const routerMetaForCost = routerResponse?.metadata || {};
|
|
614
|
+
const routingMetadataSlice = pickInvokeRoutingMetadataSlice(routerResponse, mergedConfig);
|
|
615
|
+
const effectiveModelConfig = pickEffectiveModelConfigForMetadata(mergedConfig);
|
|
616
|
+
const traceMergedRouterSnapshot = traceEnabled ? pickTraceMergedRouterConfig(mergedConfig) : undefined;
|
|
617
|
+
if (traceEnabled && traceAttempts) {
|
|
618
|
+
await enrichTraceAttemptsWithBilling(traceAttempts, costCompletion, {
|
|
619
|
+
mergedConfig,
|
|
620
|
+
calculator: aiTools?.calculator ?? null,
|
|
621
|
+
calculateCost: this.config.aiTools?.calculateCost
|
|
672
622
|
});
|
|
673
623
|
}
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
624
|
+
const traceUsageSummary = traceEnabled
|
|
625
|
+
? buildTraceUsageSummary(tokens, costCompletion, routingMetadataSlice.maxTokensRequested)
|
|
626
|
+
: undefined;
|
|
627
|
+
const enhancedResponse = {
|
|
628
|
+
content: content,
|
|
629
|
+
parsedContent: parsedContent,
|
|
630
|
+
metadata: {
|
|
677
631
|
aiRequestId: request.aiRequestId,
|
|
678
|
-
|
|
679
|
-
|
|
632
|
+
identity: request.identity,
|
|
633
|
+
latencyMs: traceEnabled && typeof providerCallLatencyMs === 'number' ? providerCallLatencyMs : (Date.now() - startTime),
|
|
634
|
+
tokens: tokens,
|
|
635
|
+
taskTypeId,
|
|
636
|
+
agentType: 'ai',
|
|
637
|
+
contentType,
|
|
638
|
+
parsingMethod,
|
|
639
|
+
...routingMetadataSlice,
|
|
640
|
+
...(effectiveModelConfig !== undefined ? { effectiveModelConfig } : {}),
|
|
641
|
+
...(costCompletion.costStatus === 'priced'
|
|
642
|
+
? {
|
|
643
|
+
costUsd: costCompletion.cost,
|
|
644
|
+
...(typeof routerMetaForCost.cost === 'number'
|
|
645
|
+
? { cost: routerMetaForCost.cost }
|
|
646
|
+
: { cost: costCompletion.cost })
|
|
647
|
+
}
|
|
648
|
+
: {}),
|
|
649
|
+
...(costCompletion.costStatus ? { costStatus: costCompletion.costStatus } : {}),
|
|
650
|
+
...(costCompletion.costBreakdown ? { costBreakdown: costCompletion.costBreakdown } : {}),
|
|
651
|
+
...(traceEnabled
|
|
652
|
+
? {
|
|
653
|
+
requestIds: traceRequestIds,
|
|
654
|
+
retryCount: traceRetryCount,
|
|
655
|
+
fallbackCount: traceFallbackCount,
|
|
656
|
+
attempts: traceAttempts,
|
|
657
|
+
...(traceUsageSummary !== undefined ? { usage: traceUsageSummary } : {}),
|
|
658
|
+
...(traceMergedRouterSnapshot !== undefined
|
|
659
|
+
? { mergedRouterConfig: traceMergedRouterSnapshot }
|
|
660
|
+
: {})
|
|
661
|
+
}
|
|
662
|
+
: {})
|
|
663
|
+
}
|
|
664
|
+
};
|
|
665
|
+
// Track activity success if activity was started
|
|
666
|
+
if (activity) {
|
|
667
|
+
try {
|
|
668
|
+
const diag = request.diagnostics;
|
|
669
|
+
const includeFullProviderBlob = diag?.includeFullProviderResponseInActivity !== false;
|
|
670
|
+
const maxFullChars = typeof diag?.activityFullResponseMaxChars === 'number' && diag.activityFullResponseMaxChars > 0
|
|
671
|
+
? diag.activityFullResponseMaxChars
|
|
672
|
+
: DEFAULT_ACTIVITY_FULL_RESPONSE_MAX_CHARS;
|
|
673
|
+
const rawFull = routerResponse.rawResponse || routerResponse;
|
|
674
|
+
const fullResponseForActivity = includeFullProviderBlob
|
|
675
|
+
? capActivityFullResponsePayload(rawFull, maxFullChars)
|
|
676
|
+
: undefined;
|
|
677
|
+
// Create activity response with proper structure for ActivityTracker
|
|
678
|
+
const activityResponse = {
|
|
679
|
+
content: {
|
|
680
|
+
rawContent: content, // Store the actual response content as rawContent
|
|
681
|
+
...(fullResponseForActivity !== undefined ? { fullResponse: fullResponseForActivity } : {})
|
|
682
|
+
},
|
|
683
|
+
parsed: parsedContent, // Include parsed content in activity record
|
|
684
|
+
metadata: enhancedResponse.metadata,
|
|
685
|
+
status: 'success',
|
|
686
|
+
error: null,
|
|
687
|
+
usage: tokens
|
|
688
|
+
};
|
|
689
|
+
await this.activityManager.logSuccess(activity, {
|
|
690
|
+
...costCompletion,
|
|
691
|
+
response: activityResponse,
|
|
692
|
+
endTime: Date.now(),
|
|
693
|
+
duration: Date.now() - startTime
|
|
694
|
+
});
|
|
695
|
+
}
|
|
696
|
+
catch (activityError) {
|
|
697
|
+
// Log activity tracking error but don't fail the request
|
|
698
|
+
this.logger.warn('Failed to track activity success', {
|
|
699
|
+
aiRequestId: request.aiRequestId,
|
|
700
|
+
error: activityError instanceof Error ? activityError.message : String(activityError)
|
|
701
|
+
});
|
|
702
|
+
}
|
|
680
703
|
}
|
|
704
|
+
if (optimixerPrediction) {
|
|
705
|
+
await this.optimixerManager?.completePrediction(optimixerPrediction.requestId, buildOptimixerActualUsage(tokens, routerResponse, Date.now() - startTime));
|
|
706
|
+
}
|
|
707
|
+
warnIfSuccessfulInvokeReportsZeroUsageOrCost(this.logger, request.identity, {
|
|
708
|
+
tokens: enhancedResponse.metadata.tokens,
|
|
709
|
+
costUsd: enhancedResponse.metadata.costUsd,
|
|
710
|
+
cost: enhancedResponse.metadata.cost
|
|
711
|
+
}, 'invoke');
|
|
712
|
+
this.logger.debug('gateway: enhancedResponse', withActivityIdentity(request.identity, {
|
|
713
|
+
latencyMs: enhancedResponse.metadata?.latencyMs,
|
|
714
|
+
contentType: enhancedResponse.metadata?.contentType,
|
|
715
|
+
debugKind: gatewayLogDebug.state
|
|
716
|
+
}));
|
|
717
|
+
return enhancedResponse;
|
|
681
718
|
}
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
taskTypeId,
|
|
703
|
-
startTime,
|
|
704
|
-
mergedConfig,
|
|
705
|
-
partialRouterPayload: partial,
|
|
706
|
-
gatewayAiRequestId: request.aiRequestId,
|
|
707
|
-
error: err
|
|
708
|
-
});
|
|
709
|
-
attachGatewayInvokeRejectionMetadata(err, rejectMeta);
|
|
710
|
-
if (err.message.includes(NO_PROVIDER_ERROR)) {
|
|
711
|
-
const wrapped = new Error(err.message + NO_PROVIDER_HINT);
|
|
712
|
-
wrapped.cause = err;
|
|
713
|
-
attachGatewayInvokeRejectionMetadata(wrapped, rejectMeta);
|
|
714
|
-
throw wrapped;
|
|
719
|
+
catch (error) {
|
|
720
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
721
|
+
const partial = tryExtractRouterLikePayloadFromErrorChain(err);
|
|
722
|
+
const rejectMeta = buildInvokeRejectionMetadata({
|
|
723
|
+
request,
|
|
724
|
+
taskTypeId,
|
|
725
|
+
startTime,
|
|
726
|
+
mergedConfig,
|
|
727
|
+
partialRouterPayload: partial,
|
|
728
|
+
gatewayAiRequestId: request.aiRequestId,
|
|
729
|
+
error: err
|
|
730
|
+
});
|
|
731
|
+
attachGatewayInvokeRejectionMetadata(err, rejectMeta);
|
|
732
|
+
if (err.message.includes(NO_PROVIDER_ERROR)) {
|
|
733
|
+
const wrapped = new Error(err.message + NO_PROVIDER_HINT);
|
|
734
|
+
wrapped.cause = err;
|
|
735
|
+
attachGatewayInvokeRejectionMetadata(wrapped, rejectMeta);
|
|
736
|
+
throw wrapped;
|
|
737
|
+
}
|
|
738
|
+
throw err;
|
|
715
739
|
}
|
|
716
|
-
|
|
717
|
-
}
|
|
740
|
+
});
|
|
718
741
|
}
|
|
719
742
|
async applyAdaptiveMaxTokensIfEnabled(request, mergedConfig, messages) {
|
|
720
743
|
if (!this.optimixerManager?.isEnabled() || isMaxTokensExplicitlySet(request, this.config)) {
|