lynkr 8.0.1 → 9.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.lynkr/telemetry.db +0 -0
- package/.lynkr/telemetry.db-shm +0 -0
- package/.lynkr/telemetry.db-wal +0 -0
- package/README.md +195 -321
- package/lynkr-skill.tar.gz +0 -0
- package/package.json +4 -3
- package/src/api/openai-router.js +30 -11
- package/src/api/providers-handler.js +171 -3
- package/src/api/router.js +9 -2
- package/src/clients/circuit-breaker.js +10 -247
- package/src/clients/codex-process.js +342 -0
- package/src/clients/codex-utils.js +143 -0
- package/src/clients/databricks.js +210 -63
- package/src/clients/resilience.js +540 -0
- package/src/clients/retry.js +22 -167
- package/src/config/index.js +57 -0
- package/src/context/compression.js +42 -9
- package/src/context/distill.js +492 -0
- package/src/orchestrator/index.js +46 -6
- package/src/routing/complexity-analyzer.js +258 -5
- package/src/routing/index.js +12 -2
- package/src/routing/latency-tracker.js +148 -0
- package/src/routing/model-tiers.js +2 -0
- package/src/routing/quality-scorer.js +113 -0
- package/src/routing/telemetry.js +464 -0
- package/src/server.js +11 -0
- package/src/tools/code-graph.js +538 -0
- package/src/tools/code-mode.js +304 -0
- package/src/tools/lazy-loader.js +11 -0
- package/src/tools/mcp-remote.js +7 -0
- package/src/tools/smart-selection.js +11 -0
- package/src/utils/payload.js +206 -0
- package/src/utils/perf-timer.js +80 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const crypto = require("crypto");
|
|
1
2
|
const config = require("../config");
|
|
2
3
|
const http = require("http");
|
|
3
4
|
const https = require("https");
|
|
@@ -5,6 +6,7 @@ const { withRetry } = require("./retry");
|
|
|
5
6
|
const { getCircuitBreakerRegistry } = require("./circuit-breaker");
|
|
6
7
|
const { getMetricsCollector } = require("../observability/metrics");
|
|
7
8
|
const { getHealthTracker } = require("../observability/health-tracker");
|
|
9
|
+
const { createBulkhead } = require("./resilience");
|
|
8
10
|
const logger = require("../logger");
|
|
9
11
|
const { STANDARD_TOOLS, STANDARD_TOOL_NAMES } = require("./standard-tools");
|
|
10
12
|
const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
|
|
@@ -12,6 +14,9 @@ const {
|
|
|
12
14
|
detectModelFamily
|
|
13
15
|
} = require("./bedrock-utils");
|
|
14
16
|
const { getGPTSystemPromptAddendum } = require("./gpt-utils");
|
|
17
|
+
const telemetry = require("../routing/telemetry");
|
|
18
|
+
const { scoreResponseQuality } = require("../routing/quality-scorer");
|
|
19
|
+
const { getLatencyTracker } = require("../routing/latency-tracker");
|
|
15
20
|
|
|
16
21
|
|
|
17
22
|
|
|
@@ -20,53 +25,11 @@ if (typeof fetch !== "function") {
|
|
|
20
25
|
throw new Error("Node 18+ is required for the built-in fetch API.");
|
|
21
26
|
}
|
|
22
27
|
|
|
23
|
-
|
|
24
|
-
* Simple Semaphore for limiting concurrent requests
|
|
25
|
-
* Used to prevent Z.AI rate limiting from parallel Claude Code CLI calls
|
|
26
|
-
*/
|
|
27
|
-
class Semaphore {
|
|
28
|
-
constructor(maxConcurrent = 2) {
|
|
29
|
-
this.maxConcurrent = maxConcurrent;
|
|
30
|
-
this.current = 0;
|
|
31
|
-
this.queue = [];
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
async acquire() {
|
|
35
|
-
if (this.current < this.maxConcurrent) {
|
|
36
|
-
this.current++;
|
|
37
|
-
return;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Wait in queue
|
|
41
|
-
return new Promise((resolve) => {
|
|
42
|
-
this.queue.push(resolve);
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
release() {
|
|
47
|
-
this.current--;
|
|
48
|
-
if (this.queue.length > 0 && this.current < this.maxConcurrent) {
|
|
49
|
-
this.current++;
|
|
50
|
-
const next = this.queue.shift();
|
|
51
|
-
next();
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
async run(fn) {
|
|
56
|
-
await this.acquire();
|
|
57
|
-
try {
|
|
58
|
-
return await fn();
|
|
59
|
-
} finally {
|
|
60
|
-
this.release();
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Z.AI request semaphore - limit concurrent requests to avoid rate limiting
|
|
28
|
+
// Z.AI request bulkhead - limit concurrent requests to avoid rate limiting
|
|
66
29
|
// Configurable via ZAI_MAX_CONCURRENT env var (default: 2)
|
|
67
30
|
const zaiMaxConcurrent = parseInt(process.env.ZAI_MAX_CONCURRENT || '2', 10);
|
|
68
|
-
const zaiSemaphore =
|
|
69
|
-
logger.info({ maxConcurrent: zaiMaxConcurrent }, "Z.AI
|
|
31
|
+
const zaiSemaphore = createBulkhead({ maxConcurrent: zaiMaxConcurrent, maxQueue: 50 });
|
|
32
|
+
logger.info({ maxConcurrent: zaiMaxConcurrent }, "Z.AI bulkhead initialized");
|
|
70
33
|
|
|
71
34
|
|
|
72
35
|
|
|
@@ -307,7 +270,7 @@ async function invokeOllama(body) {
|
|
|
307
270
|
const ollamaBody = {
|
|
308
271
|
model: modelName,
|
|
309
272
|
messages: body.messages,
|
|
310
|
-
max_tokens: body.max_tokens ||
|
|
273
|
+
max_tokens: body.max_tokens || 16384,
|
|
311
274
|
stream: false,
|
|
312
275
|
};
|
|
313
276
|
|
|
@@ -432,7 +395,7 @@ async function invokeOpenRouter(body) {
|
|
|
432
395
|
model: body._suggestionModeModel || body._tierModel || config.openrouter.model,
|
|
433
396
|
messages,
|
|
434
397
|
temperature: body.temperature ?? 0.7,
|
|
435
|
-
max_tokens: body.max_tokens ??
|
|
398
|
+
max_tokens: body.max_tokens ?? 16384,
|
|
436
399
|
top_p: body.top_p ?? 1.0,
|
|
437
400
|
stream: body.stream ?? false
|
|
438
401
|
};
|
|
@@ -515,7 +478,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
515
478
|
const azureBody = {
|
|
516
479
|
messages,
|
|
517
480
|
temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
|
|
518
|
-
max_tokens: Math.min(body.max_tokens ??
|
|
481
|
+
max_tokens: Math.min(body.max_tokens ?? 16384, 16384), // Cap at Azure OpenAI's limit
|
|
519
482
|
top_p: body.top_p ?? 1.0,
|
|
520
483
|
stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
|
|
521
484
|
model: body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment
|
|
@@ -911,7 +874,7 @@ async function invokeOpenAI(body) {
|
|
|
911
874
|
model: body._suggestionModeModel || body._tierModel || config.openai.model || "gpt-4o",
|
|
912
875
|
messages,
|
|
913
876
|
temperature: body.temperature ?? 0.7,
|
|
914
|
-
max_tokens: body.max_tokens ??
|
|
877
|
+
max_tokens: body.max_tokens ?? 16384,
|
|
915
878
|
top_p: body.top_p ?? 1.0,
|
|
916
879
|
stream: body.stream ?? false
|
|
917
880
|
};
|
|
@@ -1012,7 +975,7 @@ async function invokeLlamaCpp(body) {
|
|
|
1012
975
|
const llamacppBody = {
|
|
1013
976
|
messages: deduplicated,
|
|
1014
977
|
temperature: body.temperature ?? 0.7,
|
|
1015
|
-
max_tokens: body.max_tokens ??
|
|
978
|
+
max_tokens: body.max_tokens ?? 16384,
|
|
1016
979
|
top_p: body.top_p ?? 1.0,
|
|
1017
980
|
stream: body.stream ?? false
|
|
1018
981
|
};
|
|
@@ -1096,7 +1059,7 @@ async function invokeLMStudio(body) {
|
|
|
1096
1059
|
const lmstudioBody = {
|
|
1097
1060
|
messages,
|
|
1098
1061
|
temperature: body.temperature ?? 0.7,
|
|
1099
|
-
max_tokens: body.max_tokens ??
|
|
1062
|
+
max_tokens: body.max_tokens ?? 16384,
|
|
1100
1063
|
top_p: body.top_p ?? 1.0,
|
|
1101
1064
|
stream: body.stream ?? false
|
|
1102
1065
|
};
|
|
@@ -1411,7 +1374,7 @@ async function invokeZai(body) {
|
|
|
1411
1374
|
zaiBody = {
|
|
1412
1375
|
model: mappedModel,
|
|
1413
1376
|
messages,
|
|
1414
|
-
max_tokens: body.max_tokens ||
|
|
1377
|
+
max_tokens: body.max_tokens || 16384,
|
|
1415
1378
|
temperature: body.temperature ?? 0.7,
|
|
1416
1379
|
stream: body.stream,
|
|
1417
1380
|
};
|
|
@@ -1473,12 +1436,9 @@ async function invokeZai(body) {
|
|
|
1473
1436
|
zaiBody: JSON.stringify(zaiBody).substring(0, 1000),
|
|
1474
1437
|
}, "Z.AI request body (truncated)");
|
|
1475
1438
|
|
|
1476
|
-
// Use
|
|
1477
|
-
return zaiSemaphore.
|
|
1478
|
-
logger.debug(
|
|
1479
|
-
queueLength: zaiSemaphore.queue.length,
|
|
1480
|
-
currentConcurrent: zaiSemaphore.current,
|
|
1481
|
-
}, "Z.AI semaphore status");
|
|
1439
|
+
// Use bulkhead to limit concurrent Z.AI requests (prevents rate limiting)
|
|
1440
|
+
return zaiSemaphore.execute(async () => {
|
|
1441
|
+
logger.debug("Z.AI bulkhead executing request");
|
|
1482
1442
|
|
|
1483
1443
|
const response = await performJsonRequest(endpoint, { headers, body: zaiBody }, "Z.AI");
|
|
1484
1444
|
|
|
@@ -1560,7 +1520,7 @@ async function invokeMoonshot(body) {
|
|
|
1560
1520
|
const moonshotBody = {
|
|
1561
1521
|
model: mappedModel,
|
|
1562
1522
|
messages,
|
|
1563
|
-
max_tokens: body.max_tokens ||
|
|
1523
|
+
max_tokens: body.max_tokens || 16384,
|
|
1564
1524
|
temperature: body.temperature ?? 0.7,
|
|
1565
1525
|
top_p: body.top_p ?? 1.0,
|
|
1566
1526
|
stream: false, // Force non-streaming - OpenAI SSE to Anthropic SSE conversion not implemented
|
|
@@ -1791,7 +1751,7 @@ async function invokeVertex(body) {
|
|
|
1791
1751
|
contents,
|
|
1792
1752
|
generationConfig: {
|
|
1793
1753
|
temperature: body.temperature ?? 0.7,
|
|
1794
|
-
maxOutputTokens: body.max_tokens ||
|
|
1754
|
+
maxOutputTokens: body.max_tokens || 16384,
|
|
1795
1755
|
topP: body.top_p ?? 1.0,
|
|
1796
1756
|
}
|
|
1797
1757
|
};
|
|
@@ -2000,6 +1960,54 @@ function convertGeminiToAnthropic(response, requestedModel) {
|
|
|
2000
1960
|
};
|
|
2001
1961
|
}
|
|
2002
1962
|
|
|
1963
|
+
async function invokeCodex(body) {
|
|
1964
|
+
const { getCodexProcess } = require("./codex-process");
|
|
1965
|
+
const { convertAnthropicToCodexPrompt, convertCodexResponseToAnthropic } = require("./codex-utils");
|
|
1966
|
+
|
|
1967
|
+
const codex = getCodexProcess();
|
|
1968
|
+
await codex.ensureRunning();
|
|
1969
|
+
|
|
1970
|
+
const model = body._tierModel || config.codex?.model || "gpt-5.3-codex";
|
|
1971
|
+
const { prompt, systemContext } = convertAnthropicToCodexPrompt(body);
|
|
1972
|
+
|
|
1973
|
+
if (!prompt) {
|
|
1974
|
+
throw new Error("Codex: no prompt content to send");
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
// Start a new thread
|
|
1978
|
+
const threadParams = { model };
|
|
1979
|
+
if (systemContext) {
|
|
1980
|
+
threadParams.instructions = systemContext;
|
|
1981
|
+
}
|
|
1982
|
+
const threadResult = await codex.sendRequest("thread/start", threadParams);
|
|
1983
|
+
const threadId = threadResult?.threadId || threadResult?.id;
|
|
1984
|
+
|
|
1985
|
+
if (!threadId) {
|
|
1986
|
+
throw new Error("Codex: thread/start did not return a threadId");
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
logger.debug({ threadId, model, promptLength: prompt.length }, "[Codex] Thread started");
|
|
1990
|
+
|
|
1991
|
+
// Send the turn and collect response
|
|
1992
|
+
const turnResult = await codex.sendTurn(threadId, prompt, model);
|
|
1993
|
+
|
|
1994
|
+
logger.debug({
|
|
1995
|
+
threadId,
|
|
1996
|
+
responseLength: turnResult.text?.length || 0,
|
|
1997
|
+
}, "[Codex] Turn completed");
|
|
1998
|
+
|
|
1999
|
+
// Convert to Anthropic format
|
|
2000
|
+
const anthropicJson = convertCodexResponseToAnthropic(turnResult, model);
|
|
2001
|
+
|
|
2002
|
+
return {
|
|
2003
|
+
ok: true,
|
|
2004
|
+
status: 200,
|
|
2005
|
+
json: anthropicJson,
|
|
2006
|
+
text: JSON.stringify(anthropicJson),
|
|
2007
|
+
contentType: "application/json",
|
|
2008
|
+
};
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2003
2011
|
async function invokeModel(body, options = {}) {
|
|
2004
2012
|
const { determineProviderSmart, isFallbackEnabled, getFallbackProvider } = require("./routing");
|
|
2005
2013
|
const metricsCollector = getMetricsCollector();
|
|
@@ -2007,9 +2015,11 @@ async function invokeModel(body, options = {}) {
|
|
|
2007
2015
|
const healthTracker = getHealthTracker();
|
|
2008
2016
|
|
|
2009
2017
|
// Determine provider via async tier routing
|
|
2018
|
+
// Thread workspace for code-graph integration (from X-Lynkr-Workspace header or body._workspace)
|
|
2019
|
+
const workspace = body._workspace || options.workspace || null;
|
|
2010
2020
|
const routingResult = options.forceProvider
|
|
2011
2021
|
? { provider: options.forceProvider, model: null, method: 'forced' }
|
|
2012
|
-
: await determineProviderSmart(body);
|
|
2022
|
+
: await determineProviderSmart(body, { workspace });
|
|
2013
2023
|
const initialProvider = routingResult.provider;
|
|
2014
2024
|
const tierSelectedModel = routingResult.model;
|
|
2015
2025
|
|
|
@@ -2081,6 +2091,8 @@ async function invokeModel(body, options = {}) {
|
|
|
2081
2091
|
return await invokeVertex(body);
|
|
2082
2092
|
} else if (initialProvider === "moonshot") {
|
|
2083
2093
|
return await invokeMoonshot(body);
|
|
2094
|
+
} else if (initialProvider === "codex") {
|
|
2095
|
+
return await invokeCodex(body);
|
|
2084
2096
|
}
|
|
2085
2097
|
return await invokeDatabricks(body);
|
|
2086
2098
|
});
|
|
@@ -2091,10 +2103,13 @@ async function invokeModel(body, options = {}) {
|
|
|
2091
2103
|
metricsCollector.recordDatabricksRequest(true, retries);
|
|
2092
2104
|
healthTracker.recordSuccess(initialProvider, latency);
|
|
2093
2105
|
|
|
2106
|
+
// Record latency for routing intelligence
|
|
2107
|
+
getLatencyTracker().record(initialProvider, latency);
|
|
2108
|
+
|
|
2094
2109
|
// Record tokens and cost savings
|
|
2110
|
+
const outputTokens = result.json?.usage?.output_tokens || result.json?.usage?.completion_tokens || 0;
|
|
2111
|
+
const inputTokens = result.json?.usage?.input_tokens || result.json?.usage?.prompt_tokens || 0;
|
|
2095
2112
|
if (result.json?.usage) {
|
|
2096
|
-
const inputTokens = result.json.usage.input_tokens || result.json.usage.prompt_tokens || 0;
|
|
2097
|
-
const outputTokens = result.json.usage.output_tokens || result.json.usage.completion_tokens || 0;
|
|
2098
2113
|
metricsCollector.recordTokens(inputTokens, outputTokens);
|
|
2099
2114
|
|
|
2100
2115
|
// Estimate cost savings if Ollama was used
|
|
@@ -2104,6 +2119,53 @@ async function invokeModel(body, options = {}) {
|
|
|
2104
2119
|
}
|
|
2105
2120
|
}
|
|
2106
2121
|
|
|
2122
|
+
// Count tool calls in response
|
|
2123
|
+
const toolCallsMade = result.json?.content?.filter?.(
|
|
2124
|
+
(b) => b.type === "tool_use"
|
|
2125
|
+
)?.length || 0;
|
|
2126
|
+
|
|
2127
|
+
// Compute quality score
|
|
2128
|
+
const qualityScore = scoreResponseQuality(
|
|
2129
|
+
{ tier: routingDecision.tier, hasTools: Array.isArray(body?.tools) && body.tools.length > 0 },
|
|
2130
|
+
null,
|
|
2131
|
+
{
|
|
2132
|
+
status_code: 200,
|
|
2133
|
+
output_tokens: outputTokens,
|
|
2134
|
+
tool_calls_made: toolCallsMade,
|
|
2135
|
+
was_fallback: false,
|
|
2136
|
+
retry_count: retries,
|
|
2137
|
+
error_type: null,
|
|
2138
|
+
latency_ms: latency,
|
|
2139
|
+
}
|
|
2140
|
+
);
|
|
2141
|
+
|
|
2142
|
+
// Record routing telemetry (non-blocking)
|
|
2143
|
+
telemetry.record({
|
|
2144
|
+
request_id: crypto.randomUUID(),
|
|
2145
|
+
session_id: body._sessionId || null,
|
|
2146
|
+
timestamp: Date.now(),
|
|
2147
|
+
complexity_score: routingResult.score ?? null,
|
|
2148
|
+
tier: routingDecision.tier,
|
|
2149
|
+
agentic_type: routingResult.agenticResult?.agentType || null,
|
|
2150
|
+
tool_count: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
2151
|
+
input_tokens: inputTokens || null,
|
|
2152
|
+
message_count: Array.isArray(body?.messages) ? body.messages.length : 0,
|
|
2153
|
+
request_type: routingResult.analysis?.requestType || null,
|
|
2154
|
+
provider: initialProvider,
|
|
2155
|
+
model: routingDecision.model,
|
|
2156
|
+
routing_method: routingDecision.method,
|
|
2157
|
+
was_fallback: false,
|
|
2158
|
+
output_tokens: outputTokens || null,
|
|
2159
|
+
latency_ms: latency,
|
|
2160
|
+
status_code: 200,
|
|
2161
|
+
error_type: null,
|
|
2162
|
+
tool_calls_made: toolCallsMade,
|
|
2163
|
+
retry_count: retries,
|
|
2164
|
+
circuit_breaker_state: breaker.state,
|
|
2165
|
+
quality_score: qualityScore,
|
|
2166
|
+
tokens_per_second: outputTokens && latency > 0 ? outputTokens / (latency / 1000) : null,
|
|
2167
|
+
});
|
|
2168
|
+
|
|
2107
2169
|
// Return result with provider info and routing decision for headers
|
|
2108
2170
|
return {
|
|
2109
2171
|
...result,
|
|
@@ -2113,8 +2175,10 @@ async function invokeModel(body, options = {}) {
|
|
|
2113
2175
|
|
|
2114
2176
|
} catch (err) {
|
|
2115
2177
|
// Record failure
|
|
2178
|
+
const failLatency = Date.now() - startTime;
|
|
2116
2179
|
metricsCollector.recordProviderFailure(initialProvider);
|
|
2117
2180
|
healthTracker.recordFailure(initialProvider, err, err.status);
|
|
2181
|
+
getLatencyTracker().record(initialProvider, failLatency);
|
|
2118
2182
|
|
|
2119
2183
|
// Check if we should fallback (any provider can fall back, not just ollama)
|
|
2120
2184
|
const shouldFallback =
|
|
@@ -2124,6 +2188,33 @@ async function invokeModel(body, options = {}) {
|
|
|
2124
2188
|
|
|
2125
2189
|
if (!shouldFallback) {
|
|
2126
2190
|
metricsCollector.recordDatabricksRequest(false, retries);
|
|
2191
|
+
|
|
2192
|
+
// Record failed telemetry
|
|
2193
|
+
telemetry.record({
|
|
2194
|
+
request_id: crypto.randomUUID(),
|
|
2195
|
+
session_id: body._sessionId || null,
|
|
2196
|
+
timestamp: Date.now(),
|
|
2197
|
+
complexity_score: routingResult.score ?? null,
|
|
2198
|
+
tier: routingDecision.tier,
|
|
2199
|
+
agentic_type: routingResult.agenticResult?.agentType || null,
|
|
2200
|
+
tool_count: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
2201
|
+
input_tokens: null,
|
|
2202
|
+
message_count: Array.isArray(body?.messages) ? body.messages.length : 0,
|
|
2203
|
+
request_type: routingResult.analysis?.requestType || null,
|
|
2204
|
+
provider: initialProvider,
|
|
2205
|
+
model: routingDecision.model,
|
|
2206
|
+
routing_method: routingDecision.method,
|
|
2207
|
+
was_fallback: false,
|
|
2208
|
+
latency_ms: failLatency,
|
|
2209
|
+
status_code: err.status || null,
|
|
2210
|
+
error_type: err.code || err.name || "unknown",
|
|
2211
|
+
quality_score: scoreResponseQuality(
|
|
2212
|
+
{ tier: routingDecision.tier, hasTools: Array.isArray(body?.tools) && body.tools.length > 0 },
|
|
2213
|
+
null,
|
|
2214
|
+
{ error_type: err.code || err.name, was_fallback: false, retry_count: retries, latency_ms: failLatency }
|
|
2215
|
+
),
|
|
2216
|
+
});
|
|
2217
|
+
|
|
2127
2218
|
throw err;
|
|
2128
2219
|
}
|
|
2129
2220
|
|
|
@@ -2197,6 +2288,45 @@ async function invokeModel(body, options = {}) {
|
|
|
2197
2288
|
totalLatency: Date.now() - startTime,
|
|
2198
2289
|
}, "Fallback to cloud provider succeeded");
|
|
2199
2290
|
|
|
2291
|
+
// Record latency for fallback provider
|
|
2292
|
+
getLatencyTracker().record(fallbackProvider, fallbackLatency);
|
|
2293
|
+
|
|
2294
|
+
// Capture fallback telemetry
|
|
2295
|
+
const fbOutputTokens = fallbackResult.json?.usage?.output_tokens || fallbackResult.json?.usage?.completion_tokens || 0;
|
|
2296
|
+
const fbInputTokens = fallbackResult.json?.usage?.input_tokens || fallbackResult.json?.usage?.prompt_tokens || 0;
|
|
2297
|
+
const fbToolCalls = fallbackResult.json?.content?.filter?.(
|
|
2298
|
+
(b) => b.type === "tool_use"
|
|
2299
|
+
)?.length || 0;
|
|
2300
|
+
|
|
2301
|
+
telemetry.record({
|
|
2302
|
+
request_id: crypto.randomUUID(),
|
|
2303
|
+
session_id: body._sessionId || null,
|
|
2304
|
+
timestamp: Date.now(),
|
|
2305
|
+
complexity_score: routingResult.score ?? null,
|
|
2306
|
+
tier: routingDecision.tier,
|
|
2307
|
+
agentic_type: routingResult.agenticResult?.agentType || null,
|
|
2308
|
+
tool_count: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
2309
|
+
input_tokens: fbInputTokens || null,
|
|
2310
|
+
message_count: Array.isArray(body?.messages) ? body.messages.length : 0,
|
|
2311
|
+
request_type: routingResult.analysis?.requestType || null,
|
|
2312
|
+
provider: fallbackProvider,
|
|
2313
|
+
model: routingDecision.model,
|
|
2314
|
+
routing_method: "fallback",
|
|
2315
|
+
was_fallback: true,
|
|
2316
|
+
output_tokens: fbOutputTokens || null,
|
|
2317
|
+
latency_ms: Date.now() - startTime,
|
|
2318
|
+
status_code: 200,
|
|
2319
|
+
error_type: null,
|
|
2320
|
+
tool_calls_made: fbToolCalls,
|
|
2321
|
+
retry_count: 0,
|
|
2322
|
+
quality_score: scoreResponseQuality(
|
|
2323
|
+
{ tier: routingDecision.tier, hasTools: Array.isArray(body?.tools) && body.tools.length > 0 },
|
|
2324
|
+
null,
|
|
2325
|
+
{ status_code: 200, output_tokens: fbOutputTokens, tool_calls_made: fbToolCalls, was_fallback: true, retry_count: 0, latency_ms: Date.now() - startTime }
|
|
2326
|
+
),
|
|
2327
|
+
tokens_per_second: fbOutputTokens && fallbackLatency > 0 ? fbOutputTokens / (fallbackLatency / 1000) : null,
|
|
2328
|
+
});
|
|
2329
|
+
|
|
2200
2330
|
// Return result with actual provider used (fallback provider) and routing decision
|
|
2201
2331
|
return {
|
|
2202
2332
|
...fallbackResult,
|
|
@@ -2215,6 +2345,23 @@ async function invokeModel(body, options = {}) {
|
|
|
2215
2345
|
metricsCollector.recordDatabricksRequest(false, retries);
|
|
2216
2346
|
healthTracker.recordFailure(fallbackProvider, fallbackErr, fallbackErr.status);
|
|
2217
2347
|
|
|
2348
|
+
// Record double-failure telemetry
|
|
2349
|
+
telemetry.record({
|
|
2350
|
+
request_id: crypto.randomUUID(),
|
|
2351
|
+
session_id: body._sessionId || null,
|
|
2352
|
+
timestamp: Date.now(),
|
|
2353
|
+
complexity_score: routingResult.score ?? null,
|
|
2354
|
+
tier: routingDecision.tier,
|
|
2355
|
+
provider: fallbackProvider,
|
|
2356
|
+
model: routingDecision.model,
|
|
2357
|
+
routing_method: "fallback",
|
|
2358
|
+
was_fallback: true,
|
|
2359
|
+
latency_ms: Date.now() - startTime,
|
|
2360
|
+
status_code: fallbackErr.status || null,
|
|
2361
|
+
error_type: fallbackErr.code || fallbackErr.name || "double_failure",
|
|
2362
|
+
quality_score: 0,
|
|
2363
|
+
});
|
|
2364
|
+
|
|
2218
2365
|
logger.error({
|
|
2219
2366
|
originalProvider: initialProvider,
|
|
2220
2367
|
fallbackProvider,
|