lynkr 9.4.6 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ /**
2
+ * MCP-aware Tool Dedup
3
+ *
4
+ * Strips built-in tool definitions when an equivalent MCP tool is present in
5
+ * the request. Sending both wastes tool-schema tokens and gives the model
6
+ * redundant choices. Rule-based and deterministic.
7
+ *
8
+ * Example: if the Exa or Tavily MCP search tools are present, the built-in
9
+ * WebSearch/WebFetch tools are redundant and dropped.
10
+ *
11
+ * Ported from 9router's toolDeduper. Always on — purely removes redundant
12
+ * tool definitions, never adds.
13
+ *
14
+ * @module context/tool-dedup
15
+ */
16
+
17
+ const logger = require("../logger");
18
+
19
+ // Each rule: if any `triggers` tool is present, strip any tools matching
20
+ // `strip`. Patterns may be exact strings or RegExp (matched against the name).
21
+ const DEDUP_RULES = [
22
+ {
23
+ // Exa MCP present → drop built-in web tools (Exa is preferred).
24
+ triggers: ["mcp__exa__web_search_exa", "mcp__exa__web_fetch_exa"],
25
+ strip: ["WebSearch", "WebFetch", "web_search", "web_fetch", "mcp__workspace__web_fetch"],
26
+ },
27
+ {
28
+ // Tavily MCP present → drop built-in web tools.
29
+ triggers: ["mcp__tavily__tavily_search", "mcp__tavily__tavily_extract"],
30
+ strip: ["WebSearch", "WebFetch", "web_search", "web_fetch", "mcp__workspace__web_fetch"],
31
+ },
32
+ {
33
+ // Browser MCP present → drop a duplicate Chrome-connector tool family.
34
+ triggers: [/^mcp__browsermcp__/],
35
+ strip: [/^mcp__Claude_in_Chrome__/],
36
+ },
37
+ ];
38
+
39
+ function getToolName(t) {
40
+ return t?.name || t?.function?.name || "";
41
+ }
42
+
43
+ function matches(name, pattern) {
44
+ if (typeof pattern === "string") return name === pattern;
45
+ return pattern instanceof RegExp ? pattern.test(name) : false;
46
+ }
47
+
48
+ /**
49
+ * Remove redundant built-in tools that are superseded by present MCP tools.
50
+ *
51
+ * @param {Array} tools - Tool definitions (Anthropic or OpenAI shape).
52
+ * @returns {{tools: Array, stripped: string[]}} filtered tools + names removed.
53
+ */
54
+ function dedupeTools(tools) {
55
+ if (!Array.isArray(tools) || tools.length === 0) return { tools, stripped: [] };
56
+
57
+ const names = tools.map(getToolName);
58
+ const toStrip = new Set();
59
+
60
+ for (const rule of DEDUP_RULES) {
61
+ const hasTrigger = names.some((n) => rule.triggers.some((p) => matches(n, p)));
62
+ if (!hasTrigger) continue;
63
+ for (const n of names) {
64
+ // Never strip a tool that is itself a trigger.
65
+ if (rule.triggers.some((p) => matches(n, p))) continue;
66
+ if (rule.strip.some((p) => matches(n, p))) toStrip.add(n);
67
+ }
68
+ }
69
+
70
+ if (toStrip.size === 0) return { tools, stripped: [] };
71
+
72
+ const out = tools.filter((t) => !toStrip.has(getToolName(t)));
73
+ return { tools: out, stripped: Array.from(toStrip) };
74
+ }
75
+
76
+ /**
77
+ * Apply tool dedup to a payload in place. No-op when nothing is stripped.
78
+ *
79
+ * @param {object} payload - Request body with a `tools` array.
80
+ * @returns {string[]} names of stripped tools.
81
+ */
82
+ function applyToolDedup(payload) {
83
+ if (!payload || !Array.isArray(payload.tools)) return [];
84
+ const { tools, stripped } = dedupeTools(payload.tools);
85
+ if (stripped.length > 0) {
86
+ payload.tools = tools;
87
+ logger.debug({ stripped }, "[ToolDedup] Stripped redundant built-in tools (MCP equivalents present)");
88
+ }
89
+ return stripped;
90
+ }
91
+
92
+ module.exports = {
93
+ dedupeTools,
94
+ applyToolDedup,
95
+ };
@@ -455,6 +455,107 @@ function compressContainerOutput(text) {
455
455
  return `${header}\n${dataLines.slice(0, 10).join("\n")}\n... +${dataLines.length - 10} more (${dataLines.length} total)`;
456
456
  }
457
457
 
458
+ // 11. Grep / ripgrep output ("file:lineno:content"), per-file match cap.
459
+ // Ported from 9router RTK grep filter (rtk/src/cmds/system/pipe_cmd.rs).
460
+ const GREP_PER_FILE_MAX = 10;
461
+ function compressGrep(text) {
462
+ const byFile = new Map();
463
+ let total = 0;
464
+
465
+ for (const line of text.split("\n")) {
466
+ // splitn(3, ':') — only split on the first two colons.
467
+ const first = line.indexOf(":");
468
+ if (first === -1) continue;
469
+ const second = line.indexOf(":", first + 1);
470
+ if (second === -1) continue;
471
+ const file = line.slice(0, first);
472
+ const lineNumStr = line.slice(first + 1, second);
473
+ const content = line.slice(second + 1);
474
+ if (!/^\d+$/.test(lineNumStr)) continue;
475
+ total++;
476
+ if (!byFile.has(file)) byFile.set(file, []);
477
+ byFile.get(file).push([lineNumStr, content]);
478
+ }
479
+
480
+ // Require a meaningful number of matches so we don't mangle prose that
481
+ // happens to contain a "word:123:..." line.
482
+ if (total < 5) return null;
483
+
484
+ const files = Array.from(byFile.keys()).sort();
485
+ let out = `${total} matches in ${files.length}F:\n\n`;
486
+ for (const file of files) {
487
+ const matches = byFile.get(file);
488
+ out += `[file] ${file} (${matches.length}):\n`;
489
+ for (const [lineNum, content] of matches.slice(0, GREP_PER_FILE_MAX)) {
490
+ out += ` ${lineNum.padStart(4)}: ${content.trim()}\n`;
491
+ }
492
+ if (matches.length > GREP_PER_FILE_MAX) {
493
+ out += ` +${matches.length - GREP_PER_FILE_MAX}\n`;
494
+ }
495
+ out += "\n";
496
+ }
497
+ return out;
498
+ }
499
+
500
+ // 12. Generic log de-duplication: collapse consecutive duplicate lines and
501
+ // runs of blank lines, with a hard line cap. Ported from 9router RTK dedupLog.
502
+ const DEDUP_LINE_MAX = 2000;
503
+ function compressDedupLog(text) {
504
+ const lines = text.split("\n");
505
+ const out = [];
506
+ let prev = null;
507
+ let runCount = 0;
508
+ let blankStreak = 0;
509
+
510
+ const flushRun = () => {
511
+ if (prev !== null && runCount > 1) {
512
+ out.push(` ... (${runCount - 1} duplicate lines)`);
513
+ }
514
+ };
515
+
516
+ for (const line of lines) {
517
+ if (line.trim() === "") {
518
+ if (blankStreak < 1) out.push(line);
519
+ blankStreak += 1;
520
+ flushRun();
521
+ prev = null;
522
+ runCount = 0;
523
+ continue;
524
+ }
525
+ blankStreak = 0;
526
+ if (line === prev) {
527
+ runCount += 1;
528
+ continue;
529
+ }
530
+ flushRun();
531
+ out.push(line);
532
+ prev = line;
533
+ runCount = 1;
534
+ if (out.length >= DEDUP_LINE_MAX) {
535
+ out.push(`... (truncated at ${DEDUP_LINE_MAX} lines)`);
536
+ return out.join("\n");
537
+ }
538
+ }
539
+ flushRun();
540
+ return out.join("\n");
541
+ }
542
+
543
+ // 13. Last-resort generic truncation: keep head + tail lines, drop the middle.
544
+ // Only kicks in for very long output no specific compressor matched.
545
+ // Ported from 9router RTK smartTruncate.
546
+ const SMART_TRUNCATE_HEAD = 120;
547
+ const SMART_TRUNCATE_TAIL = 60;
548
+ const SMART_TRUNCATE_MIN_LINES = 250;
549
+ function compressSmartTruncate(text) {
550
+ const lines = text.split("\n");
551
+ if (lines.length < SMART_TRUNCATE_MIN_LINES) return null;
552
+
553
+ const head = lines.slice(0, SMART_TRUNCATE_HEAD);
554
+ const tail = lines.slice(lines.length - SMART_TRUNCATE_TAIL);
555
+ const cut = lines.length - head.length - tail.length;
556
+ return [...head, `... +${cut} lines truncated`, ...tail].join("\n");
557
+ }
558
+
458
559
  // ── Compression Pipeline ─────────────────────────────────────────────
459
560
 
460
561
  const COMPRESSORS = [
@@ -466,8 +567,13 @@ const COMPRESSORS = [
466
567
  { name: "build_output", fn: compressBuildOutput },
467
568
  { name: "container_output", fn: compressContainerOutput },
468
569
  { name: "json_response", fn: compressJSON },
570
+ { name: "grep_output", fn: compressGrep },
469
571
  { name: "directory_listing", fn: compressDirectoryListing },
470
572
  { name: "large_file", fn: compressLargeFile },
573
+ // Generic fallbacks last: dedup exact-duplicate spam, then hard head/tail
574
+ // truncation only if nothing more specific applied.
575
+ { name: "dedup_log", fn: compressDedupLog },
576
+ { name: "smart_truncate", fn: compressSmartTruncate },
471
577
  ];
472
578
 
473
579
  // Compression levels tied to routing tiers
@@ -5,24 +5,74 @@ const metrics = require('../metrics');
5
5
  const { getMetricsCollector } = require('../observability/metrics');
6
6
  const { TIER_DEFINITIONS } = require('../routing/model-tiers');
7
7
 
8
- function getConfiguredProviders() {
8
+ // Per-provider type + whether its credentials/endpoint are actually present.
9
+ function providerMeta() {
9
10
  const c = config;
10
- const providers = [];
11
- const add = (name, type, ok) => ok && providers.push({ name, type });
12
-
13
- add('databricks', 'cloud', c.databricks?.url && c.databricks?.apiKey);
14
- add('azure-anthropic','cloud', c.azureAnthropic?.endpoint && c.azureAnthropic?.apiKey);
15
- add('bedrock', 'cloud', c.bedrock?.apiKey);
16
- add('openrouter', 'cloud', c.openrouter?.apiKey);
17
- add('openai', 'cloud', c.openai?.apiKey);
18
- add('azure-openai', 'cloud', c.azureOpenAI?.endpoint && c.azureOpenAI?.apiKey);
19
- add('vertex', 'cloud', c.vertex?.projectId);
20
- add('moonshot', 'cloud', c.moonshot?.apiKey);
21
- add('ollama', 'local', c.ollama?.endpoint);
22
- add('llamacpp', 'local', c.llamacpp?.endpoint);
23
- add('lmstudio', 'local', c.lmstudio?.endpoint);
24
-
25
- return providers;
11
+ return {
12
+ databricks: { type: 'cloud', configured: !!(c.databricks?.url && c.databricks?.apiKey) },
13
+ 'azure-anthropic': { type: 'cloud', configured: !!(c.azureAnthropic?.endpoint && c.azureAnthropic?.apiKey) },
14
+ bedrock: { type: 'cloud', configured: !!c.bedrock?.apiKey },
15
+ openrouter: { type: 'cloud', configured: !!c.openrouter?.apiKey },
16
+ openai: { type: 'cloud', configured: !!c.openai?.apiKey },
17
+ 'azure-openai': { type: 'cloud', configured: !!(c.azureOpenAI?.endpoint && c.azureOpenAI?.apiKey) },
18
+ vertex: { type: 'cloud', configured: !!c.vertex?.projectId },
19
+ moonshot: { type: 'cloud', configured: !!c.moonshot?.apiKey },
20
+ ollama: { type: 'local', configured: !!c.ollama?.endpoint },
21
+ llamacpp: { type: 'local', configured: !!c.llamacpp?.endpoint },
22
+ lmstudio: { type: 'local', configured: !!c.lmstudio?.endpoint },
23
+ };
24
+ }
25
+
26
+ // Providers the active routing config actually points at: the provider prefix
27
+ // of each TIER_* value (format `provider:model[:variant]`) plus the base
28
+ // MODEL_PROVIDER. Returns Map<providerName, tierLabels[]>.
29
+ function getReferencedProviders() {
30
+ const refs = new Map();
31
+ const note = (provider, label) => {
32
+ const key = String(provider || '').trim().toLowerCase();
33
+ if (!key) return;
34
+ if (!refs.has(key)) refs.set(key, []);
35
+ if (label && !refs.get(key).includes(label)) refs.get(key).push(label);
36
+ };
37
+
38
+ const tiers = config.modelTiers || {};
39
+ for (const [tier, val] of Object.entries(tiers)) {
40
+ if (typeof val === 'string' && val.trim()) {
41
+ note(val.split(':')[0], tier);
42
+ }
43
+ }
44
+ note(config.modelProvider?.type, 'default');
45
+
46
+ return refs;
47
+ }
48
+
49
+ // Providers used by the routing config that have credentials/endpoints set.
50
+ // Unknown providers (no metadata) are included optimistically since we can't
51
+ // verify their credentials.
52
+ function getConfiguredProviders() {
53
+ const meta = providerMeta();
54
+ const out = [];
55
+ for (const [name, tiers] of getReferencedProviders()) {
56
+ const m = meta[name];
57
+ if (!m || m.configured) {
58
+ out.push({ name, type: m?.type || 'cloud', tiers });
59
+ }
60
+ }
61
+ return out;
62
+ }
63
+
64
+ // Tiers pointing at a known provider whose credentials/endpoint are missing —
65
+ // surfaced as a warning so a misconfigured tier is visible.
66
+ function getProviderWarnings() {
67
+ const meta = providerMeta();
68
+ const out = [];
69
+ for (const [name, tiers] of getReferencedProviders()) {
70
+ const m = meta[name];
71
+ if (m && !m.configured) {
72
+ out.push({ name, type: m.type, tiers });
73
+ }
74
+ }
75
+ return out;
26
76
  }
27
77
 
28
78
  // Noise provider names injected by unit tests — filter them out of UI
@@ -92,7 +142,8 @@ function overview(req, res) {
92
142
  port: config.port,
93
143
  version: process.env.npm_package_version || '9.0.2',
94
144
  modelProvider: config.modelProvider?.type || 'unknown',
95
- providers: getConfiguredProviders(),
145
+ providers: getConfiguredProviders(),
146
+ providerWarnings: getProviderWarnings(),
96
147
  statsWindow: win.label,
97
148
  metrics: {
98
149
  requestsTotal: snap.requestsTotal,
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Request Bypass
3
+ *
4
+ * Short-circuits Claude Code CLI housekeeping requests that don't need a real
5
+ * model call:
6
+ * - "Warmup" pings the CLI sends to prime a connection
7
+ * - Topic/title extraction (the CLI asks for {"isNewTopic":..,"title":..})
8
+ * - Single-word "count" / "Warmup" probes
9
+ *
10
+ * Returning a canned response here saves a full provider round-trip (latency
11
+ * and tokens) on every session. Inspired by 9router's bypassHandler.
12
+ *
13
+ * Always on — only ever returns a canned response for unambiguous Claude CLI
14
+ * housekeeping traffic, never for real work.
15
+ *
16
+ * @module orchestrator/bypass
17
+ */
18
+
19
+ const logger = require("../logger");
20
+
21
+ /** Flatten Anthropic content (string | block[]) into plain text. */
22
+ function getText(content) {
23
+ if (typeof content === "string") return content;
24
+ if (Array.isArray(content)) {
25
+ return content
26
+ .filter((b) => b && b.type === "text" && typeof b.text === "string")
27
+ .map((b) => b.text)
28
+ .join(" ");
29
+ }
30
+ return "";
31
+ }
32
+
33
+ /** Flatten the top-level Anthropic `system` field (string | block[]). */
34
+ function getSystemText(system) {
35
+ if (typeof system === "string") return system;
36
+ if (Array.isArray(system)) {
37
+ return system
38
+ .filter((s) => s && s.type === "text" && typeof s.text === "string")
39
+ .map((s) => s.text)
40
+ .join(" ");
41
+ }
42
+ return "";
43
+ }
44
+
45
+ /**
46
+ * Decide whether a request is a bypassable Claude CLI housekeeping call.
47
+ *
48
+ * @param {object} args
49
+ * @param {object} args.payload - The Anthropic request body.
50
+ * @param {object} [args.headers] - Lowercased request headers.
51
+ * @returns {{kind: string, text: string}|null} bypass descriptor or null.
52
+ */
53
+ function detectBypass({ payload, headers = {} }) {
54
+ if (!payload || !Array.isArray(payload.messages) || payload.messages.length === 0) {
55
+ return null;
56
+ }
57
+
58
+ // Only bypass Claude CLI traffic — other clients use these endpoints for
59
+ // real work and must never receive a canned response.
60
+ const ua = String(headers["user-agent"] || "").toLowerCase();
61
+ if (!ua.includes("claude-cli")) return null;
62
+
63
+ const messages = payload.messages;
64
+ const lastMsg = messages[messages.length - 1];
65
+
66
+ // Pattern 1: Title prefill — the CLI seeds an assistant turn with just "{"
67
+ // to coax a JSON object out of the model.
68
+ if (lastMsg?.role === "assistant") {
69
+ const firstBlockText =
70
+ Array.isArray(lastMsg.content) && lastMsg.content[0]?.type === "text"
71
+ ? lastMsg.content[0].text
72
+ : typeof lastMsg.content === "string"
73
+ ? lastMsg.content
74
+ : "";
75
+ if (firstBlockText.trim() === "{") {
76
+ return { kind: "title_prefill", text: "{}" };
77
+ }
78
+ }
79
+
80
+ // Pattern 2: Topic/title extraction — system prompt asks for isNewTopic.
81
+ // Synthesize a title from the first user message instead of calling a model.
82
+ const systemText = getSystemText(payload.system);
83
+ if (systemText.includes("isNewTopic")) {
84
+ const userMsg = messages.find((m) => m.role === "user");
85
+ const userText = getText(userMsg?.content).trim();
86
+ const title = userText.split(/\s+/).filter(Boolean).slice(0, 3).join(" ");
87
+ return {
88
+ kind: "title_extraction",
89
+ text: JSON.stringify({ isNewTopic: true, title }),
90
+ };
91
+ }
92
+
93
+ // Pattern 3: Warmup / count probes — a single short user message.
94
+ if (messages.length === 1 && messages[0]?.role === "user") {
95
+ const firstText = getText(messages[0].content).trim();
96
+ if (firstText === "Warmup" || firstText === "count") {
97
+ return { kind: firstText.toLowerCase(), text: "OK" };
98
+ }
99
+ }
100
+
101
+ return null;
102
+ }
103
+
104
+ /**
105
+ * Build the processMessage-shaped response for a bypass descriptor.
106
+ * Matches the `{ status, body, terminationReason }` contract the router
107
+ * consumes (same shape as the prompt-cache early returns).
108
+ *
109
+ * @param {{kind: string, text: string}} bypass
110
+ * @param {string} model - Model id to echo back.
111
+ * @returns {{status: number, body: object, terminationReason: string}}
112
+ */
113
+ function buildBypassResponse(bypass, model) {
114
+ logger.info({ kind: bypass.kind }, "[Bypass] Short-circuiting CLI housekeeping request");
115
+ return {
116
+ status: 200,
117
+ body: {
118
+ id: `msg_bypass_${Date.now()}`,
119
+ type: "message",
120
+ role: "assistant",
121
+ content: [{ type: "text", text: bypass.text }],
122
+ model: model || "claude-3-unknown",
123
+ stop_reason: "end_turn",
124
+ stop_sequence: null,
125
+ usage: { input_tokens: 1, output_tokens: 1 },
126
+ lynkr_bypass: { kind: bypass.kind },
127
+ },
128
+ terminationReason: `bypass_${bypass.kind}`,
129
+ };
130
+ }
131
+
132
+ module.exports = {
133
+ detectBypass,
134
+ buildBypassResponse,
135
+ };
@@ -18,6 +18,7 @@ const { createAuditLogger } = require("../logger/audit-logger");
18
18
  const { getResolvedIp, runWithDnsContext } = require("../clients/dns-logger");
19
19
  const { getShuttingDown } = require("../api/health");
20
20
  const { tryPreflight, buildSatisfiedResponse: buildPreflightResponse } = require("./preflight");
21
+ const { detectBypass, buildBypassResponse } = require("./bypass");
21
22
  const crypto = require("crypto");
22
23
  const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
23
24
  const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
@@ -1362,8 +1363,12 @@ function sanitizePayload(payload) {
1362
1363
  delete clean.tool_choice;
1363
1364
  }
1364
1365
 
1365
- // Smart tool selection (universal, applies to all providers)
1366
- if (config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) {
1366
+ // Smart tool selection (server mode only). In client/passthrough mode the
1367
+ // client (e.g. Claude Code) owns tool execution, so stripping its tools would
1368
+ // make the model emit calls for tools we removed — they then get dropped as
1369
+ // "hallucinated" and the session makes no progress. Pass tools through intact.
1370
+ const inClientMode = config.toolExecutionMode === "client" || config.toolExecutionMode === "passthrough";
1371
+ if (!inClientMode && config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) {
1367
1372
  const classification = classifyRequestType(clean);
1368
1373
  const selectedTools = selectToolsSmartly(clean.tools, classification, {
1369
1374
  provider: providerType,
@@ -1977,6 +1982,12 @@ IMPORTANT TOOL USAGE RULES:
1977
1982
  cleanPayload._tenantPolicy = options.tenantPolicy;
1978
1983
  }
1979
1984
 
1985
+ // Thread session id for provider affinity — keeps a tool-bearing
1986
+ // conversation on one provider so tool_call_id linkage doesn't break.
1987
+ if (session?.id) {
1988
+ cleanPayload._sessionId = session.id;
1989
+ }
1990
+
1980
1991
  // RTK-inspired tool result compression: compress large tool_results
1981
1992
  // before they reach the model (saves 60-90% on test/git/lint output)
1982
1993
  if (config.toolResultCompression?.enabled !== false) {
@@ -1985,6 +1996,18 @@ IMPORTANT TOOL USAGE RULES:
1985
1996
  compressToolResults(cleanPayload.messages, { tier });
1986
1997
  }
1987
1998
 
1999
+ // MCP-aware tool dedup: drop built-in tools superseded by present MCP tools
2000
+ // (e.g. WebSearch/WebFetch when Exa/Tavily MCP is available). Always on.
2001
+ const { applyToolDedup } = require("../context/tool-dedup");
2002
+ applyToolDedup(cleanPayload);
2003
+
2004
+ // Caveman terse-output injection (opt-in): nudge the model toward shorter
2005
+ // responses to reduce output tokens.
2006
+ if (config.caveman?.enabled === true) {
2007
+ const { injectCaveman } = require("../context/caveman");
2008
+ cleanPayload.system = injectCaveman(cleanPayload.system);
2009
+ }
2010
+
1988
2011
  if (agentTimer) agentTimer.mark("preInvokeModel");
1989
2012
  let databricksResponse;
1990
2013
  try {
@@ -3735,6 +3758,14 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3735
3758
  };
3736
3759
  }
3737
3760
 
3761
+ // === REQUEST BYPASS ===
3762
+ // Claude CLI housekeeping (Warmup pings, topic/title extraction) doesn't
3763
+ // need a model call — return a canned response and skip the provider.
3764
+ const bypass = detectBypass({ payload, headers });
3765
+ if (bypass) {
3766
+ return buildBypassResponse(bypass, requestedModel);
3767
+ }
3768
+
3738
3769
  // === PREFLIGHT CHECK ===
3739
3770
  // If the request supplied preflight_commands and they all pass in
3740
3771
  // the workspace, the work is already done — short-circuit with a
@@ -138,7 +138,46 @@ function getBestLocalProvider() {
138
138
  * @param {Object} options - Routing options
139
139
  * @returns {Object} Routing decision with provider and metadata
140
140
  */
141
+ const sessionAffinity = require('./session-affinity');
142
+
143
+ /**
144
+ * Provider routing with session affinity.
145
+ *
146
+ * When a conversation already carries tool history, reuse the provider the
147
+ * session first routed to so tool-call IDs don't break across providers.
148
+ * Fresh turns route normally and refresh the session's pinned provider.
149
+ */
141
150
  async function determineProviderSmart(payload, options = {}) {
151
+ const sessionId = payload?._sessionId || null;
152
+
153
+ // Enforce affinity only for in-flight tool exchanges — the turns that 400
154
+ // if the provider changes. Fresh turns keep full per-turn tier routing.
155
+ if (sessionId && !options.forceProvider && sessionAffinity.payloadHasToolHistory(payload)) {
156
+ const pinned = sessionAffinity.getPinned(sessionId);
157
+ if (pinned) {
158
+ logger.debug({ sessionId, provider: pinned.provider, tier: pinned.tier },
159
+ '[Routing] Session affinity — reusing provider for tool-bearing turn');
160
+ return {
161
+ provider: pinned.provider,
162
+ model: pinned.model,
163
+ tier: pinned.tier,
164
+ method: 'session_affinity',
165
+ reason: 'tool_history_provider_pin',
166
+ };
167
+ }
168
+ }
169
+
170
+ const decision = await _determineProviderSmartInner(payload, options);
171
+
172
+ // Remember the chosen provider so later tool-bearing turns stay consistent.
173
+ if (sessionId && decision?.provider && !options.forceProvider) {
174
+ sessionAffinity.setPinned(sessionId, decision);
175
+ }
176
+
177
+ return decision;
178
+ }
179
+
180
+ async function _determineProviderSmartInner(payload, options = {}) {
142
181
  const primaryProvider = config.modelProvider?.type ?? 'databricks';
143
182
 
144
183
  // Risk analysis runs orthogonally to complexity. We compute it once