lynkr 9.0.1 → 9.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +70 -21
  2. package/bin/cli.js +34 -4
  3. package/bin/lynkr-trajectory.js +136 -0
  4. package/bin/lynkr-usage.js +219 -0
  5. package/funding.json +110 -0
  6. package/index.js +7 -3
  7. package/install.sh +3 -3
  8. package/lynkr-skill.tar.gz +0 -0
  9. package/native/Cargo.toml +26 -0
  10. package/native/index.js +29 -0
  11. package/native/lynkr-native.node +0 -0
  12. package/native/src/lib.rs +321 -0
  13. package/package.json +6 -5
  14. package/public/dashboard.html +665 -0
  15. package/src/api/files-multipart.js +30 -0
  16. package/src/api/files-router.js +81 -0
  17. package/src/api/middleware/budget.js +19 -1
  18. package/src/api/middleware/load-shedding.js +17 -0
  19. package/src/api/openai-router.js +353 -301
  20. package/src/api/router.js +275 -40
  21. package/src/cache/prompt.js +13 -0
  22. package/src/clients/databricks.js +42 -18
  23. package/src/clients/ollama-utils.js +21 -17
  24. package/src/clients/openai-format.js +50 -10
  25. package/src/clients/openrouter-utils.js +42 -37
  26. package/src/clients/prompt-cache-injection.js +140 -0
  27. package/src/clients/provider-capabilities.js +41 -0
  28. package/src/clients/responses-format.js +8 -7
  29. package/src/clients/standard-tools.js +1 -1
  30. package/src/clients/xml-tool-extractor.js +307 -0
  31. package/src/cluster.js +82 -0
  32. package/src/config/index.js +16 -0
  33. package/src/context/distill.js +15 -0
  34. package/src/context/tool-result-compressor.js +563 -0
  35. package/src/dashboard/api.js +170 -0
  36. package/src/dashboard/router.js +13 -0
  37. package/src/headroom/client.js +3 -109
  38. package/src/headroom/index.js +0 -14
  39. package/src/memory/extractor.js +22 -0
  40. package/src/memory/search.js +0 -50
  41. package/src/orchestrator/index.js +163 -204
  42. package/src/orchestrator/preflight.js +188 -0
  43. package/src/routing/index.js +64 -32
  44. package/src/routing/interaction.js +183 -0
  45. package/src/routing/risk-analyzer.js +194 -0
  46. package/src/routing/telemetry.js +47 -2
  47. package/src/server.js +15 -0
  48. package/src/stores/file-store.js +104 -0
  49. package/src/stores/response-store.js +25 -0
  50. package/src/tools/index.js +1 -1
  51. package/src/tools/smart-selection.js +11 -2
  52. package/src/tools/web.js +1 -1
  53. package/src/training/trajectory-compressor.js +266 -0
  54. package/src/usage/aggregator.js +206 -0
  55. package/src/utils/markdown-ansi.js +146 -0
  56. package/.lynkr/telemetry.db +0 -0
  57. package/.lynkr/telemetry.db-shm +0 -0
  58. package/.lynkr/telemetry.db-wal +0 -0
@@ -17,6 +17,7 @@ const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = req
17
17
  const { createAuditLogger } = require("../logger/audit-logger");
18
18
  const { getResolvedIp, runWithDnsContext } = require("../clients/dns-logger");
19
19
  const { getShuttingDown } = require("../api/health");
20
+ const { tryPreflight, buildSatisfiedResponse: buildPreflightResponse } = require("./preflight");
20
21
  const crypto = require("crypto");
21
22
  const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
22
23
  const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
@@ -68,7 +69,6 @@ const DROP_KEYS = new Set([
68
69
  "beta",
69
70
  "context_management",
70
71
  "stream",
71
- "thinking",
72
72
  "max_steps",
73
73
  "max_duration_ms",
74
74
  ]);
@@ -187,7 +187,14 @@ function normaliseMessages(payload, options = {}) {
187
187
  const rawContent = message.content;
188
188
  let content;
189
189
  if (Array.isArray(rawContent)) {
190
- content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
190
+ const hasToolBlocks = rawContent.some(
191
+ (b) => b && (b.type === "tool_use" || b.type === "tool_result" || b.type === "document" || b.type === "image" || b.type === "thinking")
192
+ );
193
+ if (hasToolBlocks) {
194
+ content = rawContent.slice();
195
+ } else {
196
+ content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
197
+ }
191
198
  } else if (rawContent === undefined || rawContent === null) {
192
199
  content = flattenContent ? "" : rawContent;
193
200
  } else if (typeof rawContent === "string") {
@@ -197,7 +204,11 @@ function normaliseMessages(payload, options = {}) {
197
204
  } else {
198
205
  content = rawContent;
199
206
  }
200
- normalised.push({ role, content });
207
+ const entry = { role, content };
208
+ if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
209
+ entry.tool_calls = message.tool_calls;
210
+ }
211
+ normalised.push(entry);
201
212
  }
202
213
  }
203
214
  return normalised;
@@ -470,8 +481,8 @@ function injectToolLoopStopInstruction(messages, threshold = 5) {
470
481
  // requests escape it.
471
482
 
472
483
  const DEDUP_MAX_SIGNATURES = 50;
473
- const DEDUP_WARN_THRESHOLD = 2;
474
- const DEDUP_TERMINATE_THRESHOLD = 3;
484
+ const DEDUP_WARN_THRESHOLD = 5;
485
+ const DEDUP_TERMINATE_THRESHOLD = 8;
475
486
 
476
487
  /**
477
488
  * Initialise session.metadata.toolCallDedup if missing.
@@ -1021,10 +1032,14 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
1021
1032
  const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
1022
1033
  const contentItems = [];
1023
1034
 
1024
- if (wantsThinking) {
1035
+ // Pass through real reasoning_content as a thinking block
1036
+ const reasoningContent = typeof message.reasoning_content === "string" ? message.reasoning_content : "";
1037
+ if (reasoningContent && wantsThinking) {
1038
+ contentItems.push({ type: "thinking", thinking: reasoningContent });
1039
+ } else if (wantsThinking) {
1025
1040
  contentItems.push({
1026
1041
  type: "thinking",
1027
- thinking: "Reasoning not available from the backing Databricks model.",
1042
+ thinking: "Reasoning not available from the backing model.",
1028
1043
  });
1029
1044
  }
1030
1045
 
@@ -1220,6 +1235,13 @@ function sanitizePayload(payload) {
1220
1235
  }
1221
1236
  DROP_KEYS.forEach((key) => delete clean[key]);
1222
1237
 
1238
+ // Conditionally keep or strip the `thinking` parameter based on provider
1239
+ const { getThinkingBehavior } = require("../clients/provider-capabilities");
1240
+ const thinkingBehavior = getThinkingBehavior(providerType, clean.model);
1241
+ if (clean.thinking && thinkingBehavior !== "native") {
1242
+ delete clean.thinking;
1243
+ }
1244
+
1223
1245
  if (Array.isArray(clean.tools) && clean.tools.length === 0) {
1224
1246
  delete clean.tools;
1225
1247
  } else if (providerType === "databricks") {
@@ -1362,7 +1384,9 @@ function sanitizePayload(payload) {
1362
1384
  clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
1363
1385
  }
1364
1386
 
1365
- clean.stream = payload?.stream ?? false;
1387
+ // Always false: the agent loop needs buffered JSON to parse tool calls.
1388
+ // Lynkr synthesises SSE back to the client from the buffered response.
1389
+ clean.stream = false;
1366
1390
 
1367
1391
  if (
1368
1392
  config.modelProvider?.type === "azure-anthropic" &&
@@ -1397,47 +1421,37 @@ function sanitizePayload(payload) {
1397
1421
  applyToonCompression(clean, config.toon, { logger });
1398
1422
 
1399
1423
  // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
1400
- // Strategy: Merge all consecutive messages, add instruction to focus on last request
1424
+ // Strategy: Merge consecutive same-role messages, but NEVER merge messages
1425
+ // that contain tool_use or tool_result blocks — they must stay intact for
1426
+ // the provider's tool-call protocol.
1401
1427
  if (Array.isArray(clean.messages) && clean.messages.length > 0) {
1402
1428
  const merged = [];
1403
1429
  const messages = clean.messages;
1404
1430
 
1431
+ const hasToolContent = (msg) => {
1432
+ if (Array.isArray(msg?.content)) {
1433
+ return msg.content.some(b => b && (b.type === 'tool_use' || b.type === 'tool_result'));
1434
+ }
1435
+ return Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0;
1436
+ };
1437
+
1405
1438
  for (let i = 0; i < messages.length; i++) {
1406
1439
  const msg = messages[i];
1440
+ const prev = merged.length > 0 ? merged[merged.length - 1] : null;
1407
1441
 
1408
- if (merged.length > 0 && msg.role === merged[merged.length - 1].role) {
1409
- // Merge content with the previous message of the same role
1410
- const prevMsg = merged[merged.length - 1];
1411
- const prevContent = typeof prevMsg.content === 'string' ? prevMsg.content : JSON.stringify(prevMsg.content);
1442
+ if (prev && msg.role === prev.role && !hasToolContent(msg) && !hasToolContent(prev)) {
1443
+ const prevContent = typeof prev.content === 'string' ? prev.content : JSON.stringify(prev.content);
1412
1444
  const currContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
1413
- prevMsg.content = prevContent + '\n\n' + currContent;
1414
-
1415
- logger.debug({
1416
- mergedRole: msg.role,
1417
- addedContentPreview: currContent.substring(0, 50)
1418
- }, 'Merged consecutive message with same role');
1445
+ prev.content = prevContent + '\n\n' + currContent;
1419
1446
  } else {
1420
1447
  merged.push({ ...msg });
1421
1448
  }
1422
1449
  }
1423
1450
 
1424
- // If the last message is from user, add instruction to focus on the actual request
1425
- if (merged.length > 0 && merged[merged.length - 1].role === 'user') {
1426
- const lastMsg = merged[merged.length - 1];
1427
- const content = typeof lastMsg.content === 'string' ? lastMsg.content : JSON.stringify(lastMsg.content);
1428
-
1429
- // Find the last actual user request (after all the context/instructions)
1430
- // Add a clear separator to help the model focus
1431
- if (content.length > 500) {
1432
- lastMsg.content = content + '\n\n---\nIMPORTANT: Focus on and respond ONLY to my most recent request above. Do not summarize or acknowledge previous instructions.';
1433
- }
1434
- }
1435
-
1436
1451
  if (merged.length !== clean.messages.length) {
1437
1452
  logger.debug({
1438
1453
  originalCount: clean.messages.length,
1439
1454
  mergedCount: merged.length,
1440
- reduced: clean.messages.length - merged.length
1441
1455
  }, 'Merged consecutive messages with same role');
1442
1456
  }
1443
1457
 
@@ -1788,9 +1802,11 @@ async function runAgentLoop({
1788
1802
  }
1789
1803
  }
1790
1804
 
1791
- // Inject tool termination instructions for non-Claude models
1792
- // This helps models know when to stop calling tools and provide a text response
1793
- if (steps === 1 && providerType !== 'databricks' && providerType !== 'azure-anthropic') {
1805
+ const hasRequestTools = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
1806
+ // Inject tool termination instructions for non-Claude models only when tools
1807
+ // are actually in the request. Injecting when there are no tools confuses models
1808
+ // like MiniMax into hallucinating tool_use blocks spontaneously.
1809
+ if (steps === 1 && hasRequestTools && providerType !== 'databricks' && providerType !== 'azure-anthropic') {
1794
1810
  const toolTerminationInstruction = `
1795
1811
 
1796
1812
  IMPORTANT TOOL USAGE RULES:
@@ -1804,6 +1820,13 @@ IMPORTANT TOOL USAGE RULES:
1804
1820
  logger.debug({ sessionId: session?.id ?? null }, 'Tool termination instructions injected for non-Claude model');
1805
1821
  }
1806
1822
 
1823
+ // When no tools are in the request, explicitly forbid tool_use output for
1824
+ // Ollama models that have been trained on Claude Code data and tend to emit
1825
+ // tool_use blocks spontaneously (e.g. minimax-m2.5:cloud calling Write).
1826
+ if (steps === 1 && !hasRequestTools && providerType === 'ollama') {
1827
+ cleanPayload.system = (cleanPayload.system || '') + '\n\nCRITICAL: You have NO tools available. Do NOT generate tool_use, function_call, or code_execution blocks. Output ONLY text content directly.';
1828
+ }
1829
+
1807
1830
  // Compute model-aware token budget thresholds
1808
1831
  const registry = getModelRegistrySync();
1809
1832
  const modelInfo = registry.getCost(requestedModel);
@@ -1882,7 +1905,17 @@ IMPORTANT TOOL USAGE RULES:
1882
1905
  cleanPayload.tools || [],
1883
1906
  {
1884
1907
  mode: config.headroom?.mode,
1885
- queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
1908
+ queryContext: (() => {
1909
+ const last = cleanPayload.messages[cleanPayload.messages.length - 1]?.content;
1910
+ if (typeof last === 'string') return last;
1911
+ if (Array.isArray(last)) {
1912
+ return last
1913
+ .map(b => (b?.type === 'text' ? b.text : b?.type === 'tool_result' ? String(b.content ?? '') : ''))
1914
+ .filter(Boolean)
1915
+ .join('\n') || null;
1916
+ }
1917
+ return null;
1918
+ })(),
1886
1919
  model: requestedModel,
1887
1920
  modelLimit: modelContextWindow,
1888
1921
  tokenBudget: effectiveMax,
@@ -1933,6 +1966,14 @@ IMPORTANT TOOL USAGE RULES:
1933
1966
  cleanPayload._workspace = headers["x-lynkr-workspace"];
1934
1967
  }
1935
1968
 
1969
+ // RTK-inspired tool result compression: compress large tool_results
1970
+ // before they reach the model (saves 60-90% on test/git/lint output)
1971
+ if (config.toolResultCompression?.enabled !== false) {
1972
+ const { compressToolResults } = require("../context/tool-result-compressor");
1973
+ const tier = cleanPayload._routingTier || "MEDIUM";
1974
+ compressToolResults(cleanPayload.messages, { tier });
1975
+ }
1976
+
1936
1977
  if (agentTimer) agentTimer.mark("preInvokeModel");
1937
1978
  let databricksResponse;
1938
1979
  try {
@@ -2127,6 +2168,21 @@ IMPORTANT TOOL USAGE RULES:
2127
2168
  _anthropic_block: block,
2128
2169
  }));
2129
2170
 
2171
+ // Extract tool calls from text blocks that contain XML (some Ollama models)
2172
+ if (toolCalls.length === 0) {
2173
+ const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
2174
+ for (const block of contentArray) {
2175
+ if (block?.type === "text" && block?.text) {
2176
+ const extracted = extractToolCallsFromText(block.text);
2177
+ if (extracted.toolCalls.length > 0) {
2178
+ toolCalls = extracted.toolCalls;
2179
+ block.text = extracted.cleanedText || "";
2180
+ break;
2181
+ }
2182
+ }
2183
+ }
2184
+ }
2185
+
2130
2186
  logger.debug(
2131
2187
  {
2132
2188
  sessionId: session?.id ?? null,
@@ -2141,6 +2197,17 @@ IMPORTANT TOOL USAGE RULES:
2141
2197
  const choice = databricksResponse.json?.choices?.[0];
2142
2198
  message = choice?.message ?? {};
2143
2199
  toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
2200
+
2201
+ // Extract tool calls embedded as XML/text in content (Minimax, Qwen, GLM, Llama, etc.)
2202
+ if (toolCalls.length === 0 && typeof message.content === "string" && message.content.trim()) {
2203
+ const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
2204
+ const extracted = extractToolCallsFromText(message.content);
2205
+ if (extracted.toolCalls.length > 0) {
2206
+ toolCalls = extracted.toolCalls;
2207
+ message.tool_calls = toolCalls;
2208
+ message.content = extracted.cleanedText;
2209
+ }
2210
+ }
2144
2211
  }
2145
2212
 
2146
2213
  // Guard: drop hallucinated tool calls when no tools were sent to the model.
@@ -2155,7 +2222,30 @@ IMPORTANT TOOL USAGE RULES:
2155
2222
  noToolInjection: !!cleanPayload._noToolInjection,
2156
2223
  }, "Dropped hallucinated tool calls (no tools were sent to model)");
2157
2224
  toolCalls = [];
2158
- // If there's also no text content, treat as empty response (handled below)
2225
+
2226
+ // Check if there is any text content alongside the hallucinated tool calls.
2227
+ // If not, the response is effectively empty. Inject a redirect message so the
2228
+ // model outputs the artifact directly instead of looping tool-call attempts.
2229
+ const hasTextContent = isAnthropicFormat
2230
+ ? (databricksResponse.json?.content ?? []).some(b => b?.type === "text" && String(b.text || "").trim().length > 0)
2231
+ : (typeof message.content === "string" && message.content.trim().length > 0);
2232
+
2233
+ if (!hasTextContent && steps < settings.maxSteps - 1) {
2234
+ logger.info({
2235
+ sessionId: session?.id ?? null,
2236
+ step: steps,
2237
+ }, "Hallucinated tool calls with no text content — injecting redirect to force direct output");
2238
+
2239
+ // Push a phantom assistant turn (thinking only, no tool_use) then a user
2240
+ // redirect message so the model outputs the artifact directly.
2241
+ const redirectUser = {
2242
+ role: "user",
2243
+ content: "You don't have any tools available in this context. Please output the result directly as an <artifact identifier=\"design.html\" type=\"text/html\" title=\"Design\"> block containing complete HTML. Do not attempt to call any tools.",
2244
+ };
2245
+ cleanPayload.messages.push(redirectUser);
2246
+ steps++;
2247
+ continue;
2248
+ }
2159
2249
  }
2160
2250
 
2161
2251
  if (toolCalls.length > 0) {
@@ -2167,6 +2257,7 @@ IMPORTANT TOOL USAGE RULES:
2167
2257
  } else {
2168
2258
  // Convert OpenAI/OpenRouter format to Anthropic content blocks
2169
2259
  const contentBlocks = [];
2260
+ let toolCallIdx = 0;
2170
2261
 
2171
2262
  // Add text content if present
2172
2263
  if (message.content && typeof message.content === 'string' && message.content.trim()) {
@@ -2198,7 +2289,7 @@ IMPORTANT TOOL USAGE RULES:
2198
2289
 
2199
2290
  contentBlocks.push({
2200
2291
  type: "tool_use",
2201
- id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
2292
+ id: toolCall.id || `toolu_${Date.now()}_${(toolCallIdx++).toString(36)}_${Math.random().toString(36).substr(2, 6)}`,
2202
2293
  name: func.name || toolCall.name || "unknown",
2203
2294
  input
2204
2295
  });
@@ -2262,7 +2353,7 @@ IMPORTANT TOOL USAGE RULES:
2262
2353
  const serverSideToolCalls = [];
2263
2354
  const clientSideToolCalls = [];
2264
2355
 
2265
- const SERVER_SIDE_TOOLS = new Set(["task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent"]);
2356
+ const SERVER_SIDE_TOOLS = new Set(["task", "Task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent", "WebSearch", "WebFetch", "WebAgent"]);
2266
2357
 
2267
2358
  for (const call of toolCalls) {
2268
2359
  const toolName = (call.function?.name ?? call.name ?? "").toLowerCase();
@@ -2285,7 +2376,9 @@ IMPORTANT TOOL USAGE RULES:
2285
2376
  executionMode,
2286
2377
  clientTools: clientSideToolCalls.map((c) => c.function?.name ?? c.name),
2287
2378
  },
2288
- "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
2379
+ clientSideToolCalls.length > 1
2380
+ ? `Parallel tool passthrough: ${clientSideToolCalls.length} tools → client`
2381
+ : "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
2289
2382
  );
2290
2383
 
2291
2384
  // Filter sessionContent to only include client-side tool_use blocks
@@ -2322,26 +2415,11 @@ IMPORTANT TOOL USAGE RULES:
2322
2415
  // then continue the conversation loop. For now, let's fall through to execute server-side tools.
2323
2416
  if (serverSideToolCalls.length === 0) {
2324
2417
  // No server-side tools - pure passthrough
2325
- // Record outbound client-side tool calls into cross-request dedup tracker
2326
- if (session && clientSideToolCalls.length > 0) {
2327
- ensureDedupStructure(session);
2328
- for (const call of clientSideToolCalls) {
2329
- recordCrossRequestToolCall(session, call);
2330
- }
2331
- // Persist dedup state (non-ephemeral sessions only)
2332
- if (session.id && !session._ephemeral) {
2333
- try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
2334
- logger.debug({ err: e.message }, "Failed to persist outbound dedup state");
2335
- }
2336
- }
2337
- const { maxCount, toolName: dedupTool } = getMaxDedupCount(session);
2338
- logger.debug({
2339
- sessionId: session?.id ?? null,
2340
- clientToolCount: clientSideToolCalls.length,
2341
- maxDedupCount: maxCount,
2342
- maxDedupTool: dedupTool,
2343
- }, "Cross-request tool dedup: recorded outbound tool calls");
2344
- }
2418
+ // Do NOT record outbound tool calls here the inbound recording
2419
+ // on the next request (when the client sends results back) is
2420
+ // enough to detect real loops. Recording both outbound + inbound
2421
+ // for the same call double-counts and triggers the dedup warning
2422
+ // on the very first normal tool round-trip.
2345
2423
 
2346
2424
  return {
2347
2425
  response: {
@@ -3646,6 +3724,28 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3646
3724
  };
3647
3725
  }
3648
3726
 
3727
+ // === PREFLIGHT CHECK ===
3728
+ // If the request supplied preflight_commands and they all pass in
3729
+ // the workspace, the work is already done — short-circuit with a
3730
+ // synthetic response and never touch the model. No-op when the
3731
+ // feature is disabled or the request didn't opt in.
3732
+ const preflightResult = tryPreflight({ payload, cwd });
3733
+ if (preflightResult?.satisfied) {
3734
+ logger.info({
3735
+ commands: preflightResult.results.length,
3736
+ reason: preflightResult.reason,
3737
+ }, '[Preflight] Satisfied — skipping model call');
3738
+ return buildPreflightResponse({
3739
+ model: requestedModel,
3740
+ preflightResult,
3741
+ });
3742
+ }
3743
+ if (preflightResult && !preflightResult.satisfied) {
3744
+ logger.debug({
3745
+ failedCommand: preflightResult.failedCommand,
3746
+ }, '[Preflight] Not satisfied — proceeding with model call');
3747
+ }
3748
+
3649
3749
  // === TOOL LOOP GUARD (EARLY CHECK) ===
3650
3750
  // Check BEFORE sanitization since sanitizePayload removes conversation history
3651
3751
  // All providers use threshold 2 to catch loops early
@@ -3787,150 +3887,9 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3787
3887
  }
3788
3888
  }
3789
3889
 
3790
- // Client mode still uses the relaxed per-request threshold for the count-based guard
3791
- const effectiveThreshold = 10;
3792
- if (toolResultCount >= effectiveThreshold) {
3793
- logger.error({
3794
- toolResultCount,
3795
- toolUseCount,
3796
- threshold: effectiveThreshold,
3797
- sessionId: session?.id ?? null,
3798
- }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
3799
-
3800
- let toolResultsSummary = "";
3801
- const messages = payload?.messages || [];
3802
- let lastUserTextIndex = -1;
3803
- for (let i = messages.length - 1; i >= 0; i--) {
3804
- const msg = messages[i];
3805
- if (msg?.role !== 'user') continue;
3806
- if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
3807
- lastUserTextIndex = i;
3808
- break;
3809
- }
3810
- if (Array.isArray(msg.content)) {
3811
- const hasText = msg.content.some(block =>
3812
- (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
3813
- (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
3814
- );
3815
- if (hasText) {
3816
- lastUserTextIndex = i;
3817
- break;
3818
- }
3819
- }
3820
- }
3821
- const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
3822
- for (let i = startIndex; i < messages.length; i++) {
3823
- const msg = messages[i];
3824
- if (!msg || !Array.isArray(msg.content)) continue;
3825
- for (const block of msg.content) {
3826
- if (block?.type === 'tool_result' && block?.content) {
3827
- const content = typeof block.content === 'string'
3828
- ? block.content
3829
- : JSON.stringify(block.content);
3830
- if (content && !content.includes('Found 0')) {
3831
- toolResultsSummary += content + "\n";
3832
- }
3833
- }
3834
- }
3835
- }
3836
-
3837
- let responseText = `Based on the tool results, here's what I found:\n\n`;
3838
- if (toolResultsSummary.trim()) {
3839
- responseText += toolResultsSummary.trim();
3840
- } else {
3841
- responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
3842
- }
3843
-
3844
- const forcedResponse = {
3845
- id: `msg_forced_${Date.now()}`,
3846
- type: "message",
3847
- role: "assistant",
3848
- content: [{ type: "text", text: responseText }],
3849
- model: requestedModel || "unknown",
3850
- stop_reason: "end_turn",
3851
- stop_sequence: null,
3852
- usage: { input_tokens: 0, output_tokens: 100 },
3853
- };
3854
-
3855
- return {
3856
- status: 200,
3857
- body: forcedResponse,
3858
- terminationReason: "tool_loop_guard",
3859
- };
3860
- }
3861
- } else {
3862
- // Server mode: use existing threshold 2 with countToolCallsInHistory
3863
- const effectiveThreshold = toolLoopThreshold;
3864
-
3865
- if (toolResultCount >= effectiveThreshold) {
3866
- logger.error({
3867
- toolResultCount,
3868
- toolUseCount,
3869
- threshold: effectiveThreshold,
3870
- sessionId: session?.id ?? null,
3871
- }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
3872
-
3873
- let toolResultsSummary = "";
3874
- const messages = payload?.messages || [];
3875
- let lastUserTextIndex = -1;
3876
- for (let i = messages.length - 1; i >= 0; i--) {
3877
- const msg = messages[i];
3878
- if (msg?.role !== 'user') continue;
3879
- if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
3880
- lastUserTextIndex = i;
3881
- break;
3882
- }
3883
- if (Array.isArray(msg.content)) {
3884
- const hasText = msg.content.some(block =>
3885
- (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
3886
- (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
3887
- );
3888
- if (hasText) {
3889
- lastUserTextIndex = i;
3890
- break;
3891
- }
3892
- }
3893
- }
3894
- const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
3895
- for (let i = startIndex; i < messages.length; i++) {
3896
- const msg = messages[i];
3897
- if (!msg || !Array.isArray(msg.content)) continue;
3898
- for (const block of msg.content) {
3899
- if (block?.type === 'tool_result' && block?.content) {
3900
- const content = typeof block.content === 'string'
3901
- ? block.content
3902
- : JSON.stringify(block.content);
3903
- if (content && !content.includes('Found 0')) {
3904
- toolResultsSummary += content + "\n";
3905
- }
3906
- }
3907
- }
3908
- }
3909
-
3910
- let responseText = `Based on the tool results, here's what I found:\n\n`;
3911
- if (toolResultsSummary.trim()) {
3912
- responseText += toolResultsSummary.trim();
3913
- } else {
3914
- responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
3915
- }
3916
-
3917
- const forcedResponse = {
3918
- id: `msg_forced_${Date.now()}`,
3919
- type: "message",
3920
- role: "assistant",
3921
- content: [{ type: "text", text: responseText }],
3922
- model: requestedModel || "unknown",
3923
- stop_reason: "end_turn",
3924
- stop_sequence: null,
3925
- usage: { input_tokens: 0, output_tokens: 100 },
3926
- };
3927
-
3928
- return {
3929
- status: 200,
3930
- body: forcedResponse,
3931
- terminationReason: "tool_loop_guard",
3932
- };
3933
- }
3890
+ // No count-based tool_loop_guard. Natural limits (maxSteps, maxDurationMs,
3891
+ // provider token/rate limits, client-side loop detection, and the
3892
+ // cross-request dedup above) are sufficient protection.
3934
3893
  }
3935
3894
 
3936
3895
  const { createTimer } = require("../utils/perf-timer");