lynkr 8.0.1 → 9.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +238 -315
  2. package/bin/cli.js +16 -3
  3. package/index.js +7 -3
  4. package/install.sh +3 -3
  5. package/lynkr-skill.tar.gz +0 -0
  6. package/native/Cargo.toml +26 -0
  7. package/native/index.js +29 -0
  8. package/native/lynkr-native.node +0 -0
  9. package/native/src/lib.rs +321 -0
  10. package/package.json +8 -6
  11. package/src/api/files-multipart.js +30 -0
  12. package/src/api/files-router.js +81 -0
  13. package/src/api/openai-router.js +379 -308
  14. package/src/api/providers-handler.js +171 -3
  15. package/src/api/router.js +109 -5
  16. package/src/cache/prompt.js +13 -0
  17. package/src/clients/circuit-breaker.js +10 -247
  18. package/src/clients/codex-process.js +342 -0
  19. package/src/clients/codex-utils.js +143 -0
  20. package/src/clients/databricks.js +243 -76
  21. package/src/clients/ollama-utils.js +21 -17
  22. package/src/clients/openai-format.js +20 -6
  23. package/src/clients/openrouter-utils.js +42 -37
  24. package/src/clients/prompt-cache-injection.js +140 -0
  25. package/src/clients/provider-capabilities.js +41 -0
  26. package/src/clients/resilience.js +540 -0
  27. package/src/clients/responses-format.js +8 -7
  28. package/src/clients/retry.js +22 -167
  29. package/src/clients/standard-tools.js +1 -1
  30. package/src/clients/xml-tool-extractor.js +307 -0
  31. package/src/cluster.js +82 -0
  32. package/src/config/index.js +66 -0
  33. package/src/context/compression.js +42 -9
  34. package/src/context/distill.js +507 -0
  35. package/src/context/tool-result-compressor.js +563 -0
  36. package/src/memory/extractor.js +22 -0
  37. package/src/orchestrator/index.js +147 -205
  38. package/src/routing/complexity-analyzer.js +258 -5
  39. package/src/routing/index.js +15 -34
  40. package/src/routing/latency-tracker.js +148 -0
  41. package/src/routing/model-tiers.js +2 -0
  42. package/src/routing/quality-scorer.js +113 -0
  43. package/src/routing/telemetry.js +502 -0
  44. package/src/server.js +23 -0
  45. package/src/stores/file-store.js +69 -0
  46. package/src/stores/response-store.js +25 -0
  47. package/src/tools/code-graph.js +538 -0
  48. package/src/tools/code-mode.js +304 -0
  49. package/src/tools/index.js +1 -1
  50. package/src/tools/lazy-loader.js +11 -0
  51. package/src/tools/mcp-remote.js +7 -0
  52. package/src/tools/smart-selection.js +11 -0
  53. package/src/tools/web.js +1 -1
  54. package/src/utils/payload.js +206 -0
  55. package/src/utils/perf-timer.js +80 -0
@@ -55,6 +55,8 @@ function getDestinationUrl(providerType) {
55
55
  return config.vertex?.endpoint ?? 'unknown';
56
56
  case 'moonshot':
57
57
  return config.moonshot?.endpoint ?? 'unknown';
58
+ case 'codex':
59
+ return 'codex://app-server (local process)';
58
60
  default:
59
61
  return 'unknown';
60
62
  }
@@ -66,7 +68,6 @@ const DROP_KEYS = new Set([
66
68
  "beta",
67
69
  "context_management",
68
70
  "stream",
69
- "thinking",
70
71
  "max_steps",
71
72
  "max_duration_ms",
72
73
  ]);
@@ -185,7 +186,14 @@ function normaliseMessages(payload, options = {}) {
185
186
  const rawContent = message.content;
186
187
  let content;
187
188
  if (Array.isArray(rawContent)) {
188
- content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
189
+ const hasToolBlocks = rawContent.some(
190
+ (b) => b && (b.type === "tool_use" || b.type === "tool_result" || b.type === "document" || b.type === "image" || b.type === "thinking")
191
+ );
192
+ if (hasToolBlocks) {
193
+ content = rawContent.slice();
194
+ } else {
195
+ content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
196
+ }
189
197
  } else if (rawContent === undefined || rawContent === null) {
190
198
  content = flattenContent ? "" : rawContent;
191
199
  } else if (typeof rawContent === "string") {
@@ -195,7 +203,11 @@ function normaliseMessages(payload, options = {}) {
195
203
  } else {
196
204
  content = rawContent;
197
205
  }
198
- normalised.push({ role, content });
206
+ const entry = { role, content };
207
+ if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
208
+ entry.tool_calls = message.tool_calls;
209
+ }
210
+ normalised.push(entry);
199
211
  }
200
212
  }
201
213
  return normalised;
@@ -468,8 +480,8 @@ function injectToolLoopStopInstruction(messages, threshold = 5) {
468
480
  // requests escape it.
469
481
 
470
482
  const DEDUP_MAX_SIGNATURES = 50;
471
- const DEDUP_WARN_THRESHOLD = 2;
472
- const DEDUP_TERMINATE_THRESHOLD = 3;
483
+ const DEDUP_WARN_THRESHOLD = 5;
484
+ const DEDUP_TERMINATE_THRESHOLD = 8;
473
485
 
474
486
  /**
475
487
  * Initialise session.metadata.toolCallDedup if missing.
@@ -1019,10 +1031,14 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
1019
1031
  const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
1020
1032
  const contentItems = [];
1021
1033
 
1022
- if (wantsThinking) {
1034
+ // Pass through real reasoning_content as a thinking block
1035
+ const reasoningContent = typeof message.reasoning_content === "string" ? message.reasoning_content : "";
1036
+ if (reasoningContent && wantsThinking) {
1037
+ contentItems.push({ type: "thinking", thinking: reasoningContent });
1038
+ } else if (wantsThinking) {
1023
1039
  contentItems.push({
1024
1040
  type: "thinking",
1025
- thinking: "Reasoning not available from the backing Databricks model.",
1041
+ thinking: "Reasoning not available from the backing model.",
1026
1042
  });
1027
1043
  }
1028
1044
 
@@ -1085,7 +1101,10 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
1085
1101
  }
1086
1102
 
1087
1103
  function sanitizePayload(payload) {
1088
- const clean = JSON.parse(JSON.stringify(payload ?? {}));
1104
+ const { clonePayloadSmart } = require("../utils/payload");
1105
+ const providerType = config.modelProvider?.type ?? "databricks";
1106
+ const willFlatten = providerType !== "azure-anthropic";
1107
+ const clean = clonePayloadSmart(payload ?? {}, { willFlatten });
1089
1108
  const requestedModel =
1090
1109
  (typeof payload?.model === "string" && payload.model.trim().length > 0
1091
1110
  ? payload.model.trim()
@@ -1093,11 +1112,10 @@ function sanitizePayload(payload) {
1093
1112
  config.modelProvider?.defaultModel ??
1094
1113
  "databricks-claude-sonnet-4-5";
1095
1114
  clean.model = requestedModel;
1096
- if (!clean.max_tokens) {
1097
- clean.max_tokens = 16384;
1098
- }
1099
- const providerType = config.modelProvider?.type ?? "databricks";
1100
- const flattenContent = providerType !== "azure-anthropic";
1115
+ if (!clean.max_tokens) {
1116
+ clean.max_tokens = 16384;
1117
+ }
1118
+ const flattenContent = willFlatten;
1101
1119
  clean.messages = normaliseMessages(clean, { flattenContent }).filter((msg) => {
1102
1120
  const hasToolCalls =
1103
1121
  Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0;
@@ -1216,6 +1234,13 @@ function sanitizePayload(payload) {
1216
1234
  }
1217
1235
  DROP_KEYS.forEach((key) => delete clean[key]);
1218
1236
 
1237
+ // Conditionally keep or strip the `thinking` parameter based on provider
1238
+ const { getThinkingBehavior } = require("../clients/provider-capabilities");
1239
+ const thinkingBehavior = getThinkingBehavior(providerType, clean.model);
1240
+ if (clean.thinking && thinkingBehavior !== "native") {
1241
+ delete clean.thinking;
1242
+ }
1243
+
1219
1244
  if (Array.isArray(clean.tools) && clean.tools.length === 0) {
1220
1245
  delete clean.tools;
1221
1246
  } else if (providerType === "databricks") {
@@ -1393,47 +1418,37 @@ function sanitizePayload(payload) {
1393
1418
  applyToonCompression(clean, config.toon, { logger });
1394
1419
 
1395
1420
  // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
1396
- // Strategy: Merge all consecutive messages, add instruction to focus on last request
1421
+ // Strategy: Merge consecutive same-role messages, but NEVER merge messages
1422
+ // that contain tool_use or tool_result blocks — they must stay intact for
1423
+ // the provider's tool-call protocol.
1397
1424
  if (Array.isArray(clean.messages) && clean.messages.length > 0) {
1398
1425
  const merged = [];
1399
1426
  const messages = clean.messages;
1400
1427
 
1428
+ const hasToolContent = (msg) => {
1429
+ if (Array.isArray(msg?.content)) {
1430
+ return msg.content.some(b => b && (b.type === 'tool_use' || b.type === 'tool_result'));
1431
+ }
1432
+ return Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0;
1433
+ };
1434
+
1401
1435
  for (let i = 0; i < messages.length; i++) {
1402
1436
  const msg = messages[i];
1437
+ const prev = merged.length > 0 ? merged[merged.length - 1] : null;
1403
1438
 
1404
- if (merged.length > 0 && msg.role === merged[merged.length - 1].role) {
1405
- // Merge content with the previous message of the same role
1406
- const prevMsg = merged[merged.length - 1];
1407
- const prevContent = typeof prevMsg.content === 'string' ? prevMsg.content : JSON.stringify(prevMsg.content);
1439
+ if (prev && msg.role === prev.role && !hasToolContent(msg) && !hasToolContent(prev)) {
1440
+ const prevContent = typeof prev.content === 'string' ? prev.content : JSON.stringify(prev.content);
1408
1441
  const currContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
1409
- prevMsg.content = prevContent + '\n\n' + currContent;
1410
-
1411
- logger.debug({
1412
- mergedRole: msg.role,
1413
- addedContentPreview: currContent.substring(0, 50)
1414
- }, 'Merged consecutive message with same role');
1442
+ prev.content = prevContent + '\n\n' + currContent;
1415
1443
  } else {
1416
1444
  merged.push({ ...msg });
1417
1445
  }
1418
1446
  }
1419
1447
 
1420
- // If the last message is from user, add instruction to focus on the actual request
1421
- if (merged.length > 0 && merged[merged.length - 1].role === 'user') {
1422
- const lastMsg = merged[merged.length - 1];
1423
- const content = typeof lastMsg.content === 'string' ? lastMsg.content : JSON.stringify(lastMsg.content);
1424
-
1425
- // Find the last actual user request (after all the context/instructions)
1426
- // Add a clear separator to help the model focus
1427
- if (content.length > 500) {
1428
- lastMsg.content = content + '\n\n---\nIMPORTANT: Focus on and respond ONLY to my most recent request above. Do not summarize or acknowledge previous instructions.';
1429
- }
1430
- }
1431
-
1432
1448
  if (merged.length !== clean.messages.length) {
1433
1449
  logger.debug({
1434
1450
  originalCount: clean.messages.length,
1435
1451
  mergedCount: merged.length,
1436
- reduced: clean.messages.length - merged.length
1437
1452
  }, 'Merged consecutive messages with same role');
1438
1453
  }
1439
1454
 
@@ -1552,6 +1567,8 @@ async function runAgentLoop({
1552
1567
  headers,
1553
1568
  }) {
1554
1569
  logger.debug({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop entered');
1570
+ const { createTimer } = require("../utils/perf-timer");
1571
+ const agentTimer = createTimer("agentLoop");
1555
1572
  const settings = resolveLoopOptions(options);
1556
1573
  // Initialize audit logger (no-op if disabled)
1557
1574
  const auditLogger = createAuditLogger(config.audit);
@@ -1634,6 +1651,7 @@ async function runAgentLoop({
1634
1651
  }
1635
1652
 
1636
1653
 
1654
+ if (steps === 1 && agentTimer) agentTimer.mark("preCompression");
1637
1655
  if (steps === 1 && config.historyCompression?.enabled !== false) {
1638
1656
  try {
1639
1657
  if (historyCompression.needsCompression(cleanPayload.messages)) {
@@ -1875,7 +1893,17 @@ IMPORTANT TOOL USAGE RULES:
1875
1893
  cleanPayload.tools || [],
1876
1894
  {
1877
1895
  mode: config.headroom?.mode,
1878
- queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
1896
+ queryContext: (() => {
1897
+ const last = cleanPayload.messages[cleanPayload.messages.length - 1]?.content;
1898
+ if (typeof last === 'string') return last;
1899
+ if (Array.isArray(last)) {
1900
+ return last
1901
+ .map(b => (b?.type === 'text' ? b.text : b?.type === 'tool_result' ? String(b.content ?? '') : ''))
1902
+ .filter(Boolean)
1903
+ .join('\n') || null;
1904
+ }
1905
+ return null;
1906
+ })(),
1879
1907
  model: requestedModel,
1880
1908
  modelLimit: modelContextWindow,
1881
1909
  tokenBudget: effectiveMax,
@@ -1921,9 +1949,24 @@ IMPORTANT TOOL USAGE RULES:
1921
1949
  });
1922
1950
  }
1923
1951
 
1952
+ // Thread workspace for code-graph integration (auto-detected or from header)
1953
+ if (headers?.["x-lynkr-workspace"]) {
1954
+ cleanPayload._workspace = headers["x-lynkr-workspace"];
1955
+ }
1956
+
1957
+ // RTK-inspired tool result compression: compress large tool_results
1958
+ // before they reach the model (saves 60-90% on test/git/lint output)
1959
+ if (config.toolResultCompression?.enabled !== false) {
1960
+ const { compressToolResults } = require("../context/tool-result-compressor");
1961
+ const tier = cleanPayload._routingTier || "MEDIUM";
1962
+ compressToolResults(cleanPayload.messages, { tier });
1963
+ }
1964
+
1965
+ if (agentTimer) agentTimer.mark("preInvokeModel");
1924
1966
  let databricksResponse;
1925
1967
  try {
1926
1968
  databricksResponse = await invokeModel(cleanPayload);
1969
+ if (agentTimer) agentTimer.mark("invokeModel");
1927
1970
  } catch (modelError) {
1928
1971
  const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
1929
1972
  || modelError.message?.includes('fetch failed')
@@ -2113,6 +2156,21 @@ IMPORTANT TOOL USAGE RULES:
2113
2156
  _anthropic_block: block,
2114
2157
  }));
2115
2158
 
2159
+ // Extract tool calls from text blocks that contain XML (some Ollama models)
2160
+ if (toolCalls.length === 0) {
2161
+ const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
2162
+ for (const block of contentArray) {
2163
+ if (block?.type === "text" && block?.text) {
2164
+ const extracted = extractToolCallsFromText(block.text);
2165
+ if (extracted.toolCalls.length > 0) {
2166
+ toolCalls = extracted.toolCalls;
2167
+ block.text = extracted.cleanedText || "";
2168
+ break;
2169
+ }
2170
+ }
2171
+ }
2172
+ }
2173
+
2116
2174
  logger.debug(
2117
2175
  {
2118
2176
  sessionId: session?.id ?? null,
@@ -2127,6 +2185,17 @@ IMPORTANT TOOL USAGE RULES:
2127
2185
  const choice = databricksResponse.json?.choices?.[0];
2128
2186
  message = choice?.message ?? {};
2129
2187
  toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
2188
+
2189
+ // Extract tool calls embedded as XML/text in content (Minimax, Qwen, GLM, Llama, etc.)
2190
+ if (toolCalls.length === 0 && typeof message.content === "string" && message.content.trim()) {
2191
+ const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
2192
+ const extracted = extractToolCallsFromText(message.content);
2193
+ if (extracted.toolCalls.length > 0) {
2194
+ toolCalls = extracted.toolCalls;
2195
+ message.tool_calls = toolCalls;
2196
+ message.content = extracted.cleanedText;
2197
+ }
2198
+ }
2130
2199
  }
2131
2200
 
2132
2201
  // Guard: drop hallucinated tool calls when no tools were sent to the model.
@@ -2153,6 +2222,7 @@ IMPORTANT TOOL USAGE RULES:
2153
2222
  } else {
2154
2223
  // Convert OpenAI/OpenRouter format to Anthropic content blocks
2155
2224
  const contentBlocks = [];
2225
+ let toolCallIdx = 0;
2156
2226
 
2157
2227
  // Add text content if present
2158
2228
  if (message.content && typeof message.content === 'string' && message.content.trim()) {
@@ -2184,7 +2254,7 @@ IMPORTANT TOOL USAGE RULES:
2184
2254
 
2185
2255
  contentBlocks.push({
2186
2256
  type: "tool_use",
2187
- id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
2257
+ id: toolCall.id || `toolu_${Date.now()}_${(toolCallIdx++).toString(36)}_${Math.random().toString(36).substr(2, 6)}`,
2188
2258
  name: func.name || toolCall.name || "unknown",
2189
2259
  input
2190
2260
  });
@@ -2248,7 +2318,7 @@ IMPORTANT TOOL USAGE RULES:
2248
2318
  const serverSideToolCalls = [];
2249
2319
  const clientSideToolCalls = [];
2250
2320
 
2251
- const SERVER_SIDE_TOOLS = new Set(["task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent"]);
2321
+ const SERVER_SIDE_TOOLS = new Set(["task", "Task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent", "WebSearch", "WebFetch", "WebAgent"]);
2252
2322
 
2253
2323
  for (const call of toolCalls) {
2254
2324
  const toolName = (call.function?.name ?? call.name ?? "").toLowerCase();
@@ -2271,7 +2341,9 @@ IMPORTANT TOOL USAGE RULES:
2271
2341
  executionMode,
2272
2342
  clientTools: clientSideToolCalls.map((c) => c.function?.name ?? c.name),
2273
2343
  },
2274
- "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
2344
+ clientSideToolCalls.length > 1
2345
+ ? `Parallel tool passthrough: ${clientSideToolCalls.length} tools → client`
2346
+ : "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
2275
2347
  );
2276
2348
 
2277
2349
  // Filter sessionContent to only include client-side tool_use blocks
@@ -2308,26 +2380,11 @@ IMPORTANT TOOL USAGE RULES:
2308
2380
  // then continue the conversation loop. For now, let's fall through to execute server-side tools.
2309
2381
  if (serverSideToolCalls.length === 0) {
2310
2382
  // No server-side tools - pure passthrough
2311
- // Record outbound client-side tool calls into cross-request dedup tracker
2312
- if (session && clientSideToolCalls.length > 0) {
2313
- ensureDedupStructure(session);
2314
- for (const call of clientSideToolCalls) {
2315
- recordCrossRequestToolCall(session, call);
2316
- }
2317
- // Persist dedup state (non-ephemeral sessions only)
2318
- if (session.id && !session._ephemeral) {
2319
- try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
2320
- logger.debug({ err: e.message }, "Failed to persist outbound dedup state");
2321
- }
2322
- }
2323
- const { maxCount, toolName: dedupTool } = getMaxDedupCount(session);
2324
- logger.debug({
2325
- sessionId: session?.id ?? null,
2326
- clientToolCount: clientSideToolCalls.length,
2327
- maxDedupCount: maxCount,
2328
- maxDedupTool: dedupTool,
2329
- }, "Cross-request tool dedup: recorded outbound tool calls");
2330
- }
2383
+ // Do NOT record outbound tool calls here the inbound recording
2384
+ // on the next request (when the client sends results back) is
2385
+ // enough to detect real loops. Recording both outbound + inbound
2386
+ // for the same call double-counts and triggers the dedup warning
2387
+ // on the very first normal tool round-trip.
2331
2388
 
2332
2389
  return {
2333
2390
  response: {
@@ -3150,6 +3207,12 @@ IMPORTANT TOOL USAGE RULES:
3150
3207
  if (Array.isArray(anthropicPayload?.content)) {
3151
3208
  anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
3152
3209
  }
3210
+ } else if (actualProvider === "codex") {
3211
+ // Codex responses are already in Anthropic format from invokeCodex
3212
+ anthropicPayload = databricksResponse.json;
3213
+ if (Array.isArray(anthropicPayload?.content)) {
3214
+ anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
3215
+ }
3153
3216
  } else {
3154
3217
  anthropicPayload = toAnthropicResponse(
3155
3218
  databricksResponse.json,
@@ -3434,6 +3497,15 @@ IMPORTANT TOOL USAGE RULES:
3434
3497
  }
3435
3498
  }
3436
3499
 
3500
+ // Attach routing metadata for OpenClaw model name rewriting
3501
+ if (databricksResponse.routingDecision) {
3502
+ anthropicPayload._routingMeta = {
3503
+ provider: databricksResponse.routingDecision.provider,
3504
+ model: databricksResponse.routingDecision.model,
3505
+ tier: databricksResponse.routingDecision.tier,
3506
+ };
3507
+ }
3508
+
3437
3509
  appendTurnToSession(session, {
3438
3510
  role: "assistant",
3439
3511
  type: "message",
@@ -3487,6 +3559,7 @@ IMPORTANT TOOL USAGE RULES:
3487
3559
  },
3488
3560
  "Agent loop completed successfully",
3489
3561
  );
3562
+ if (agentTimer) { agentTimer.mark("responseReady"); agentTimer.done(); }
3490
3563
  return {
3491
3564
  response: {
3492
3565
  status: 200,
@@ -3757,153 +3830,16 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3757
3830
  }
3758
3831
  }
3759
3832
 
3760
- // Client mode still uses the relaxed per-request threshold for the count-based guard
3761
- const effectiveThreshold = 10;
3762
- if (toolResultCount >= effectiveThreshold) {
3763
- logger.error({
3764
- toolResultCount,
3765
- toolUseCount,
3766
- threshold: effectiveThreshold,
3767
- sessionId: session?.id ?? null,
3768
- }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
3769
-
3770
- let toolResultsSummary = "";
3771
- const messages = payload?.messages || [];
3772
- let lastUserTextIndex = -1;
3773
- for (let i = messages.length - 1; i >= 0; i--) {
3774
- const msg = messages[i];
3775
- if (msg?.role !== 'user') continue;
3776
- if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
3777
- lastUserTextIndex = i;
3778
- break;
3779
- }
3780
- if (Array.isArray(msg.content)) {
3781
- const hasText = msg.content.some(block =>
3782
- (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
3783
- (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
3784
- );
3785
- if (hasText) {
3786
- lastUserTextIndex = i;
3787
- break;
3788
- }
3789
- }
3790
- }
3791
- const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
3792
- for (let i = startIndex; i < messages.length; i++) {
3793
- const msg = messages[i];
3794
- if (!msg || !Array.isArray(msg.content)) continue;
3795
- for (const block of msg.content) {
3796
- if (block?.type === 'tool_result' && block?.content) {
3797
- const content = typeof block.content === 'string'
3798
- ? block.content
3799
- : JSON.stringify(block.content);
3800
- if (content && !content.includes('Found 0')) {
3801
- toolResultsSummary += content + "\n";
3802
- }
3803
- }
3804
- }
3805
- }
3806
-
3807
- let responseText = `Based on the tool results, here's what I found:\n\n`;
3808
- if (toolResultsSummary.trim()) {
3809
- responseText += toolResultsSummary.trim();
3810
- } else {
3811
- responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
3812
- }
3813
-
3814
- const forcedResponse = {
3815
- id: `msg_forced_${Date.now()}`,
3816
- type: "message",
3817
- role: "assistant",
3818
- content: [{ type: "text", text: responseText }],
3819
- model: requestedModel || "unknown",
3820
- stop_reason: "end_turn",
3821
- stop_sequence: null,
3822
- usage: { input_tokens: 0, output_tokens: 100 },
3823
- };
3824
-
3825
- return {
3826
- status: 200,
3827
- body: forcedResponse,
3828
- terminationReason: "tool_loop_guard",
3829
- };
3830
- }
3831
- } else {
3832
- // Server mode: use existing threshold 2 with countToolCallsInHistory
3833
- const effectiveThreshold = toolLoopThreshold;
3834
-
3835
- if (toolResultCount >= effectiveThreshold) {
3836
- logger.error({
3837
- toolResultCount,
3838
- toolUseCount,
3839
- threshold: effectiveThreshold,
3840
- sessionId: session?.id ?? null,
3841
- }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
3842
-
3843
- let toolResultsSummary = "";
3844
- const messages = payload?.messages || [];
3845
- let lastUserTextIndex = -1;
3846
- for (let i = messages.length - 1; i >= 0; i--) {
3847
- const msg = messages[i];
3848
- if (msg?.role !== 'user') continue;
3849
- if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
3850
- lastUserTextIndex = i;
3851
- break;
3852
- }
3853
- if (Array.isArray(msg.content)) {
3854
- const hasText = msg.content.some(block =>
3855
- (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
3856
- (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
3857
- );
3858
- if (hasText) {
3859
- lastUserTextIndex = i;
3860
- break;
3861
- }
3862
- }
3863
- }
3864
- const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
3865
- for (let i = startIndex; i < messages.length; i++) {
3866
- const msg = messages[i];
3867
- if (!msg || !Array.isArray(msg.content)) continue;
3868
- for (const block of msg.content) {
3869
- if (block?.type === 'tool_result' && block?.content) {
3870
- const content = typeof block.content === 'string'
3871
- ? block.content
3872
- : JSON.stringify(block.content);
3873
- if (content && !content.includes('Found 0')) {
3874
- toolResultsSummary += content + "\n";
3875
- }
3876
- }
3877
- }
3878
- }
3879
-
3880
- let responseText = `Based on the tool results, here's what I found:\n\n`;
3881
- if (toolResultsSummary.trim()) {
3882
- responseText += toolResultsSummary.trim();
3883
- } else {
3884
- responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
3885
- }
3886
-
3887
- const forcedResponse = {
3888
- id: `msg_forced_${Date.now()}`,
3889
- type: "message",
3890
- role: "assistant",
3891
- content: [{ type: "text", text: responseText }],
3892
- model: requestedModel || "unknown",
3893
- stop_reason: "end_turn",
3894
- stop_sequence: null,
3895
- usage: { input_tokens: 0, output_tokens: 100 },
3896
- };
3897
-
3898
- return {
3899
- status: 200,
3900
- body: forcedResponse,
3901
- terminationReason: "tool_loop_guard",
3902
- };
3903
- }
3833
+ // No count-based tool_loop_guard. Natural limits (maxSteps, maxDurationMs,
3834
+ // provider token/rate limits, client-side loop detection, and the
3835
+ // cross-request dedup above) are sufficient protection.
3904
3836
  }
3905
3837
 
3838
+ const { createTimer } = require("../utils/perf-timer");
3839
+ const pTimer = createTimer("processMessage");
3840
+
3906
3841
  const cleanPayload = sanitizePayload(payload);
3842
+ pTimer.mark("sanitizePayload");
3907
3843
 
3908
3844
  // Proactively load tools based on prompt content (lazy loading)
3909
3845
  try {
@@ -3914,6 +3850,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3914
3850
  } catch (err) {
3915
3851
  logger.debug({ error: err.message }, "Lazy tool loading check failed");
3916
3852
  }
3853
+ pTimer.mark("lazyToolLoad");
3917
3854
 
3918
3855
  appendTurnToSession(session, {
3919
3856
  role: "user",
@@ -3923,12 +3860,14 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3923
3860
  },
3924
3861
  type: "message",
3925
3862
  });
3863
+ pTimer.mark("sessionAppend");
3926
3864
 
3927
3865
  let cacheKey = null;
3928
3866
  let cachedResponse = null;
3929
3867
  if (promptCache.isEnabled()) {
3930
3868
  // cleanPayload is already a deep clone from sanitizePayload, no need to clone again
3931
3869
  const { key, entry } = promptCache.lookup(cleanPayload);
3870
+ pTimer.mark("cacheCheck");
3932
3871
  cacheKey = key;
3933
3872
  if (entry?.value) {
3934
3873
  try {
@@ -4018,6 +3957,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
4018
3957
  // NOTE: Tool loop guard moved to BEFORE sanitizePayload() since sanitization
4019
3958
  // removes conversation history (consecutive same-role messages)
4020
3959
 
3960
+ pTimer.mark("preAgentLoop");
4021
3961
  const loopResult = await runAgentLoop({
4022
3962
  cleanPayload,
4023
3963
  requestedModel,
@@ -4029,6 +3969,8 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
4029
3969
  providerType: config.modelProvider?.type ?? "databricks",
4030
3970
  headers,
4031
3971
  });
3972
+ pTimer.mark("agentLoopDone");
3973
+ pTimer.done();
4032
3974
 
4033
3975
  // Store successful responses in semantic cache for future fuzzy matching
4034
3976
  if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {