npm - lynkr - Versions diffs - 8.0.1 → 9.0.2 - Mend

lynkr 8.0.1 → 9.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +238 -315
package/bin/cli.js +16 -3
package/index.js +7 -3
package/install.sh +3 -3
package/lynkr-skill.tar.gz +0 -0
package/native/Cargo.toml +26 -0
package/native/index.js +29 -0
package/native/lynkr-native.node +0 -0
package/native/src/lib.rs +321 -0
package/package.json +8 -6
package/src/api/files-multipart.js +30 -0
package/src/api/files-router.js +81 -0
package/src/api/openai-router.js +379 -308
package/src/api/providers-handler.js +171 -3
package/src/api/router.js +109 -5
package/src/cache/prompt.js +13 -0
package/src/clients/circuit-breaker.js +10 -247
package/src/clients/codex-process.js +342 -0
package/src/clients/codex-utils.js +143 -0
package/src/clients/databricks.js +243 -76
package/src/clients/ollama-utils.js +21 -17
package/src/clients/openai-format.js +20 -6
package/src/clients/openrouter-utils.js +42 -37
package/src/clients/prompt-cache-injection.js +140 -0
package/src/clients/provider-capabilities.js +41 -0
package/src/clients/resilience.js +540 -0
package/src/clients/responses-format.js +8 -7
package/src/clients/retry.js +22 -167
package/src/clients/standard-tools.js +1 -1
package/src/clients/xml-tool-extractor.js +307 -0
package/src/cluster.js +82 -0
package/src/config/index.js +66 -0
package/src/context/compression.js +42 -9
package/src/context/distill.js +507 -0
package/src/context/tool-result-compressor.js +563 -0
package/src/memory/extractor.js +22 -0
package/src/orchestrator/index.js +147 -205
package/src/routing/complexity-analyzer.js +258 -5
package/src/routing/index.js +15 -34
package/src/routing/latency-tracker.js +148 -0
package/src/routing/model-tiers.js +2 -0
package/src/routing/quality-scorer.js +113 -0
package/src/routing/telemetry.js +502 -0
package/src/server.js +23 -0
package/src/stores/file-store.js +69 -0
package/src/stores/response-store.js +25 -0
package/src/tools/code-graph.js +538 -0
package/src/tools/code-mode.js +304 -0
package/src/tools/index.js +1 -1
package/src/tools/lazy-loader.js +11 -0
package/src/tools/mcp-remote.js +7 -0
package/src/tools/smart-selection.js +11 -0
package/src/tools/web.js +1 -1
package/src/utils/payload.js +206 -0
package/src/utils/perf-timer.js +80 -0

package/src/orchestrator/index.js CHANGED Viewed

@@ -55,6 +55,8 @@ function getDestinationUrl(providerType) {
       return config.vertex?.endpoint ?? 'unknown';
     case 'moonshot':
       return config.moonshot?.endpoint ?? 'unknown';
+    case 'codex':
+      return 'codex://app-server (local process)';
     default:
       return 'unknown';
   }
@@ -66,7 +68,6 @@ const DROP_KEYS = new Set([
   "beta",
   "context_management",
   "stream",
-  "thinking",
   "max_steps",
   "max_duration_ms",
 ]);
@@ -185,7 +186,14 @@ function normaliseMessages(payload, options = {}) {
       const rawContent = message.content;
       let content;
       if (Array.isArray(rawContent)) {
-        content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
+        const hasToolBlocks = rawContent.some(
+          (b) => b && (b.type === "tool_use" || b.type === "tool_result" || b.type === "document" || b.type === "image" || b.type === "thinking")
+        );
+        if (hasToolBlocks) {
+          content = rawContent.slice();
+        } else {
+          content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice();
+        }
       } else if (rawContent === undefined || rawContent === null) {
         content = flattenContent ? "" : rawContent;
       } else if (typeof rawContent === "string") {
@@ -195,7 +203,11 @@ function normaliseMessages(payload, options = {}) {
       } else {
         content = rawContent;
       }
-      normalised.push({ role, content });
+      const entry = { role, content };
+      if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
+        entry.tool_calls = message.tool_calls;
+      }
+      normalised.push(entry);
     }
   }
   return normalised;
@@ -468,8 +480,8 @@ function injectToolLoopStopInstruction(messages, threshold = 5) {
 // requests escape it.
 const DEDUP_MAX_SIGNATURES = 50;
-const DEDUP_WARN_THRESHOLD = 2;
-const DEDUP_TERMINATE_THRESHOLD = 3;
+const DEDUP_WARN_THRESHOLD = 5;
+const DEDUP_TERMINATE_THRESHOLD = 8;
 /**
  * Initialise session.metadata.toolCallDedup if missing.
@@ -1019,10 +1031,14 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
   const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
   const contentItems = [];
-  if (wantsThinking) {
+  // Pass through real reasoning_content as a thinking block
+  const reasoningContent = typeof message.reasoning_content === "string" ? message.reasoning_content : "";
+  if (reasoningContent && wantsThinking) {
+    contentItems.push({ type: "thinking", thinking: reasoningContent });
+  } else if (wantsThinking) {
     contentItems.push({
       type: "thinking",
-      thinking: "Reasoning not available from the backing Databricks model.",
+      thinking: "Reasoning not available from the backing model.",
     });
   }
@@ -1085,7 +1101,10 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
 }
 function sanitizePayload(payload) {
-  const clean = JSON.parse(JSON.stringify(payload ?? {}));
+  const { clonePayloadSmart } = require("../utils/payload");
+  const providerType = config.modelProvider?.type ?? "databricks";
+  const willFlatten = providerType !== "azure-anthropic";
+  const clean = clonePayloadSmart(payload ?? {}, { willFlatten });
   const requestedModel =
     (typeof payload?.model === "string" && payload.model.trim().length > 0
       ? payload.model.trim()
@@ -1093,11 +1112,10 @@ function sanitizePayload(payload) {
     config.modelProvider?.defaultModel ??
     "databricks-claude-sonnet-4-5";
   clean.model = requestedModel;
-  if (!clean.max_tokens) {
-    clean.max_tokens = 16384;
-  }
-  const providerType = config.modelProvider?.type ?? "databricks";
-  const flattenContent = providerType !== "azure-anthropic";
+  if (!clean.max_tokens) {
+    clean.max_tokens = 16384;
+  }
+  const flattenContent = willFlatten;
   clean.messages = normaliseMessages(clean, { flattenContent }).filter((msg) => {
     const hasToolCalls =
       Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0;
@@ -1216,6 +1234,13 @@ function sanitizePayload(payload) {
   }
   DROP_KEYS.forEach((key) => delete clean[key]);
+  // Conditionally keep or strip the `thinking` parameter based on provider
+  const { getThinkingBehavior } = require("../clients/provider-capabilities");
+  const thinkingBehavior = getThinkingBehavior(providerType, clean.model);
+  if (clean.thinking && thinkingBehavior !== "native") {
+    delete clean.thinking;
+  }
   if (Array.isArray(clean.tools) && clean.tools.length === 0) {
     delete clean.tools;
   } else if (providerType === "databricks") {
@@ -1393,47 +1418,37 @@ function sanitizePayload(payload) {
   applyToonCompression(clean, config.toon, { logger });
   // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
-  // Strategy: Merge all consecutive messages, add instruction to focus on last request
+  // Strategy: Merge consecutive same-role messages, but NEVER merge messages
+  // that contain tool_use or tool_result blocks — they must stay intact for
+  // the provider's tool-call protocol.
   if (Array.isArray(clean.messages) && clean.messages.length > 0) {
     const merged = [];
     const messages = clean.messages;
+    const hasToolContent = (msg) => {
+      if (Array.isArray(msg?.content)) {
+        return msg.content.some(b => b && (b.type === 'tool_use' || b.type === 'tool_result'));
+      }
+      return Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0;
+    };
     for (let i = 0; i < messages.length; i++) {
       const msg = messages[i];
+      const prev = merged.length > 0 ? merged[merged.length - 1] : null;
-      if (merged.length > 0 && msg.role === merged[merged.length - 1].role) {
-        // Merge content with the previous message of the same role
-        const prevMsg = merged[merged.length - 1];
-        const prevContent = typeof prevMsg.content === 'string' ? prevMsg.content : JSON.stringify(prevMsg.content);
+      if (prev && msg.role === prev.role && !hasToolContent(msg) && !hasToolContent(prev)) {
+        const prevContent = typeof prev.content === 'string' ? prev.content : JSON.stringify(prev.content);
         const currContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
-        prevMsg.content = prevContent + '\n\n' + currContent;
-        logger.debug({
-          mergedRole: msg.role,
-          addedContentPreview: currContent.substring(0, 50)
-        }, 'Merged consecutive message with same role');
+        prev.content = prevContent + '\n\n' + currContent;
       } else {
         merged.push({ ...msg });
       }
     }
-    // If the last message is from user, add instruction to focus on the actual request
-    if (merged.length > 0 && merged[merged.length - 1].role === 'user') {
-      const lastMsg = merged[merged.length - 1];
-      const content = typeof lastMsg.content === 'string' ? lastMsg.content : JSON.stringify(lastMsg.content);
-      // Find the last actual user request (after all the context/instructions)
-      // Add a clear separator to help the model focus
-      if (content.length > 500) {
-        lastMsg.content = content + '\n\n---\nIMPORTANT: Focus on and respond ONLY to my most recent request above. Do not summarize or acknowledge previous instructions.';
-      }
-    }
     if (merged.length !== clean.messages.length) {
       logger.debug({
         originalCount: clean.messages.length,
         mergedCount: merged.length,
-        reduced: clean.messages.length - merged.length
       }, 'Merged consecutive messages with same role');
     }
@@ -1552,6 +1567,8 @@ async function runAgentLoop({
   headers,
 }) {
   logger.debug({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop entered');
+  const { createTimer } = require("../utils/perf-timer");
+  const agentTimer = createTimer("agentLoop");
   const settings = resolveLoopOptions(options);
   // Initialize audit logger (no-op if disabled)
   const auditLogger = createAuditLogger(config.audit);
@@ -1634,6 +1651,7 @@ async function runAgentLoop({
     }
+    if (steps === 1 && agentTimer) agentTimer.mark("preCompression");
     if (steps === 1 && config.historyCompression?.enabled !== false) {
       try {
         if (historyCompression.needsCompression(cleanPayload.messages)) {
@@ -1875,7 +1893,17 @@ IMPORTANT TOOL USAGE RULES:
         cleanPayload.tools || [],
         {
           mode: config.headroom?.mode,
-          queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
+          queryContext: (() => {
+            const last = cleanPayload.messages[cleanPayload.messages.length - 1]?.content;
+            if (typeof last === 'string') return last;
+            if (Array.isArray(last)) {
+              return last
+                .map(b => (b?.type === 'text' ? b.text : b?.type === 'tool_result' ? String(b.content ?? '') : ''))
+                .filter(Boolean)
+                .join('\n') || null;
+            }
+            return null;
+          })(),
           model: requestedModel,
           modelLimit: modelContextWindow,
           tokenBudget: effectiveMax,
@@ -1921,9 +1949,24 @@ IMPORTANT TOOL USAGE RULES:
     });
   }
+  // Thread workspace for code-graph integration (auto-detected or from header)
+  if (headers?.["x-lynkr-workspace"]) {
+    cleanPayload._workspace = headers["x-lynkr-workspace"];
+  }
+  // RTK-inspired tool result compression: compress large tool_results
+  // before they reach the model (saves 60-90% on test/git/lint output)
+  if (config.toolResultCompression?.enabled !== false) {
+    const { compressToolResults } = require("../context/tool-result-compressor");
+    const tier = cleanPayload._routingTier || "MEDIUM";
+    compressToolResults(cleanPayload.messages, { tier });
+  }
+  if (agentTimer) agentTimer.mark("preInvokeModel");
   let databricksResponse;
   try {
     databricksResponse = await invokeModel(cleanPayload);
+    if (agentTimer) agentTimer.mark("invokeModel");
   } catch (modelError) {
     const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
       || modelError.message?.includes('fetch failed')
@@ -2113,6 +2156,21 @@ IMPORTANT TOOL USAGE RULES:
           _anthropic_block: block,
         }));
+      // Extract tool calls from text blocks that contain XML (some Ollama models)
+      if (toolCalls.length === 0) {
+        const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
+        for (const block of contentArray) {
+          if (block?.type === "text" && block?.text) {
+            const extracted = extractToolCallsFromText(block.text);
+            if (extracted.toolCalls.length > 0) {
+              toolCalls = extracted.toolCalls;
+              block.text = extracted.cleanedText || "";
+              break;
+            }
+          }
+        }
+      }
       logger.debug(
         {
           sessionId: session?.id ?? null,
@@ -2127,6 +2185,17 @@ IMPORTANT TOOL USAGE RULES:
       const choice = databricksResponse.json?.choices?.[0];
       message = choice?.message ?? {};
       toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+      // Extract tool calls embedded as XML/text in content (Minimax, Qwen, GLM, Llama, etc.)
+      if (toolCalls.length === 0 && typeof message.content === "string" && message.content.trim()) {
+        const { extractToolCallsFromText } = require("../clients/xml-tool-extractor");
+        const extracted = extractToolCallsFromText(message.content);
+        if (extracted.toolCalls.length > 0) {
+          toolCalls = extracted.toolCalls;
+          message.tool_calls = toolCalls;
+          message.content = extracted.cleanedText;
+        }
+      }
     }
     // Guard: drop hallucinated tool calls when no tools were sent to the model.
@@ -2153,6 +2222,7 @@ IMPORTANT TOOL USAGE RULES:
       } else {
         // Convert OpenAI/OpenRouter format to Anthropic content blocks
         const contentBlocks = [];
+        let toolCallIdx = 0;
         // Add text content if present
         if (message.content && typeof message.content === 'string' && message.content.trim()) {
@@ -2184,7 +2254,7 @@ IMPORTANT TOOL USAGE RULES:
           contentBlocks.push({
             type: "tool_use",
-            id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
+            id: toolCall.id || `toolu_${Date.now()}_${(toolCallIdx++).toString(36)}_${Math.random().toString(36).substr(2, 6)}`,
             name: func.name || toolCall.name || "unknown",
             input
           });
@@ -2248,7 +2318,7 @@ IMPORTANT TOOL USAGE RULES:
       const serverSideToolCalls = [];
       const clientSideToolCalls = [];
-      const SERVER_SIDE_TOOLS = new Set(["task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent"]);
+      const SERVER_SIDE_TOOLS = new Set(["task", "Task", "web_search", "web_fetch", "websearch", "webfetch", "web_agent", "WebSearch", "WebFetch", "WebAgent"]);
       for (const call of toolCalls) {
         const toolName = (call.function?.name ?? call.name ?? "").toLowerCase();
@@ -2271,7 +2341,9 @@ IMPORTANT TOOL USAGE RULES:
             executionMode,
             clientTools: clientSideToolCalls.map((c) => c.function?.name ?? c.name),
           },
-          "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
+          clientSideToolCalls.length > 1
+            ? `Parallel tool passthrough: ${clientSideToolCalls.length} tools → client`
+            : "Hybrid mode: returning non-Task tools to client, executing Task tools on server"
         );
         // Filter sessionContent to only include client-side tool_use blocks
@@ -2308,26 +2380,11 @@ IMPORTANT TOOL USAGE RULES:
         // then continue the conversation loop. For now, let's fall through to execute server-side tools.
         if (serverSideToolCalls.length === 0) {
           // No server-side tools - pure passthrough
-          // Record outbound client-side tool calls into cross-request dedup tracker
-          if (session && clientSideToolCalls.length > 0) {
-            ensureDedupStructure(session);
-            for (const call of clientSideToolCalls) {
-              recordCrossRequestToolCall(session, call);
-            }
-            // Persist dedup state (non-ephemeral sessions only)
-            if (session.id && !session._ephemeral) {
-              try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
-                logger.debug({ err: e.message }, "Failed to persist outbound dedup state");
-              }
-            }
-            const { maxCount, toolName: dedupTool } = getMaxDedupCount(session);
-            logger.debug({
-              sessionId: session?.id ?? null,
-              clientToolCount: clientSideToolCalls.length,
-              maxDedupCount: maxCount,
-              maxDedupTool: dedupTool,
-            }, "Cross-request tool dedup: recorded outbound tool calls");
-          }
+          // Do NOT record outbound tool calls here — the inbound recording
+          // on the next request (when the client sends results back) is
+          // enough to detect real loops.  Recording both outbound + inbound
+          // for the same call double-counts and triggers the dedup warning
+          // on the very first normal tool round-trip.
           return {
             response: {
@@ -3150,6 +3207,12 @@ IMPORTANT TOOL USAGE RULES:
       if (Array.isArray(anthropicPayload?.content)) {
         anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
       }
+    } else if (actualProvider === "codex") {
+      // Codex responses are already in Anthropic format from invokeCodex
+      anthropicPayload = databricksResponse.json;
+      if (Array.isArray(anthropicPayload?.content)) {
+        anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
+      }
     } else {
       anthropicPayload = toAnthropicResponse(
         databricksResponse.json,
@@ -3434,6 +3497,15 @@ IMPORTANT TOOL USAGE RULES:
       }
     }
+    // Attach routing metadata for OpenClaw model name rewriting
+    if (databricksResponse.routingDecision) {
+      anthropicPayload._routingMeta = {
+        provider: databricksResponse.routingDecision.provider,
+        model: databricksResponse.routingDecision.model,
+        tier: databricksResponse.routingDecision.tier,
+      };
+    }
     appendTurnToSession(session, {
       role: "assistant",
       type: "message",
@@ -3487,6 +3559,7 @@ IMPORTANT TOOL USAGE RULES:
       },
       "Agent loop completed successfully",
     );
+    if (agentTimer) { agentTimer.mark("responseReady"); agentTimer.done(); }
     return {
       response: {
         status: 200,
@@ -3757,153 +3830,16 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
       }
     }
-    // Client mode still uses the relaxed per-request threshold for the count-based guard
-    const effectiveThreshold = 10;
-    if (toolResultCount >= effectiveThreshold) {
-      logger.error({
-        toolResultCount,
-        toolUseCount,
-        threshold: effectiveThreshold,
-        sessionId: session?.id ?? null,
-      }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
-      let toolResultsSummary = "";
-      const messages = payload?.messages || [];
-      let lastUserTextIndex = -1;
-      for (let i = messages.length - 1; i >= 0; i--) {
-        const msg = messages[i];
-        if (msg?.role !== 'user') continue;
-        if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
-          lastUserTextIndex = i;
-          break;
-        }
-        if (Array.isArray(msg.content)) {
-          const hasText = msg.content.some(block =>
-            (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
-            (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
-          );
-          if (hasText) {
-            lastUserTextIndex = i;
-            break;
-          }
-        }
-      }
-      const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
-      for (let i = startIndex; i < messages.length; i++) {
-        const msg = messages[i];
-        if (!msg || !Array.isArray(msg.content)) continue;
-        for (const block of msg.content) {
-          if (block?.type === 'tool_result' && block?.content) {
-            const content = typeof block.content === 'string'
-              ? block.content
-              : JSON.stringify(block.content);
-            if (content && !content.includes('Found 0')) {
-              toolResultsSummary += content + "\n";
-            }
-          }
-        }
-      }
-      let responseText = `Based on the tool results, here's what I found:\n\n`;
-      if (toolResultsSummary.trim()) {
-        responseText += toolResultsSummary.trim();
-      } else {
-        responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
-      }
-      const forcedResponse = {
-        id: `msg_forced_${Date.now()}`,
-        type: "message",
-        role: "assistant",
-        content: [{ type: "text", text: responseText }],
-        model: requestedModel || "unknown",
-        stop_reason: "end_turn",
-        stop_sequence: null,
-        usage: { input_tokens: 0, output_tokens: 100 },
-      };
-      return {
-        status: 200,
-        body: forcedResponse,
-        terminationReason: "tool_loop_guard",
-      };
-    }
-  } else {
-    // Server mode: use existing threshold 2 with countToolCallsInHistory
-    const effectiveThreshold = toolLoopThreshold;
-    if (toolResultCount >= effectiveThreshold) {
-      logger.error({
-        toolResultCount,
-        toolUseCount,
-        threshold: effectiveThreshold,
-        sessionId: session?.id ?? null,
-      }, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
-      let toolResultsSummary = "";
-      const messages = payload?.messages || [];
-      let lastUserTextIndex = -1;
-      for (let i = messages.length - 1; i >= 0; i--) {
-        const msg = messages[i];
-        if (msg?.role !== 'user') continue;
-        if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
-          lastUserTextIndex = i;
-          break;
-        }
-        if (Array.isArray(msg.content)) {
-          const hasText = msg.content.some(block =>
-            (block?.type === 'text' && block?.text?.trim?.().length > 0) ||
-            (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
-          );
-          if (hasText) {
-            lastUserTextIndex = i;
-            break;
-          }
-        }
-      }
-      const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
-      for (let i = startIndex; i < messages.length; i++) {
-        const msg = messages[i];
-        if (!msg || !Array.isArray(msg.content)) continue;
-        for (const block of msg.content) {
-          if (block?.type === 'tool_result' && block?.content) {
-            const content = typeof block.content === 'string'
-              ? block.content
-              : JSON.stringify(block.content);
-            if (content && !content.includes('Found 0')) {
-              toolResultsSummary += content + "\n";
-            }
-          }
-        }
-      }
-      let responseText = `Based on the tool results, here's what I found:\n\n`;
-      if (toolResultsSummary.trim()) {
-        responseText += toolResultsSummary.trim();
-      } else {
-        responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
-      }
-      const forcedResponse = {
-        id: `msg_forced_${Date.now()}`,
-        type: "message",
-        role: "assistant",
-        content: [{ type: "text", text: responseText }],
-        model: requestedModel || "unknown",
-        stop_reason: "end_turn",
-        stop_sequence: null,
-        usage: { input_tokens: 0, output_tokens: 100 },
-      };
-      return {
-        status: 200,
-        body: forcedResponse,
-        terminationReason: "tool_loop_guard",
-      };
-    }
+    // No count-based tool_loop_guard. Natural limits (maxSteps, maxDurationMs,
+    // provider token/rate limits, client-side loop detection, and the
+    // cross-request dedup above) are sufficient protection.
   }
+  const { createTimer } = require("../utils/perf-timer");
+  const pTimer = createTimer("processMessage");
   const cleanPayload = sanitizePayload(payload);
+  pTimer.mark("sanitizePayload");
   // Proactively load tools based on prompt content (lazy loading)
   try {
@@ -3914,6 +3850,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
   } catch (err) {
     logger.debug({ error: err.message }, "Lazy tool loading check failed");
   }
+  pTimer.mark("lazyToolLoad");
   appendTurnToSession(session, {
     role: "user",
@@ -3923,12 +3860,14 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     },
     type: "message",
   });
+  pTimer.mark("sessionAppend");
   let cacheKey = null;
   let cachedResponse = null;
   if (promptCache.isEnabled()) {
     // cleanPayload is already a deep clone from sanitizePayload, no need to clone again
     const { key, entry } = promptCache.lookup(cleanPayload);
+    pTimer.mark("cacheCheck");
     cacheKey = key;
     if (entry?.value) {
       try {
@@ -4018,6 +3957,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
   // NOTE: Tool loop guard moved to BEFORE sanitizePayload() since sanitization
   // removes conversation history (consecutive same-role messages)
+  pTimer.mark("preAgentLoop");
   const loopResult = await runAgentLoop({
     cleanPayload,
     requestedModel,
@@ -4029,6 +3969,8 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     providerType: config.modelProvider?.type ?? "databricks",
     headers,
   });
+  pTimer.mark("agentLoopDone");
+  pTimer.done();
   // Store successful responses in semantic cache for future fuzzy matching
   if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {