npm - opencode-acp - Versions diffs - 1.4.2 → 1.5.1 - Mend

opencode-acp 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/index.js +151 -59
package/dist/index.js.map +1 -1
package/dist/lib/config-validation.d.ts.map +1 -1
package/dist/lib/config.d.ts +1 -0
package/dist/lib/config.d.ts.map +1 -1
package/dist/lib/gc/merge.d.ts.map +1 -1
package/dist/lib/messages/inject/inject.d.ts.map +1 -1
package/dist/lib/messages/inject/utils.d.ts +1 -1
package/dist/lib/messages/inject/utils.d.ts.map +1 -1
package/dist/lib/messages/utils.d.ts.map +1 -1
package/dist/lib/prompts/context-limit-nudge.d.ts +1 -1
package/dist/lib/prompts/context-limit-nudge.d.ts.map +1 -1
package/dist/lib/prompts/extensions/nudge.d.ts +1 -0
package/dist/lib/prompts/extensions/nudge.d.ts.map +1 -1
package/dist/lib/prompts/system.d.ts +1 -1
package/dist/lib/prompts/system.d.ts.map +1 -1
package/dist/lib/prompts/turn-nudge.d.ts +1 -1
package/dist/lib/prompts/turn-nudge.d.ts.map +1 -1
package/dist/lib/state/persistence.d.ts +2 -0
package/dist/lib/state/persistence.d.ts.map +1 -1
package/dist/lib/state/state.d.ts.map +1 -1
package/dist/lib/state/types.d.ts +2 -0
package/dist/lib/state/types.d.ts.map +1 -1
package/dist/lib/state/utils.d.ts.map +1 -1
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -894,6 +894,7 @@ var VALID_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "compress.modelMaxLimits",
   "compress.modelMinLimits",
   "compress.nudgeFrequency",
+  "compress.perMessageNudgeGrowthPercent",
   "compress.iterationNudgeThreshold",
   "compress.nudgeForce",
   "compress.protectedTools",
@@ -1117,6 +1118,13 @@ function validateConfigTypes(config) {
           actual: `${compress.nudgeFrequency} (will be clamped to 1)`
         });
       }
+      if (compress.perMessageNudgeGrowthPercent !== void 0 && typeof compress.perMessageNudgeGrowthPercent !== "number") {
+        errors.push({
+          key: "compress.perMessageNudgeGrowthPercent",
+          expected: "number",
+          actual: typeof compress.perMessageNudgeGrowthPercent
+        });
+      }
       if (compress.iterationNudgeThreshold !== void 0 && typeof compress.iterationNudgeThreshold !== "number") {
         errors.push({
           key: "compress.iterationNudgeThreshold",
@@ -1461,6 +1469,7 @@ var defaultConfig = {
     maxContextLimit: "55%",
     minContextLimit: "45%",
     nudgeFrequency: 5,
+    perMessageNudgeGrowthPercent: 3,
     iterationNudgeThreshold: 15,
     nudgeForce: "soft",
     protectedTools: [...COMPRESS_DEFAULT_PROTECTED_TOOLS],
@@ -1485,7 +1494,7 @@ var defaultConfig = {
     maxOldGenSummaryLength: 3e3,
     majorGcThresholdPercent: "100%",
     batchCleanup: {
-      lowThreshold: "60%",
+      lowThreshold: "55%",
       highThreshold: "75%",
       forceThreshold: "90%"
     }
@@ -1612,6 +1621,7 @@ function mergeCompress(base, override) {
     modelMaxLimits: override.modelMaxLimits ?? base.modelMaxLimits,
     modelMinLimits: override.modelMinLimits ?? base.modelMinLimits,
     nudgeFrequency: override.nudgeFrequency ?? base.nudgeFrequency,
+    perMessageNudgeGrowthPercent: override.perMessageNudgeGrowthPercent ?? base.perMessageNudgeGrowthPercent,
     iterationNudgeThreshold: override.iterationNudgeThreshold ?? base.iterationNudgeThreshold,
     nudgeForce: override.nudgeForce ?? base.nudgeForce,
     protectedTools: [.../* @__PURE__ */ new Set([...base.protectedTools, ...override.protectedTools ?? []])],
@@ -3019,7 +3029,9 @@ function resetOnCompaction(state) {
   state.nudges = {
     contextLimitAnchors: /* @__PURE__ */ new Set(),
     turnNudgeAnchors: /* @__PURE__ */ new Set(),
-    iterationNudgeAnchors: /* @__PURE__ */ new Set()
+    iterationNudgeAnchors: /* @__PURE__ */ new Set(),
+    lastPerMessageNudgeTurn: 0,
+    lastPerMessageNudgeTokens: 0
   };
   state.messageIds = {
     byRawId: /* @__PURE__ */ new Map(),
@@ -3085,7 +3097,9 @@ async function saveSessionState(sessionState, logger, sessionName) {
     nudges: {
       contextLimitAnchors: Array.from(sessionState.nudges.contextLimitAnchors),
       turnNudgeAnchors: Array.from(sessionState.nudges.turnNudgeAnchors),
-      iterationNudgeAnchors: Array.from(sessionState.nudges.iterationNudgeAnchors)
+      iterationNudgeAnchors: Array.from(sessionState.nudges.iterationNudgeAnchors),
+      lastPerMessageNudgeTurn: sessionState.nudges.lastPerMessageNudgeTurn ?? 0,
+      lastPerMessageNudgeTokens: sessionState.nudges.lastPerMessageNudgeTokens ?? 0
     },
     stats: sessionState.stats,
     lastUpdated: (/* @__PURE__ */ new Date()).toISOString(),
@@ -3299,7 +3313,9 @@ function createSessionState() {
     nudges: {
       contextLimitAnchors: /* @__PURE__ */ new Set(),
       turnNudgeAnchors: /* @__PURE__ */ new Set(),
-      iterationNudgeAnchors: /* @__PURE__ */ new Set()
+      iterationNudgeAnchors: /* @__PURE__ */ new Set(),
+      lastPerMessageNudgeTurn: 0,
+      lastPerMessageNudgeTokens: 0
     },
     stats: {
       pruneTokenCounter: 0,
@@ -3336,7 +3352,9 @@ function resetSessionState(state) {
   state.nudges = {
     contextLimitAnchors: /* @__PURE__ */ new Set(),
     turnNudgeAnchors: /* @__PURE__ */ new Set(),
-    iterationNudgeAnchors: /* @__PURE__ */ new Set()
+    iterationNudgeAnchors: /* @__PURE__ */ new Set(),
+    lastPerMessageNudgeTurn: 0,
+    lastPerMessageNudgeTokens: 0
   };
   state.stats = {
     pruneTokenCounter: 0,
@@ -3381,6 +3399,8 @@ async function ensureSessionInitialized(client, state, sessionId, logger, messag
   state.nudges.iterationNudgeAnchors = new Set(
     persisted.nudges.iterationNudgeAnchors || []
   );
+  state.nudges.lastPerMessageNudgeTurn = persisted.nudges.lastPerMessageNudgeTurn ?? 0;
+  state.nudges.lastPerMessageNudgeTokens = persisted.nudges.lastPerMessageNudgeTokens ?? 0;
   state.stats = {
     pruneTokenCounter: persisted.stats?.pruneTokenCounter || 0,
     totalPruneTokens: persisted.stats?.totalPruneTokens || 0
@@ -4815,7 +4835,8 @@ var createSyntheticUserMessage = (baseMessage, content, stableSeed) => {
         sessionID: userInfo.sessionID,
         messageID: messageId,
         type: "text",
-        text: content
+        text: content,
+        synthetic: true
       }
     ]
   };
@@ -5218,12 +5239,25 @@ function buildCompressedBlockGuidance(state, gcConfig, context) {
   const activeBlockIds = Array.from(state.prune.messages.activeBlockIds).filter((id) => Number.isInteger(id) && id > 0).sort((a, b) => a - b);
   const refs = activeBlockIds.map((id) => `b${id}`);
   const blockCount = refs.length;
-  const blockList = blockCount > 0 ? refs.join(", ") : "none";
+  let blockList;
+  if (blockCount <= 20) {
+    blockList = blockCount > 0 ? refs.join(", ") : "none";
+  } else {
+    const recent = refs.slice(-20).join(", ");
+    blockList = `${recent} (+${blockCount - 20} older, use decompress to access by ID)`;
+  }
+  const includeHint = context?.includeHint ?? true;
   const lines = [
     "Compressed block context:",
     `- Active compressed blocks: ${blockCount} (${blockList})`,
     "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`."
   ];
+  if (includeHint) {
+    lines.push("- \u{1F4A1} When you've finished using tool outputs, compress them \u2014 you can decompress later if needed. Lean context improves accuracy.");
+  }
+  if (blockCount > 50) {
+    lines.push(`- \u{1F500} You have ${blockCount} blocks \u2014 consider merging adjacent same-topic blocks instead of finding new content to compress. This permanently reduces per-turn overhead.`);
+  }
   const usageRatio = context?.currentTokens && context?.modelContextLimit ? context.currentTokens / context.modelContextLimit : 0;
   if (gcConfig && usageRatio > 0.5) {
     const promotionThreshold = gcConfig.promotionThreshold;
@@ -5566,7 +5600,7 @@ function resolveThresholdPercent(threshold, modelContextLimit) {
   const parsed = parseFloat(threshold);
   return isNaN(parsed) ? void 0 : parsed;
 }
-function buildContextUsageGuidance(config, currentTokens, modelContextLimit) {
+function buildContextUsageGuidance(config, currentTokens, modelContextLimit, minimal = false) {
   if (currentTokens === void 0 || modelContextLimit === void 0 || modelContextLimit === 0) {
     return "";
   }
@@ -5575,14 +5609,19 @@ function buildContextUsageGuidance(config, currentTokens, modelContextLimit) {
   const formatK = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}K` : String(n);
   const minPct = resolveThresholdPercent(config.compress.minContextLimit, modelContextLimit) ?? 45;
   const maxPct = resolveThresholdPercent(config.compress.maxContextLimit, modelContextLimit) ?? 55;
-  const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%). ACP threshold: ${maxPct.toFixed(0)}%.`;
+  const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%).`;
+  if (minimal) {
+    return `
+${base}`;
+  }
   let guidance;
   if (pct < minPct) {
-    guidance = " Context is ample \u2014 focus on your task. Only compress obvious waste (large terminal outputs, duplicated content).";
+    guidance = " \u{1F4A1} Be frugal with context \u2014 if you see large completed outputs (>2000 tokens), compress them into summaries. If everything is already compressed, skip this nudge. You can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings. Compress everything else.";
   } else if (pct < maxPct) {
-    guidance = " Context is moderate \u2014 compress completed sections and high-token waste. Preserve key details.";
+    guidance = " \u26A0\uFE0F Context is growing \u2014 compress completed sections and high-token waste now. Preserve key details.";
   } else {
-    guidance = " Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
+    guidance = " \u{1F525} Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
   }
   return `
@@ -5680,6 +5719,18 @@ function createSuffixMessage(messages) {
   messages.push(synthetic);
   return synthetic;
 }
+function shouldInjectPerMessageNudge(state, config, currentTokens, modelContextLimit) {
+  const turn = state.currentTurn ?? 0;
+  const lastTurn = state.nudges.lastPerMessageNudgeTurn ?? 0;
+  const turnsSinceLast = turn - lastTurn;
+  const tokens = currentTokens ?? 0;
+  const lastTokens = state.nudges.lastPerMessageNudgeTokens ?? 0;
+  const tokenGrowth = tokens - lastTokens;
+  const tokenGrowthPercent = modelContextLimit ? tokenGrowth / modelContextLimit * 100 : 0;
+  const frequency = config.compress.nudgeFrequency ?? 5;
+  const growthThreshold = config.compress.perMessageNudgeGrowthPercent ?? 3;
+  return turnsSinceLast >= frequency || tokenGrowthPercent >= growthThreshold;
+}
 var injectCompressNudges = (state, config, logger, messages, prompts, compressionPriorities) => {
   if (compressPermission(state, config) === "deny") {
     return;
@@ -5764,21 +5815,26 @@ var injectCompressNudges = (state, config, logger, messages, prompts, compressio
   }
   const suffixMessage = createSuffixMessage(messages);
   applyAnchoredNudges(state, config, messages, prompts, compressionPriorities, currentTokens, modelContextLimit, suffixMessage);
-  injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit);
+  const shouldNudge = shouldInjectPerMessageNudge(state, config, currentTokens, modelContextLimit);
+  injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit, !shouldNudge);
   if (config.compress.mode !== "message") {
-    const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit });
+    const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit, includeHint: shouldNudge });
     if (blockGuidance.trim() && suffixMessage) {
       appendToLastTextPart(suffixMessage, "\n\n" + blockGuidance);
     }
   }
+  if (shouldNudge) {
+    state.nudges.lastPerMessageNudgeTurn = state.currentTurn ?? 0;
+    state.nudges.lastPerMessageNudgeTokens = currentTokens ?? 0;
+  }
   injectVisibleIdRange(state, messages, suffixMessage);
   if (anchorsChanged) {
     void saveSessionState(state, logger);
   }
 };
-function injectContextUsage(target, config, currentTokens, modelContextLimit) {
+function injectContextUsage(target, config, currentTokens, modelContextLimit, minimal = false) {
   if (!target) return;
-  const usageTag = buildContextUsageGuidance(config, currentTokens, modelContextLimit);
+  const usageTag = buildContextUsageGuidance(config, currentTokens, modelContextLimit, minimal);
   if (!usageTag) return;
   for (const part of target.parts) {
     if (part.type === "text") {
@@ -6628,15 +6684,15 @@ COMPRESSION PHILOSOPHY
 Compression replaces raw conversation content with dense summaries. When used correctly, it keeps your context sharp and focused. When used carelessly, it destroys information you need.
-The key principle: compress based on context pressure, not habit. When context is ample, compress rarely or not at all. When context is tight, compress aggressively but selectively. The runtime context usage indicator tells you the current pressure level.
+The key principle: compress proactively to keep context lean, but selectively. Large tool outputs (shell, diffs, logs) can be compressed into summaries at any time \u2014 you can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings \u2014 even if buried in large messages. Compress everything else, including verbose parts of user messages, large code dumps, and long discussions.
 Target the largest UNCOMPRESSED content first. Savings scale with original size \u2014 compressing a 5000-token tool output frees far more than re-shrinking an already-summarized 300-token block.
 CONTEXT PRESSURE LEVELS
-- Ample: Context is well below the threshold. Do NOT compress unless there is obvious waste (huge terminal dumps, duplicated content). Focus entirely on your task.
-- Moderate: Context is approaching the threshold. Compress completed sections proactively. Prioritize high-token waste over minor cleanup.
-- High: Context has exceeded the threshold. Compress aggressively. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
+- Normal: Be frugal \u2014 compress tool outputs you've finished using into summaries. You can decompress later. Extract and keep what matters from any message; compress verbose parts \u2014 including large logs in user messages or generated code.
+- Elevated: Context is growing. Compress completed sections and high-token waste more urgently.
+- Critical: Compress aggressively now. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
 WHAT TO COMPRESS FIRST (high value, low risk)
@@ -6798,9 +6854,9 @@ General cleanup should be done periodically between other normal compression too
 // lib/prompts/context-limit-nudge.ts
 var CONTEXT_LIMIT_NUDGE = `
 <system-reminder>
-\u26A0\uFE0F CRITICAL: Context limit reached. You MUST use the \`compress\` tool NOW.
+\u26A0\uFE0F Context limit reached \u2014 time to compress the largest ranges you no longer need. Prioritize completed tool outputs and resolved work. You can decompress specific blocks later if you need details. Keeping context lean helps you stay accurate.
-If mid-atomic-operation, finish that step first, then compress immediately.
+If mid-atomic-operation, finish that step first, then compress.
 HOW TO CALL COMPRESS:
 {
@@ -6815,7 +6871,7 @@ HOW TO CALL COMPRESS:
 }
 \u26A0\uFE0F ID RULES \u2014 MOST COMMON CAUSE OF ERRORS:
-- ONLY use IDs you can see in <dcp-message-id> tags in the messages ABOVE.
+- ONLY use IDs you can see in  tags in the messages ABOVE.
 - Do NOT copy IDs from this example. Do NOT invent IDs.
 - Do NOT use IDs from compressed block summaries \u2014 they are stale.
 - startId must appear BEFORE endId in the conversation.
@@ -6831,14 +6887,14 @@ SUMMARY RULES:
 // lib/prompts/turn-nudge.ts
 var TURN_NUDGE = `
 <system-reminder>
-Context is getting full. Compress closed/older conversation ranges now.
+Context is getting full. If you've finished reading tool outputs or exploration results, compress them \u2014 you can decompress later if needed. This keeps your focus on the current task and improves accuracy.
 {
   "topic": "Short Label",
   "content": [{ "startId": "<visible message ID>", "endId": "<visible message ID>", "summary": "..." }]
 }
-\u26A0\uFE0F ONLY use IDs from <dcp-message-id> tags visible above. Do NOT invent or copy example IDs.
+\u26A0\uFE0F ONLY use IDs from  tags visible above. Do NOT invent or copy example IDs.
 </system-reminder>
 `;
@@ -8212,10 +8268,12 @@ function parseGcThreshold(limit, modelContextLimit) {
 // lib/gc/merge.ts
 var DEFAULT_BATCH_CLEANUP = {
-  lowThreshold: "60%",
+  lowThreshold: "55%",
   highThreshold: "75%",
   forceThreshold: "90%"
 };
+var ESCALATE_MIN_MARKED = 3;
+var ESCALATE_MIN_RATIO = 0.4;
 function resolveBatchCleanup(gc) {
   return gc.batchCleanup ?? DEFAULT_BATCH_CLEANUP;
 }
@@ -8239,11 +8297,15 @@ function collectActiveOldGenBlocks(state, maxOldGenSummaryLength) {
   return blocks;
 }
 function collectActiveMarkedBlocks(state) {
-  const ids = Array.from(state.prune.messages.markedForCleanup).sort((a, b) => a - b);
+  const messagesState = state.prune.messages;
+  const ids = Array.from(messagesState.markedForCleanup).sort((a, b) => a - b);
   const blocks = [];
   for (const id of ids) {
-    const block = state.prune.messages.blocksById.get(id);
-    if (!block || !block.active) continue;
+    const block = messagesState.blocksById.get(id);
+    if (!block || !block.active) {
+      messagesState.markedForCleanup.delete(id);
+      continue;
+    }
     blocks.push(block);
   }
   return blocks;
@@ -8368,21 +8430,53 @@ function mergeMarkedBlocks(state, markedIds, maxMergedLength) {
   const savedTokens = Math.max(0, sourceTokens - newSummaryTokens);
   return { mergedCount: sourceBlocks.length, savedTokens };
 }
-function buildNudgeText(state, maxMergedLength) {
-  const blocks = collectActiveMarkedBlocks(state);
-  if (blocks.length < 1) return void 0;
-  const refs = blocks.map((b) => formatBlockRef(b.blockId)).join(", ");
-  const sourceTokens = blocks.reduce(
+function estimateTokens(blocks) {
+  return blocks.reduce(
     (sum, block) => sum + (block.summaryTokens || Math.round(block.summary.length / 4)),
     0
   );
-  const estimatedMergedTokens = Math.round(maxMergedLength / 4);
-  const estimatedSavings = Math.max(0, sourceTokens - estimatedMergedTokens);
+}
+function buildNudgeText(state, maxMergedLength) {
+  const marked = collectActiveMarkedBlocks(state);
+  const oldGen = collectActiveOldGenBlocks(state, maxMergedLength);
+  if (oldGen.length === 0) return void 0;
+  const oldGenIds = new Set(oldGen.map((b) => b.blockId));
+  const markedOldGen = marked.filter((b) => oldGenIds.has(b.blockId));
+  const markedOldGenCount = markedOldGen.length;
+  const oldGenCount = oldGen.length;
+  const ratio = markedOldGenCount / oldGenCount;
+  const ratioPct = Math.round(ratio * 100);
+  const escalateMinPct = Math.round(ESCALATE_MIN_RATIO * 100);
+  if (markedOldGenCount >= ESCALATE_MIN_MARKED && ratio >= ESCALATE_MIN_RATIO) {
+    const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
+    const firstRef = formatBlockRef(marked[0].blockId);
+    const lastRef = formatBlockRef(marked[marked.length - 1].blockId);
+    const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
+    return [
+      `\u{1F525} ${markedOldGenCount}/${oldGenCount} old-gen blocks marked (${ratioPct}%) \u2014 ready for batch cleanup.`,
+      `Compressing ${refs} (range ${firstRef}\u2013${lastRef}) would free ~${estimatedSavings} tokens in one cache break.`,
+      `Call compress with this range now to consolidate them.`
+    ].join(" ");
+  }
+  if (marked.length >= 1) {
+    const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
+    const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
+    return [
+      `\u26A0\uFE0F ${marked.length} block(s) marked for batch cleanup (${refs}).`,
+      `Merge-compressing them would free ~${estimatedSavings} tokens.`,
+      marked.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
+      `Mark more old-gen blocks (need \u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}%) to trigger batch cleanup sooner.`,
+      "To act now, use compress with a range covering these blocks."
+    ].join(" ");
+  }
+  const shown = oldGen.slice(0, 5);
+  const oldGenRefs = shown.map((b) => formatBlockRef(b.blockId)).join(", ");
+  const more = oldGenCount > 5 ? ` (+${oldGenCount - 5} more)` : "";
   return [
-    `\u26A0\uFE0F ${blocks.length} block(s) marked for batch cleanup (${refs}).`,
-    `Merge-compressing them would free ~${estimatedSavings} tokens.`,
-    blocks.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
-    "To act now, use compress with a range covering these blocks."
+    `\u{1F4CB} Context pressure rising \u2014 ${oldGenCount} old-gen compressed block(s) occupy ~${estimateTokens(oldGen)} tokens (${oldGenRefs}${more}).`,
+    `Review which blocks contain information you no longer need, and use mark_block to flag them.`,
+    `Once enough are marked (\u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}% of old-gen), they'll be batch-merged in one cache break to preserve cache hit rate.`,
+    `Do NOT mark blocks you may still need.`
   ].join(" ");
 }
 function runBatchCleanup(state, config, logger, messages) {
@@ -8427,26 +8521,24 @@ function runBatchCleanup(state, config, logger, messages) {
   }
   if (currentTokens >= highTokens) {
     const marked = collectActiveMarkedBlocks(state);
-    if (marked.length < 2) {
-      return noop;
-    }
-    const ids = marked.map((b) => b.blockId);
-    const result = mergeMarkedBlocks(state, ids, maxMergedLength);
-    if (result.mergedCount === 0) {
-      return noop;
+    if (marked.length >= 2) {
+      const ids = marked.map((b) => b.blockId);
+      const result = mergeMarkedBlocks(state, ids, maxMergedLength);
+      if (result.mergedCount > 0) {
+        logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
+          mergedCount: result.mergedCount,
+          savedTokens: result.savedTokens,
+          currentTokens,
+          highThreshold: batchCleanup.highThreshold
+        });
+        return {
+          tier: 2,
+          action: "merge",
+          mergedCount: result.mergedCount,
+          savedTokens: result.savedTokens
+        };
+      }
     }
-    logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
-      mergedCount: result.mergedCount,
-      savedTokens: result.savedTokens,
-      currentTokens,
-      highThreshold: batchCleanup.highThreshold
-    });
-    return {
-      tier: 2,
-      action: "merge",
-      mergedCount: result.mergedCount,
-      savedTokens: result.savedTokens
-    };
   }
   if (currentTokens >= lowTokens) {
     const nudgeText = buildNudgeText(state, maxMergedLength);