npm - opencode-acp - Versions diffs - 1.4.1 → 1.5.0 - Mend

opencode-acp 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +2 -1
package/dist/index.js +93 -131
package/dist/index.js.map +1 -1
package/dist/lib/config-validation.d.ts +0 -1
package/dist/lib/config-validation.d.ts.map +1 -1
package/dist/lib/gc/merge.d.ts.map +1 -1
package/dist/lib/message-ids.d.ts +0 -1
package/dist/lib/message-ids.d.ts.map +1 -1
package/dist/lib/messages/prune.d.ts.map +1 -1
package/dist/lib/messages/utils.d.ts +0 -4
package/dist/lib/messages/utils.d.ts.map +1 -1
package/dist/lib/prompts/context-limit-nudge.d.ts +1 -1
package/dist/lib/prompts/context-limit-nudge.d.ts.map +1 -1
package/dist/lib/prompts/extensions/nudge.d.ts.map +1 -1
package/dist/lib/prompts/system.d.ts +1 -1
package/dist/lib/prompts/system.d.ts.map +1 -1
package/dist/lib/prompts/turn-nudge.d.ts +1 -1
package/dist/lib/prompts/turn-nudge.d.ts.map +1 -1
package/dist/lib/state/state.d.ts.map +1 -1
package/dist/lib/ui/notification.d.ts +0 -2
package/dist/lib/ui/notification.d.ts.map +1 -1
package/dist/lib/ui/utils.d.ts +0 -3
package/dist/lib/ui/utils.d.ts.map +1 -1
package/dist/lib/update.d.ts +0 -16
package/dist/lib/update.d.ts.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -389,7 +389,7 @@ ACP auto-migrates config from `dcp.jsonc` to `acp.jsonc` and prompts from `dcp-p
 ---
 <details>
-<summary><strong>Bug Fixes (37 total)</strong> -- applied on top of DCP v3.1.11</summary>
+<summary><strong>Bug Fixes (38 total)</strong> -- applied on top of DCP v3.1.11</summary>
 | # | Severity | Summary |
 |---|----------|---------|
@@ -419,6 +419,7 @@ ACP auto-migrates config from `dcp.jsonc` to `acp.jsonc` and prompts from `dcp-p
 | 35 | HIGH | Aging warnings shown at low context usage (<50%) -- triggers unnecessary compress, wastes tokens |
 | 36 | HIGH | Compression summary emitted as a standalone user message before the user's real turn -- model reads its own prior assistant output as user input, causing dialog role confusion / self-Q&A loops |
 | 37 | HIGH | Message-transform pipeline runs on OpenCode's hidden title/summary/compaction agent requests -- corrupts the request and shared session state, breaking session title generation |
+| 38 | CRITICAL | pruneToolOutputs/pruneToolInputs/pruneToolErrors mutate existing messages in-place -- invalidates LLM prefix cache, causing 89% of fresh input tokens to be wasted on cache-invalidating re-sends |
 For the complete list with root cause analysis, see the [bug tracker](https://github.com/ranxianglei/opencode-acp/issues).

package/dist/index.js CHANGED Viewed

@@ -1485,7 +1485,7 @@ var defaultConfig = {
     maxOldGenSummaryLength: 3e3,
     majorGcThresholdPercent: "100%",
     batchCleanup: {
-      lowThreshold: "60%",
+      lowThreshold: "55%",
       highThreshold: "75%",
       forceThreshold: "90%"
     }
@@ -4958,89 +4958,8 @@ var stripHallucinations = (messages) => {
 };
 // lib/messages/prune.ts
-var PRUNED_TOOL_OUTPUT_REPLACEMENT = "[Output removed to save context - information superseded or no longer needed]";
-var PRUNED_TOOL_ERROR_INPUT_REPLACEMENT = "[input removed due to failed tool call]";
-var PRUNED_QUESTION_INPUT_REPLACEMENT = "[questions removed - see output for user's answers]";
 var prune = (state, logger, config, messages) => {
   filterCompressedRanges(state, logger, config, messages);
-  pruneToolOutputs(state, logger, messages);
-  pruneToolInputs(state, logger, messages);
-  pruneToolErrors(state, logger, messages);
-};
-var pruneToolOutputs = (state, logger, messages) => {
-  for (const msg of messages) {
-    if (isMessageCompacted(state, msg)) {
-      continue;
-    }
-    const parts = Array.isArray(msg.parts) ? msg.parts : [];
-    for (const part of parts) {
-      if (part.type !== "tool") {
-        continue;
-      }
-      if (!state.prune.tools.has(part.callID)) {
-        continue;
-      }
-      if (part.state.status !== "completed") {
-        continue;
-      }
-      if (part.tool === "question" || part.tool === "edit" || part.tool === "write") {
-        continue;
-      }
-      part.state.output = PRUNED_TOOL_OUTPUT_REPLACEMENT;
-    }
-  }
-};
-var pruneToolInputs = (state, logger, messages) => {
-  for (const msg of messages) {
-    if (isMessageCompacted(state, msg)) {
-      continue;
-    }
-    const parts = Array.isArray(msg.parts) ? msg.parts : [];
-    for (const part of parts) {
-      if (part.type !== "tool") {
-        continue;
-      }
-      if (!state.prune.tools.has(part.callID)) {
-        continue;
-      }
-      if (part.state.status !== "completed") {
-        continue;
-      }
-      if (part.tool !== "question") {
-        continue;
-      }
-      if (part.state.input?.questions !== void 0) {
-        part.state.input.questions = PRUNED_QUESTION_INPUT_REPLACEMENT;
-      }
-    }
-  }
-};
-var pruneToolErrors = (state, logger, messages) => {
-  for (const msg of messages) {
-    if (isMessageCompacted(state, msg)) {
-      continue;
-    }
-    const parts = Array.isArray(msg.parts) ? msg.parts : [];
-    for (const part of parts) {
-      if (part.type !== "tool") {
-        continue;
-      }
-      if (!state.prune.tools.has(part.callID)) {
-        continue;
-      }
-      if (part.state.status !== "error") {
-        continue;
-      }
-      const input = part.state.input;
-      if (input && typeof input === "object") {
-        for (const key of Object.keys(input)) {
-          if (typeof input[key] === "string") {
-            input[key] = PRUNED_TOOL_ERROR_INPUT_REPLACEMENT;
-          }
-        }
-      }
-    }
-  }
 };
 var filterCompressedRanges = (state, logger, config, messages) => {
   if (state.prune.messages.byMessageId.size === 0 && state.prune.messages.activeByAnchorMessageId.size === 0) {
@@ -5299,11 +5218,18 @@ function buildCompressedBlockGuidance(state, gcConfig, context) {
   const activeBlockIds = Array.from(state.prune.messages.activeBlockIds).filter((id) => Number.isInteger(id) && id > 0).sort((a, b) => a - b);
   const refs = activeBlockIds.map((id) => `b${id}`);
   const blockCount = refs.length;
-  const blockList = blockCount > 0 ? refs.join(", ") : "none";
+  let blockList;
+  if (blockCount <= 20) {
+    blockList = blockCount > 0 ? refs.join(", ") : "none";
+  } else {
+    const recent = refs.slice(-20).join(", ");
+    blockList = `${recent} (+${blockCount - 20} older, use decompress to access by ID)`;
+  }
   const lines = [
     "Compressed block context:",
     `- Active compressed blocks: ${blockCount} (${blockList})`,
-    "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`."
+    "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`.",
+    "- \u{1F4A1} When you've finished using tool outputs, compress them \u2014 you can decompress later if needed. Lean context improves accuracy."
   ];
   const usageRatio = context?.currentTokens && context?.modelContextLimit ? context.currentTokens / context.modelContextLimit : 0;
   if (gcConfig && usageRatio > 0.5) {
@@ -5656,14 +5582,14 @@ function buildContextUsageGuidance(config, currentTokens, modelContextLimit) {
   const formatK = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}K` : String(n);
   const minPct = resolveThresholdPercent(config.compress.minContextLimit, modelContextLimit) ?? 45;
   const maxPct = resolveThresholdPercent(config.compress.maxContextLimit, modelContextLimit) ?? 55;
-  const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%). ACP threshold: ${maxPct.toFixed(0)}%.`;
+  const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%).`;
   let guidance;
   if (pct < minPct) {
-    guidance = " Context is ample \u2014 focus on your task. Only compress obvious waste (large terminal outputs, duplicated content).";
+    guidance = " \u{1F4A1} Be frugal with context \u2014 compress tool outputs you've finished using into summaries. You can decompress later; nothing is permanently lost. Lean context means better accuracy. Extract and keep what matters: user intent, key decisions, file paths, and important findings \u2014 even if buried in large messages. Compress everything else, including verbose parts of any message.";
   } else if (pct < maxPct) {
-    guidance = " Context is moderate \u2014 compress completed sections and high-token waste. Preserve key details.";
+    guidance = " \u26A0\uFE0F Context is growing \u2014 compress completed sections and high-token waste now. Preserve key details.";
   } else {
-    guidance = " Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
+    guidance = " \u{1F525} Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
   }
   return `
@@ -6709,15 +6635,15 @@ COMPRESSION PHILOSOPHY
 Compression replaces raw conversation content with dense summaries. When used correctly, it keeps your context sharp and focused. When used carelessly, it destroys information you need.
-The key principle: compress based on context pressure, not habit. When context is ample, compress rarely or not at all. When context is tight, compress aggressively but selectively. The runtime context usage indicator tells you the current pressure level.
+The key principle: compress proactively to keep context lean, but selectively. Large tool outputs (shell, diffs, logs) can be compressed into summaries at any time \u2014 you can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings \u2014 even if buried in large messages. Compress everything else, including verbose parts of user messages, large code dumps, and long discussions.
 Target the largest UNCOMPRESSED content first. Savings scale with original size \u2014 compressing a 5000-token tool output frees far more than re-shrinking an already-summarized 300-token block.
 CONTEXT PRESSURE LEVELS
-- Ample: Context is well below the threshold. Do NOT compress unless there is obvious waste (huge terminal dumps, duplicated content). Focus entirely on your task.
-- Moderate: Context is approaching the threshold. Compress completed sections proactively. Prioritize high-token waste over minor cleanup.
-- High: Context has exceeded the threshold. Compress aggressively. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
+- Normal: Be frugal \u2014 compress tool outputs you've finished using into summaries. You can decompress later. Extract and keep what matters from any message; compress verbose parts \u2014 including large logs in user messages or generated code.
+- Elevated: Context is growing. Compress completed sections and high-token waste more urgently.
+- Critical: Compress aggressively now. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
 WHAT TO COMPRESS FIRST (high value, low risk)
@@ -6879,9 +6805,9 @@ General cleanup should be done periodically between other normal compression too
 // lib/prompts/context-limit-nudge.ts
 var CONTEXT_LIMIT_NUDGE = `
 <system-reminder>
-\u26A0\uFE0F CRITICAL: Context limit reached. You MUST use the \`compress\` tool NOW.
+\u26A0\uFE0F Context limit reached \u2014 time to compress the largest ranges you no longer need. Prioritize completed tool outputs and resolved work. You can decompress specific blocks later if you need details. Keeping context lean helps you stay accurate.
-If mid-atomic-operation, finish that step first, then compress immediately.
+If mid-atomic-operation, finish that step first, then compress.
 HOW TO CALL COMPRESS:
 {
@@ -6896,7 +6822,7 @@ HOW TO CALL COMPRESS:
 }
 \u26A0\uFE0F ID RULES \u2014 MOST COMMON CAUSE OF ERRORS:
-- ONLY use IDs you can see in <dcp-message-id> tags in the messages ABOVE.
+- ONLY use IDs you can see in  tags in the messages ABOVE.
 - Do NOT copy IDs from this example. Do NOT invent IDs.
 - Do NOT use IDs from compressed block summaries \u2014 they are stale.
 - startId must appear BEFORE endId in the conversation.
@@ -6912,14 +6838,14 @@ SUMMARY RULES:
 // lib/prompts/turn-nudge.ts
 var TURN_NUDGE = `
 <system-reminder>
-Context is getting full. Compress closed/older conversation ranges now.
+Context is getting full. If you've finished reading tool outputs or exploration results, compress them \u2014 you can decompress later if needed. This keeps your focus on the current task and improves accuracy.
 {
   "topic": "Short Label",
   "content": [{ "startId": "<visible message ID>", "endId": "<visible message ID>", "summary": "..." }]
 }
-\u26A0\uFE0F ONLY use IDs from <dcp-message-id> tags visible above. Do NOT invent or copy example IDs.
+\u26A0\uFE0F ONLY use IDs from  tags visible above. Do NOT invent or copy example IDs.
 </system-reminder>
 `;
@@ -8293,10 +8219,12 @@ function parseGcThreshold(limit, modelContextLimit) {
 // lib/gc/merge.ts
 var DEFAULT_BATCH_CLEANUP = {
-  lowThreshold: "60%",
+  lowThreshold: "55%",
   highThreshold: "75%",
   forceThreshold: "90%"
 };
+var ESCALATE_MIN_MARKED = 3;
+var ESCALATE_MIN_RATIO = 0.4;
 function resolveBatchCleanup(gc) {
   return gc.batchCleanup ?? DEFAULT_BATCH_CLEANUP;
 }
@@ -8320,11 +8248,15 @@ function collectActiveOldGenBlocks(state, maxOldGenSummaryLength) {
   return blocks;
 }
 function collectActiveMarkedBlocks(state) {
-  const ids = Array.from(state.prune.messages.markedForCleanup).sort((a, b) => a - b);
+  const messagesState = state.prune.messages;
+  const ids = Array.from(messagesState.markedForCleanup).sort((a, b) => a - b);
   const blocks = [];
   for (const id of ids) {
-    const block = state.prune.messages.blocksById.get(id);
-    if (!block || !block.active) continue;
+    const block = messagesState.blocksById.get(id);
+    if (!block || !block.active) {
+      messagesState.markedForCleanup.delete(id);
+      continue;
+    }
     blocks.push(block);
   }
   return blocks;
@@ -8449,21 +8381,53 @@ function mergeMarkedBlocks(state, markedIds, maxMergedLength) {
   const savedTokens = Math.max(0, sourceTokens - newSummaryTokens);
   return { mergedCount: sourceBlocks.length, savedTokens };
 }
-function buildNudgeText(state, maxMergedLength) {
-  const blocks = collectActiveMarkedBlocks(state);
-  if (blocks.length < 1) return void 0;
-  const refs = blocks.map((b) => formatBlockRef(b.blockId)).join(", ");
-  const sourceTokens = blocks.reduce(
+function estimateTokens(blocks) {
+  return blocks.reduce(
     (sum, block) => sum + (block.summaryTokens || Math.round(block.summary.length / 4)),
     0
   );
-  const estimatedMergedTokens = Math.round(maxMergedLength / 4);
-  const estimatedSavings = Math.max(0, sourceTokens - estimatedMergedTokens);
+}
+function buildNudgeText(state, maxMergedLength) {
+  const marked = collectActiveMarkedBlocks(state);
+  const oldGen = collectActiveOldGenBlocks(state, maxMergedLength);
+  if (oldGen.length === 0) return void 0;
+  const oldGenIds = new Set(oldGen.map((b) => b.blockId));
+  const markedOldGen = marked.filter((b) => oldGenIds.has(b.blockId));
+  const markedOldGenCount = markedOldGen.length;
+  const oldGenCount = oldGen.length;
+  const ratio = markedOldGenCount / oldGenCount;
+  const ratioPct = Math.round(ratio * 100);
+  const escalateMinPct = Math.round(ESCALATE_MIN_RATIO * 100);
+  if (markedOldGenCount >= ESCALATE_MIN_MARKED && ratio >= ESCALATE_MIN_RATIO) {
+    const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
+    const firstRef = formatBlockRef(marked[0].blockId);
+    const lastRef = formatBlockRef(marked[marked.length - 1].blockId);
+    const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
+    return [
+      `\u{1F525} ${markedOldGenCount}/${oldGenCount} old-gen blocks marked (${ratioPct}%) \u2014 ready for batch cleanup.`,
+      `Compressing ${refs} (range ${firstRef}\u2013${lastRef}) would free ~${estimatedSavings} tokens in one cache break.`,
+      `Call compress with this range now to consolidate them.`
+    ].join(" ");
+  }
+  if (marked.length >= 1) {
+    const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
+    const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
+    return [
+      `\u26A0\uFE0F ${marked.length} block(s) marked for batch cleanup (${refs}).`,
+      `Merge-compressing them would free ~${estimatedSavings} tokens.`,
+      marked.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
+      `Mark more old-gen blocks (need \u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}%) to trigger batch cleanup sooner.`,
+      "To act now, use compress with a range covering these blocks."
+    ].join(" ");
+  }
+  const shown = oldGen.slice(0, 5);
+  const oldGenRefs = shown.map((b) => formatBlockRef(b.blockId)).join(", ");
+  const more = oldGenCount > 5 ? ` (+${oldGenCount - 5} more)` : "";
   return [
-    `\u26A0\uFE0F ${blocks.length} block(s) marked for batch cleanup (${refs}).`,
-    `Merge-compressing them would free ~${estimatedSavings} tokens.`,
-    blocks.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
-    "To act now, use compress with a range covering these blocks."
+    `\u{1F4CB} Context pressure rising \u2014 ${oldGenCount} old-gen compressed block(s) occupy ~${estimateTokens(oldGen)} tokens (${oldGenRefs}${more}).`,
+    `Review which blocks contain information you no longer need, and use mark_block to flag them.`,
+    `Once enough are marked (\u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}% of old-gen), they'll be batch-merged in one cache break to preserve cache hit rate.`,
+    `Do NOT mark blocks you may still need.`
   ].join(" ");
 }
 function runBatchCleanup(state, config, logger, messages) {
@@ -8508,26 +8472,24 @@ function runBatchCleanup(state, config, logger, messages) {
   }
   if (currentTokens >= highTokens) {
     const marked = collectActiveMarkedBlocks(state);
-    if (marked.length < 2) {
-      return noop;
-    }
-    const ids = marked.map((b) => b.blockId);
-    const result = mergeMarkedBlocks(state, ids, maxMergedLength);
-    if (result.mergedCount === 0) {
-      return noop;
+    if (marked.length >= 2) {
+      const ids = marked.map((b) => b.blockId);
+      const result = mergeMarkedBlocks(state, ids, maxMergedLength);
+      if (result.mergedCount > 0) {
+        logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
+          mergedCount: result.mergedCount,
+          savedTokens: result.savedTokens,
+          currentTokens,
+          highThreshold: batchCleanup.highThreshold
+        });
+        return {
+          tier: 2,
+          action: "merge",
+          mergedCount: result.mergedCount,
+          savedTokens: result.savedTokens
+        };
+      }
     }
-    logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
-      mergedCount: result.mergedCount,
-      savedTokens: result.savedTokens,
-      currentTokens,
-      highThreshold: batchCleanup.highThreshold
-    });
-    return {
-      tier: 2,
-      action: "merge",
-      mergedCount: result.mergedCount,
-      savedTokens: result.savedTokens
-    };
   }
   if (currentTokens >= lowTokens) {
     const nudgeText = buildNudgeText(state, maxMergedLength);