opencode-acp 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -894,6 +894,7 @@ var VALID_CONFIG_KEYS = /* @__PURE__ */ new Set([
894
894
  "compress.modelMaxLimits",
895
895
  "compress.modelMinLimits",
896
896
  "compress.nudgeFrequency",
897
+ "compress.perMessageNudgeGrowthPercent",
897
898
  "compress.iterationNudgeThreshold",
898
899
  "compress.nudgeForce",
899
900
  "compress.protectedTools",
@@ -1117,6 +1118,13 @@ function validateConfigTypes(config) {
1117
1118
  actual: `${compress.nudgeFrequency} (will be clamped to 1)`
1118
1119
  });
1119
1120
  }
1121
+ if (compress.perMessageNudgeGrowthPercent !== void 0 && typeof compress.perMessageNudgeGrowthPercent !== "number") {
1122
+ errors.push({
1123
+ key: "compress.perMessageNudgeGrowthPercent",
1124
+ expected: "number",
1125
+ actual: typeof compress.perMessageNudgeGrowthPercent
1126
+ });
1127
+ }
1120
1128
  if (compress.iterationNudgeThreshold !== void 0 && typeof compress.iterationNudgeThreshold !== "number") {
1121
1129
  errors.push({
1122
1130
  key: "compress.iterationNudgeThreshold",
@@ -1461,6 +1469,7 @@ var defaultConfig = {
1461
1469
  maxContextLimit: "55%",
1462
1470
  minContextLimit: "45%",
1463
1471
  nudgeFrequency: 5,
1472
+ perMessageNudgeGrowthPercent: 3,
1464
1473
  iterationNudgeThreshold: 15,
1465
1474
  nudgeForce: "soft",
1466
1475
  protectedTools: [...COMPRESS_DEFAULT_PROTECTED_TOOLS],
@@ -1485,7 +1494,7 @@ var defaultConfig = {
1485
1494
  maxOldGenSummaryLength: 3e3,
1486
1495
  majorGcThresholdPercent: "100%",
1487
1496
  batchCleanup: {
1488
- lowThreshold: "60%",
1497
+ lowThreshold: "55%",
1489
1498
  highThreshold: "75%",
1490
1499
  forceThreshold: "90%"
1491
1500
  }
@@ -1612,6 +1621,7 @@ function mergeCompress(base, override) {
1612
1621
  modelMaxLimits: override.modelMaxLimits ?? base.modelMaxLimits,
1613
1622
  modelMinLimits: override.modelMinLimits ?? base.modelMinLimits,
1614
1623
  nudgeFrequency: override.nudgeFrequency ?? base.nudgeFrequency,
1624
+ perMessageNudgeGrowthPercent: override.perMessageNudgeGrowthPercent ?? base.perMessageNudgeGrowthPercent,
1615
1625
  iterationNudgeThreshold: override.iterationNudgeThreshold ?? base.iterationNudgeThreshold,
1616
1626
  nudgeForce: override.nudgeForce ?? base.nudgeForce,
1617
1627
  protectedTools: [.../* @__PURE__ */ new Set([...base.protectedTools, ...override.protectedTools ?? []])],
@@ -3019,7 +3029,9 @@ function resetOnCompaction(state) {
3019
3029
  state.nudges = {
3020
3030
  contextLimitAnchors: /* @__PURE__ */ new Set(),
3021
3031
  turnNudgeAnchors: /* @__PURE__ */ new Set(),
3022
- iterationNudgeAnchors: /* @__PURE__ */ new Set()
3032
+ iterationNudgeAnchors: /* @__PURE__ */ new Set(),
3033
+ lastPerMessageNudgeTurn: 0,
3034
+ lastPerMessageNudgeTokens: 0
3023
3035
  };
3024
3036
  state.messageIds = {
3025
3037
  byRawId: /* @__PURE__ */ new Map(),
@@ -3085,7 +3097,9 @@ async function saveSessionState(sessionState, logger, sessionName) {
3085
3097
  nudges: {
3086
3098
  contextLimitAnchors: Array.from(sessionState.nudges.contextLimitAnchors),
3087
3099
  turnNudgeAnchors: Array.from(sessionState.nudges.turnNudgeAnchors),
3088
- iterationNudgeAnchors: Array.from(sessionState.nudges.iterationNudgeAnchors)
3100
+ iterationNudgeAnchors: Array.from(sessionState.nudges.iterationNudgeAnchors),
3101
+ lastPerMessageNudgeTurn: sessionState.nudges.lastPerMessageNudgeTurn ?? 0,
3102
+ lastPerMessageNudgeTokens: sessionState.nudges.lastPerMessageNudgeTokens ?? 0
3089
3103
  },
3090
3104
  stats: sessionState.stats,
3091
3105
  lastUpdated: (/* @__PURE__ */ new Date()).toISOString(),
@@ -3299,7 +3313,9 @@ function createSessionState() {
3299
3313
  nudges: {
3300
3314
  contextLimitAnchors: /* @__PURE__ */ new Set(),
3301
3315
  turnNudgeAnchors: /* @__PURE__ */ new Set(),
3302
- iterationNudgeAnchors: /* @__PURE__ */ new Set()
3316
+ iterationNudgeAnchors: /* @__PURE__ */ new Set(),
3317
+ lastPerMessageNudgeTurn: 0,
3318
+ lastPerMessageNudgeTokens: 0
3303
3319
  },
3304
3320
  stats: {
3305
3321
  pruneTokenCounter: 0,
@@ -3336,7 +3352,9 @@ function resetSessionState(state) {
3336
3352
  state.nudges = {
3337
3353
  contextLimitAnchors: /* @__PURE__ */ new Set(),
3338
3354
  turnNudgeAnchors: /* @__PURE__ */ new Set(),
3339
- iterationNudgeAnchors: /* @__PURE__ */ new Set()
3355
+ iterationNudgeAnchors: /* @__PURE__ */ new Set(),
3356
+ lastPerMessageNudgeTurn: 0,
3357
+ lastPerMessageNudgeTokens: 0
3340
3358
  };
3341
3359
  state.stats = {
3342
3360
  pruneTokenCounter: 0,
@@ -3381,6 +3399,8 @@ async function ensureSessionInitialized(client, state, sessionId, logger, messag
3381
3399
  state.nudges.iterationNudgeAnchors = new Set(
3382
3400
  persisted.nudges.iterationNudgeAnchors || []
3383
3401
  );
3402
+ state.nudges.lastPerMessageNudgeTurn = persisted.nudges.lastPerMessageNudgeTurn ?? 0;
3403
+ state.nudges.lastPerMessageNudgeTokens = persisted.nudges.lastPerMessageNudgeTokens ?? 0;
3384
3404
  state.stats = {
3385
3405
  pruneTokenCounter: persisted.stats?.pruneTokenCounter || 0,
3386
3406
  totalPruneTokens: persisted.stats?.totalPruneTokens || 0
@@ -4815,7 +4835,8 @@ var createSyntheticUserMessage = (baseMessage, content, stableSeed) => {
4815
4835
  sessionID: userInfo.sessionID,
4816
4836
  messageID: messageId,
4817
4837
  type: "text",
4818
- text: content
4838
+ text: content,
4839
+ synthetic: true
4819
4840
  }
4820
4841
  ]
4821
4842
  };
@@ -5218,12 +5239,25 @@ function buildCompressedBlockGuidance(state, gcConfig, context) {
5218
5239
  const activeBlockIds = Array.from(state.prune.messages.activeBlockIds).filter((id) => Number.isInteger(id) && id > 0).sort((a, b) => a - b);
5219
5240
  const refs = activeBlockIds.map((id) => `b${id}`);
5220
5241
  const blockCount = refs.length;
5221
- const blockList = blockCount > 0 ? refs.join(", ") : "none";
5242
+ let blockList;
5243
+ if (blockCount <= 20) {
5244
+ blockList = blockCount > 0 ? refs.join(", ") : "none";
5245
+ } else {
5246
+ const recent = refs.slice(-20).join(", ");
5247
+ blockList = `${recent} (+${blockCount - 20} older, use decompress to access by ID)`;
5248
+ }
5249
+ const includeHint = context?.includeHint ?? true;
5222
5250
  const lines = [
5223
5251
  "Compressed block context:",
5224
5252
  `- Active compressed blocks: ${blockCount} (${blockList})`,
5225
5253
  "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`."
5226
5254
  ];
5255
+ if (includeHint) {
5256
+ lines.push("- \u{1F4A1} When you've finished using tool outputs, compress them \u2014 you can decompress later if needed. Lean context improves accuracy.");
5257
+ }
5258
+ if (blockCount > 50) {
5259
+ lines.push(`- \u{1F500} You have ${blockCount} blocks \u2014 consider merging adjacent same-topic blocks instead of finding new content to compress. This permanently reduces per-turn overhead.`);
5260
+ }
5227
5261
  const usageRatio = context?.currentTokens && context?.modelContextLimit ? context.currentTokens / context.modelContextLimit : 0;
5228
5262
  if (gcConfig && usageRatio > 0.5) {
5229
5263
  const promotionThreshold = gcConfig.promotionThreshold;
@@ -5566,7 +5600,7 @@ function resolveThresholdPercent(threshold, modelContextLimit) {
5566
5600
  const parsed = parseFloat(threshold);
5567
5601
  return isNaN(parsed) ? void 0 : parsed;
5568
5602
  }
5569
- function buildContextUsageGuidance(config, currentTokens, modelContextLimit) {
5603
+ function buildContextUsageGuidance(config, currentTokens, modelContextLimit, minimal = false) {
5570
5604
  if (currentTokens === void 0 || modelContextLimit === void 0 || modelContextLimit === 0) {
5571
5605
  return "";
5572
5606
  }
@@ -5575,14 +5609,19 @@ function buildContextUsageGuidance(config, currentTokens, modelContextLimit) {
5575
5609
  const formatK = (n) => n >= 1e3 ? `${(n / 1e3).toFixed(1)}K` : String(n);
5576
5610
  const minPct = resolveThresholdPercent(config.compress.minContextLimit, modelContextLimit) ?? 45;
5577
5611
  const maxPct = resolveThresholdPercent(config.compress.maxContextLimit, modelContextLimit) ?? 55;
5578
- const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%). ACP threshold: ${maxPct.toFixed(0)}%.`;
5612
+ const base = `Context usage: ${formatK(currentTokens)} / ${formatK(modelContextLimit)} tokens (${percentage}%).`;
5613
+ if (minimal) {
5614
+ return `
5615
+
5616
+ ${base}`;
5617
+ }
5579
5618
  let guidance;
5580
5619
  if (pct < minPct) {
5581
- guidance = " Context is ample \u2014 focus on your task. Only compress obvious waste (large terminal outputs, duplicated content).";
5620
+ guidance = " \u{1F4A1} Be frugal with context \u2014 if you see large completed outputs (>2000 tokens), compress them into summaries. If everything is already compressed, skip this nudge. You can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings. Compress everything else.";
5582
5621
  } else if (pct < maxPct) {
5583
- guidance = " Context is moderate \u2014 compress completed sections and high-token waste. Preserve key details.";
5622
+ guidance = " \u26A0\uFE0F Context is growing \u2014 compress completed sections and high-token waste now. Preserve key details.";
5584
5623
  } else {
5585
- guidance = " Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
5624
+ guidance = " \u{1F525} Context is high \u2014 compress aggressively but selectively. Preserve only what is essential.";
5586
5625
  }
5587
5626
  return `
5588
5627
 
@@ -5680,6 +5719,18 @@ function createSuffixMessage(messages) {
5680
5719
  messages.push(synthetic);
5681
5720
  return synthetic;
5682
5721
  }
5722
+ function shouldInjectPerMessageNudge(state, config, currentTokens, modelContextLimit) {
5723
+ const turn = state.currentTurn ?? 0;
5724
+ const lastTurn = state.nudges.lastPerMessageNudgeTurn ?? 0;
5725
+ const turnsSinceLast = turn - lastTurn;
5726
+ const tokens = currentTokens ?? 0;
5727
+ const lastTokens = state.nudges.lastPerMessageNudgeTokens ?? 0;
5728
+ const tokenGrowth = tokens - lastTokens;
5729
+ const tokenGrowthPercent = modelContextLimit ? tokenGrowth / modelContextLimit * 100 : 0;
5730
+ const frequency = config.compress.nudgeFrequency ?? 5;
5731
+ const growthThreshold = config.compress.perMessageNudgeGrowthPercent ?? 3;
5732
+ return turnsSinceLast >= frequency || tokenGrowthPercent >= growthThreshold;
5733
+ }
5683
5734
  var injectCompressNudges = (state, config, logger, messages, prompts, compressionPriorities) => {
5684
5735
  if (compressPermission(state, config) === "deny") {
5685
5736
  return;
@@ -5764,21 +5815,26 @@ var injectCompressNudges = (state, config, logger, messages, prompts, compressio
5764
5815
  }
5765
5816
  const suffixMessage = createSuffixMessage(messages);
5766
5817
  applyAnchoredNudges(state, config, messages, prompts, compressionPriorities, currentTokens, modelContextLimit, suffixMessage);
5767
- injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit);
5818
+ const shouldNudge = shouldInjectPerMessageNudge(state, config, currentTokens, modelContextLimit);
5819
+ injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit, !shouldNudge);
5768
5820
  if (config.compress.mode !== "message") {
5769
- const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit });
5821
+ const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit, includeHint: shouldNudge });
5770
5822
  if (blockGuidance.trim() && suffixMessage) {
5771
5823
  appendToLastTextPart(suffixMessage, "\n\n" + blockGuidance);
5772
5824
  }
5773
5825
  }
5826
+ if (shouldNudge) {
5827
+ state.nudges.lastPerMessageNudgeTurn = state.currentTurn ?? 0;
5828
+ state.nudges.lastPerMessageNudgeTokens = currentTokens ?? 0;
5829
+ }
5774
5830
  injectVisibleIdRange(state, messages, suffixMessage);
5775
5831
  if (anchorsChanged) {
5776
5832
  void saveSessionState(state, logger);
5777
5833
  }
5778
5834
  };
5779
- function injectContextUsage(target, config, currentTokens, modelContextLimit) {
5835
+ function injectContextUsage(target, config, currentTokens, modelContextLimit, minimal = false) {
5780
5836
  if (!target) return;
5781
- const usageTag = buildContextUsageGuidance(config, currentTokens, modelContextLimit);
5837
+ const usageTag = buildContextUsageGuidance(config, currentTokens, modelContextLimit, minimal);
5782
5838
  if (!usageTag) return;
5783
5839
  for (const part of target.parts) {
5784
5840
  if (part.type === "text") {
@@ -6628,15 +6684,15 @@ COMPRESSION PHILOSOPHY
6628
6684
 
6629
6685
  Compression replaces raw conversation content with dense summaries. When used correctly, it keeps your context sharp and focused. When used carelessly, it destroys information you need.
6630
6686
 
6631
- The key principle: compress based on context pressure, not habit. When context is ample, compress rarely or not at all. When context is tight, compress aggressively but selectively. The runtime context usage indicator tells you the current pressure level.
6687
+ The key principle: compress proactively to keep context lean, but selectively. Large tool outputs (shell, diffs, logs) can be compressed into summaries at any time \u2014 you can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings \u2014 even if buried in large messages. Compress everything else, including verbose parts of user messages, large code dumps, and long discussions.
6632
6688
 
6633
6689
  Target the largest UNCOMPRESSED content first. Savings scale with original size \u2014 compressing a 5000-token tool output frees far more than re-shrinking an already-summarized 300-token block.
6634
6690
 
6635
6691
  CONTEXT PRESSURE LEVELS
6636
6692
 
6637
- - Ample: Context is well below the threshold. Do NOT compress unless there is obvious waste (huge terminal dumps, duplicated content). Focus entirely on your task.
6638
- - Moderate: Context is approaching the threshold. Compress completed sections proactively. Prioritize high-token waste over minor cleanup.
6639
- - High: Context has exceeded the threshold. Compress aggressively. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
6693
+ - Normal: Be frugal \u2014 compress tool outputs you've finished using into summaries. You can decompress later. Extract and keep what matters from any message; compress verbose parts \u2014 including large logs in user messages or generated code.
6694
+ - Elevated: Context is growing. Compress completed sections and high-token waste more urgently.
6695
+ - Critical: Compress aggressively now. Every compression should free meaningful tokens. Preserve only what is essential for the current task.
6640
6696
 
6641
6697
  WHAT TO COMPRESS FIRST (high value, low risk)
6642
6698
 
@@ -6798,9 +6854,9 @@ General cleanup should be done periodically between other normal compression too
6798
6854
  // lib/prompts/context-limit-nudge.ts
6799
6855
  var CONTEXT_LIMIT_NUDGE = `
6800
6856
  <system-reminder>
6801
- \u26A0\uFE0F CRITICAL: Context limit reached. You MUST use the \`compress\` tool NOW.
6857
+ \u26A0\uFE0F Context limit reached \u2014 time to compress the largest ranges you no longer need. Prioritize completed tool outputs and resolved work. You can decompress specific blocks later if you need details. Keeping context lean helps you stay accurate.
6802
6858
 
6803
- If mid-atomic-operation, finish that step first, then compress immediately.
6859
+ If mid-atomic-operation, finish that step first, then compress.
6804
6860
 
6805
6861
  HOW TO CALL COMPRESS:
6806
6862
  {
@@ -6815,7 +6871,7 @@ HOW TO CALL COMPRESS:
6815
6871
  }
6816
6872
 
6817
6873
  \u26A0\uFE0F ID RULES \u2014 MOST COMMON CAUSE OF ERRORS:
6818
- - ONLY use IDs you can see in <dcp-message-id> tags in the messages ABOVE.
6874
+ - ONLY use IDs you can see in tags in the messages ABOVE.
6819
6875
  - Do NOT copy IDs from this example. Do NOT invent IDs.
6820
6876
  - Do NOT use IDs from compressed block summaries \u2014 they are stale.
6821
6877
  - startId must appear BEFORE endId in the conversation.
@@ -6831,14 +6887,14 @@ SUMMARY RULES:
6831
6887
  // lib/prompts/turn-nudge.ts
6832
6888
  var TURN_NUDGE = `
6833
6889
  <system-reminder>
6834
- Context is getting full. Compress closed/older conversation ranges now.
6890
+ Context is getting full. If you've finished reading tool outputs or exploration results, compress them \u2014 you can decompress later if needed. This keeps your focus on the current task and improves accuracy.
6835
6891
 
6836
6892
  {
6837
6893
  "topic": "Short Label",
6838
6894
  "content": [{ "startId": "<visible message ID>", "endId": "<visible message ID>", "summary": "..." }]
6839
6895
  }
6840
6896
 
6841
- \u26A0\uFE0F ONLY use IDs from <dcp-message-id> tags visible above. Do NOT invent or copy example IDs.
6897
+ \u26A0\uFE0F ONLY use IDs from tags visible above. Do NOT invent or copy example IDs.
6842
6898
  </system-reminder>
6843
6899
  `;
6844
6900
 
@@ -8212,10 +8268,12 @@ function parseGcThreshold(limit, modelContextLimit) {
8212
8268
 
8213
8269
  // lib/gc/merge.ts
8214
8270
  var DEFAULT_BATCH_CLEANUP = {
8215
- lowThreshold: "60%",
8271
+ lowThreshold: "55%",
8216
8272
  highThreshold: "75%",
8217
8273
  forceThreshold: "90%"
8218
8274
  };
8275
+ var ESCALATE_MIN_MARKED = 3;
8276
+ var ESCALATE_MIN_RATIO = 0.4;
8219
8277
  function resolveBatchCleanup(gc) {
8220
8278
  return gc.batchCleanup ?? DEFAULT_BATCH_CLEANUP;
8221
8279
  }
@@ -8239,11 +8297,15 @@ function collectActiveOldGenBlocks(state, maxOldGenSummaryLength) {
8239
8297
  return blocks;
8240
8298
  }
8241
8299
  function collectActiveMarkedBlocks(state) {
8242
- const ids = Array.from(state.prune.messages.markedForCleanup).sort((a, b) => a - b);
8300
+ const messagesState = state.prune.messages;
8301
+ const ids = Array.from(messagesState.markedForCleanup).sort((a, b) => a - b);
8243
8302
  const blocks = [];
8244
8303
  for (const id of ids) {
8245
- const block = state.prune.messages.blocksById.get(id);
8246
- if (!block || !block.active) continue;
8304
+ const block = messagesState.blocksById.get(id);
8305
+ if (!block || !block.active) {
8306
+ messagesState.markedForCleanup.delete(id);
8307
+ continue;
8308
+ }
8247
8309
  blocks.push(block);
8248
8310
  }
8249
8311
  return blocks;
@@ -8368,21 +8430,53 @@ function mergeMarkedBlocks(state, markedIds, maxMergedLength) {
8368
8430
  const savedTokens = Math.max(0, sourceTokens - newSummaryTokens);
8369
8431
  return { mergedCount: sourceBlocks.length, savedTokens };
8370
8432
  }
8371
- function buildNudgeText(state, maxMergedLength) {
8372
- const blocks = collectActiveMarkedBlocks(state);
8373
- if (blocks.length < 1) return void 0;
8374
- const refs = blocks.map((b) => formatBlockRef(b.blockId)).join(", ");
8375
- const sourceTokens = blocks.reduce(
8433
+ function estimateTokens(blocks) {
8434
+ return blocks.reduce(
8376
8435
  (sum, block) => sum + (block.summaryTokens || Math.round(block.summary.length / 4)),
8377
8436
  0
8378
8437
  );
8379
- const estimatedMergedTokens = Math.round(maxMergedLength / 4);
8380
- const estimatedSavings = Math.max(0, sourceTokens - estimatedMergedTokens);
8438
+ }
8439
+ function buildNudgeText(state, maxMergedLength) {
8440
+ const marked = collectActiveMarkedBlocks(state);
8441
+ const oldGen = collectActiveOldGenBlocks(state, maxMergedLength);
8442
+ if (oldGen.length === 0) return void 0;
8443
+ const oldGenIds = new Set(oldGen.map((b) => b.blockId));
8444
+ const markedOldGen = marked.filter((b) => oldGenIds.has(b.blockId));
8445
+ const markedOldGenCount = markedOldGen.length;
8446
+ const oldGenCount = oldGen.length;
8447
+ const ratio = markedOldGenCount / oldGenCount;
8448
+ const ratioPct = Math.round(ratio * 100);
8449
+ const escalateMinPct = Math.round(ESCALATE_MIN_RATIO * 100);
8450
+ if (markedOldGenCount >= ESCALATE_MIN_MARKED && ratio >= ESCALATE_MIN_RATIO) {
8451
+ const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
8452
+ const firstRef = formatBlockRef(marked[0].blockId);
8453
+ const lastRef = formatBlockRef(marked[marked.length - 1].blockId);
8454
+ const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
8455
+ return [
8456
+ `\u{1F525} ${markedOldGenCount}/${oldGenCount} old-gen blocks marked (${ratioPct}%) \u2014 ready for batch cleanup.`,
8457
+ `Compressing ${refs} (range ${firstRef}\u2013${lastRef}) would free ~${estimatedSavings} tokens in one cache break.`,
8458
+ `Call compress with this range now to consolidate them.`
8459
+ ].join(" ");
8460
+ }
8461
+ if (marked.length >= 1) {
8462
+ const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ");
8463
+ const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4));
8464
+ return [
8465
+ `\u26A0\uFE0F ${marked.length} block(s) marked for batch cleanup (${refs}).`,
8466
+ `Merge-compressing them would free ~${estimatedSavings} tokens.`,
8467
+ marked.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
8468
+ `Mark more old-gen blocks (need \u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}%) to trigger batch cleanup sooner.`,
8469
+ "To act now, use compress with a range covering these blocks."
8470
+ ].join(" ");
8471
+ }
8472
+ const shown = oldGen.slice(0, 5);
8473
+ const oldGenRefs = shown.map((b) => formatBlockRef(b.blockId)).join(", ");
8474
+ const more = oldGenCount > 5 ? ` (+${oldGenCount - 5} more)` : "";
8381
8475
  return [
8382
- `\u26A0\uFE0F ${blocks.length} block(s) marked for batch cleanup (${refs}).`,
8383
- `Merge-compressing them would free ~${estimatedSavings} tokens.`,
8384
- blocks.length >= 2 ? "They will auto-merge when context pressure reaches the high threshold." : "A single marked block won't auto-merge on its own \u2014 use compress to consolidate it, or unmark_block if no longer needed.",
8385
- "To act now, use compress with a range covering these blocks."
8476
+ `\u{1F4CB} Context pressure rising \u2014 ${oldGenCount} old-gen compressed block(s) occupy ~${estimateTokens(oldGen)} tokens (${oldGenRefs}${more}).`,
8477
+ `Review which blocks contain information you no longer need, and use mark_block to flag them.`,
8478
+ `Once enough are marked (\u2265${ESCALATE_MIN_MARKED} at \u2265${escalateMinPct}% of old-gen), they'll be batch-merged in one cache break to preserve cache hit rate.`,
8479
+ `Do NOT mark blocks you may still need.`
8386
8480
  ].join(" ");
8387
8481
  }
8388
8482
  function runBatchCleanup(state, config, logger, messages) {
@@ -8427,26 +8521,24 @@ function runBatchCleanup(state, config, logger, messages) {
8427
8521
  }
8428
8522
  if (currentTokens >= highTokens) {
8429
8523
  const marked = collectActiveMarkedBlocks(state);
8430
- if (marked.length < 2) {
8431
- return noop;
8432
- }
8433
- const ids = marked.map((b) => b.blockId);
8434
- const result = mergeMarkedBlocks(state, ids, maxMergedLength);
8435
- if (result.mergedCount === 0) {
8436
- return noop;
8524
+ if (marked.length >= 2) {
8525
+ const ids = marked.map((b) => b.blockId);
8526
+ const result = mergeMarkedBlocks(state, ids, maxMergedLength);
8527
+ if (result.mergedCount > 0) {
8528
+ logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
8529
+ mergedCount: result.mergedCount,
8530
+ savedTokens: result.savedTokens,
8531
+ currentTokens,
8532
+ highThreshold: batchCleanup.highThreshold
8533
+ });
8534
+ return {
8535
+ tier: 2,
8536
+ action: "merge",
8537
+ mergedCount: result.mergedCount,
8538
+ savedTokens: result.savedTokens
8539
+ };
8540
+ }
8437
8541
  }
8438
- logger.info("Batch cleanup tier 2 (high): merged marked blocks", {
8439
- mergedCount: result.mergedCount,
8440
- savedTokens: result.savedTokens,
8441
- currentTokens,
8442
- highThreshold: batchCleanup.highThreshold
8443
- });
8444
- return {
8445
- tier: 2,
8446
- action: "merge",
8447
- mergedCount: result.mergedCount,
8448
- savedTokens: result.savedTokens
8449
- };
8450
8542
  }
8451
8543
  if (currentTokens >= lowTokens) {
8452
8544
  const nudgeText = buildNudgeText(state, maxMergedLength);