npm - @usabledev/usable-chat - Versions diffs - 1.152.0 → 1.153.0 - Mend

@usabledev/usable-chat 1.152.0 → 1.153.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/cli.js +330 -128
package/package.json +1 -1

package/cli.js CHANGED Viewed

@@ -35469,10 +35469,13 @@ function contentText(content) {
     return content.map((p28) => {
       if (typeof p28.text === "string") return p28.text;
       if (p28.type === "tool-call") return `[tool-call: ${String(p28.toolName ?? "")}]`;
-      if (p28.type === "tool-result") {
-        const out = p28.result ?? p28.output;
+      if (p28.type === "tool-result" || p28.type === "tool_result") {
+        const out = p28.result ?? p28.output ?? p28.content;
         const s17 = typeof out === "string" ? out : JSON.stringify(out ?? "");
-        return `[tool-result: ${s17.slice(0, 2e3)}]`;
+        const pointers = s17.match(/\[⚠️ FULL RESULT TOO LARGE[^\]]*\]/g) ?? [];
+        const tail2 = pointers.length ? `
+${pointers.join("\n")}` : "";
+        return `[tool-result: ${s17.slice(0, 2e3)}${tail2}]`;
       }
       return "";
     }).filter(Boolean).join("\n");
@@ -35485,6 +35488,64 @@ function buildCompactionTranscript(messages4) {
     return text3.trim() ? `${m33.role.toUpperCase()}: ${text3}` : "";
   }).filter(Boolean).join("\n\n");
 }
+function fingerprintMessages(messages4) {
+  let h25 = 5381;
+  for (const m33 of messages4) {
+    const s17 = m33.role + ":" + (typeof m33.content === "string" ? m33.content : JSON.stringify(m33.content)).slice(0, 200);
+    for (let i18 = 0; i18 < s17.length; i18++) h25 = (h25 << 5) + h25 + s17.charCodeAt(i18) | 0;
+  }
+  return `${h25 >>> 0}:${messages4.length}`;
+}
+function getCachedCompaction(conversationId) {
+  return compactionCache.get(conversationId);
+}
+function setCachedCompaction(conversationId, entry) {
+  if (compactionCache.size >= COMPACTION_CACHE_MAX && !compactionCache.has(conversationId)) {
+    const oldest = compactionCache.keys().next().value;
+    if (oldest !== void 0) compactionCache.delete(oldest);
+  }
+  compactionCache.set(conversationId, entry);
+}
+async function generateCompactionSummaryCached(conversationId, dropped, opts) {
+  const cached2 = conversationId ? getCachedCompaction(conversationId) : void 0;
+  if (cached2 && dropped.length >= cached2.count) {
+    const prefixFp = fingerprintMessages(dropped.slice(0, cached2.count));
+    if (prefixFp === cached2.fingerprint) {
+      if (dropped.length === cached2.count) {
+        return { summary: cached2.summary, cacheHit: "exact" };
+      }
+      const delta = dropped.slice(cached2.count);
+      const summary2 = await generateCompactionSummary(
+        [
+          {
+            role: "user",
+            content: `[prior checkpoint summary of the earlier conversation]
+${cached2.summary}`
+          },
+          ...delta
+        ],
+        opts
+      );
+      if (summary2 && conversationId) {
+        setCachedCompaction(conversationId, {
+          fingerprint: fingerprintMessages(dropped),
+          count: dropped.length,
+          summary: summary2
+        });
+      }
+      return { summary: summary2, cacheHit: "rolling" };
+    }
+  }
+  const summary = await generateCompactionSummary(dropped, opts);
+  if (summary && conversationId) {
+    setCachedCompaction(conversationId, {
+      fingerprint: fingerprintMessages(dropped),
+      count: dropped.length,
+      summary
+    });
+  }
+  return { summary, cacheHit: "miss" };
+}
 async function viaOpenRouter(system, user, opts) {
   const apiKey = opts.provider === "usable" ? opts.authToken ?? "" : opts.openRouterApiKey ?? "";
   const provider = createOpenRouter({
@@ -35561,7 +35622,7 @@ async function generateCompactionSummary(messages4, opts) {
     return null;
   }
 }
-var COMPACTION_SYSTEM_PROMPT, DEFAULT_CALLERS;
+var COMPACTION_SYSTEM_PROMPT, compactionCache, COMPACTION_CACHE_MAX, DEFAULT_CALLERS;
 var init_compaction_summary = __esm({
   "src/core/orchestrator/compaction-summary.ts"() {
     "use strict";
@@ -35580,11 +35641,15 @@ var init_compaction_summary = __esm({
       "- PROGRESS: what is DONE, what is IN-PROGRESS, what is BLOCKED.",
       "- DECISIONS: key choices, constraints, and user preferences to honor.",
       "- FILES: files read and files modified so far (paths).",
+      '- DATA: where large tool results are preserved \u2014 copy any "[\u26A0\uFE0F FULL RESULT TOO LARGE \u2014 \u2026]"',
+      "  spill paths VERBATIM so the work can re-read them instead of re-calling the tools.",
       "- NEXT: the concrete next steps to continue from here.",
       "",
       "Be factual and specific (keep ids, paths, names verbatim \u2014 never shorten UUIDs). Do not invent",
       "progress that did not happen. Output the summary only \u2014 no preamble, no sign-off."
     ].join("\n");
+    compactionCache = /* @__PURE__ */ new Map();
+    COMPACTION_CACHE_MAX = 200;
     DEFAULT_CALLERS = {
       openrouter: viaOpenRouter,
       anthropic: viaAnthropic,
@@ -35641,6 +35706,17 @@ function extractFileOps(messages4) {
   }
   return { read: [...read], modified: [...modified] };
 }
+function extractSpillPointers(messages4) {
+  const paths = /* @__PURE__ */ new Set();
+  const re10 = /(?:\/data\/tool-results\/|[^\s"'\\]*usable-tool-results\/)[^\s"'\\\]]+/g;
+  for (const msg of messages4) {
+    const text3 = typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content);
+    for (const m33 of text3.match(re10) ?? []) {
+      paths.add(m33.replace(/[.,;:!?]+$/, ""));
+    }
+  }
+  return [...paths];
+}
 function isStepStart(messages4, i18) {
   if (i18 <= 0) return false;
   return messages4[i18].role === "assistant" && messages4[i18 - 1].role !== "assistant";
@@ -164395,6 +164471,7 @@ var init_agents2 = __esm({
 // src/core/utils/tool-result-summarizer.ts
 var tool_result_summarizer_exports = {};
 __export(tool_result_summarizer_exports, {
+  dedupeFragmentContentFields: () => dedupeFragmentContentFields,
   sanitizeImageToolResult: () => sanitizeImageToolResult,
   summarizeToolResultForHistory: () => summarizeToolResultForHistory,
   truncateToolResultWithNotice: () => truncateToolResultWithNotice
@@ -164443,12 +164520,52 @@ function extractFragmentReferences(result) {
 }
 function truncateToolResultWithNotice(serialized, maxChars) {
   if (serialized.length <= maxChars) return serialized;
-  const head = serialized.slice(0, maxChars);
-  const keptKb = Math.round(maxChars / 1024);
-  const omittedKb = Math.round((serialized.length - maxChars) / 1024);
+  let head = serialized.slice(0, maxChars);
+  const boundary = Math.max(
+    head.lastIndexOf("},"),
+    head.lastIndexOf("],"),
+    head.lastIndexOf('",'),
+    head.lastIndexOf("\n")
+  );
+  if (boundary > maxChars * 0.8) head = head.slice(0, boundary + 1);
+  const keptKb = Math.round(head.length / 1024);
+  const omittedKb = Math.max(1, Math.round((serialized.length - head.length) / 1024));
   return `${head}
-[tool-result truncated \u2014 kept the first ${keptKb}KB, ${omittedKb}KB omitted. The full result is preserved (see the spill path noted below) \u2014 re-read it if you need the omitted part.]`;
+[tool-result truncated \u2014 kept the first ${keptKb}KB, ~${omittedKb}KB omitted. The omitted tail is NOT in this conversation unless a spill-file path is noted below. NEVER guess or reconstruct omitted values (ids, uuids, names) \u2014 re-run the tool with narrower filters/pagination to get the part you need, or read the spill file if a path is given.]`;
+}
+function dedupeFragmentContentFields(result) {
+  if (!result || typeof result !== "object") return result;
+  if (Array.isArray(result)) {
+    let changed = false;
+    const mapped = result.map((item) => {
+      if (item && typeof item === "object" && item.type === "text" && typeof item.text === "string") {
+        const text3 = item.text;
+        if (text3.includes('"contentWithoutFrontmatter"')) {
+          try {
+            const parsed = JSON.parse(text3);
+            const deduped = dedupeFragmentContentFields(parsed);
+            if (deduped !== parsed) {
+              changed = true;
+              return { ...item, text: JSON.stringify(deduped) };
+            }
+          } catch {
+          }
+        }
+      }
+      return item;
+    });
+    return changed ? mapped : result;
+  }
+  const r17 = result;
+  if (typeof r17.content === "string" && typeof r17.contentWithoutFrontmatter === "string" && r17.contentWithoutFrontmatter.length > 1e3) {
+    return { ...r17, contentWithoutFrontmatter: FRAGMENT_DUP_MARKER };
+  }
+  if (Array.isArray(r17.content)) {
+    const deduped = dedupeFragmentContentFields(r17.content);
+    if (deduped !== r17.content) return { ...r17, content: deduped };
+  }
+  return result;
 }
 function sanitizeImageToolResult(result) {
   if (!result || typeof result !== "object") {
@@ -164491,7 +164608,7 @@ function sanitizeImageToolResult(result) {
   return sanitized;
 }
 async function summarizeToolResultForHistory(toolName, result, _conversationContext) {
-  const sanitizedResult = sanitizeImageToolResult(result);
+  const sanitizedResult = dedupeFragmentContentFields(sanitizeImageToolResult(result));
   const serialized = JSON.stringify(sanitizedResult);
   const originalSize = countTokens(JSON.stringify(result));
   const references = extractFragmentReferences(result);
@@ -164506,12 +164623,13 @@ async function summarizeToolResultForHistory(toolName, result, _conversationCont
     usedQwen: false
   };
 }
-var MAX_TOOL_RESULT_CHARS;
+var MAX_TOOL_RESULT_CHARS, FRAGMENT_DUP_MARKER;
 var init_tool_result_summarizer = __esm({
   "src/core/utils/tool-result-summarizer.ts"() {
     "use strict";
     init_token_counter();
     MAX_TOOL_RESULT_CHARS = 5e4;
+    FRAGMENT_DUP_MARKER = "[omitted \u2014 duplicate of `content` minus the YAML frontmatter; read `content`]";
   }
 });
@@ -175858,6 +175976,33 @@ data: ${JSON.stringify(envelope)}
 });
 // src/core/tools/spawn-subagent.ts
+async function maybeStartBackgroundRelay(redis, context, conversationId, taskId, success2) {
+  if (!redis || !context.startBackgroundSubagentRelay) return false;
+  try {
+    const record2 = await spawnedTaskStore.get(redis, taskId);
+    if (record2?.notifiedTurnAt) {
+      return true;
+    }
+    const lockKey = `subagents:background-relay-lock:${conversationId}`;
+    const acquired = await redis.set(lockKey, taskId, "NX", "EX", 180).catch(() => null);
+    if (!acquired) {
+      return true;
+    }
+    const started = await context.startBackgroundSubagentRelay({
+      conversationId,
+      taskId,
+      reason: success2 ? "completed" : "failed"
+    });
+    return started;
+  } catch (err) {
+    logger.debug("SpawnSubagent", "Failed to start background relay job", {
+      conversationId,
+      taskId,
+      error: err instanceof Error ? err.message : String(err)
+    });
+    return false;
+  }
+}
 async function maybeEmitPingPrompt(redis, conversationId, taskId) {
   if (!redis) return;
   try {
@@ -176158,7 +176303,10 @@ HOW TO USE THIS:
                 tokenUsage: response.tokenUsage
               }
             });
-            await maybeEmitPingPrompt(redis, conversationId, taskId);
+            const relayed = await maybeStartBackgroundRelay(redis, context, conversationId, taskId, success2);
+            if (!relayed) {
+              await maybeEmitPingPrompt(redis, conversationId, taskId);
+            }
           } catch (err) {
             const errorMessage = err instanceof Error ? err.message : String(err);
             const status = abort.signal.aborted ? "cancelled" : "failed";
@@ -176173,7 +176321,10 @@ HOW TO USE THIS:
               taskId,
               data: { error: errorMessage }
             });
-            await maybeEmitPingPrompt(redis, conversationId, taskId);
+            const relayed = await maybeStartBackgroundRelay(redis, context, conversationId, taskId, false);
+            if (!relayed) {
+              await maybeEmitPingPrompt(redis, conversationId, taskId);
+            }
           } finally {
             spawnedTaskStore.unregisterLocalAbort(taskId);
           }
@@ -248154,6 +248305,65 @@ function pruneOversizedToolResultBodies(messages4, maxCharsPerResult) {
   });
   return { messages: pruned, prunedCount, bytesReclaimed };
 }
+async function buildCollectionContextParts(opts) {
+  const { usableApiUrl, accessToken, contextId, workspaceId } = opts;
+  const parts = [];
+  const fallback = () => {
+    parts.length = 0;
+    parts.push(`<collection id="${contextId}" name="${opts.metadataName || "Unnamed"}">`);
+    if (opts.metadataSummary) parts.push(`<description>${opts.metadataSummary}</description>`);
+    parts.push("</collection>");
+    return parts;
+  };
+  if (!workspaceId) return fallback();
+  const headers = {
+    Authorization: `Bearer ${accessToken}`,
+    "Content-Type": "application/json"
+  };
+  const collectionResponse = await fetch(
+    `${usableApiUrl}/workspaces/${workspaceId}/collections/${contextId}`,
+    { headers }
+  );
+  if (!collectionResponse.ok) return fallback();
+  const collectionData = await collectionResponse.json();
+  const collection = collectionData.collection ?? collectionData;
+  let fragments = [];
+  let totalCount = 0;
+  try {
+    const fragmentsResponse = await fetch(
+      `${usableApiUrl}/workspaces/${workspaceId}/collections/${contextId}/fragments?limit=50`,
+      { headers }
+    );
+    if (fragmentsResponse.ok) {
+      const fragmentsData = await fragmentsResponse.json();
+      fragments = fragmentsData.fragments || fragmentsData.data || [];
+      totalCount = fragmentsData.total ?? fragments.length;
+    }
+  } catch {
+  }
+  parts.push(
+    `<collection id="${contextId}" name="${collection.name || opts.metadataName || "Unnamed"}" workspace-id="${collection.workspaceId || workspaceId}">`
+  );
+  if (collection.description) parts.push(`<description>${collection.description}</description>`);
+  parts.push(`<fragments count="${totalCount}">`);
+  for (const frag of fragments.slice(0, 50)) {
+    const fragId = frag.id || frag.fragmentId;
+    const fragTitle = frag.title || "Untitled";
+    const fragSummary = frag.summary || "";
+    parts.push(`<fragment-ref id="${fragId}" title="${fragTitle}" summary="${fragSummary}" />`);
+  }
+  parts.push("</fragments>");
+  if (totalCount > 50) {
+    parts.push(
+      `<note>Collection has ${totalCount} fragments total. Only showing first 50. Use list-memory-fragments with collectionId to see more.</note>`
+    );
+  }
+  parts.push(
+    "<instructions>Use get-memory-fragment-content to read full content of specific fragments. Don't add all fragments to context.</instructions>"
+  );
+  parts.push("</collection>");
+  return parts;
+}
 function enforceContextLimits(messages4, systemMessage, contextTokens, toCountableMessage, modelId, toolDefinitionTokens = 0) {
   const model = getModelById(modelId);
   const contextLength = model?.capabilities.contextLength || 128e3;
@@ -249145,7 +249355,8 @@ async function orchestrate(request) {
     chatMode,
     imageGenModel: context.metadata?.imageGenModel,
     imageGenThinking: context.metadata?.imageGenThinking,
-    registeredParentToolSchemas: context.registeredParentToolSchemas
+    registeredParentToolSchemas: context.registeredParentToolSchemas,
+    startBackgroundSubagentRelay: context.startBackgroundSubagentRelay
   });
   if (!config3.localFilesystem) {
     aiTools["spawn_subagent"] = {
@@ -249613,54 +249824,16 @@ Folder behavior:
                 }
                 contextParts.push("</tool-result>");
               } else if (item.contextType === "collection") {
-                const usableApiUrl = config4.mcp.url.replace("/mcp", "");
-                const collectionResponse = await fetch(
-                  `${usableApiUrl}/collections/${item.contextId}?includeFragments=true&fragmentLimit=50`,
-                  {
-                    headers: {
-                      Authorization: `Bearer ${context.session.user?.accessToken}`,
-                      "Content-Type": "application/json"
-                    }
-                  }
+                contextParts.push(
+                  ...await buildCollectionContextParts({
+                    usableApiUrl: config4.mcp.url.replace("/mcp", ""),
+                    accessToken: context.session.user?.accessToken,
+                    contextId: item.contextId,
+                    workspaceId: item.metadata?.workspaceId,
+                    metadataName: item.metadata?.name,
+                    metadataSummary: item.metadata?.summary
+                  })
                 );
-                if (collectionResponse.ok) {
-                  const collection = await collectionResponse.json();
-                  const fragments = collection.fragments || collection.items || [];
-                  const totalCount = collection.fragmentCount || collection.totalFragments || fragments.length;
-                  contextParts.push(
-                    `<collection id="${item.contextId}" name="${collection.name || item.metadata?.name || "Unnamed"}" workspace-id="${collection.workspaceId || item.metadata?.workspaceId || ""}">`
-                  );
-                  if (collection.description) {
-                    contextParts.push(`<description>${collection.description}</description>`);
-                  }
-                  contextParts.push(`<fragments count="${totalCount}">`);
-                  for (const frag of fragments.slice(0, 50)) {
-                    const fragId = frag.id || frag.fragmentId;
-                    const fragTitle = frag.title || "Untitled";
-                    const fragSummary = frag.summary || "";
-                    contextParts.push(
-                      `<fragment-ref id="${fragId}" title="${fragTitle}" summary="${fragSummary}" />`
-                    );
-                  }
-                  contextParts.push("</fragments>");
-                  if (totalCount > 50) {
-                    contextParts.push(
-                      `<note>Collection has ${totalCount} fragments total. Only showing first 50. Use list-memory-fragments with collectionId to see more.</note>`
-                    );
-                  }
-                  contextParts.push(
-                    "<instructions>Use get-memory-fragment-content to read full content of specific fragments. Don't add all fragments to context.</instructions>"
-                  );
-                  contextParts.push("</collection>");
-                } else {
-                  contextParts.push(
-                    `<collection id="${item.contextId}" name="${item.metadata?.name || "Unnamed"}">`
-                  );
-                  if (item.metadata?.summary) {
-                    contextParts.push(`<description>${item.metadata.summary}</description>`);
-                  }
-                  contextParts.push("</collection>");
-                }
               }
             } catch (error41) {
               orchestrationLogger.warn("Failed to fetch context item content", {
@@ -249942,54 +250115,16 @@ Folder behavior:
               }
               contextParts.push("</tool-result>");
             } else if (item.contextType === "collection") {
-              const usableApiUrl = config4.mcp.url.replace("/mcp", "");
-              const collectionResponse = await fetch(
-                `${usableApiUrl}/collections/${item.contextId}?includeFragments=true&fragmentLimit=50`,
-                {
-                  headers: {
-                    Authorization: `Bearer ${context.session.user?.accessToken}`,
-                    "Content-Type": "application/json"
-                  }
-                }
+              contextParts.push(
+                ...await buildCollectionContextParts({
+                  usableApiUrl: config4.mcp.url.replace("/mcp", ""),
+                  accessToken: context.session.user?.accessToken,
+                  contextId: item.contextId,
+                  workspaceId: item.metadata?.workspaceId,
+                  metadataName: item.metadata?.name,
+                  metadataSummary: item.metadata?.summary
+                })
               );
-              if (collectionResponse.ok) {
-                const collection = await collectionResponse.json();
-                const fragments = collection.fragments || collection.items || [];
-                const totalCount = collection.fragmentCount || collection.totalFragments || fragments.length;
-                contextParts.push(
-                  `<collection id="${item.contextId}" name="${collection.name || item.metadata?.name || "Unnamed"}" workspace-id="${collection.workspaceId || item.metadata?.workspaceId || ""}">`
-                );
-                if (collection.description) {
-                  contextParts.push(`<description>${collection.description}</description>`);
-                }
-                contextParts.push(`<fragments count="${totalCount}">`);
-                for (const frag of fragments.slice(0, 50)) {
-                  const fragId = frag.id || frag.fragmentId;
-                  const fragTitle = frag.title || "Untitled";
-                  const fragSummary = frag.summary || "";
-                  contextParts.push(
-                    `<fragment-ref id="${fragId}" title="${fragTitle}" summary="${fragSummary}" />`
-                  );
-                }
-                contextParts.push("</fragments>");
-                if (totalCount > 50) {
-                  contextParts.push(
-                    `<note>Collection has ${totalCount} fragments total. Only showing first 50. Use list-memory-fragments with collectionId to see more.</note>`
-                  );
-                }
-                contextParts.push(
-                  "<instructions>Use get-memory-fragment-content to read full content of specific fragments. Don't add all fragments to context.</instructions>"
-                );
-                contextParts.push("</collection>");
-              } else {
-                contextParts.push(
-                  `<collection id="${item.contextId}" name="${item.metadata?.name || "Unnamed"}">`
-                );
-                if (item.metadata?.summary) {
-                  contextParts.push(`<description>${item.metadata.summary}</description>`);
-                }
-                contextParts.push("</collection>");
-              }
             }
           } catch (error41) {
             orchestrationLogger.warn("Failed to fetch context item content", {
@@ -251287,44 +251422,101 @@ ${combinedSystemMessage}` : combinedSystemMessage;
           const ctxLen = getModelById(selectedModelId)?.capabilities.contextLength || 128e3;
           const envRatio = Number(process.env.USABLE_HARNESS_COMPACT_RATIO);
           const compactRatio = envRatio > 0 && envRatio <= 1 ? envRatio : HARNESS_COMPACT_RATIO;
-          const tokensOf = (m33) => countTokens(typeof m33.content === "string" ? m33.content : JSON.stringify(m33.content));
-          const estTokens = conversationMessages.reduce((s17, m33) => s17 + tokensOf(m33), 0);
+          const tokensOf = (m33) => countMessageTokens(toCountableMessage(m33));
+          const compactToolDefTokens = estimateToolDefinitionTokens(rawTools);
+          const systemTokens = systemMessage ? countTokens(systemMessage) : 0;
+          const baseTokens = systemTokens + contextTokens + compactToolDefTokens;
+          const messageTokens = conversationMessages.reduce((s17, m33) => s17 + tokensOf(m33), 0);
+          const estTokens = baseTokens + messageTokens;
+          const threshold = Math.floor(ctxLen * compactRatio);
           const alreadyCompacted = isCompactionCheckpoint(conversationMessages[0]);
-          if (!alreadyCompacted && estTokens > ctxLen * compactRatio) {
+          orchestrationLogger.debug("\u{1F9F9} compaction check", {
+            estTokens,
+            baseTokens,
+            systemTokens,
+            contextTokens,
+            toolDefTokens: compactToolDefTokens,
+            messageTokens,
+            threshold,
+            ctxLen,
+            compactRatio,
+            messages: conversationMessages.length,
+            alreadyCompacted,
+            path: config3.localFilesystem ? "cli" : "web"
+          });
+          if (!alreadyCompacted && estTokens > threshold) {
             const beforeLen = conversationMessages.length;
             const envKeep = Number(process.env.USABLE_HARNESS_KEEP_TOKENS);
-            const keepRecentTokens = envKeep > 0 ? envKeep : Math.floor(ctxLen * HARNESS_KEEP_RECENT_RATIO);
+            const desiredKeep = envKeep > 0 ? envKeep : Math.floor(ctxLen * HARNESS_KEEP_RECENT_RATIO);
+            const roomForMessages = Math.floor(ctxLen * 0.85) - baseTokens;
+            const keepRecentTokens = Math.max(2e3, Math.min(desiredKeep, roomForMessages));
             const cut = findCutIndex(conversationMessages, keepRecentTokens, tokensOf);
             if (cut >= 3) {
+              const compactionStartEvent = emitter.emit("compaction-needed", {
+                contextUsagePercent: Math.round(estTokens / ctxLen * 100),
+                inputTokens: estTokens,
+                contextLength: ctxLen,
+                model: selectedModelId,
+                serverCompacted: true
+              });
+              multiplexer.send(compactionStartEvent);
+              const compactingPlan = emitter.emit("plan", {
+                plan: "\u{1F9F9} Summarizing older conversation to free context\u2026",
+                steps: config3.maxSteps
+              });
+              multiplexer.send(compactingPlan);
               const dropped = conversationMessages.slice(0, cut);
               const tail2 = conversationMessages.slice(cut);
               const fileOps = extractFileOps(dropped);
-              const summary = await generateCompactionSummary(dropped, {
-                provider: providerRouting?.provider,
-                model: selectedModelId,
-                openRouterApiKey: apiKey,
-                anthropicApiKey,
-                baseUrl: request.providerBaseUrl,
-                authToken: request.providerAuthToken,
-                codexAuth: request.codexAuth,
-                abortSignal: context.abortSignal
-              });
-              const checkpoint = summary ? buildSummaryCheckpoint(summary, dropped.length, fileOps) : buildCompactionMarker(goalText(conversationMessages), dropped.length, fileOps);
+              const compactionConversationId = context.metadata?.conversationId;
+              const { summary, cacheHit } = await generateCompactionSummaryCached(
+                compactionConversationId,
+                dropped,
+                {
+                  provider: providerRouting?.provider,
+                  model: selectedModelId,
+                  openRouterApiKey: apiKey,
+                  anthropicApiKey,
+                  baseUrl: request.providerBaseUrl,
+                  authToken: request.providerAuthToken,
+                  codexAuth: request.codexAuth,
+                  abortSignal: context.abortSignal
+                }
+              );
+              let checkpoint = summary ? buildSummaryCheckpoint(summary, dropped.length, fileOps) : buildCompactionMarker(goalText(conversationMessages), dropped.length, fileOps);
+              const spillPaths = extractSpillPointers(dropped);
+              if (spillPaths.length > 0) {
+                checkpoint = {
+                  role: "user",
+                  content: `${checkpoint.content}
+Large tool results from the compacted work are PRESERVED at:
+` + spillPaths.map((p28) => `- ${p28}`).join("\n") + `
+Re-read them (working_memory on web, bash on the CLI \u2014 grep/sed projection, not a full cat) instead of re-calling the tools that produced them.`
+                };
+              }
               conversationMessages = ensureToolCallIntegrity([checkpoint, ...tail2]);
               orchestrationLogger.warn("\u{1F9F9} Proactive conversation compaction", {
                 beforeLen,
                 afterLen: conversationMessages.length,
                 droppedCount: dropped.length,
                 estTokens,
+                baseTokens,
+                messageTokens,
+                keepRecentTokens,
+                threshold,
                 contextLength: ctxLen,
                 usedLlmSummary: !!summary,
+                cacheHit,
                 path: config3.localFilesystem ? "cli" : "web"
               });
-              const compactPlan = emitter.emit("plan", {
-                plan: `\u{1F9F9} Compacted ${dropped.length} older messages into a summary to free context \u2014 continuing.`,
-                steps: config3.maxSteps
+              const compactionDoneEvent = emitter.emit("compaction", {
+                summary: summary ?? (typeof checkpoint.content === "string" ? checkpoint.content : ""),
+                droppedCount: dropped.length,
+                usedLlmSummary: !!summary,
+                cacheHit
               });
-              multiplexer.send(compactPlan);
+              multiplexer.send(compactionDoneEvent);
               if (request.onContextCompacted) {
                 try {
                   await request.onContextCompacted(conversationMessages);
@@ -251911,7 +252103,7 @@ ${combinedSystemMessage}` : combinedSystemMessage;
                       await bash.exec(`echo "${b64}" | base64 -d > ${filename}`);
                       summary.content = `${summary.content}
-[\u26A0\uFE0F FULL RESULT TOO LARGE \u2014 stored at: ${filename}. Use working_memory tool with "cat ${filename}" to read the complete content on demand.]`;
+[\u26A0\uFE0F FULL RESULT TOO LARGE \u2014 full data stored at ${filename}. Read it with the working_memory tool, but do NOT \`cat\` the whole file (it re-truncates). Project just the part you need, e.g. \`grep -n PATTERN ${filename}\` or \`sed -n '100,200p' ${filename}\` \u2014 do NOT re-call the original tool to recover the omitted part.]`;
                     } catch {
                     }
                   }
@@ -252242,6 +252434,7 @@ function createOrchestratorRequest(messages4, context, config3, persona, apiKey,
       metadata: context.metadata,
       allowedWorkspaceIds: context.allowedWorkspaceIds,
       registeredParentToolSchemas: context.registeredParentToolSchemas,
+      startBackgroundSubagentRelay: context.startBackgroundSubagentRelay,
       extensions: context.extensions
       // Plugin hook seam (CLI only; undefined on web)
     },
@@ -269922,6 +270115,15 @@ Edit it, then /verify-extension ${arg.trim()} to check it loads, /trust (project
           push({ kind: "system", text: plan, tone: "info" });
           ui2.requestRender();
         }
+      } else if (e14.type === "compaction") {
+        const data2 = e14.data;
+        finalize();
+        const head = `\u{1F9F9} Compacted ${data2?.droppedCount ?? 0} older messages into a checkpoint`;
+        const body = data2?.summary ? `${head}:
+${data2.summary}` : `${head}.`;
+        push({ kind: "system", text: body, tone: "info" });
+        ui2.requestRender();
       }
     };
     try {
@@ -270781,7 +270983,7 @@ init_tui_select();
 init_model_registry();
 // package.json
-var version2 = "1.152.0";
+var version2 = "1.153.0";
 // src/adapters/cli/model-catalog.ts
 init_codex_auth();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@usabledev/usable-chat",
-  "version": "1.152.0",
+  "version": "1.153.0",
   "description": "usable-chat — terminal harness for usable-chat (headless + TUI)",
   "type": "module",
   "bin": {