npm - @inceptionstack/roundhouse - Versions diffs - 0.5.27 → 0.5.29 - Mend

@inceptionstack/roundhouse 0.5.27 → 0.5.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +12 -0
package/package.json +1 -1
package/src/agents/pi/pi-adapter.ts +71 -1
package/src/agents/shared/session-repair.test.ts +174 -0
package/src/agents/shared/session-repair.ts +175 -0
package/src/memory/lifecycle.ts +121 -29
package/src/memory/policy.ts +26 -5
package/src/memory/types.ts +1 -1
package/src/types.ts +11 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,18 @@
 All notable changes to `@inceptionstack/roundhouse` are documented here.
+## [0.5.29] — 2026-05-14
+### Added
+- **Soft-reset recovery for already-overflowed sessions.** When a session has grown past the model's context window, normal compact cannot recover — the summarizer prompt itself overflows and `compact()` throws `prompt is too long: N > max`. v0.5.28's threshold tuning prevents *new* sessions from hitting this; this release adds graceful recovery for sessions that already crossed the line. On context-overflow detection, the memory lifecycle calls a new `agent.softReset(threadId)` capability that trims the on-disk session jsonl to its most-recent N user turns (default 8, byte-capped at 250k), reloads the session, and queues a memory re-injection on the next turn. The agent loses verbatim message history for older turns but retains its durable context (MEMORY.md, daily front-page, soul.md). No more manual surgery on stuck sessions.
+- New module exports: `softResetSessionFile()` and `isContextOverflowError()` in `src/agents/shared/session-repair.ts`. New optional `softReset?(threadId)` method on `AgentAdapter` interface (no-op when not implemented — backward-compatible). PiAdapter implements it via the existing `reloadSession` path.
+- 20 new tests across `session-repair.test.ts` (file-level cut/preserve/repair semantics, error classifier) and `memory.test.ts` (lifecycle wiring — success/no-op/missing-capability/non-overflow-error/throws-during-recovery). 527 tests total.
+## [0.5.28] — 2026-05-14
+### Fixed
+- **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
 ## [0.5.27] — 2026-05-14
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inceptionstack/roundhouse",
-  "version": "0.5.27",
+  "version": "0.5.29",
   "type": "module",
   "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
   "license": "MIT",

package/src/agents/pi/pi-adapter.ts CHANGED Viewed

@@ -28,7 +28,7 @@ import {
 import type { AgentAdapter, AgentAdapterFactory, AgentMessage, AgentResponse, AgentStreamEvent, MessageContext } from "../../types";
 import { formatMessage, extractCustomMessage, customContentToText } from "./message-format";
-import { isToolPairingError, repairSessionFile } from "../shared/session-repair";
+import { isToolPairingError, repairSessionFile, softResetSessionFile, type SoftResetReport } from "../shared/session-repair";
 import { SESSIONS_DIR } from "../../config";
 import { DEBUG_STREAM, threadIdToDir } from "../../util";
@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
         const agentState = (entry.session as any).agent?.state;
         let currentModel: any;
+        let currentThinkingLevel: any;
         let modelSwapped = false;
+        let thinkingSwapped = false;
         // Resolve and swap model for compact
         if (!agentState) {
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
             modelSwapped = true;
             console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
           }
+          // Force thinking off for compact regardless of agent's default.
+          // Summarization doesn't benefit from reasoning, costs more tokens,
+          // and complicates the maxTokens math (adjustMaxTokensForThinking adds
+          // up to 16k thinking budget). Direct state mutation matches the model
+          // swap above and avoids setThinkingLevel(), which would persist to
+          // settings.json.
+          if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
+            currentThinkingLevel = agentState.thinkingLevel;
+            agentState.thinkingLevel = "off";
+            thinkingSwapped = true;
+            console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
+          }
         }
         try {
@@ -640,7 +655,62 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
           if (modelSwapped) {
             agentState.model = currentModel;
           }
+          if (thinkingSwapped) {
+            agentState.thinkingLevel = currentThinkingLevel;
+          }
+        }
+      });
+    },
+    /**
+     * Soft-reset an overflowed session: trim the on-disk jsonl to its most
+     * recent N user turns, then reload the session in place. Used by the
+     * memory-lifecycle layer when compact fails with "prompt is too long"
+     * — the session has grown past the model's context window and the
+     * summarizer prompt itself can no longer fit.
+     *
+     * Returns the soft-reset report (or null if no session for threadId).
+     * Behavior:
+     *   - In-memory session: returns null (nothing to trim on disk).
+     *   - Already-trimmed session: report.reset === false, no reload.
+     *   - Otherwise: trims file, reloads session, returns report.
+     *
+     * On reload failure, the SessionEntry is dropped from the cache so the
+     * next prompt() recreates it cleanly.
+     */
+    async softReset(threadId: string): Promise<SoftResetReport | null> {
+      return enqueue(threadId, async () => {
+        const entry = sessions.get(threadId);
+        if (!entry) return null;
+        const sessionFile = entry.session.sessionFile;
+        if (!sessionFile) {
+          console.warn(`[pi-agent] softReset: ${threadId} has no on-disk session file, skipping`);
+          return null;
+        }
+        console.warn(`[pi-agent] softReset: trimming overflowed session ${sessionFile}`);
+        const report = softResetSessionFile(sessionFile);
+        if (!report.reset) {
+          console.log(`[pi-agent] softReset: nothing to trim (${report.reason})`);
+          return report;
+        }
+        console.warn(
+          `[pi-agent] softReset: ${report.entriesBefore} → ${report.entriesAfter} entries, ` +
+          `${report.bytesBefore} → ${report.bytesAfter} bytes (${report.reason}). Backup: ${report.backupPath}`
+        );
+        // Reload the session so pi-ai re-reads the trimmed file. Drop the
+        // cache entry on failure so the next prompt() recreates from scratch
+        // rather than running against the disposed session.
+        try {
+          const reloaded = await reloadSession(entry, sessionFile);
+          await entry.session.dispose();
+          entry.session = reloaded.session;
+        } catch (err) {
+          console.error(`[pi-agent] softReset reload failed for ${threadId}:`, (err as Error).message);
+          sessions.delete(threadId);
         }
+        return report;
       });
     },

package/src/agents/shared/session-repair.test.ts CHANGED Viewed

@@ -11,6 +11,8 @@ import {
   inspectSessionFile,
   repairSessionFile,
   isToolPairingError,
+  softResetSessionFile,
+  isContextOverflowError,
 } from './session-repair';
 // ---------- fixtures ----------
@@ -376,3 +378,175 @@ describe('session-repair', () => {
     });
   });
 });
+// ============================================================
+// softResetSessionFile
+// ============================================================
+describe('softResetSessionFile', () => {
+  function userTurn(idPrefix: string, parentId: string | null) {
+    // A user turn = user msg + assistant text reply (no tool calls, so cuts
+    // are clean; tool-pairing edge cases are covered by repair tests).
+    return [
+      userMsg(`${idPrefix}u`, parentId, `text-${idPrefix}`),
+      {
+        type: 'message',
+        id: `${idPrefix}a`,
+        parentId: `${idPrefix}u`,
+        timestamp: '2026-05-01T00:00:04Z',
+        message: {
+          role: 'assistant',
+          content: [{ type: 'text', text: `reply-${idPrefix}` }],
+          api: 'bedrock-converse-stream',
+          provider: 'amazon-bedrock',
+          model: 'claude',
+          usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+          stopReason: 'endTurn',
+          timestamp: 4,
+        },
+      },
+    ];
+  }
+  it('softResetSessionFile_OnSessionWithMoreTurnsThanTarget_KeepsHeaderAndRecentTurns', () => {
+    // Arrange: 10 user turns, target keepRecentUserTurns=3.
+    const entries: object[] = [HEADER, MODEL_CHANGE];
+    let parent: string | null = 'mc-1';
+    for (let i = 1; i <= 10; i++) {
+      const turn = userTurn(`t${i}`, parent);
+      entries.push(...turn);
+      parent = `t${i}a`;
+    }
+    const path = tmpJsonl(entries);
+    // Act
+    const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
+    // Assert: report indicates reset, file shrunk, header preserved, last 3 user msgs present.
+    expect(report.reset).toBe(true);
+    expect(report.entriesAfter).toBeLessThan(report.entriesBefore);
+    expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
+    expect(report.backupPath).toBeDefined();
+    expect(existsSync(report.backupPath!)).toBe(true);
+    const trimmed = parseSessionFile(path);
+    // Header always preserved.
+    expect(trimmed[0].type).toBe('session');
+    // Last 3 user turns present.
+    const userIds = trimmed.filter(e => e.message?.role === 'user').map(e => e.id);
+    expect(userIds).toEqual(['t8u', 't9u', 't10u']);
+    // First kept entry's parentId reset to null (no dangling pointer).
+    const firstAfterHeader = trimmed[1];
+    expect(firstAfterHeader.parentId).toBeNull();
+  });
+  it('softResetSessionFile_OnSessionSmallerThanTarget_ReturnsResetFalseAndDoesNotMutate', () => {
+    // Arrange: 2 user turns, target keepRecentUserTurns=8.
+    const entries: object[] = [HEADER, MODEL_CHANGE, ...userTurn('a', 'mc-1'), ...userTurn('b', 'aa')];
+    const path = tmpJsonl(entries);
+    const before = readFileSync(path, 'utf8');
+    // Act
+    const report = softResetSessionFile(path, { keepRecentUserTurns: 8 });
+    // Assert: no reset, file untouched, no backup.
+    expect(report.reset).toBe(false);
+    expect(report.backupPath).toBeUndefined();
+    expect(readFileSync(path, 'utf8')).toBe(before);
+  });
+  it('softResetSessionFile_OnTinySession_ReturnsResetFalseWithReason', () => {
+    // Arrange: only header.
+    const path = tmpJsonl([HEADER]);
+    // Act
+    const report = softResetSessionFile(path);
+    // Assert
+    expect(report.reset).toBe(false);
+    expect(report.reason).toContain('too-small');
+  });
+  it('softResetSessionFile_OnSessionWithOrphanedToolPairsAfterCut_AlsoRunsRepair', () => {
+    // Arrange: a session where the tail contains a toolResult whose toolCall
+    // sits in the older (dropped) section. After the cut the toolResult is
+    // orphaned — soft-reset must clean it up via the post-cut repair.
+    const oldToolCall = assistantToolCall('a-old', 'mc-1', 'call-X');
+    const orphanedResult = {
+      type: 'message',
+      id: 'tr-1',
+      parentId: 'a-old',
+      timestamp: '2026-05-01T00:00:05Z',
+      message: { role: 'toolResult', toolCallId: 'call-X', content: 'ok', timestamp: 5 },
+    };
+    const entries: object[] = [HEADER, MODEL_CHANGE, userMsg('u-old', 'mc-1', 'old'), oldToolCall];
+    let parent: string | null = 'a-old';
+    // Push 5 fresh turns so the cut leaves us in tail.
+    for (let i = 1; i <= 5; i++) {
+      entries.push(...userTurn(`f${i}`, parent));
+      parent = `f${i}a`;
+    }
+    // Insert the orphaned result mid-tail (kept by cut, but call is dropped).
+    entries.splice(6, 0, orphanedResult);
+    const path = tmpJsonl(entries);
+    // Act
+    const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
+    // Assert: reset succeeded AND post-cut repair fired.
+    expect(report.reset).toBe(true);
+    expect(report.postRepair).toBeDefined();
+    // Final file is internally consistent (no orphans).
+    expect(inspectSessionFile(path).hasOrphans).toBe(false);
+  });
+  it('softResetSessionFile_OnNonexistentFile_Throws', () => {
+    // Arrange/Act/Assert: documents the precondition.
+    expect(() => softResetSessionFile('/nonexistent/path.jsonl')).toThrow(/not found/);
+  });
+  it('softResetSessionFile_BytesCapHonored_StopsCutAtCap', () => {
+    // Arrange: each turn is small but we set a tiny byte cap so we cut early.
+    const entries: object[] = [HEADER, MODEL_CHANGE];
+    let parent: string | null = 'mc-1';
+    for (let i = 1; i <= 20; i++) {
+      entries.push(...userTurn(`t${i}`, parent));
+      parent = `t${i}a`;
+    }
+    const path = tmpJsonl(entries);
+    // Act
+    const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 800 });
+    // Assert: reset triggered by byte cap (we asked for 100 turns we don't have,
+    // but byte cap kicks in first).
+    expect(report.reset).toBe(true);
+    expect(report.reason).toMatch(/byte-cap|fewer-turns/);
+    expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
+  });
+});
+// ============================================================
+// isContextOverflowError
+// ============================================================
+describe('isContextOverflowError', () => {
+  it.each([
+    ['prompt is too long: 212776 tokens > 200000 maximum', true],
+    ['Validation error: input is too long', true],
+    ['context length exceeded for this model', true],
+    ['maximum context length reached', true],
+    ['tokens > 200000 maximum', true],
+    ['toolUse without toolResult', false], // pairing error — different recovery
+    ['random network failure', false],
+    ['', false],
+  ])('classifies %p as overflow=%p', (msg, expected) => {
+    expect(isContextOverflowError(new Error(msg))).toBe(expected);
+  });
+  it('returns false for null/undefined/non-Error inputs', () => {
+    expect(isContextOverflowError(null)).toBe(false);
+    expect(isContextOverflowError(undefined)).toBe(false);
+    expect(isContextOverflowError({})).toBe(false);
+  });
+});

package/src/agents/shared/session-repair.ts CHANGED Viewed

@@ -286,6 +286,181 @@ export function repairSessionFile(path: string): SessionRepairReport {
   };
 }
+// ── Soft reset (recovery from already-overflowed sessions) ──────────────
+/**
+ * When a session has grown past the model's context window, normal compact
+ * cannot recover — the summarizer prompt itself overflows. Soft reset trims
+ * the session jsonl on disk to its most-recent N user turns, drops everything
+ * older, and re-runs the tool-pairing repair so what's left is internally
+ * consistent.
+ *
+ * Trade-off: loses fidelity for older turns. The roundhouse memory layer
+ * (MEMORY.md, daily front-page) re-injects on the next turn, so the agent
+ * still has its durable context — just not the verbatim message history.
+ *
+ * Conservative defaults aim for ~30–40% of a 200k window so the next compact
+ * has ample room to summarize.
+ */
+export interface SoftResetOptions {
+  /** Keep at most this many user turns from the tail (default: 8). */
+  keepRecentUserTurns?: number;
+  /** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
+  maxBytes?: number;
+}
+export interface SoftResetReport {
+  reset: boolean;
+  reason: string;
+  entriesBefore: number;
+  entriesAfter: number;
+  bytesBefore: number;
+  bytesAfter: number;
+  backupPath?: string;
+  /** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
+  postRepair?: SessionRepairReport;
+}
+/**
+ * Find a safe cut index in the entries array. Walk backwards from the end
+ * looking for user message entries; the cut sits *just before* the Nth
+ * most-recent user message we encounter. Returns the index of the first
+ * entry to KEEP (i.e. all entries[0..cutIdx) are dropped).
+ *
+ * If we can't find enough user messages, returns 1 to keep everything except
+ * the session header (which we preserve separately).
+ */
+function findSoftResetCutIndex(
+  entries: SessionFileEntry[],
+  keepRecentUserTurns: number,
+  maxBytes: number,
+): { cutIdx: number; reason: string } {
+  let userTurnsSeen = 0;
+  let bytesAccumulated = 0;
+  // Scan tail-to-head, stop when we've collected enough user turns OR exceeded byte budget.
+  for (let i = entries.length - 1; i >= 0; i--) {
+    const e = entries[i];
+    bytesAccumulated += JSON.stringify(e).length + 1; // +1 for newline
+    if (e.type === 'message' && e.message?.role === 'user') {
+      userTurnsSeen++;
+      if (userTurnsSeen >= keepRecentUserTurns) {
+        return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
+      }
+    }
+    // Byte cap is a safety net for sessions where a single turn is enormous
+    // (e.g. one turn dumped a 200k file). Stop once we'd exceed the cap.
+    if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
+      return { cutIdx: i + 1, reason: `byte-cap-${bytesAccumulated}b` };
+    }
+  }
+  // Not enough user turns in the file — keep everything except header.
+  // (Header is always at index 0 and is preserved by the writer separately.)
+  return { cutIdx: 1, reason: 'fewer-turns-than-target' };
+}
+/**
+ * Soft-reset a pi-ai session jsonl: keep the most-recent N user turns + their
+ * surrounding messages, drop everything older. Always preserves the session
+ * header (entries[0]). Re-parents the first kept entry to null so the tree
+ * remains valid. Re-runs tool-pairing repair on the trimmed file because
+ * the cut likely orphaned some toolCall/toolResult pairs.
+ *
+ * Atomic + backup: same safety pattern as repairSessionFile.
+ *
+ * @returns report describing what was reset, or `{reset:false}` if nothing to do.
+ */
+export function softResetSessionFile(
+  path: string,
+  options: SoftResetOptions = {},
+): SoftResetReport {
+  if (!existsSync(path)) {
+    throw new Error(`Session file not found: ${path}`);
+  }
+  const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
+  const maxBytes = options.maxBytes ?? 250_000;
+  const entries = parseSessionFile(path);
+  const bytesBefore = readFileSync(path).length;
+  // Need at least header + a couple of messages to be worth resetting.
+  if (entries.length < 4) {
+    return {
+      reset: false,
+      reason: 'session-too-small',
+      entriesBefore: entries.length,
+      entriesAfter: entries.length,
+      bytesBefore,
+      bytesAfter: bytesBefore,
+    };
+  }
+  const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
+  // No-op if cut is already at the start (nothing to drop besides header).
+  if (cutIdx <= 1) {
+    return {
+      reset: false,
+      reason: `cut-at-start (${reason})`,
+      entriesBefore: entries.length,
+      entriesAfter: entries.length,
+      bytesBefore,
+      bytesAfter: bytesBefore,
+    };
+  }
+  // Build trimmed entries: header + tail.
+  // Re-parent the first kept tail entry to null so the tree root is intact.
+  const header = entries[0];
+  const tail = entries.slice(cutIdx);
+  if (tail.length > 0 && tail[0].parentId !== undefined) {
+    tail[0] = { ...tail[0], parentId: null };
+  }
+  const trimmed = [header, ...tail];
+  const backupPath = backupFile(path);
+  const newContent = trimmed.map(e => JSON.stringify(e)).join('\n') + '\n';
+  atomicWrite(path, newContent);
+  // The cut may have orphaned tool pairs (e.g. toolResult kept but its
+  // toolCall is now in the dropped section). Run repair to clean those up.
+  const postRepair = repairSessionFile(path);
+  const bytesAfter = readFileSync(path).length;
+  return {
+    reset: true,
+    reason,
+    entriesBefore: entries.length,
+    entriesAfter: trimmed.length - postRepair.droppedEntryIds.length,
+    bytesBefore,
+    bytesAfter,
+    backupPath,
+    postRepair,
+  };
+}
+// ── Error classifiers ────────────────────────────────────────────────────
+/**
+ * Detect whether an error from pi-ai / the model provider indicates the
+ * session has grown past the model's context window (input > max).
+ *
+ * Triggers soft-reset recovery in the memory lifecycle. Intentionally narrow:
+ * only matches the well-known overflow phrasings, not generic 4xx errors.
+ */
+export function isContextOverflowError(err: unknown): boolean {
+  if (!err) return false;
+  const msg = (err as { message?: string }).message ?? String(err);
+  const patterns = [
+    /prompt is too long/i,
+    /tokens?\s*[>>]\s*\d+\s*maximum/i,
+    /input is too long/i,
+    /context length exceeded/i,
+    /maximum context length/i,
+  ];
+  return patterns.some(p => p.test(msg));
+}
 /**
  * Detect whether an error from pi-ai / the model provider indicates a
  * tool-pairing mismatch that can be recovered by session repair.

package/src/memory/lifecycle.ts CHANGED Viewed

@@ -16,10 +16,41 @@ import { shouldInjectMemory, classifyContextPressure, isSoftFlushOnCooldown } fr
 import { buildMemoryInjection, injectMemoryIntoMessage } from "./inject";
 import { buildFlushPrompt } from "./prompts";
 import { bootstrapMemoryFiles } from "./bootstrap";
+import { isContextOverflowError } from "../agents/shared/session-repair";
 import { appendFile, mkdir } from "node:fs/promises";
 import { join } from "node:path";
 import { homedir } from "node:os";
+// ── Telemetry helper ─────────────────────────────────
+interface CompactLogEntry {
+  threadId: string;
+  level: string;
+  effectiveLevel: string;
+  flushSkipped: boolean;
+  tokensBefore: number | null;
+  tokensAfter: number | null;
+  flushMs: number;
+  compactMs: number;
+  totalMs: number;
+  model: string;
+  status: "ok" | "failed";
+  error: string | null;
+}
+/**
+ * Append a compact telemetry entry. Fire-and-forget.
+ * Schema is uniform across success/failure (status discriminator) so
+ * downstream parsers don't have to handle missing fields.
+ */
+function appendCompactLog(entry: CompactLogEntry): void {
+  const logDir = join(homedir(), ".roundhouse", "logs");
+  const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
+  mkdir(logDir, { recursive: true })
+    .then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
+    .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
+}
 // ── Memory mode detection ────────────────────────────
 /**
@@ -246,11 +277,16 @@ export async function flushMemoryThenCompact(
   // "manual" level, attempting the flush in that condition will hit the same
   // 200k rejection. Deferring flush to a later (successful) turn is the safe
   // recovery path.
-  const stuckInEmergency = (await loadThreadMemoryState(threadId)).pendingCompact === "emergency";
+  const stateBeforeCompact = await loadThreadMemoryState(threadId);
+  const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
   const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
+  // Hoisted so the catch block can report accurate flush vs compact timing
+  // (a failure during compact() would otherwise conflate the two phases).
+  let flushMs = 0;
+  let compactMs = 0;
   try {
-    let flushMs = 0;
     if (!skipFlush) {
       // Step 1: flush
       const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
@@ -276,16 +312,18 @@ export async function flushMemoryThenCompact(
     const result = usedCompactModel
       ? await agent.compactWithModel!(threadId, flushModel!)
       : await agent.compact!(threadId);
-    const compactMs = Date.now() - t1;
+    compactMs = Date.now() - t1;
     if (!result) return null;
-    // Step 3: mark force re-inject (Full mode only)
+    // Step 3: mark force re-inject (Full mode only). Reuse the state we
+    // already loaded above; the compact step doesn't mutate memory-state
+    // (it mutates the pi session, a separate file), so the in-memory copy
+    // is still authoritative for our fields.
     if (mode !== "complement") {
-      const state = await loadThreadMemoryState(threadId);
-      state.forceInjectReason = "after-compact";
-      state.lastCompactAt = new Date().toISOString();
-      state.pendingCompact = undefined;
-      await saveThreadMemoryState(threadId, state);
+      stateBeforeCompact.forceInjectReason = "after-compact";
+      stateBeforeCompact.lastCompactAt = new Date().toISOString();
+      stateBeforeCompact.pendingCompact = undefined;
+      await saveThreadMemoryState(threadId, stateBeforeCompact);
     }
     const totalMs = Date.now() - t0;
@@ -302,30 +340,84 @@ export async function flushMemoryThenCompact(
     const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
     console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
-    // Persist timing log for debugging (async, fire-and-forget)
-    const logDir = join(homedir(), ".roundhouse", "logs");
-    mkdir(logDir, { recursive: true })
-      .then(() => {
-        const entry = JSON.stringify({
-          ts: new Date().toISOString(),
-          threadId,
-          level,
-          tokensBefore: result.tokensBefore,
-          tokensAfter: result.tokensAfter,
-          ...timing,
-        });
-        return appendFile(join(logDir, "compact-timing.jsonl"), entry + "\n");
-      })
-      .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
+    // Persist timing log for debugging (async, fire-and-forget).
+    // Schema is intentionally uniform across success and failure entries
+    // (status discriminator + same field set) so jsonl parsers don't have
+    // to special-case missing fields.
+    appendCompactLog({
+      threadId,
+      level,
+      effectiveLevel,
+      flushSkipped: skipFlush,
+      tokensBefore: result.tokensBefore,
+      tokensAfter: result.tokensAfter ?? null,
+      ...timing,
+      status: "ok",
+      error: null,
+    });
     return { ...result, timing };
   } catch (err) {
-    console.error(`[memory] flush+compact failed for ${threadId}:`, (err as Error).message);
-    // Mark pending so we retry on next turn
+    const errMsg = (err as Error).message;
+    console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
+    // Recovery path: when the session has grown past the model's context
+    // window, the summarizer prompt itself overflows and compact() throws
+    // "prompt is too long". Threshold tuning prevents *new* sessions from
+    // hitting this, but does nothing for sessions already past the line.
+    // Trim the on-disk session jsonl to its most recent N user turns and
+    // mark the next turn for a fresh memory injection. We do NOT retry
+    // compact inline — that would extend the thread lock for another long
+    // operation. The trimmed session is small enough that the next user
+    // turn proceeds normally; any soft pressure from injected memory will
+    // trigger a regular compact later.
+    let softResetAttempted = false;
+    let softResetSucceeded = false;
+    if (isContextOverflowError(err) && agent.softReset) {
+      softResetAttempted = true;
+      try {
+        await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
+        const report = await agent.softReset(threadId);
+        if (report?.reset) {
+          softResetSucceeded = true;
+          console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
+        } else {
+          console.warn(`[memory] soft-reset returned no-op for ${threadId} (${(report as { reason?: string } | null)?.reason ?? "unknown"})`);
+        }
+      } catch (resetErr) {
+        console.error(`[memory] soft-reset failed for ${threadId}:`, (resetErr as Error).message);
+      }
+    }
+    appendCompactLog({
+      threadId,
+      level,
+      effectiveLevel,
+      flushSkipped: skipFlush,
+      tokensBefore: null,
+      tokensAfter: null,
+      flushMs,    // accurate: 0 if skipped or failed before flush completed
+      compactMs,  // accurate: 0 if failed before/during compact
+      totalMs: Date.now() - t0,
+      model: flushModel ?? "default",
+      status: "failed",
+      error: (softResetAttempted
+        ? `${softResetSucceeded ? "soft-reset-recovered" : "soft-reset-failed"}: ${errMsg}`
+        : errMsg).slice(0, 500),
+    });
     try {
-      const state = await loadThreadMemoryState(threadId);
-      state.pendingCompact = effectiveLevel;
-      await saveThreadMemoryState(threadId, state);
+      if (softResetSucceeded) {
+        // Soft reset cleared the overflow. Mark the next turn for memory
+        // re-injection so the agent has its durable context, and clear the
+        // pendingCompact flag — there's nothing left to compact now.
+        stateBeforeCompact.forceInjectReason = "after-soft-reset";
+        stateBeforeCompact.pendingCompact = undefined;
+      } else {
+        // Re-arm pendingCompact so the next turn retries.
+        stateBeforeCompact.pendingCompact = effectiveLevel;
+      }
+      await saveThreadMemoryState(threadId, stateBeforeCompact);
     } catch {}
     return null;
   }

package/src/memory/policy.ts CHANGED Viewed

@@ -10,12 +10,26 @@ import { formatDate } from "./files";
 // ── Defaults ─────────────────────────────────────────
 const DEFAULT_SOFT_PERCENT = 0.45;
-const DEFAULT_SOFT_TOKENS = 180_000;
+const DEFAULT_SOFT_TOKENS = 130_000;
 const DEFAULT_HARD_PERCENT = 0.50;
-const DEFAULT_HARD_TOKENS = 200_000;
+const DEFAULT_HARD_TOKENS = 150_000;
 const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
 const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
+// Headroom reserved for the summarization payload itself when compact runs.
+// The summarizer prompt serializes ALL discarded history (everything older
+// than ~20k of recent tokens) plus scaffolding plus previous summary, then
+// asks the model to summarize. If the prompt itself overflows the model
+// context, compact() throws. 50k is the empirical headroom that fits a
+// typical summarization prompt on Claude family.
+const COMPACT_HEADROOM_TOKENS = 50_000;
+// Why 130k/150k as the default absolute thresholds against a 200k window:
+// see COMPACT_HEADROOM_TOKENS above and
+// ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
+// For smaller-window models, classifyContextPressure() clamps the absolute
+// thresholds to `window - HEADROOM` so they never exceed the window.
 // ── Injection policy ─────────────────────────────────
 export interface InjectionDecision {
@@ -87,14 +101,21 @@ export function classifyContextPressure(
   const pctDecimal = percent != null ? percent / 100 : tokens / window;
+  // Clamp absolute thresholds so they never exceed `window - HEADROOM`.
+  // Defends against future smaller-window models where the configured
+  // 150k/130k absolute thresholds would otherwise sit above the window.
+  // The percent thresholds already scale with window naturally.
+  const headroom = COMPACT_HEADROOM_TOKENS;
+  const ceiling = Math.max(0, window - headroom);
   // Hard threshold
   const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
-  const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
+  const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
   if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
-  // Soft threshold
+  // Soft threshold (clamped one step below hard so soft fires first).
   const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
-  const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
+  const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
   if (pctDecimal >= softPct || tokens >= softTok) return "soft";
   return "none";

package/src/memory/types.ts CHANGED Viewed

@@ -56,7 +56,7 @@ export interface ThreadMemoryState {
   /** Local date when memory was last injected (detects day boundary) */
   lastSeenLocalDate?: string;
   /** Force re-injection on next turn */
-  forceInjectReason?: "new-session" | "after-compact" | "manual";
+  forceInjectReason?: "new-session" | "after-compact" | "after-soft-reset" | "manual";
   /** When last compaction happened */
   lastCompactAt?: string;
   /** Pending compaction level (from interrupted flush) */

package/src/types.ts CHANGED Viewed

@@ -122,6 +122,17 @@ export interface AgentAdapter {
   /** Compact with a specific model. */
   compactWithModel?(threadId: string, modelId: string): Promise<{ tokensBefore: number; tokensAfter: number | null } | null>;
+  /**
+   * Soft-reset an overflowed session by trimming on-disk history to the
+   * most-recent few turns. Called by memory lifecycle when compact() fails
+   * because the session itself is too large for the model's context window.
+   *
+   * Returns a report describing what was trimmed (shape is adapter-specific
+   * but always has `reset: boolean`), or null if not applicable.
+   * Adapters without on-disk sessions (in-memory only) should return null.
+   */
+  softReset?(threadId: string): Promise<{ reset: boolean } | null>;
   /** Abort the current agent run for a thread. */
   abort?(threadId: string): Promise<void>;