@inceptionstack/roundhouse 0.5.27 → 0.5.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to `@inceptionstack/roundhouse` are documented here.
4
4
 
5
+ ## [0.5.29] — 2026-05-14
6
+
7
+ ### Added
8
+ - **Soft-reset recovery for already-overflowed sessions.** When a session has grown past the model's context window, normal compact cannot recover — the summarizer prompt itself overflows and `compact()` throws `prompt is too long: N > max`. v0.5.28's threshold tuning prevents *new* sessions from hitting this; this release adds graceful recovery for sessions that already crossed the line. On context-overflow detection, the memory lifecycle calls a new `agent.softReset(threadId)` capability that trims the on-disk session jsonl to its most-recent N user turns (default 8, byte-capped at 250k), reloads the session, and queues a memory re-injection on the next turn. The agent loses verbatim message history for older turns but retains its durable context (MEMORY.md, daily front-page, soul.md). No more manual surgery on stuck sessions.
9
+ - New module exports: `softResetSessionFile()` and `isContextOverflowError()` in `src/agents/shared/session-repair.ts`. New optional `softReset?(threadId)` method on `AgentAdapter` interface (no-op when not implemented — backward-compatible). PiAdapter implements it via the existing `reloadSession` path.
10
+ - 20 new tests across `session-repair.test.ts` (file-level cut/preserve/repair semantics, error classifier) and `memory.test.ts` (lifecycle wiring — success/no-op/missing-capability/non-overflow-error/throws-during-recovery). 527 tests total.
11
+
12
+ ## [0.5.28] — 2026-05-14
13
+
14
+ ### Fixed
15
+ - **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
16
+
5
17
  ## [0.5.27] — 2026-05-14
6
18
 
7
19
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inceptionstack/roundhouse",
3
- "version": "0.5.27",
3
+ "version": "0.5.29",
4
4
  "type": "module",
5
5
  "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
6
6
  "license": "MIT",
@@ -28,7 +28,7 @@ import {
28
28
 
29
29
  import type { AgentAdapter, AgentAdapterFactory, AgentMessage, AgentResponse, AgentStreamEvent, MessageContext } from "../../types";
30
30
  import { formatMessage, extractCustomMessage, customContentToText } from "./message-format";
31
- import { isToolPairingError, repairSessionFile } from "../shared/session-repair";
31
+ import { isToolPairingError, repairSessionFile, softResetSessionFile, type SoftResetReport } from "../shared/session-repair";
32
32
  import { SESSIONS_DIR } from "../../config";
33
33
  import { DEBUG_STREAM, threadIdToDir } from "../../util";
34
34
 
@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
608
608
 
609
609
  const agentState = (entry.session as any).agent?.state;
610
610
  let currentModel: any;
611
+ let currentThinkingLevel: any;
611
612
  let modelSwapped = false;
613
+ let thinkingSwapped = false;
612
614
 
613
615
  // Resolve and swap model for compact
614
616
  if (!agentState) {
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
627
629
  modelSwapped = true;
628
630
  console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
629
631
  }
632
+
633
+ // Force thinking off for compact regardless of agent's default.
634
+ // Summarization doesn't benefit from reasoning, costs more tokens,
635
+ // and complicates the maxTokens math (adjustMaxTokensForThinking adds
636
+ // up to 16k thinking budget). Direct state mutation matches the model
637
+ // swap above and avoids setThinkingLevel(), which would persist to
638
+ // settings.json.
639
+ if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
640
+ currentThinkingLevel = agentState.thinkingLevel;
641
+ agentState.thinkingLevel = "off";
642
+ thinkingSwapped = true;
643
+ console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
644
+ }
630
645
  }
631
646
 
632
647
  try {
@@ -640,7 +655,62 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
640
655
  if (modelSwapped) {
641
656
  agentState.model = currentModel;
642
657
  }
658
+ if (thinkingSwapped) {
659
+ agentState.thinkingLevel = currentThinkingLevel;
660
+ }
661
+ }
662
+ });
663
+ },
664
+
665
+ /**
666
+ * Soft-reset an overflowed session: trim the on-disk jsonl to its most
667
+ * recent N user turns, then reload the session in place. Used by the
668
+ * memory-lifecycle layer when compact fails with "prompt is too long"
669
+ * — the session has grown past the model's context window and the
670
+ * summarizer prompt itself can no longer fit.
671
+ *
672
+ * Returns the soft-reset report (or null if no session for threadId).
673
+ * Behavior:
674
+ * - In-memory session: returns null (nothing to trim on disk).
675
+ * - Already-trimmed session: report.reset === false, no reload.
676
+ * - Otherwise: trims file, reloads session, returns report.
677
+ *
678
+ * On reload failure, the SessionEntry is dropped from the cache so the
679
+ * next prompt() recreates it cleanly.
680
+ */
681
+ async softReset(threadId: string): Promise<SoftResetReport | null> {
682
+ return enqueue(threadId, async () => {
683
+ const entry = sessions.get(threadId);
684
+ if (!entry) return null;
685
+ const sessionFile = entry.session.sessionFile;
686
+ if (!sessionFile) {
687
+ console.warn(`[pi-agent] softReset: ${threadId} has no on-disk session file, skipping`);
688
+ return null;
689
+ }
690
+
691
+ console.warn(`[pi-agent] softReset: trimming overflowed session ${sessionFile}`);
692
+ const report = softResetSessionFile(sessionFile);
693
+ if (!report.reset) {
694
+ console.log(`[pi-agent] softReset: nothing to trim (${report.reason})`);
695
+ return report;
696
+ }
697
+ console.warn(
698
+ `[pi-agent] softReset: ${report.entriesBefore} → ${report.entriesAfter} entries, ` +
699
+ `${report.bytesBefore} → ${report.bytesAfter} bytes (${report.reason}). Backup: ${report.backupPath}`
700
+ );
701
+
702
+ // Reload the session so pi-ai re-reads the trimmed file. Drop the
703
+ // cache entry on failure so the next prompt() recreates from scratch
704
+ // rather than running against the disposed session.
705
+ try {
706
+ const reloaded = await reloadSession(entry, sessionFile);
707
+ await entry.session.dispose();
708
+ entry.session = reloaded.session;
709
+ } catch (err) {
710
+ console.error(`[pi-agent] softReset reload failed for ${threadId}:`, (err as Error).message);
711
+ sessions.delete(threadId);
643
712
  }
713
+ return report;
644
714
  });
645
715
  },
646
716
 
@@ -11,6 +11,8 @@ import {
11
11
  inspectSessionFile,
12
12
  repairSessionFile,
13
13
  isToolPairingError,
14
+ softResetSessionFile,
15
+ isContextOverflowError,
14
16
  } from './session-repair';
15
17
 
16
18
  // ---------- fixtures ----------
@@ -376,3 +378,175 @@ describe('session-repair', () => {
376
378
  });
377
379
  });
378
380
  });
381
+
382
+ // ============================================================
383
+ // softResetSessionFile
384
+ // ============================================================
385
+
386
+ describe('softResetSessionFile', () => {
387
+ function userTurn(idPrefix: string, parentId: string | null) {
388
+ // A user turn = user msg + assistant text reply (no tool calls, so cuts
389
+ // are clean; tool-pairing edge cases are covered by repair tests).
390
+ return [
391
+ userMsg(`${idPrefix}u`, parentId, `text-${idPrefix}`),
392
+ {
393
+ type: 'message',
394
+ id: `${idPrefix}a`,
395
+ parentId: `${idPrefix}u`,
396
+ timestamp: '2026-05-01T00:00:04Z',
397
+ message: {
398
+ role: 'assistant',
399
+ content: [{ type: 'text', text: `reply-${idPrefix}` }],
400
+ api: 'bedrock-converse-stream',
401
+ provider: 'amazon-bedrock',
402
+ model: 'claude',
403
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
404
+ stopReason: 'endTurn',
405
+ timestamp: 4,
406
+ },
407
+ },
408
+ ];
409
+ }
410
+
411
+ it('softResetSessionFile_OnSessionWithMoreTurnsThanTarget_KeepsHeaderAndRecentTurns', () => {
412
+ // Arrange: 10 user turns, target keepRecentUserTurns=3.
413
+ const entries: object[] = [HEADER, MODEL_CHANGE];
414
+ let parent: string | null = 'mc-1';
415
+ for (let i = 1; i <= 10; i++) {
416
+ const turn = userTurn(`t${i}`, parent);
417
+ entries.push(...turn);
418
+ parent = `t${i}a`;
419
+ }
420
+ const path = tmpJsonl(entries);
421
+
422
+ // Act
423
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
424
+
425
+ // Assert: report indicates reset, file shrunk, header preserved, last 3 user msgs present.
426
+ expect(report.reset).toBe(true);
427
+ expect(report.entriesAfter).toBeLessThan(report.entriesBefore);
428
+ expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
429
+ expect(report.backupPath).toBeDefined();
430
+ expect(existsSync(report.backupPath!)).toBe(true);
431
+
432
+ const trimmed = parseSessionFile(path);
433
+ // Header always preserved.
434
+ expect(trimmed[0].type).toBe('session');
435
+ // Last 3 user turns present.
436
+ const userIds = trimmed.filter(e => e.message?.role === 'user').map(e => e.id);
437
+ expect(userIds).toEqual(['t8u', 't9u', 't10u']);
438
+ // First kept entry's parentId reset to null (no dangling pointer).
439
+ const firstAfterHeader = trimmed[1];
440
+ expect(firstAfterHeader.parentId).toBeNull();
441
+ });
442
+
443
+ it('softResetSessionFile_OnSessionSmallerThanTarget_ReturnsResetFalseAndDoesNotMutate', () => {
444
+ // Arrange: 2 user turns, target keepRecentUserTurns=8.
445
+ const entries: object[] = [HEADER, MODEL_CHANGE, ...userTurn('a', 'mc-1'), ...userTurn('b', 'aa')];
446
+ const path = tmpJsonl(entries);
447
+ const before = readFileSync(path, 'utf8');
448
+
449
+ // Act
450
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 8 });
451
+
452
+ // Assert: no reset, file untouched, no backup.
453
+ expect(report.reset).toBe(false);
454
+ expect(report.backupPath).toBeUndefined();
455
+ expect(readFileSync(path, 'utf8')).toBe(before);
456
+ });
457
+
458
+ it('softResetSessionFile_OnTinySession_ReturnsResetFalseWithReason', () => {
459
+ // Arrange: only header.
460
+ const path = tmpJsonl([HEADER]);
461
+
462
+ // Act
463
+ const report = softResetSessionFile(path);
464
+
465
+ // Assert
466
+ expect(report.reset).toBe(false);
467
+ expect(report.reason).toContain('too-small');
468
+ });
469
+
470
+ it('softResetSessionFile_OnSessionWithOrphanedToolPairsAfterCut_AlsoRunsRepair', () => {
471
+ // Arrange: a session where the tail contains a toolResult whose toolCall
472
+ // sits in the older (dropped) section. After the cut the toolResult is
473
+ // orphaned — soft-reset must clean it up via the post-cut repair.
474
+ const oldToolCall = assistantToolCall('a-old', 'mc-1', 'call-X');
475
+ const orphanedResult = {
476
+ type: 'message',
477
+ id: 'tr-1',
478
+ parentId: 'a-old',
479
+ timestamp: '2026-05-01T00:00:05Z',
480
+ message: { role: 'toolResult', toolCallId: 'call-X', content: 'ok', timestamp: 5 },
481
+ };
482
+ const entries: object[] = [HEADER, MODEL_CHANGE, userMsg('u-old', 'mc-1', 'old'), oldToolCall];
483
+ let parent: string | null = 'a-old';
484
+ // Push 5 fresh turns so the cut leaves us in tail.
485
+ for (let i = 1; i <= 5; i++) {
486
+ entries.push(...userTurn(`f${i}`, parent));
487
+ parent = `f${i}a`;
488
+ }
489
+ // Insert the orphaned result mid-tail (kept by cut, but call is dropped).
490
+ entries.splice(6, 0, orphanedResult);
491
+ const path = tmpJsonl(entries);
492
+
493
+ // Act
494
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
495
+
496
+ // Assert: reset succeeded AND post-cut repair fired.
497
+ expect(report.reset).toBe(true);
498
+ expect(report.postRepair).toBeDefined();
499
+ // Final file is internally consistent (no orphans).
500
+ expect(inspectSessionFile(path).hasOrphans).toBe(false);
501
+ });
502
+
503
+ it('softResetSessionFile_OnNonexistentFile_Throws', () => {
504
+ // Arrange/Act/Assert: documents the precondition.
505
+ expect(() => softResetSessionFile('/nonexistent/path.jsonl')).toThrow(/not found/);
506
+ });
507
+
508
+ it('softResetSessionFile_BytesCapHonored_StopsCutAtCap', () => {
509
+ // Arrange: each turn is small but we set a tiny byte cap so we cut early.
510
+ const entries: object[] = [HEADER, MODEL_CHANGE];
511
+ let parent: string | null = 'mc-1';
512
+ for (let i = 1; i <= 20; i++) {
513
+ entries.push(...userTurn(`t${i}`, parent));
514
+ parent = `t${i}a`;
515
+ }
516
+ const path = tmpJsonl(entries);
517
+
518
+ // Act
519
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 800 });
520
+
521
+ // Assert: reset triggered by byte cap (we asked for 100 turns we don't have,
522
+ // but byte cap kicks in first).
523
+ expect(report.reset).toBe(true);
524
+ expect(report.reason).toMatch(/byte-cap|fewer-turns/);
525
+ expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
526
+ });
527
+ });
528
+
529
+ // ============================================================
530
+ // isContextOverflowError
531
+ // ============================================================
532
+
533
+ describe('isContextOverflowError', () => {
534
+ it.each([
535
+ ['prompt is too long: 212776 tokens > 200000 maximum', true],
536
+ ['Validation error: input is too long', true],
537
+ ['context length exceeded for this model', true],
538
+ ['maximum context length reached', true],
539
+ ['tokens > 200000 maximum', true],
540
+ ['toolUse without toolResult', false], // pairing error — different recovery
541
+ ['random network failure', false],
542
+ ['', false],
543
+ ])('classifies %p as overflow=%p', (msg, expected) => {
544
+ expect(isContextOverflowError(new Error(msg))).toBe(expected);
545
+ });
546
+
547
+ it('returns false for null/undefined/non-Error inputs', () => {
548
+ expect(isContextOverflowError(null)).toBe(false);
549
+ expect(isContextOverflowError(undefined)).toBe(false);
550
+ expect(isContextOverflowError({})).toBe(false);
551
+ });
552
+ });
@@ -286,6 +286,181 @@ export function repairSessionFile(path: string): SessionRepairReport {
286
286
  };
287
287
  }
288
288
 
289
+ // ── Soft reset (recovery from already-overflowed sessions) ──────────────
290
+
291
+ /**
292
+ * When a session has grown past the model's context window, normal compact
293
+ * cannot recover — the summarizer prompt itself overflows. Soft reset trims
294
+ * the session jsonl on disk to its most-recent N user turns, drops everything
295
+ * older, and re-runs the tool-pairing repair so what's left is internally
296
+ * consistent.
297
+ *
298
+ * Trade-off: loses fidelity for older turns. The roundhouse memory layer
299
+ * (MEMORY.md, daily front-page) re-injects on the next turn, so the agent
300
+ * still has its durable context — just not the verbatim message history.
301
+ *
302
+ * Conservative defaults aim for ~30–40% of a 200k window so the next compact
303
+ * has ample room to summarize.
304
+ */
305
+ export interface SoftResetOptions {
306
+ /** Keep at most this many user turns from the tail (default: 8). */
307
+ keepRecentUserTurns?: number;
308
+ /** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
309
+ maxBytes?: number;
310
+ }
311
+
312
+ export interface SoftResetReport {
313
+ reset: boolean;
314
+ reason: string;
315
+ entriesBefore: number;
316
+ entriesAfter: number;
317
+ bytesBefore: number;
318
+ bytesAfter: number;
319
+ backupPath?: string;
320
+ /** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
321
+ postRepair?: SessionRepairReport;
322
+ }
323
+
324
+ /**
325
+ * Find a safe cut index in the entries array. Walk backwards from the end
326
+ * looking for user message entries; the cut sits *just before* the Nth
327
+ * most-recent user message we encounter. Returns the index of the first
328
+ * entry to KEEP (i.e. all entries[0..cutIdx) are dropped).
329
+ *
330
+ * If we can't find enough user messages, returns 1 to keep everything except
331
+ * the session header (which we preserve separately).
332
+ */
333
+ function findSoftResetCutIndex(
334
+ entries: SessionFileEntry[],
335
+ keepRecentUserTurns: number,
336
+ maxBytes: number,
337
+ ): { cutIdx: number; reason: string } {
338
+ let userTurnsSeen = 0;
339
+ let bytesAccumulated = 0;
340
+ // Scan tail-to-head, stop when we've collected enough user turns OR exceeded byte budget.
341
+ for (let i = entries.length - 1; i >= 0; i--) {
342
+ const e = entries[i];
343
+ bytesAccumulated += JSON.stringify(e).length + 1; // +1 for newline
344
+ if (e.type === 'message' && e.message?.role === 'user') {
345
+ userTurnsSeen++;
346
+ if (userTurnsSeen >= keepRecentUserTurns) {
347
+ return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
348
+ }
349
+ }
350
+ // Byte cap is a safety net for sessions where a single turn is enormous
351
+ // (e.g. one turn dumped a 200k file). Stop once we'd exceed the cap.
352
+ if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
353
+ return { cutIdx: i + 1, reason: `byte-cap-${bytesAccumulated}b` };
354
+ }
355
+ }
356
+ // Not enough user turns in the file — keep everything except header.
357
+ // (Header is always at index 0 and is preserved by the writer separately.)
358
+ return { cutIdx: 1, reason: 'fewer-turns-than-target' };
359
+ }
360
+
361
+ /**
362
+ * Soft-reset a pi-ai session jsonl: keep the most-recent N user turns + their
363
+ * surrounding messages, drop everything older. Always preserves the session
364
+ * header (entries[0]). Re-parents the first kept entry to null so the tree
365
+ * remains valid. Re-runs tool-pairing repair on the trimmed file because
366
+ * the cut likely orphaned some toolCall/toolResult pairs.
367
+ *
368
+ * Atomic + backup: same safety pattern as repairSessionFile.
369
+ *
370
+ * @returns report describing what was reset, or `{reset:false}` if nothing to do.
371
+ */
372
+ export function softResetSessionFile(
373
+ path: string,
374
+ options: SoftResetOptions = {},
375
+ ): SoftResetReport {
376
+ if (!existsSync(path)) {
377
+ throw new Error(`Session file not found: ${path}`);
378
+ }
379
+
380
+ const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
381
+ const maxBytes = options.maxBytes ?? 250_000;
382
+
383
+ const entries = parseSessionFile(path);
384
+ const bytesBefore = readFileSync(path).length;
385
+
386
+ // Need at least header + a couple of messages to be worth resetting.
387
+ if (entries.length < 4) {
388
+ return {
389
+ reset: false,
390
+ reason: 'session-too-small',
391
+ entriesBefore: entries.length,
392
+ entriesAfter: entries.length,
393
+ bytesBefore,
394
+ bytesAfter: bytesBefore,
395
+ };
396
+ }
397
+
398
+ const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
399
+
400
+ // No-op if cut is already at the start (nothing to drop besides header).
401
+ if (cutIdx <= 1) {
402
+ return {
403
+ reset: false,
404
+ reason: `cut-at-start (${reason})`,
405
+ entriesBefore: entries.length,
406
+ entriesAfter: entries.length,
407
+ bytesBefore,
408
+ bytesAfter: bytesBefore,
409
+ };
410
+ }
411
+
412
+ // Build trimmed entries: header + tail.
413
+ // Re-parent the first kept tail entry to null so the tree root is intact.
414
+ const header = entries[0];
415
+ const tail = entries.slice(cutIdx);
416
+ if (tail.length > 0 && tail[0].parentId !== undefined) {
417
+ tail[0] = { ...tail[0], parentId: null };
418
+ }
419
+ const trimmed = [header, ...tail];
420
+
421
+ const backupPath = backupFile(path);
422
+ const newContent = trimmed.map(e => JSON.stringify(e)).join('\n') + '\n';
423
+ atomicWrite(path, newContent);
424
+
425
+ // The cut may have orphaned tool pairs (e.g. toolResult kept but its
426
+ // toolCall is now in the dropped section). Run repair to clean those up.
427
+ const postRepair = repairSessionFile(path);
428
+
429
+ const bytesAfter = readFileSync(path).length;
430
+ return {
431
+ reset: true,
432
+ reason,
433
+ entriesBefore: entries.length,
434
+ entriesAfter: trimmed.length - postRepair.droppedEntryIds.length,
435
+ bytesBefore,
436
+ bytesAfter,
437
+ backupPath,
438
+ postRepair,
439
+ };
440
+ }
441
+
442
+ // ── Error classifiers ────────────────────────────────────────────────────
443
+
444
+ /**
445
+ * Detect whether an error from pi-ai / the model provider indicates the
446
+ * session has grown past the model's context window (input > max).
447
+ *
448
+ * Triggers soft-reset recovery in the memory lifecycle. Intentionally narrow:
449
+ * only matches the well-known overflow phrasings, not generic 4xx errors.
450
+ */
451
+ export function isContextOverflowError(err: unknown): boolean {
452
+ if (!err) return false;
453
+ const msg = (err as { message?: string }).message ?? String(err);
454
+ const patterns = [
455
+ /prompt is too long/i,
456
+ /tokens?\s*[>>]\s*\d+\s*maximum/i,
457
+ /input is too long/i,
458
+ /context length exceeded/i,
459
+ /maximum context length/i,
460
+ ];
461
+ return patterns.some(p => p.test(msg));
462
+ }
463
+
289
464
  /**
290
465
  * Detect whether an error from pi-ai / the model provider indicates a
291
466
  * tool-pairing mismatch that can be recovered by session repair.
@@ -16,10 +16,41 @@ import { shouldInjectMemory, classifyContextPressure, isSoftFlushOnCooldown } fr
16
16
  import { buildMemoryInjection, injectMemoryIntoMessage } from "./inject";
17
17
  import { buildFlushPrompt } from "./prompts";
18
18
  import { bootstrapMemoryFiles } from "./bootstrap";
19
+ import { isContextOverflowError } from "../agents/shared/session-repair";
19
20
  import { appendFile, mkdir } from "node:fs/promises";
20
21
  import { join } from "node:path";
21
22
  import { homedir } from "node:os";
22
23
 
24
+ // ── Telemetry helper ─────────────────────────────────
25
+
26
+ interface CompactLogEntry {
27
+ threadId: string;
28
+ level: string;
29
+ effectiveLevel: string;
30
+ flushSkipped: boolean;
31
+ tokensBefore: number | null;
32
+ tokensAfter: number | null;
33
+ flushMs: number;
34
+ compactMs: number;
35
+ totalMs: number;
36
+ model: string;
37
+ status: "ok" | "failed";
38
+ error: string | null;
39
+ }
40
+
41
+ /**
42
+ * Append a compact telemetry entry. Fire-and-forget.
43
+ * Schema is uniform across success/failure (status discriminator) so
44
+ * downstream parsers don't have to handle missing fields.
45
+ */
46
+ function appendCompactLog(entry: CompactLogEntry): void {
47
+ const logDir = join(homedir(), ".roundhouse", "logs");
48
+ const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
49
+ mkdir(logDir, { recursive: true })
50
+ .then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
51
+ .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
52
+ }
53
+
23
54
  // ── Memory mode detection ────────────────────────────
24
55
 
25
56
  /**
@@ -246,11 +277,16 @@ export async function flushMemoryThenCompact(
246
277
  // "manual" level, attempting the flush in that condition will hit the same
247
278
  // 200k rejection. Deferring flush to a later (successful) turn is the safe
248
279
  // recovery path.
249
- const stuckInEmergency = (await loadThreadMemoryState(threadId)).pendingCompact === "emergency";
280
+ const stateBeforeCompact = await loadThreadMemoryState(threadId);
281
+ const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
250
282
  const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
251
283
 
284
+ // Hoisted so the catch block can report accurate flush vs compact timing
285
+ // (a failure during compact() would otherwise conflate the two phases).
286
+ let flushMs = 0;
287
+ let compactMs = 0;
288
+
252
289
  try {
253
- let flushMs = 0;
254
290
  if (!skipFlush) {
255
291
  // Step 1: flush
256
292
  const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
@@ -276,16 +312,18 @@ export async function flushMemoryThenCompact(
276
312
  const result = usedCompactModel
277
313
  ? await agent.compactWithModel!(threadId, flushModel!)
278
314
  : await agent.compact!(threadId);
279
- const compactMs = Date.now() - t1;
315
+ compactMs = Date.now() - t1;
280
316
  if (!result) return null;
281
317
 
282
- // Step 3: mark force re-inject (Full mode only)
318
+ // Step 3: mark force re-inject (Full mode only). Reuse the state we
319
+ // already loaded above; the compact step doesn't mutate memory-state
320
+ // (it mutates the pi session, a separate file), so the in-memory copy
321
+ // is still authoritative for our fields.
283
322
  if (mode !== "complement") {
284
- const state = await loadThreadMemoryState(threadId);
285
- state.forceInjectReason = "after-compact";
286
- state.lastCompactAt = new Date().toISOString();
287
- state.pendingCompact = undefined;
288
- await saveThreadMemoryState(threadId, state);
323
+ stateBeforeCompact.forceInjectReason = "after-compact";
324
+ stateBeforeCompact.lastCompactAt = new Date().toISOString();
325
+ stateBeforeCompact.pendingCompact = undefined;
326
+ await saveThreadMemoryState(threadId, stateBeforeCompact);
289
327
  }
290
328
 
291
329
  const totalMs = Date.now() - t0;
@@ -302,30 +340,84 @@ export async function flushMemoryThenCompact(
302
340
  const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
303
341
  console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
304
342
 
305
- // Persist timing log for debugging (async, fire-and-forget)
306
- const logDir = join(homedir(), ".roundhouse", "logs");
307
- mkdir(logDir, { recursive: true })
308
- .then(() => {
309
- const entry = JSON.stringify({
310
- ts: new Date().toISOString(),
311
- threadId,
312
- level,
313
- tokensBefore: result.tokensBefore,
314
- tokensAfter: result.tokensAfter,
315
- ...timing,
316
- });
317
- return appendFile(join(logDir, "compact-timing.jsonl"), entry + "\n");
318
- })
319
- .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
343
+ // Persist timing log for debugging (async, fire-and-forget).
344
+ // Schema is intentionally uniform across success and failure entries
345
+ // (status discriminator + same field set) so jsonl parsers don't have
346
+ // to special-case missing fields.
347
+ appendCompactLog({
348
+ threadId,
349
+ level,
350
+ effectiveLevel,
351
+ flushSkipped: skipFlush,
352
+ tokensBefore: result.tokensBefore,
353
+ tokensAfter: result.tokensAfter ?? null,
354
+ ...timing,
355
+ status: "ok",
356
+ error: null,
357
+ });
320
358
 
321
359
  return { ...result, timing };
322
360
  } catch (err) {
323
- console.error(`[memory] flush+compact failed for ${threadId}:`, (err as Error).message);
324
- // Mark pending so we retry on next turn
361
+ const errMsg = (err as Error).message;
362
+ console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
363
+
364
+ // Recovery path: when the session has grown past the model's context
365
+ // window, the summarizer prompt itself overflows and compact() throws
366
+ // "prompt is too long". Threshold tuning prevents *new* sessions from
367
+ // hitting this, but does nothing for sessions already past the line.
368
+ // Trim the on-disk session jsonl to its most recent N user turns and
369
+ // mark the next turn for a fresh memory injection. We do NOT retry
370
+ // compact inline — that would extend the thread lock for another long
371
+ // operation. The trimmed session is small enough that the next user
372
+ // turn proceeds normally; any soft pressure from injected memory will
373
+ // trigger a regular compact later.
374
+ let softResetAttempted = false;
375
+ let softResetSucceeded = false;
376
+ if (isContextOverflowError(err) && agent.softReset) {
377
+ softResetAttempted = true;
378
+ try {
379
+ await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
380
+ const report = await agent.softReset(threadId);
381
+ if (report?.reset) {
382
+ softResetSucceeded = true;
383
+ console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
384
+ } else {
385
+ console.warn(`[memory] soft-reset returned no-op for ${threadId} (${(report as { reason?: string } | null)?.reason ?? "unknown"})`);
386
+ }
387
+ } catch (resetErr) {
388
+ console.error(`[memory] soft-reset failed for ${threadId}:`, (resetErr as Error).message);
389
+ }
390
+ }
391
+
392
+ appendCompactLog({
393
+ threadId,
394
+ level,
395
+ effectiveLevel,
396
+ flushSkipped: skipFlush,
397
+ tokensBefore: null,
398
+ tokensAfter: null,
399
+ flushMs, // accurate: 0 if skipped or failed before flush completed
400
+ compactMs, // accurate: 0 if failed before/during compact
401
+ totalMs: Date.now() - t0,
402
+ model: flushModel ?? "default",
403
+ status: "failed",
404
+ error: (softResetAttempted
405
+ ? `${softResetSucceeded ? "soft-reset-recovered" : "soft-reset-failed"}: ${errMsg}`
406
+ : errMsg).slice(0, 500),
407
+ });
408
+
325
409
  try {
326
- const state = await loadThreadMemoryState(threadId);
327
- state.pendingCompact = effectiveLevel;
328
- await saveThreadMemoryState(threadId, state);
410
+ if (softResetSucceeded) {
411
+ // Soft reset cleared the overflow. Mark the next turn for memory
412
+ // re-injection so the agent has its durable context, and clear the
413
+ // pendingCompact flag — there's nothing left to compact now.
414
+ stateBeforeCompact.forceInjectReason = "after-soft-reset";
415
+ stateBeforeCompact.pendingCompact = undefined;
416
+ } else {
417
+ // Re-arm pendingCompact so the next turn retries.
418
+ stateBeforeCompact.pendingCompact = effectiveLevel;
419
+ }
420
+ await saveThreadMemoryState(threadId, stateBeforeCompact);
329
421
  } catch {}
330
422
  return null;
331
423
  }
@@ -10,12 +10,26 @@ import { formatDate } from "./files";
10
10
  // ── Defaults ─────────────────────────────────────────
11
11
 
12
12
  const DEFAULT_SOFT_PERCENT = 0.45;
13
- const DEFAULT_SOFT_TOKENS = 180_000;
13
+ const DEFAULT_SOFT_TOKENS = 130_000;
14
14
  const DEFAULT_HARD_PERCENT = 0.50;
15
- const DEFAULT_HARD_TOKENS = 200_000;
15
+ const DEFAULT_HARD_TOKENS = 150_000;
16
16
  const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
17
17
  const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
18
18
 
19
+ // Headroom reserved for the summarization payload itself when compact runs.
20
+ // The summarizer prompt serializes ALL discarded history (everything older
21
+ // than ~20k of recent tokens) plus scaffolding plus previous summary, then
22
+ // asks the model to summarize. If the prompt itself overflows the model
23
+ // context, compact() throws. 50k is the empirical headroom that fits a
24
+ // typical summarization prompt on Claude family.
25
+ const COMPACT_HEADROOM_TOKENS = 50_000;
26
+
27
+ // Why 130k/150k as the default absolute thresholds against a 200k window:
28
+ // see COMPACT_HEADROOM_TOKENS above and
29
+ // ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
30
+ // For smaller-window models, classifyContextPressure() clamps the absolute
31
+ // thresholds to `window - HEADROOM` so they never exceed the window.
32
+
19
33
  // ── Injection policy ─────────────────────────────────
20
34
 
21
35
  export interface InjectionDecision {
@@ -87,14 +101,21 @@ export function classifyContextPressure(
87
101
 
88
102
  const pctDecimal = percent != null ? percent / 100 : tokens / window;
89
103
 
104
+ // Clamp absolute thresholds so they never exceed `window - HEADROOM`.
105
+ // Defends against future smaller-window models where the configured
106
+ // 150k/130k absolute thresholds would otherwise sit above the window.
107
+ // The percent thresholds already scale with window naturally.
108
+ const headroom = COMPACT_HEADROOM_TOKENS;
109
+ const ceiling = Math.max(0, window - headroom);
110
+
90
111
  // Hard threshold
91
112
  const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
92
- const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
113
+ const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
93
114
  if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
94
115
 
95
- // Soft threshold
116
+ // Soft threshold (clamped one step below hard so soft fires first).
96
117
  const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
97
- const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
118
+ const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
98
119
  if (pctDecimal >= softPct || tokens >= softTok) return "soft";
99
120
 
100
121
  return "none";
@@ -56,7 +56,7 @@ export interface ThreadMemoryState {
56
56
  /** Local date when memory was last injected (detects day boundary) */
57
57
  lastSeenLocalDate?: string;
58
58
  /** Force re-injection on next turn */
59
- forceInjectReason?: "new-session" | "after-compact" | "manual";
59
+ forceInjectReason?: "new-session" | "after-compact" | "after-soft-reset" | "manual";
60
60
  /** When last compaction happened */
61
61
  lastCompactAt?: string;
62
62
  /** Pending compaction level (from interrupted flush) */
package/src/types.ts CHANGED
@@ -122,6 +122,17 @@ export interface AgentAdapter {
122
122
  /** Compact with a specific model. */
123
123
  compactWithModel?(threadId: string, modelId: string): Promise<{ tokensBefore: number; tokensAfter: number | null } | null>;
124
124
 
125
+ /**
126
+ * Soft-reset an overflowed session by trimming on-disk history to the
127
+ * most-recent few turns. Called by memory lifecycle when compact() fails
128
+ * because the session itself is too large for the model's context window.
129
+ *
130
+ * Returns a report describing what was trimmed (shape is adapter-specific
131
+ * but always has `reset: boolean`), or null if not applicable.
132
+ * Adapters without on-disk sessions (in-memory only) should return null.
133
+ */
134
+ softReset?(threadId: string): Promise<{ reset: boolean } | null>;
135
+
125
136
  /** Abort the current agent run for a thread. */
126
137
  abort?(threadId: string): Promise<void>;
127
138