@inceptionstack/roundhouse 0.5.28 → 0.5.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  All notable changes to `@inceptionstack/roundhouse` are documented here.
4
4
 
5
+ ## [0.5.30] — 2026-05-14
6
+
7
+ ### Fixed
8
+ - **Soft-reset robustness fixes from codex review of v0.5.29:**
9
+ - **P1 — byte-cap could cut mid-turn.** When `findSoftResetCutIndex()` hit the byte budget before reaching `keepRecentUserTurns`, it returned `i + 1` which could land on an assistant reply or toolResult whose user prompt was about to be dropped. The kept tail then started mid-turn and tool-pairing repair didn't fix that (only orphans, not turn boundaries). Fixed: byte-cap path now snaps to the most-recent user-message boundary we've walked through.
10
+ - **P2 — byte cap measured in JS code units, not real bytes.** `JSON.stringify(e).length` counts UTF-16 code units; non-ASCII content (emoji, CJK) overshot the advertised 250k ceiling 2–3x. Now uses `Buffer.byteLength(..., 'utf8')` end-to-end so reported `bytesAfter` and the cap decision both reflect actual file bytes.
11
+ - **P2 — trim + repair was not atomic end-to-end.** Old flow wrote the trimmed file, then called `repairSessionFile()` which re-backed-up the *already-trimmed* file and rewrote it again. A crash between the two writes left a partial state and lost the true original. Refactored: extracted `repairEntriesInMemory()` so trim + tool-pair repair compose in memory and land as a single backup + atomic rename.
12
+ - **P2 — `isContextOverflowError()` only inspected top-level `.message`.** Wrapped provider errors (`err.cause.message`, Bedrock `ValidationException` carrying overflow text in nested SDK fields) fell through to re-arming `pendingCompact` instead of triggering recovery. Now mirrors `isToolPairingError()`'s nested handling: walks the `cause` chain (bounded, cycle-safe) and stringify-searches gated on a 4xx/`ValidationException` shape so we don't false-positive on unrelated 5xx noise.
13
+ - 7 regression tests added (534 total passing): byte-cap user-boundary snap, UTF-8 byte accounting, single-atomic-write backup integrity, wrapped-cause classification, Bedrock validation classification, false-positive gating, circular-cause safety.
14
+
15
+ ## [0.5.29] — 2026-05-14
16
+
17
+ ### Added
18
+ - **Soft-reset recovery for already-overflowed sessions.** When a session has grown past the model's context window, normal compact cannot recover — the summarizer prompt itself overflows and `compact()` throws `prompt is too long: N > max`. v0.5.28's threshold tuning prevents *new* sessions from hitting this; this release adds graceful recovery for sessions that already crossed the line. On context-overflow detection, the memory lifecycle calls a new `agent.softReset(threadId)` capability that trims the on-disk session jsonl to its most-recent N user turns (default 8, byte-capped at 250k), reloads the session, and queues a memory re-injection on the next turn. The agent loses verbatim message history for older turns but retains its durable context (MEMORY.md, daily front-page, soul.md). No more manual surgery on stuck sessions.
19
+ - New module exports: `softResetSessionFile()` and `isContextOverflowError()` in `src/agents/shared/session-repair.ts`. New optional `softReset?(threadId)` method on `AgentAdapter` interface (no-op when not implemented — backward-compatible). PiAdapter implements it via the existing `reloadSession` path.
20
+ - 20 new tests across `session-repair.test.ts` (file-level cut/preserve/repair semantics, error classifier) and `memory.test.ts` (lifecycle wiring — success/no-op/missing-capability/non-overflow-error/throws-during-recovery). 527 tests total.
21
+
5
22
  ## [0.5.28] — 2026-05-14
6
23
 
7
24
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inceptionstack/roundhouse",
3
- "version": "0.5.28",
3
+ "version": "0.5.30",
4
4
  "type": "module",
5
5
  "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
6
6
  "license": "MIT",
@@ -28,7 +28,7 @@ import {
28
28
 
29
29
  import type { AgentAdapter, AgentAdapterFactory, AgentMessage, AgentResponse, AgentStreamEvent, MessageContext } from "../../types";
30
30
  import { formatMessage, extractCustomMessage, customContentToText } from "./message-format";
31
- import { isToolPairingError, repairSessionFile } from "../shared/session-repair";
31
+ import { isToolPairingError, repairSessionFile, softResetSessionFile, type SoftResetReport } from "../shared/session-repair";
32
32
  import { SESSIONS_DIR } from "../../config";
33
33
  import { DEBUG_STREAM, threadIdToDir } from "../../util";
34
34
 
@@ -662,6 +662,58 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
662
662
  });
663
663
  },
664
664
 
665
+ /**
666
+ * Soft-reset an overflowed session: trim the on-disk jsonl to its most
667
+ * recent N user turns, then reload the session in place. Used by the
668
+ * memory-lifecycle layer when compact fails with "prompt is too long"
669
+ * — the session has grown past the model's context window and the
670
+ * summarizer prompt itself can no longer fit.
671
+ *
672
+ * Returns the soft-reset report (or null if no session for threadId).
673
+ * Behavior:
674
+ * - In-memory session: returns null (nothing to trim on disk).
675
+ * - Already-trimmed session: report.reset === false, no reload.
676
+ * - Otherwise: trims file, reloads session, returns report.
677
+ *
678
+ * On reload failure, the SessionEntry is dropped from the cache so the
679
+ * next prompt() recreates it cleanly.
680
+ */
681
+ async softReset(threadId: string): Promise<SoftResetReport | null> {
682
+ return enqueue(threadId, async () => {
683
+ const entry = sessions.get(threadId);
684
+ if (!entry) return null;
685
+ const sessionFile = entry.session.sessionFile;
686
+ if (!sessionFile) {
687
+ console.warn(`[pi-agent] softReset: ${threadId} has no on-disk session file, skipping`);
688
+ return null;
689
+ }
690
+
691
+ console.warn(`[pi-agent] softReset: trimming overflowed session ${sessionFile}`);
692
+ const report = softResetSessionFile(sessionFile);
693
+ if (!report.reset) {
694
+ console.log(`[pi-agent] softReset: nothing to trim (${report.reason})`);
695
+ return report;
696
+ }
697
+ console.warn(
698
+ `[pi-agent] softReset: ${report.entriesBefore} → ${report.entriesAfter} entries, ` +
699
+ `${report.bytesBefore} → ${report.bytesAfter} bytes (${report.reason}). Backup: ${report.backupPath}`
700
+ );
701
+
702
+ // Reload the session so pi-ai re-reads the trimmed file. Drop the
703
+ // cache entry on failure so the next prompt() recreates from scratch
704
+ // rather than running against the disposed session.
705
+ try {
706
+ const reloaded = await reloadSession(entry, sessionFile);
707
+ await entry.session.dispose();
708
+ entry.session = reloaded.session;
709
+ } catch (err) {
710
+ console.error(`[pi-agent] softReset reload failed for ${threadId}:`, (err as Error).message);
711
+ sessions.delete(threadId);
712
+ }
713
+ return report;
714
+ });
715
+ },
716
+
665
717
  async abort(threadId: string): Promise<void> {
666
718
  const entry = sessions.get(threadId);
667
719
  if (entry) {
@@ -11,6 +11,8 @@ import {
11
11
  inspectSessionFile,
12
12
  repairSessionFile,
13
13
  isToolPairingError,
14
+ softResetSessionFile,
15
+ isContextOverflowError,
14
16
  } from './session-repair';
15
17
 
16
18
  // ---------- fixtures ----------
@@ -376,3 +378,315 @@ describe('session-repair', () => {
376
378
  });
377
379
  });
378
380
  });
381
+
382
+ // ============================================================
383
+ // softResetSessionFile
384
+ // ============================================================
385
+
386
+ describe('softResetSessionFile', () => {
387
+ function userTurn(idPrefix: string, parentId: string | null) {
388
+ // A user turn = user msg + assistant text reply (no tool calls, so cuts
389
+ // are clean; tool-pairing edge cases are covered by repair tests).
390
+ return [
391
+ userMsg(`${idPrefix}u`, parentId, `text-${idPrefix}`),
392
+ {
393
+ type: 'message',
394
+ id: `${idPrefix}a`,
395
+ parentId: `${idPrefix}u`,
396
+ timestamp: '2026-05-01T00:00:04Z',
397
+ message: {
398
+ role: 'assistant',
399
+ content: [{ type: 'text', text: `reply-${idPrefix}` }],
400
+ api: 'bedrock-converse-stream',
401
+ provider: 'amazon-bedrock',
402
+ model: 'claude',
403
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
404
+ stopReason: 'endTurn',
405
+ timestamp: 4,
406
+ },
407
+ },
408
+ ];
409
+ }
410
+
411
+ it('softResetSessionFile_OnSessionWithMoreTurnsThanTarget_KeepsHeaderAndRecentTurns', () => {
412
+ // Arrange: 10 user turns, target keepRecentUserTurns=3.
413
+ const entries: object[] = [HEADER, MODEL_CHANGE];
414
+ let parent: string | null = 'mc-1';
415
+ for (let i = 1; i <= 10; i++) {
416
+ const turn = userTurn(`t${i}`, parent);
417
+ entries.push(...turn);
418
+ parent = `t${i}a`;
419
+ }
420
+ const path = tmpJsonl(entries);
421
+
422
+ // Act
423
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
424
+
425
+ // Assert: report indicates reset, file shrunk, header preserved, last 3 user msgs present.
426
+ expect(report.reset).toBe(true);
427
+ expect(report.entriesAfter).toBeLessThan(report.entriesBefore);
428
+ expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
429
+ expect(report.backupPath).toBeDefined();
430
+ expect(existsSync(report.backupPath!)).toBe(true);
431
+
432
+ const trimmed = parseSessionFile(path);
433
+ // Header always preserved.
434
+ expect(trimmed[0].type).toBe('session');
435
+ // Last 3 user turns present.
436
+ const userIds = trimmed.filter(e => e.message?.role === 'user').map(e => e.id);
437
+ expect(userIds).toEqual(['t8u', 't9u', 't10u']);
438
+ // First kept entry's parentId reset to null (no dangling pointer).
439
+ const firstAfterHeader = trimmed[1];
440
+ expect(firstAfterHeader.parentId).toBeNull();
441
+ });
442
+
443
+ it('softResetSessionFile_OnSessionSmallerThanTarget_ReturnsResetFalseAndDoesNotMutate', () => {
444
+ // Arrange: 2 user turns, target keepRecentUserTurns=8.
445
+ const entries: object[] = [HEADER, MODEL_CHANGE, ...userTurn('a', 'mc-1'), ...userTurn('b', 'aa')];
446
+ const path = tmpJsonl(entries);
447
+ const before = readFileSync(path, 'utf8');
448
+
449
+ // Act
450
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 8 });
451
+
452
+ // Assert: no reset, file untouched, no backup.
453
+ expect(report.reset).toBe(false);
454
+ expect(report.backupPath).toBeUndefined();
455
+ expect(readFileSync(path, 'utf8')).toBe(before);
456
+ });
457
+
458
+ it('softResetSessionFile_OnTinySession_ReturnsResetFalseWithReason', () => {
459
+ // Arrange: only header.
460
+ const path = tmpJsonl([HEADER]);
461
+
462
+ // Act
463
+ const report = softResetSessionFile(path);
464
+
465
+ // Assert
466
+ expect(report.reset).toBe(false);
467
+ expect(report.reason).toContain('too-small');
468
+ });
469
+
470
+ it('softResetSessionFile_OnSessionWithOrphanedToolPairsAfterCut_AlsoRunsRepair', () => {
471
+ // Arrange: a session where the tail contains a toolResult whose toolCall
472
+ // sits in the older (dropped) section. After the cut the toolResult is
473
+ // orphaned — soft-reset must clean it up via the post-cut repair.
474
+ const oldToolCall = assistantToolCall('a-old', 'mc-1', 'call-X');
475
+ const orphanedResult = {
476
+ type: 'message',
477
+ id: 'tr-1',
478
+ parentId: 'a-old',
479
+ timestamp: '2026-05-01T00:00:05Z',
480
+ message: { role: 'toolResult', toolCallId: 'call-X', content: 'ok', timestamp: 5 },
481
+ };
482
+ const entries: object[] = [HEADER, MODEL_CHANGE, userMsg('u-old', 'mc-1', 'old'), oldToolCall];
483
+ let parent: string | null = 'a-old';
484
+ // Push 5 fresh turns so the cut leaves us in tail.
485
+ for (let i = 1; i <= 5; i++) {
486
+ entries.push(...userTurn(`f${i}`, parent));
487
+ parent = `f${i}a`;
488
+ }
489
+ // Insert the orphaned result mid-tail (kept by cut, but call is dropped).
490
+ entries.splice(6, 0, orphanedResult);
491
+ const path = tmpJsonl(entries);
492
+
493
+ // Act
494
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
495
+
496
+ // Assert: reset succeeded AND post-cut repair fired.
497
+ expect(report.reset).toBe(true);
498
+ expect(report.postRepair).toBeDefined();
499
+ // Final file is internally consistent (no orphans).
500
+ expect(inspectSessionFile(path).hasOrphans).toBe(false);
501
+ });
502
+
503
+ it('softResetSessionFile_OnNonexistentFile_Throws', () => {
504
+ // Arrange/Act/Assert: documents the precondition.
505
+ expect(() => softResetSessionFile('/nonexistent/path.jsonl')).toThrow(/not found/);
506
+ });
507
+
508
+ it('softResetSessionFile_ByteCapHit_SnapsToUserTurnBoundary_NeverStartsMidTurn', () => {
509
+ // Regression test for codex P1: byte-cap path used to return `i + 1`
510
+ // which could land mid-turn (assistant reply or toolResult with no user
511
+ // prompt above it). Fixed to snap to the most-recent user-message index.
512
+ // Arrange: many small turns, byte cap forces an early cut.
513
+ const entries: object[] = [HEADER, MODEL_CHANGE];
514
+ let parent: string | null = 'mc-1';
515
+ for (let i = 1; i <= 30; i++) {
516
+ entries.push(...userTurn(`t${i}`, parent));
517
+ parent = `t${i}a`;
518
+ }
519
+ const path = tmpJsonl(entries);
520
+
521
+ // Act: very tight byte budget so cap fires before keepRecentUserTurns reached.
522
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 600 });
523
+
524
+ // Assert: reset happened AND first kept entry is a user message.
525
+ expect(report.reset).toBe(true);
526
+ const trimmed = parseSessionFile(path);
527
+ expect(trimmed[0].type).toBe('session'); // header preserved
528
+ expect(trimmed[1].message?.role).toBe('user'); // first kept = user turn
529
+ expect(trimmed[1].parentId).toBeNull(); // re-parented
530
+ });
531
+
532
+ it('softResetSessionFile_NonAsciiContent_ReportedBytesMatchActualFileBytes', () => {
533
+ // Regression test for codex P2: trim used JSON.stringify(e).length
534
+ // (UTF-16 code units) but reported bytesAfter from real file bytes.
535
+ // After fix, both use Buffer.byteLength(..., 'utf8').
536
+ // Arrange: turns containing multi-byte UTF-8 (each emoji = 4 bytes,
537
+ // length 2 in code units — 2x discrepancy).
538
+ const entries: object[] = [HEADER, MODEL_CHANGE];
539
+ const emojis = '🚀🔥🎉✨💡'.repeat(20); // ~100 bytes per turn
540
+ let parent: string | null = 'mc-1';
541
+ for (let i = 1; i <= 20; i++) {
542
+ entries.push(
543
+ userMsg(`t${i}u`, parent, `${emojis} text-${i}`),
544
+ {
545
+ type: 'message', id: `t${i}a`, parentId: `t${i}u`,
546
+ timestamp: '2026-05-01T00:00:04Z',
547
+ message: {
548
+ role: 'assistant',
549
+ content: [{ type: 'text', text: `${emojis} reply-${i}` }],
550
+ api: 'bedrock-converse-stream', provider: 'amazon-bedrock', model: 'claude',
551
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
552
+ stopReason: 'endTurn', timestamp: 4,
553
+ },
554
+ },
555
+ );
556
+ parent = `t${i}a`;
557
+ }
558
+ const path = tmpJsonl(entries);
559
+
560
+ // Act
561
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 2000 });
562
+
563
+ // Assert: reported bytesAfter matches actual file bytes (true UTF-8 size).
564
+ expect(report.reset).toBe(true);
565
+ const actualBytes = readFileSync(path).length;
566
+ expect(report.bytesAfter).toBe(actualBytes);
567
+ // And we honored the cap (allow some slack for snap-to-user-boundary).
568
+ expect(report.bytesAfter).toBeLessThan(4000);
569
+ });
570
+
571
+ it('softResetSessionFile_OnSingleAtomicWrite_OriginalBackupIsRecoverable', () => {
572
+ // Regression test for codex P2: previously trim wrote once, then
573
+ // repairSessionFile() wrote again with its OWN backup of the
574
+ // already-trimmed file. After fix, only one backup exists and it's
575
+ // the true original.
576
+ // Arrange: session with orphaned tool pair so post-cut repair fires.
577
+ const oldToolCall = assistantToolCall('a-old', 'mc-1', 'call-X');
578
+ const orphanedResult = {
579
+ type: 'message', id: 'tr-1', parentId: 'a-old',
580
+ timestamp: '2026-05-01T00:00:05Z',
581
+ message: { role: 'toolResult', toolCallId: 'call-X', content: 'ok', timestamp: 5 },
582
+ };
583
+ const entries: object[] = [HEADER, MODEL_CHANGE, userMsg('u-old', 'mc-1', 'old'), oldToolCall];
584
+ let parent: string | null = 'a-old';
585
+ for (let i = 1; i <= 5; i++) {
586
+ entries.push(...userTurn(`f${i}`, parent));
587
+ parent = `f${i}a`;
588
+ }
589
+ entries.splice(6, 0, orphanedResult);
590
+ const path = tmpJsonl(entries);
591
+ const originalBytes = readFileSync(path);
592
+
593
+ // Act
594
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
595
+
596
+ // Assert: backup contents = TRUE original (pre-trim, pre-repair),
597
+ // not an intermediate trimmed-but-unrepaired state.
598
+ expect(report.reset).toBe(true);
599
+ expect(report.backupPath).toBeDefined();
600
+ const backup = readFileSync(report.backupPath!);
601
+ expect(backup.equals(originalBytes)).toBe(true);
602
+ // Final on-disk file is internally consistent.
603
+ expect(inspectSessionFile(path).hasOrphans).toBe(false);
604
+ });
605
+
606
+ it('softResetSessionFile_BytesCapHonored_StopsCutAtCap', () => {
607
+ // Arrange: each turn is small but we set a tiny byte cap so we cut early.
608
+ const entries: object[] = [HEADER, MODEL_CHANGE];
609
+ let parent: string | null = 'mc-1';
610
+ for (let i = 1; i <= 20; i++) {
611
+ entries.push(...userTurn(`t${i}`, parent));
612
+ parent = `t${i}a`;
613
+ }
614
+ const path = tmpJsonl(entries);
615
+
616
+ // Act
617
+ const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 800 });
618
+
619
+ // Assert: reset triggered by byte cap (we asked for 100 turns we don't have,
620
+ // but byte cap kicks in first).
621
+ expect(report.reset).toBe(true);
622
+ expect(report.reason).toMatch(/byte-cap|fewer-turns/);
623
+ expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
624
+ });
625
+ });
626
+
627
+ // ============================================================
628
+ // isContextOverflowError
629
+ // ============================================================
630
+
631
+ describe('isContextOverflowError', () => {
632
+ it.each([
633
+ ['prompt is too long: 212776 tokens > 200000 maximum', true],
634
+ ['Validation error: input is too long', true],
635
+ ['context length exceeded for this model', true],
636
+ ['maximum context length reached', true],
637
+ ['tokens > 200000 maximum', true],
638
+ ['toolUse without toolResult', false], // pairing error — different recovery
639
+ ['random network failure', false],
640
+ ['', false],
641
+ ])('classifies %p as overflow=%p', (msg, expected) => {
642
+ expect(isContextOverflowError(new Error(msg))).toBe(expected);
643
+ });
644
+
645
+ it('returns false for null/undefined/non-Error inputs', () => {
646
+ expect(isContextOverflowError(null)).toBe(false);
647
+ expect(isContextOverflowError(undefined)).toBe(false);
648
+ expect(isContextOverflowError({})).toBe(false);
649
+ });
650
+
651
+ it('classifies overflow when text lives in err.cause.message (wrapped SDK error)', () => {
652
+ // Regression test for codex P2: wrapped provider errors used to fall
653
+ // through to re-arming pendingCompact. After fix, cause-chain is walked.
654
+ const inner = new Error('prompt is too long: 212776 tokens > 200000 maximum');
655
+ const outer = new Error('Summarization failed');
656
+ (outer as { cause?: unknown }).cause = inner;
657
+ expect(isContextOverflowError(outer)).toBe(true);
658
+ });
659
+
660
+ it('classifies overflow on Bedrock ValidationException with nested overflow text', () => {
661
+ // Regression test: Bedrock SDK can carry the useful text in nested
662
+ // $metadata or stringify-only fields. We only stringify-search when
663
+ // the error LOOKS like a 4xx validation (mirrors isToolPairingError).
664
+ const err = Object.assign(new Error('validation failed'), {
665
+ name: 'ValidationException',
666
+ $metadata: { httpStatusCode: 400 },
667
+ detail: { reason: 'prompt is too long' },
668
+ });
669
+ expect(isContextOverflowError(err)).toBe(true);
670
+ });
671
+
672
+ it('does NOT stringify-search arbitrary errors that contain overflow keywords', () => {
673
+ // Negative case: gating prevents false-positives on unrelated 5xx errors
674
+ // whose payload happens to contain trigger phrases.
675
+ const err = Object.assign(new Error('internal error'), {
676
+ name: 'InternalServerError',
677
+ $metadata: { httpStatusCode: 500 },
678
+ diagnostics: 'log line: prompt is too long check disabled',
679
+ });
680
+ expect(isContextOverflowError(err)).toBe(false);
681
+ });
682
+
683
+ it('does not loop forever on circular cause chains', () => {
684
+ // Safety: cause walk is bounded.
685
+ const a = new Error('outer');
686
+ const b = new Error('inner');
687
+ (a as { cause?: unknown }).cause = b;
688
+ (b as { cause?: unknown }).cause = a; // cycle
689
+ expect(() => isContextOverflowError(a)).not.toThrow();
690
+ expect(isContextOverflowError(a)).toBe(false);
691
+ });
692
+ });
@@ -246,46 +246,292 @@ export function inspectSessionFile(path: string): {
246
246
  *
247
247
  * @returns report describing what was repaired
248
248
  */
249
- export function repairSessionFile(path: string): SessionRepairReport {
250
- if (!existsSync(path)) {
251
- throw new Error(`Session file not found: ${path}`);
252
- }
253
-
254
- const entries = parseSessionFile(path);
249
+ /**
250
+ * Pure in-memory tool-pairing repair. Takes entries, returns repaired entries
251
+ * + a report. Does not touch the filesystem. Used directly by
252
+ * `softResetSessionFile` so trim + repair land as a single atomic write, and
253
+ * via a thin wrapper by `repairSessionFile` for on-disk repair.
254
+ */
255
+ function repairEntriesInMemory(entries: SessionFileEntry[]): {
256
+ entries: SessionFileEntry[];
257
+ report: SessionRepairReport;
258
+ } {
255
259
  const { messages } = extractMessages(entries);
256
260
  const validation = validateToolPairing(messages);
257
261
 
258
262
  if (validation.isValid) {
259
263
  return {
260
- repaired: false,
261
- droppedEntryIds: [],
262
- droppedToolCallIds: [],
263
- droppedToolResultIds: [],
264
- totalEntries: entries.length,
264
+ entries,
265
+ report: {
266
+ repaired: false,
267
+ droppedEntryIds: [],
268
+ droppedToolCallIds: [],
269
+ droppedToolResultIds: [],
270
+ totalEntries: entries.length,
271
+ },
265
272
  };
266
273
  }
267
274
 
268
275
  const orphanedCalls = new Set(validation.orphanedToolCallIds);
269
276
  const orphanedResults = new Set(validation.orphanedToolResultIds);
270
-
271
277
  const { entriesToDrop, entriesToEdit } = findEntriesToDrop(entries, orphanedCalls, orphanedResults);
272
278
  const edited = applyEntryEdits(entries, entriesToEdit);
273
279
  const kept = reparentDroppedEntries(edited, entriesToDrop);
274
280
 
281
+ return {
282
+ entries: kept,
283
+ report: {
284
+ repaired: true,
285
+ droppedEntryIds: Array.from(entriesToDrop),
286
+ droppedToolCallIds: validation.orphanedToolCallIds,
287
+ droppedToolResultIds: validation.orphanedToolResultIds,
288
+ totalEntries: entries.length,
289
+ },
290
+ };
291
+ }
292
+
293
+ export function repairSessionFile(path: string): SessionRepairReport {
294
+ if (!existsSync(path)) {
295
+ throw new Error(`Session file not found: ${path}`);
296
+ }
297
+
298
+ const entries = parseSessionFile(path);
299
+ const { entries: repaired, report } = repairEntriesInMemory(entries);
300
+
301
+ if (!report.repaired) return report;
302
+
303
+ const backupPath = backupFile(path);
304
+ const newContent = repaired.map(e => JSON.stringify(e)).join('\n') + '\n';
305
+ atomicWrite(path, newContent);
306
+
307
+ return { ...report, backupPath };
308
+ }
309
+
310
+ // ── Soft reset (recovery from already-overflowed sessions) ──────────────
311
+
312
+ /**
313
+ * When a session has grown past the model's context window, normal compact
314
+ * cannot recover — the summarizer prompt itself overflows. Soft reset trims
315
+ * the session jsonl on disk to its most-recent N user turns, drops everything
316
+ * older, and re-runs the tool-pairing repair so what's left is internally
317
+ * consistent.
318
+ *
319
+ * Trade-off: loses fidelity for older turns. The roundhouse memory layer
320
+ * (MEMORY.md, daily front-page) re-injects on the next turn, so the agent
321
+ * still has its durable context — just not the verbatim message history.
322
+ *
323
+ * Conservative defaults aim for ~30–40% of a 200k window so the next compact
324
+ * has ample room to summarize.
325
+ */
326
+ export interface SoftResetOptions {
327
+ /** Keep at most this many user turns from the tail (default: 8). */
328
+ keepRecentUserTurns?: number;
329
+ /** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
330
+ maxBytes?: number;
331
+ }
332
+
333
+ export interface SoftResetReport {
334
+ reset: boolean;
335
+ reason: string;
336
+ entriesBefore: number;
337
+ entriesAfter: number;
338
+ bytesBefore: number;
339
+ bytesAfter: number;
340
+ backupPath?: string;
341
+ /** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
342
+ postRepair?: SessionRepairReport;
343
+ }
344
+
345
+ /**
346
+ * Find a safe cut index in the entries array. Walk backwards from the end
347
+ * looking for user message entries; the cut sits *at* the Nth most-recent
348
+ * user message we encounter (so the kept tail starts on a user turn).
349
+ * Returns the index of the first entry to KEEP (i.e. all entries[0..cutIdx)
350
+ * are dropped).
351
+ *
352
+ * Byte-cap path: if we exceed the byte budget before reaching N user turns,
353
+ * we still snap the cut to the most-recent user-message boundary we've seen.
354
+ * That guarantees the kept tail always starts with a user message — never an
355
+ * orphaned assistant reply or toolResult whose user prompt was dropped.
356
+ *
357
+ * If we can't find ANY user messages, returns entries.length (drop everything
358
+ * but header) so the caller produces a header-only no-op session rather than
359
+ * a malformed tail.
360
+ */
361
+ function findSoftResetCutIndex(
362
+ entries: SessionFileEntry[],
363
+ keepRecentUserTurns: number,
364
+ maxBytes: number,
365
+ ): { cutIdx: number; reason: string } {
366
+ let userTurnsSeen = 0;
367
+ let bytesAccumulated = 0;
368
+ /** Most recent user-message index we've walked through, or -1 if none yet. */
369
+ let lastUserIdx = -1;
370
+ // Scan tail-to-head, stop when we've collected enough user turns OR exceeded byte budget.
371
+ for (let i = entries.length - 1; i >= 0; i--) {
372
+ const e = entries[i];
373
+ bytesAccumulated += Buffer.byteLength(JSON.stringify(e), 'utf8') + 1; // +1 for newline
374
+ if (e.type === 'message' && e.message?.role === 'user') {
375
+ userTurnsSeen++;
376
+ lastUserIdx = i;
377
+ if (userTurnsSeen >= keepRecentUserTurns) {
378
+ return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
379
+ }
380
+ }
381
+ // Byte cap is a safety net for sessions where a single turn is enormous
382
+ // (e.g. one turn dumped a 200k file). When we hit it we MUST snap the cut
383
+ // to the most recent user-message boundary — otherwise the kept tail could
384
+ // start mid-turn (assistant/toolResult with no user prompt above it), and
385
+ // tool-pairing repair won't fix that.
386
+ if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
387
+ return { cutIdx: lastUserIdx, reason: `byte-cap-${bytesAccumulated}b` };
388
+ }
389
+ }
390
+ // Fewer user turns than target — treat as no-op. Soft-reset is recovery
391
+ // from overflow; if the session has fewer turns than our target it isn't
392
+ // overflowed and we shouldn't mutate it. Returning 1 means "keep everything
393
+ // after the header", which the caller's `cutIdx <= 1` gate maps to reset:false.
394
+ return { cutIdx: 1, reason: 'fewer-turns-than-target' };
395
+ }
396
+
397
+ /**
398
+ * Soft-reset a pi-ai session jsonl: keep the most-recent N user turns + their
399
+ * surrounding messages, drop everything older. Always preserves the session
400
+ * header (entries[0]). Re-parents the first kept entry to null so the tree
401
+ * remains valid. Re-runs tool-pairing repair on the trimmed file because
402
+ * the cut likely orphaned some toolCall/toolResult pairs.
403
+ *
404
+ * Atomic + backup: same safety pattern as repairSessionFile.
405
+ *
406
+ * @returns report describing what was reset, or `{reset:false}` if nothing to do.
407
+ */
408
+ export function softResetSessionFile(
409
+ path: string,
410
+ options: SoftResetOptions = {},
411
+ ): SoftResetReport {
412
+ if (!existsSync(path)) {
413
+ throw new Error(`Session file not found: ${path}`);
414
+ }
415
+
416
+ const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
417
+ const maxBytes = options.maxBytes ?? 250_000;
418
+
419
+ const entries = parseSessionFile(path);
420
+ const bytesBefore = readFileSync(path).length;
421
+
422
+ // Need at least header + a couple of messages to be worth resetting.
423
+ if (entries.length < 4) {
424
+ return {
425
+ reset: false,
426
+ reason: 'session-too-small',
427
+ entriesBefore: entries.length,
428
+ entriesAfter: entries.length,
429
+ bytesBefore,
430
+ bytesAfter: bytesBefore,
431
+ };
432
+ }
433
+
434
+ const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
435
+
436
+ // No-op if cut is already at the start (nothing to drop besides header).
437
+ if (cutIdx <= 1) {
438
+ return {
439
+ reset: false,
440
+ reason: `cut-at-start (${reason})`,
441
+ entriesBefore: entries.length,
442
+ entriesAfter: entries.length,
443
+ bytesBefore,
444
+ bytesAfter: bytesBefore,
445
+ };
446
+ }
447
+
448
+ // Build trimmed entries: header + tail.
449
+ // Re-parent the first kept tail entry to null so the tree root is intact.
450
+ const header = entries[0];
451
+ const tail = entries.slice(cutIdx);
452
+ if (tail.length > 0 && tail[0].parentId !== undefined) {
453
+ tail[0] = { ...tail[0], parentId: null };
454
+ }
455
+ const trimmed = [header, ...tail];
456
+
457
+ // Run tool-pair repair *in memory* on the trimmed entries before writing,
458
+ // so the on-disk update is a single atomic backup + atomic rename. Doing
459
+ // disk-write → repairSessionFile() (another disk-write) would mean a crash
460
+ // between the two leaves a partially-processed file AND a backup of the
461
+ // already-trimmed file rather than the true original.
462
+ const repaired = repairEntriesInMemory(trimmed);
463
+
275
464
  const backupPath = backupFile(path);
276
- const newContent = kept.map(e => JSON.stringify(e)).join('\n') + '\n';
465
+ const newContent = repaired.entries.map(e => JSON.stringify(e)).join('\n') + '\n';
277
466
  atomicWrite(path, newContent);
278
467
 
468
+ const bytesAfter = Buffer.byteLength(newContent, 'utf8');
279
469
  return {
280
- repaired: true,
281
- droppedEntryIds: Array.from(entriesToDrop),
282
- droppedToolCallIds: validation.orphanedToolCallIds,
283
- droppedToolResultIds: validation.orphanedToolResultIds,
470
+ reset: true,
471
+ reason,
472
+ entriesBefore: entries.length,
473
+ entriesAfter: repaired.entries.length,
474
+ bytesBefore,
475
+ bytesAfter,
284
476
  backupPath,
285
- totalEntries: entries.length,
477
+ postRepair: repaired.report,
286
478
  };
287
479
  }
288
480
 
481
+ // ── Error classifiers ────────────────────────────────────────────────────
482
+
483
+ /**
484
+ * Detect whether an error from pi-ai / the model provider indicates the
485
+ * session has grown past the model's context window (input > max).
486
+ *
487
+ * Triggers soft-reset recovery in the memory lifecycle. Intentionally narrow:
488
+ * only matches the well-known overflow phrasings, not generic 4xx errors.
489
+ *
490
+ * Mirrors `isToolPairingError`'s nested-error handling: provider SDKs commonly
491
+ * wrap the useful text under `cause.message` or in serialized fields on
492
+ * Bedrock ValidationException. Stringify-search is gated on a 4xx / validation
493
+ * shape so we don't false-positive on noisy unrelated errors.
494
+ */
495
+ export function isContextOverflowError(err: unknown): boolean {
496
+ if (!err) return false;
497
+ const patterns = [
498
+ /prompt is too long/i,
499
+ /tokens?\s*[>>]\s*\d+\s*maximum/i,
500
+ /input is too long/i,
501
+ /context length exceeded/i,
502
+ /maximum context length/i,
503
+ ];
504
+
505
+ // 1. Top-level message.
506
+ const msg = (err as { message?: string }).message ?? String(err);
507
+ if (patterns.some(p => p.test(msg))) return true;
508
+
509
+ // 2. Walk the cause chain (a few hops — don't loop forever on circular).
510
+ let cur: unknown = (err as { cause?: unknown }).cause;
511
+ for (let hop = 0; hop < 5 && cur; hop++) {
512
+ const causeMsg = (cur as { message?: string }).message ?? String(cur);
513
+ if (patterns.some(p => p.test(causeMsg))) return true;
514
+ cur = (cur as { cause?: unknown }).cause;
515
+ }
516
+
517
+ // 3. Bedrock ValidationException sometimes carries the overflow text in
518
+ // nested SDK fields. Only stringify-search when the error LOOKS like a 4xx
519
+ // validation error — mirrors the gating in isToolPairingError.
520
+ const name = (err as { name?: string }).name ?? '';
521
+ const httpStatus =
522
+ (err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode;
523
+ if (name === 'ValidationException' || httpStatus === 400) {
524
+ try {
525
+ const full = JSON.stringify(err);
526
+ if (patterns.some(p => p.test(full))) return true;
527
+ } catch {
528
+ /* circular structure — give up */
529
+ }
530
+ }
531
+
532
+ return false;
533
+ }
534
+
289
535
  /**
290
536
  * Detect whether an error from pi-ai / the model provider indicates a
291
537
  * tool-pairing mismatch that can be recovered by session repair.
@@ -16,6 +16,7 @@ import { shouldInjectMemory, classifyContextPressure, isSoftFlushOnCooldown } fr
16
16
  import { buildMemoryInjection, injectMemoryIntoMessage } from "./inject";
17
17
  import { buildFlushPrompt } from "./prompts";
18
18
  import { bootstrapMemoryFiles } from "./bootstrap";
19
+ import { isContextOverflowError } from "../agents/shared/session-repair";
19
20
  import { appendFile, mkdir } from "node:fs/promises";
20
21
  import { join } from "node:path";
21
22
  import { homedir } from "node:os";
@@ -359,6 +360,35 @@ export async function flushMemoryThenCompact(
359
360
  } catch (err) {
360
361
  const errMsg = (err as Error).message;
361
362
  console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
363
+
364
+ // Recovery path: when the session has grown past the model's context
365
+ // window, the summarizer prompt itself overflows and compact() throws
366
+ // "prompt is too long". Threshold tuning prevents *new* sessions from
367
+ // hitting this, but does nothing for sessions already past the line.
368
+ // Trim the on-disk session jsonl to its most recent N user turns and
369
+ // mark the next turn for a fresh memory injection. We do NOT retry
370
+ // compact inline — that would extend the thread lock for another long
371
+ // operation. The trimmed session is small enough that the next user
372
+ // turn proceeds normally; any soft pressure from injected memory will
373
+ // trigger a regular compact later.
374
+ let softResetAttempted = false;
375
+ let softResetSucceeded = false;
376
+ if (isContextOverflowError(err) && agent.softReset) {
377
+ softResetAttempted = true;
378
+ try {
379
+ await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
380
+ const report = await agent.softReset(threadId);
381
+ if (report?.reset) {
382
+ softResetSucceeded = true;
383
+ console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
384
+ } else {
385
+ console.warn(`[memory] soft-reset returned no-op for ${threadId} (${(report as { reason?: string } | null)?.reason ?? "unknown"})`);
386
+ }
387
+ } catch (resetErr) {
388
+ console.error(`[memory] soft-reset failed for ${threadId}:`, (resetErr as Error).message);
389
+ }
390
+ }
391
+
362
392
  appendCompactLog({
363
393
  threadId,
364
394
  level,
@@ -371,11 +401,22 @@ export async function flushMemoryThenCompact(
371
401
  totalMs: Date.now() - t0,
372
402
  model: flushModel ?? "default",
373
403
  status: "failed",
374
- error: errMsg.slice(0, 500),
404
+ error: (softResetAttempted
405
+ ? `${softResetSucceeded ? "soft-reset-recovered" : "soft-reset-failed"}: ${errMsg}`
406
+ : errMsg).slice(0, 500),
375
407
  });
376
- // Mark pending so we retry on next turn. Reuse the state we already loaded.
408
+
377
409
  try {
378
- stateBeforeCompact.pendingCompact = effectiveLevel;
410
+ if (softResetSucceeded) {
411
+ // Soft reset cleared the overflow. Mark the next turn for memory
412
+ // re-injection so the agent has its durable context, and clear the
413
+ // pendingCompact flag — there's nothing left to compact now.
414
+ stateBeforeCompact.forceInjectReason = "after-soft-reset";
415
+ stateBeforeCompact.pendingCompact = undefined;
416
+ } else {
417
+ // Re-arm pendingCompact so the next turn retries.
418
+ stateBeforeCompact.pendingCompact = effectiveLevel;
419
+ }
379
420
  await saveThreadMemoryState(threadId, stateBeforeCompact);
380
421
  } catch {}
381
422
  return null;
@@ -56,7 +56,7 @@ export interface ThreadMemoryState {
56
56
  /** Local date when memory was last injected (detects day boundary) */
57
57
  lastSeenLocalDate?: string;
58
58
  /** Force re-injection on next turn */
59
- forceInjectReason?: "new-session" | "after-compact" | "manual";
59
+ forceInjectReason?: "new-session" | "after-compact" | "after-soft-reset" | "manual";
60
60
  /** When last compaction happened */
61
61
  lastCompactAt?: string;
62
62
  /** Pending compaction level (from interrupted flush) */
package/src/types.ts CHANGED
@@ -122,6 +122,17 @@ export interface AgentAdapter {
122
122
  /** Compact with a specific model. */
123
123
  compactWithModel?(threadId: string, modelId: string): Promise<{ tokensBefore: number; tokensAfter: number | null } | null>;
124
124
 
125
+ /**
126
+ * Soft-reset an overflowed session by trimming on-disk history to the
127
+ * most-recent few turns. Called by memory lifecycle when compact() fails
128
+ * because the session itself is too large for the model's context window.
129
+ *
130
+ * Returns a report describing what was trimmed (shape is adapter-specific
131
+ * but always has `reset: boolean`), or null if not applicable.
132
+ * Adapters without on-disk sessions (in-memory only) should return null.
133
+ */
134
+ softReset?(threadId: string): Promise<{ reset: boolean } | null>;
135
+
125
136
  /** Abort the current agent run for a thread. */
126
137
  abort?(threadId: string): Promise<void>;
127
138