@inceptionstack/roundhouse 0.5.27 → 0.5.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/package.json +1 -1
- package/src/agents/pi/pi-adapter.ts +71 -1
- package/src/agents/shared/session-repair.test.ts +174 -0
- package/src/agents/shared/session-repair.ts +175 -0
- package/src/memory/lifecycle.ts +121 -29
- package/src/memory/policy.ts +26 -5
- package/src/memory/types.ts +1 -1
- package/src/types.ts +11 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to `@inceptionstack/roundhouse` are documented here.
|
|
4
4
|
|
|
5
|
+
## [0.5.29] — 2026-05-14
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Soft-reset recovery for already-overflowed sessions.** When a session has grown past the model's context window, normal compact cannot recover — the summarizer prompt itself overflows and `compact()` throws `prompt is too long: N > max`. v0.5.28's threshold tuning prevents *new* sessions from hitting this; this release adds graceful recovery for sessions that already crossed the line. On context-overflow detection, the memory lifecycle calls a new `agent.softReset(threadId)` capability that trims the on-disk session jsonl to its most-recent N user turns (default 8, byte-capped at 250k), reloads the session, and queues a memory re-injection on the next turn. The agent loses verbatim message history for older turns but retains its durable context (MEMORY.md, daily front-page, soul.md). No more manual surgery on stuck sessions.
|
|
9
|
+
- New module exports: `softResetSessionFile()` and `isContextOverflowError()` in `src/agents/shared/session-repair.ts`. New optional `softReset?(threadId)` method on `AgentAdapter` interface (no-op when not implemented — backward-compatible). PiAdapter implements it via the existing `reloadSession` path.
|
|
10
|
+
- 20 new tests across `session-repair.test.ts` (file-level cut/preserve/repair semantics, error classifier) and `memory.test.ts` (lifecycle wiring — success/no-op/missing-capability/non-overflow-error/throws-during-recovery). 527 tests total.
|
|
11
|
+
|
|
12
|
+
## [0.5.28] — 2026-05-14
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
|
|
16
|
+
|
|
5
17
|
## [0.5.27] — 2026-05-14
|
|
6
18
|
|
|
7
19
|
### Fixed
|
package/package.json
CHANGED
|
@@ -28,7 +28,7 @@ import {
|
|
|
28
28
|
|
|
29
29
|
import type { AgentAdapter, AgentAdapterFactory, AgentMessage, AgentResponse, AgentStreamEvent, MessageContext } from "../../types";
|
|
30
30
|
import { formatMessage, extractCustomMessage, customContentToText } from "./message-format";
|
|
31
|
-
import { isToolPairingError, repairSessionFile } from "../shared/session-repair";
|
|
31
|
+
import { isToolPairingError, repairSessionFile, softResetSessionFile, type SoftResetReport } from "../shared/session-repair";
|
|
32
32
|
import { SESSIONS_DIR } from "../../config";
|
|
33
33
|
import { DEBUG_STREAM, threadIdToDir } from "../../util";
|
|
34
34
|
|
|
@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
608
608
|
|
|
609
609
|
const agentState = (entry.session as any).agent?.state;
|
|
610
610
|
let currentModel: any;
|
|
611
|
+
let currentThinkingLevel: any;
|
|
611
612
|
let modelSwapped = false;
|
|
613
|
+
let thinkingSwapped = false;
|
|
612
614
|
|
|
613
615
|
// Resolve and swap model for compact
|
|
614
616
|
if (!agentState) {
|
|
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
627
629
|
modelSwapped = true;
|
|
628
630
|
console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
|
|
629
631
|
}
|
|
632
|
+
|
|
633
|
+
// Force thinking off for compact regardless of agent's default.
|
|
634
|
+
// Summarization doesn't benefit from reasoning, costs more tokens,
|
|
635
|
+
// and complicates the maxTokens math (adjustMaxTokensForThinking adds
|
|
636
|
+
// up to 16k thinking budget). Direct state mutation matches the model
|
|
637
|
+
// swap above and avoids setThinkingLevel(), which would persist to
|
|
638
|
+
// settings.json.
|
|
639
|
+
if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
|
|
640
|
+
currentThinkingLevel = agentState.thinkingLevel;
|
|
641
|
+
agentState.thinkingLevel = "off";
|
|
642
|
+
thinkingSwapped = true;
|
|
643
|
+
console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
|
|
644
|
+
}
|
|
630
645
|
}
|
|
631
646
|
|
|
632
647
|
try {
|
|
@@ -640,7 +655,62 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
640
655
|
if (modelSwapped) {
|
|
641
656
|
agentState.model = currentModel;
|
|
642
657
|
}
|
|
658
|
+
if (thinkingSwapped) {
|
|
659
|
+
agentState.thinkingLevel = currentThinkingLevel;
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
});
|
|
663
|
+
},
|
|
664
|
+
|
|
665
|
+
/**
|
|
666
|
+
* Soft-reset an overflowed session: trim the on-disk jsonl to its most
|
|
667
|
+
* recent N user turns, then reload the session in place. Used by the
|
|
668
|
+
* memory-lifecycle layer when compact fails with "prompt is too long"
|
|
669
|
+
* — the session has grown past the model's context window and the
|
|
670
|
+
* summarizer prompt itself can no longer fit.
|
|
671
|
+
*
|
|
672
|
+
* Returns the soft-reset report (or null if no session for threadId).
|
|
673
|
+
* Behavior:
|
|
674
|
+
* - In-memory session: returns null (nothing to trim on disk).
|
|
675
|
+
* - Already-trimmed session: report.reset === false, no reload.
|
|
676
|
+
* - Otherwise: trims file, reloads session, returns report.
|
|
677
|
+
*
|
|
678
|
+
* On reload failure, the SessionEntry is dropped from the cache so the
|
|
679
|
+
* next prompt() recreates it cleanly.
|
|
680
|
+
*/
|
|
681
|
+
async softReset(threadId: string): Promise<SoftResetReport | null> {
|
|
682
|
+
return enqueue(threadId, async () => {
|
|
683
|
+
const entry = sessions.get(threadId);
|
|
684
|
+
if (!entry) return null;
|
|
685
|
+
const sessionFile = entry.session.sessionFile;
|
|
686
|
+
if (!sessionFile) {
|
|
687
|
+
console.warn(`[pi-agent] softReset: ${threadId} has no on-disk session file, skipping`);
|
|
688
|
+
return null;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
console.warn(`[pi-agent] softReset: trimming overflowed session ${sessionFile}`);
|
|
692
|
+
const report = softResetSessionFile(sessionFile);
|
|
693
|
+
if (!report.reset) {
|
|
694
|
+
console.log(`[pi-agent] softReset: nothing to trim (${report.reason})`);
|
|
695
|
+
return report;
|
|
696
|
+
}
|
|
697
|
+
console.warn(
|
|
698
|
+
`[pi-agent] softReset: ${report.entriesBefore} → ${report.entriesAfter} entries, ` +
|
|
699
|
+
`${report.bytesBefore} → ${report.bytesAfter} bytes (${report.reason}). Backup: ${report.backupPath}`
|
|
700
|
+
);
|
|
701
|
+
|
|
702
|
+
// Reload the session so pi-ai re-reads the trimmed file. Drop the
|
|
703
|
+
// cache entry on failure so the next prompt() recreates from scratch
|
|
704
|
+
// rather than running against the disposed session.
|
|
705
|
+
try {
|
|
706
|
+
const reloaded = await reloadSession(entry, sessionFile);
|
|
707
|
+
await entry.session.dispose();
|
|
708
|
+
entry.session = reloaded.session;
|
|
709
|
+
} catch (err) {
|
|
710
|
+
console.error(`[pi-agent] softReset reload failed for ${threadId}:`, (err as Error).message);
|
|
711
|
+
sessions.delete(threadId);
|
|
643
712
|
}
|
|
713
|
+
return report;
|
|
644
714
|
});
|
|
645
715
|
},
|
|
646
716
|
|
|
@@ -11,6 +11,8 @@ import {
|
|
|
11
11
|
inspectSessionFile,
|
|
12
12
|
repairSessionFile,
|
|
13
13
|
isToolPairingError,
|
|
14
|
+
softResetSessionFile,
|
|
15
|
+
isContextOverflowError,
|
|
14
16
|
} from './session-repair';
|
|
15
17
|
|
|
16
18
|
// ---------- fixtures ----------
|
|
@@ -376,3 +378,175 @@ describe('session-repair', () => {
|
|
|
376
378
|
});
|
|
377
379
|
});
|
|
378
380
|
});
|
|
381
|
+
|
|
382
|
+
// ============================================================
|
|
383
|
+
// softResetSessionFile
|
|
384
|
+
// ============================================================
|
|
385
|
+
|
|
386
|
+
describe('softResetSessionFile', () => {
|
|
387
|
+
function userTurn(idPrefix: string, parentId: string | null) {
|
|
388
|
+
// A user turn = user msg + assistant text reply (no tool calls, so cuts
|
|
389
|
+
// are clean; tool-pairing edge cases are covered by repair tests).
|
|
390
|
+
return [
|
|
391
|
+
userMsg(`${idPrefix}u`, parentId, `text-${idPrefix}`),
|
|
392
|
+
{
|
|
393
|
+
type: 'message',
|
|
394
|
+
id: `${idPrefix}a`,
|
|
395
|
+
parentId: `${idPrefix}u`,
|
|
396
|
+
timestamp: '2026-05-01T00:00:04Z',
|
|
397
|
+
message: {
|
|
398
|
+
role: 'assistant',
|
|
399
|
+
content: [{ type: 'text', text: `reply-${idPrefix}` }],
|
|
400
|
+
api: 'bedrock-converse-stream',
|
|
401
|
+
provider: 'amazon-bedrock',
|
|
402
|
+
model: 'claude',
|
|
403
|
+
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
|
404
|
+
stopReason: 'endTurn',
|
|
405
|
+
timestamp: 4,
|
|
406
|
+
},
|
|
407
|
+
},
|
|
408
|
+
];
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
it('softResetSessionFile_OnSessionWithMoreTurnsThanTarget_KeepsHeaderAndRecentTurns', () => {
|
|
412
|
+
// Arrange: 10 user turns, target keepRecentUserTurns=3.
|
|
413
|
+
const entries: object[] = [HEADER, MODEL_CHANGE];
|
|
414
|
+
let parent: string | null = 'mc-1';
|
|
415
|
+
for (let i = 1; i <= 10; i++) {
|
|
416
|
+
const turn = userTurn(`t${i}`, parent);
|
|
417
|
+
entries.push(...turn);
|
|
418
|
+
parent = `t${i}a`;
|
|
419
|
+
}
|
|
420
|
+
const path = tmpJsonl(entries);
|
|
421
|
+
|
|
422
|
+
// Act
|
|
423
|
+
const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
|
|
424
|
+
|
|
425
|
+
// Assert: report indicates reset, file shrunk, header preserved, last 3 user msgs present.
|
|
426
|
+
expect(report.reset).toBe(true);
|
|
427
|
+
expect(report.entriesAfter).toBeLessThan(report.entriesBefore);
|
|
428
|
+
expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
|
|
429
|
+
expect(report.backupPath).toBeDefined();
|
|
430
|
+
expect(existsSync(report.backupPath!)).toBe(true);
|
|
431
|
+
|
|
432
|
+
const trimmed = parseSessionFile(path);
|
|
433
|
+
// Header always preserved.
|
|
434
|
+
expect(trimmed[0].type).toBe('session');
|
|
435
|
+
// Last 3 user turns present.
|
|
436
|
+
const userIds = trimmed.filter(e => e.message?.role === 'user').map(e => e.id);
|
|
437
|
+
expect(userIds).toEqual(['t8u', 't9u', 't10u']);
|
|
438
|
+
// First kept entry's parentId reset to null (no dangling pointer).
|
|
439
|
+
const firstAfterHeader = trimmed[1];
|
|
440
|
+
expect(firstAfterHeader.parentId).toBeNull();
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it('softResetSessionFile_OnSessionSmallerThanTarget_ReturnsResetFalseAndDoesNotMutate', () => {
|
|
444
|
+
// Arrange: 2 user turns, target keepRecentUserTurns=8.
|
|
445
|
+
const entries: object[] = [HEADER, MODEL_CHANGE, ...userTurn('a', 'mc-1'), ...userTurn('b', 'aa')];
|
|
446
|
+
const path = tmpJsonl(entries);
|
|
447
|
+
const before = readFileSync(path, 'utf8');
|
|
448
|
+
|
|
449
|
+
// Act
|
|
450
|
+
const report = softResetSessionFile(path, { keepRecentUserTurns: 8 });
|
|
451
|
+
|
|
452
|
+
// Assert: no reset, file untouched, no backup.
|
|
453
|
+
expect(report.reset).toBe(false);
|
|
454
|
+
expect(report.backupPath).toBeUndefined();
|
|
455
|
+
expect(readFileSync(path, 'utf8')).toBe(before);
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
it('softResetSessionFile_OnTinySession_ReturnsResetFalseWithReason', () => {
|
|
459
|
+
// Arrange: only header.
|
|
460
|
+
const path = tmpJsonl([HEADER]);
|
|
461
|
+
|
|
462
|
+
// Act
|
|
463
|
+
const report = softResetSessionFile(path);
|
|
464
|
+
|
|
465
|
+
// Assert
|
|
466
|
+
expect(report.reset).toBe(false);
|
|
467
|
+
expect(report.reason).toContain('too-small');
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
it('softResetSessionFile_OnSessionWithOrphanedToolPairsAfterCut_AlsoRunsRepair', () => {
|
|
471
|
+
// Arrange: a session where the tail contains a toolResult whose toolCall
|
|
472
|
+
// sits in the older (dropped) section. After the cut the toolResult is
|
|
473
|
+
// orphaned — soft-reset must clean it up via the post-cut repair.
|
|
474
|
+
const oldToolCall = assistantToolCall('a-old', 'mc-1', 'call-X');
|
|
475
|
+
const orphanedResult = {
|
|
476
|
+
type: 'message',
|
|
477
|
+
id: 'tr-1',
|
|
478
|
+
parentId: 'a-old',
|
|
479
|
+
timestamp: '2026-05-01T00:00:05Z',
|
|
480
|
+
message: { role: 'toolResult', toolCallId: 'call-X', content: 'ok', timestamp: 5 },
|
|
481
|
+
};
|
|
482
|
+
const entries: object[] = [HEADER, MODEL_CHANGE, userMsg('u-old', 'mc-1', 'old'), oldToolCall];
|
|
483
|
+
let parent: string | null = 'a-old';
|
|
484
|
+
// Push 5 fresh turns so the cut leaves us in tail.
|
|
485
|
+
for (let i = 1; i <= 5; i++) {
|
|
486
|
+
entries.push(...userTurn(`f${i}`, parent));
|
|
487
|
+
parent = `f${i}a`;
|
|
488
|
+
}
|
|
489
|
+
// Insert the orphaned result mid-tail (kept by cut, but call is dropped).
|
|
490
|
+
entries.splice(6, 0, orphanedResult);
|
|
491
|
+
const path = tmpJsonl(entries);
|
|
492
|
+
|
|
493
|
+
// Act
|
|
494
|
+
const report = softResetSessionFile(path, { keepRecentUserTurns: 3 });
|
|
495
|
+
|
|
496
|
+
// Assert: reset succeeded AND post-cut repair fired.
|
|
497
|
+
expect(report.reset).toBe(true);
|
|
498
|
+
expect(report.postRepair).toBeDefined();
|
|
499
|
+
// Final file is internally consistent (no orphans).
|
|
500
|
+
expect(inspectSessionFile(path).hasOrphans).toBe(false);
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
it('softResetSessionFile_OnNonexistentFile_Throws', () => {
|
|
504
|
+
// Arrange/Act/Assert: documents the precondition.
|
|
505
|
+
expect(() => softResetSessionFile('/nonexistent/path.jsonl')).toThrow(/not found/);
|
|
506
|
+
});
|
|
507
|
+
|
|
508
|
+
it('softResetSessionFile_BytesCapHonored_StopsCutAtCap', () => {
|
|
509
|
+
// Arrange: each turn is small but we set a tiny byte cap so we cut early.
|
|
510
|
+
const entries: object[] = [HEADER, MODEL_CHANGE];
|
|
511
|
+
let parent: string | null = 'mc-1';
|
|
512
|
+
for (let i = 1; i <= 20; i++) {
|
|
513
|
+
entries.push(...userTurn(`t${i}`, parent));
|
|
514
|
+
parent = `t${i}a`;
|
|
515
|
+
}
|
|
516
|
+
const path = tmpJsonl(entries);
|
|
517
|
+
|
|
518
|
+
// Act
|
|
519
|
+
const report = softResetSessionFile(path, { keepRecentUserTurns: 100, maxBytes: 800 });
|
|
520
|
+
|
|
521
|
+
// Assert: reset triggered by byte cap (we asked for 100 turns we don't have,
|
|
522
|
+
// but byte cap kicks in first).
|
|
523
|
+
expect(report.reset).toBe(true);
|
|
524
|
+
expect(report.reason).toMatch(/byte-cap|fewer-turns/);
|
|
525
|
+
expect(report.bytesAfter).toBeLessThan(report.bytesBefore);
|
|
526
|
+
});
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
// ============================================================
|
|
530
|
+
// isContextOverflowError
|
|
531
|
+
// ============================================================
|
|
532
|
+
|
|
533
|
+
describe('isContextOverflowError', () => {
|
|
534
|
+
it.each([
|
|
535
|
+
['prompt is too long: 212776 tokens > 200000 maximum', true],
|
|
536
|
+
['Validation error: input is too long', true],
|
|
537
|
+
['context length exceeded for this model', true],
|
|
538
|
+
['maximum context length reached', true],
|
|
539
|
+
['tokens > 200000 maximum', true],
|
|
540
|
+
['toolUse without toolResult', false], // pairing error — different recovery
|
|
541
|
+
['random network failure', false],
|
|
542
|
+
['', false],
|
|
543
|
+
])('classifies %p as overflow=%p', (msg, expected) => {
|
|
544
|
+
expect(isContextOverflowError(new Error(msg))).toBe(expected);
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
it('returns false for null/undefined/non-Error inputs', () => {
|
|
548
|
+
expect(isContextOverflowError(null)).toBe(false);
|
|
549
|
+
expect(isContextOverflowError(undefined)).toBe(false);
|
|
550
|
+
expect(isContextOverflowError({})).toBe(false);
|
|
551
|
+
});
|
|
552
|
+
});
|
|
@@ -286,6 +286,181 @@ export function repairSessionFile(path: string): SessionRepairReport {
|
|
|
286
286
|
};
|
|
287
287
|
}
|
|
288
288
|
|
|
289
|
+
// ── Soft reset (recovery from already-overflowed sessions) ──────────────
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* When a session has grown past the model's context window, normal compact
|
|
293
|
+
* cannot recover — the summarizer prompt itself overflows. Soft reset trims
|
|
294
|
+
* the session jsonl on disk to its most-recent N user turns, drops everything
|
|
295
|
+
* older, and re-runs the tool-pairing repair so what's left is internally
|
|
296
|
+
* consistent.
|
|
297
|
+
*
|
|
298
|
+
* Trade-off: loses fidelity for older turns. The roundhouse memory layer
|
|
299
|
+
* (MEMORY.md, daily front-page) re-injects on the next turn, so the agent
|
|
300
|
+
* still has its durable context — just not the verbatim message history.
|
|
301
|
+
*
|
|
302
|
+
* Conservative defaults aim for ~30–40% of a 200k window so the next compact
|
|
303
|
+
* has ample room to summarize.
|
|
304
|
+
*/
|
|
305
|
+
export interface SoftResetOptions {
|
|
306
|
+
/** Keep at most this many user turns from the tail (default: 8). */
|
|
307
|
+
keepRecentUserTurns?: number;
|
|
308
|
+
/** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
|
|
309
|
+
maxBytes?: number;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export interface SoftResetReport {
|
|
313
|
+
reset: boolean;
|
|
314
|
+
reason: string;
|
|
315
|
+
entriesBefore: number;
|
|
316
|
+
entriesAfter: number;
|
|
317
|
+
bytesBefore: number;
|
|
318
|
+
bytesAfter: number;
|
|
319
|
+
backupPath?: string;
|
|
320
|
+
/** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
|
|
321
|
+
postRepair?: SessionRepairReport;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Find a safe cut index in the entries array. Walk backwards from the end
|
|
326
|
+
* looking for user message entries; the cut sits *just before* the Nth
|
|
327
|
+
* most-recent user message we encounter. Returns the index of the first
|
|
328
|
+
* entry to KEEP (i.e. all entries[0..cutIdx) are dropped).
|
|
329
|
+
*
|
|
330
|
+
* If we can't find enough user messages, returns 1 to keep everything except
|
|
331
|
+
* the session header (which we preserve separately).
|
|
332
|
+
*/
|
|
333
|
+
function findSoftResetCutIndex(
|
|
334
|
+
entries: SessionFileEntry[],
|
|
335
|
+
keepRecentUserTurns: number,
|
|
336
|
+
maxBytes: number,
|
|
337
|
+
): { cutIdx: number; reason: string } {
|
|
338
|
+
let userTurnsSeen = 0;
|
|
339
|
+
let bytesAccumulated = 0;
|
|
340
|
+
// Scan tail-to-head, stop when we've collected enough user turns OR exceeded byte budget.
|
|
341
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
342
|
+
const e = entries[i];
|
|
343
|
+
bytesAccumulated += JSON.stringify(e).length + 1; // +1 for newline
|
|
344
|
+
if (e.type === 'message' && e.message?.role === 'user') {
|
|
345
|
+
userTurnsSeen++;
|
|
346
|
+
if (userTurnsSeen >= keepRecentUserTurns) {
|
|
347
|
+
return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
// Byte cap is a safety net for sessions where a single turn is enormous
|
|
351
|
+
// (e.g. one turn dumped a 200k file). Stop once we'd exceed the cap.
|
|
352
|
+
if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
|
|
353
|
+
return { cutIdx: i + 1, reason: `byte-cap-${bytesAccumulated}b` };
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// Not enough user turns in the file — keep everything except header.
|
|
357
|
+
// (Header is always at index 0 and is preserved by the writer separately.)
|
|
358
|
+
return { cutIdx: 1, reason: 'fewer-turns-than-target' };
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Soft-reset a pi-ai session jsonl: keep the most-recent N user turns + their
|
|
363
|
+
* surrounding messages, drop everything older. Always preserves the session
|
|
364
|
+
* header (entries[0]). Re-parents the first kept entry to null so the tree
|
|
365
|
+
* remains valid. Re-runs tool-pairing repair on the trimmed file because
|
|
366
|
+
* the cut likely orphaned some toolCall/toolResult pairs.
|
|
367
|
+
*
|
|
368
|
+
* Atomic + backup: same safety pattern as repairSessionFile.
|
|
369
|
+
*
|
|
370
|
+
* @returns report describing what was reset, or `{reset:false}` if nothing to do.
|
|
371
|
+
*/
|
|
372
|
+
export function softResetSessionFile(
|
|
373
|
+
path: string,
|
|
374
|
+
options: SoftResetOptions = {},
|
|
375
|
+
): SoftResetReport {
|
|
376
|
+
if (!existsSync(path)) {
|
|
377
|
+
throw new Error(`Session file not found: ${path}`);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
|
|
381
|
+
const maxBytes = options.maxBytes ?? 250_000;
|
|
382
|
+
|
|
383
|
+
const entries = parseSessionFile(path);
|
|
384
|
+
const bytesBefore = readFileSync(path).length;
|
|
385
|
+
|
|
386
|
+
// Need at least header + a couple of messages to be worth resetting.
|
|
387
|
+
if (entries.length < 4) {
|
|
388
|
+
return {
|
|
389
|
+
reset: false,
|
|
390
|
+
reason: 'session-too-small',
|
|
391
|
+
entriesBefore: entries.length,
|
|
392
|
+
entriesAfter: entries.length,
|
|
393
|
+
bytesBefore,
|
|
394
|
+
bytesAfter: bytesBefore,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
|
|
399
|
+
|
|
400
|
+
// No-op if cut is already at the start (nothing to drop besides header).
|
|
401
|
+
if (cutIdx <= 1) {
|
|
402
|
+
return {
|
|
403
|
+
reset: false,
|
|
404
|
+
reason: `cut-at-start (${reason})`,
|
|
405
|
+
entriesBefore: entries.length,
|
|
406
|
+
entriesAfter: entries.length,
|
|
407
|
+
bytesBefore,
|
|
408
|
+
bytesAfter: bytesBefore,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Build trimmed entries: header + tail.
|
|
413
|
+
// Re-parent the first kept tail entry to null so the tree root is intact.
|
|
414
|
+
const header = entries[0];
|
|
415
|
+
const tail = entries.slice(cutIdx);
|
|
416
|
+
if (tail.length > 0 && tail[0].parentId !== undefined) {
|
|
417
|
+
tail[0] = { ...tail[0], parentId: null };
|
|
418
|
+
}
|
|
419
|
+
const trimmed = [header, ...tail];
|
|
420
|
+
|
|
421
|
+
const backupPath = backupFile(path);
|
|
422
|
+
const newContent = trimmed.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
423
|
+
atomicWrite(path, newContent);
|
|
424
|
+
|
|
425
|
+
// The cut may have orphaned tool pairs (e.g. toolResult kept but its
|
|
426
|
+
// toolCall is now in the dropped section). Run repair to clean those up.
|
|
427
|
+
const postRepair = repairSessionFile(path);
|
|
428
|
+
|
|
429
|
+
const bytesAfter = readFileSync(path).length;
|
|
430
|
+
return {
|
|
431
|
+
reset: true,
|
|
432
|
+
reason,
|
|
433
|
+
entriesBefore: entries.length,
|
|
434
|
+
entriesAfter: trimmed.length - postRepair.droppedEntryIds.length,
|
|
435
|
+
bytesBefore,
|
|
436
|
+
bytesAfter,
|
|
437
|
+
backupPath,
|
|
438
|
+
postRepair,
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// ── Error classifiers ────────────────────────────────────────────────────
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Detect whether an error from pi-ai / the model provider indicates the
|
|
446
|
+
* session has grown past the model's context window (input > max).
|
|
447
|
+
*
|
|
448
|
+
* Triggers soft-reset recovery in the memory lifecycle. Intentionally narrow:
|
|
449
|
+
* only matches the well-known overflow phrasings, not generic 4xx errors.
|
|
450
|
+
*/
|
|
451
|
+
export function isContextOverflowError(err: unknown): boolean {
|
|
452
|
+
if (!err) return false;
|
|
453
|
+
const msg = (err as { message?: string }).message ?? String(err);
|
|
454
|
+
const patterns = [
|
|
455
|
+
/prompt is too long/i,
|
|
456
|
+
/tokens?\s*[>>]\s*\d+\s*maximum/i,
|
|
457
|
+
/input is too long/i,
|
|
458
|
+
/context length exceeded/i,
|
|
459
|
+
/maximum context length/i,
|
|
460
|
+
];
|
|
461
|
+
return patterns.some(p => p.test(msg));
|
|
462
|
+
}
|
|
463
|
+
|
|
289
464
|
/**
|
|
290
465
|
* Detect whether an error from pi-ai / the model provider indicates a
|
|
291
466
|
* tool-pairing mismatch that can be recovered by session repair.
|
package/src/memory/lifecycle.ts
CHANGED
|
@@ -16,10 +16,41 @@ import { shouldInjectMemory, classifyContextPressure, isSoftFlushOnCooldown } fr
|
|
|
16
16
|
import { buildMemoryInjection, injectMemoryIntoMessage } from "./inject";
|
|
17
17
|
import { buildFlushPrompt } from "./prompts";
|
|
18
18
|
import { bootstrapMemoryFiles } from "./bootstrap";
|
|
19
|
+
import { isContextOverflowError } from "../agents/shared/session-repair";
|
|
19
20
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
20
21
|
import { join } from "node:path";
|
|
21
22
|
import { homedir } from "node:os";
|
|
22
23
|
|
|
24
|
+
// ── Telemetry helper ─────────────────────────────────
|
|
25
|
+
|
|
26
|
+
interface CompactLogEntry {
|
|
27
|
+
threadId: string;
|
|
28
|
+
level: string;
|
|
29
|
+
effectiveLevel: string;
|
|
30
|
+
flushSkipped: boolean;
|
|
31
|
+
tokensBefore: number | null;
|
|
32
|
+
tokensAfter: number | null;
|
|
33
|
+
flushMs: number;
|
|
34
|
+
compactMs: number;
|
|
35
|
+
totalMs: number;
|
|
36
|
+
model: string;
|
|
37
|
+
status: "ok" | "failed";
|
|
38
|
+
error: string | null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Append a compact telemetry entry. Fire-and-forget.
|
|
43
|
+
* Schema is uniform across success/failure (status discriminator) so
|
|
44
|
+
* downstream parsers don't have to handle missing fields.
|
|
45
|
+
*/
|
|
46
|
+
function appendCompactLog(entry: CompactLogEntry): void {
|
|
47
|
+
const logDir = join(homedir(), ".roundhouse", "logs");
|
|
48
|
+
const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
|
|
49
|
+
mkdir(logDir, { recursive: true })
|
|
50
|
+
.then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
|
|
51
|
+
.catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
|
|
52
|
+
}
|
|
53
|
+
|
|
23
54
|
// ── Memory mode detection ────────────────────────────
|
|
24
55
|
|
|
25
56
|
/**
|
|
@@ -246,11 +277,16 @@ export async function flushMemoryThenCompact(
|
|
|
246
277
|
// "manual" level, attempting the flush in that condition will hit the same
|
|
247
278
|
// 200k rejection. Deferring flush to a later (successful) turn is the safe
|
|
248
279
|
// recovery path.
|
|
249
|
-
const
|
|
280
|
+
const stateBeforeCompact = await loadThreadMemoryState(threadId);
|
|
281
|
+
const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
|
|
250
282
|
const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
|
|
251
283
|
|
|
284
|
+
// Hoisted so the catch block can report accurate flush vs compact timing
|
|
285
|
+
// (a failure during compact() would otherwise conflate the two phases).
|
|
286
|
+
let flushMs = 0;
|
|
287
|
+
let compactMs = 0;
|
|
288
|
+
|
|
252
289
|
try {
|
|
253
|
-
let flushMs = 0;
|
|
254
290
|
if (!skipFlush) {
|
|
255
291
|
// Step 1: flush
|
|
256
292
|
const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
|
|
@@ -276,16 +312,18 @@ export async function flushMemoryThenCompact(
|
|
|
276
312
|
const result = usedCompactModel
|
|
277
313
|
? await agent.compactWithModel!(threadId, flushModel!)
|
|
278
314
|
: await agent.compact!(threadId);
|
|
279
|
-
|
|
315
|
+
compactMs = Date.now() - t1;
|
|
280
316
|
if (!result) return null;
|
|
281
317
|
|
|
282
|
-
// Step 3: mark force re-inject (Full mode only)
|
|
318
|
+
// Step 3: mark force re-inject (Full mode only). Reuse the state we
|
|
319
|
+
// already loaded above; the compact step doesn't mutate memory-state
|
|
320
|
+
// (it mutates the pi session, a separate file), so the in-memory copy
|
|
321
|
+
// is still authoritative for our fields.
|
|
283
322
|
if (mode !== "complement") {
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
await saveThreadMemoryState(threadId, state);
|
|
323
|
+
stateBeforeCompact.forceInjectReason = "after-compact";
|
|
324
|
+
stateBeforeCompact.lastCompactAt = new Date().toISOString();
|
|
325
|
+
stateBeforeCompact.pendingCompact = undefined;
|
|
326
|
+
await saveThreadMemoryState(threadId, stateBeforeCompact);
|
|
289
327
|
}
|
|
290
328
|
|
|
291
329
|
const totalMs = Date.now() - t0;
|
|
@@ -302,30 +340,84 @@ export async function flushMemoryThenCompact(
|
|
|
302
340
|
const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
|
|
303
341
|
console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
|
|
304
342
|
|
|
305
|
-
// Persist timing log for debugging (async, fire-and-forget)
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
343
|
+
// Persist timing log for debugging (async, fire-and-forget).
|
|
344
|
+
// Schema is intentionally uniform across success and failure entries
|
|
345
|
+
// (status discriminator + same field set) so jsonl parsers don't have
|
|
346
|
+
// to special-case missing fields.
|
|
347
|
+
appendCompactLog({
|
|
348
|
+
threadId,
|
|
349
|
+
level,
|
|
350
|
+
effectiveLevel,
|
|
351
|
+
flushSkipped: skipFlush,
|
|
352
|
+
tokensBefore: result.tokensBefore,
|
|
353
|
+
tokensAfter: result.tokensAfter ?? null,
|
|
354
|
+
...timing,
|
|
355
|
+
status: "ok",
|
|
356
|
+
error: null,
|
|
357
|
+
});
|
|
320
358
|
|
|
321
359
|
return { ...result, timing };
|
|
322
360
|
} catch (err) {
|
|
323
|
-
|
|
324
|
-
|
|
361
|
+
const errMsg = (err as Error).message;
|
|
362
|
+
console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
|
|
363
|
+
|
|
364
|
+
// Recovery path: when the session has grown past the model's context
|
|
365
|
+
// window, the summarizer prompt itself overflows and compact() throws
|
|
366
|
+
// "prompt is too long". Threshold tuning prevents *new* sessions from
|
|
367
|
+
// hitting this, but does nothing for sessions already past the line.
|
|
368
|
+
// Trim the on-disk session jsonl to its most recent N user turns and
|
|
369
|
+
// mark the next turn for a fresh memory injection. We do NOT retry
|
|
370
|
+
// compact inline — that would extend the thread lock for another long
|
|
371
|
+
// operation. The trimmed session is small enough that the next user
|
|
372
|
+
// turn proceeds normally; any soft pressure from injected memory will
|
|
373
|
+
// trigger a regular compact later.
|
|
374
|
+
let softResetAttempted = false;
|
|
375
|
+
let softResetSucceeded = false;
|
|
376
|
+
if (isContextOverflowError(err) && agent.softReset) {
|
|
377
|
+
softResetAttempted = true;
|
|
378
|
+
try {
|
|
379
|
+
await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
|
|
380
|
+
const report = await agent.softReset(threadId);
|
|
381
|
+
if (report?.reset) {
|
|
382
|
+
softResetSucceeded = true;
|
|
383
|
+
console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
|
|
384
|
+
} else {
|
|
385
|
+
console.warn(`[memory] soft-reset returned no-op for ${threadId} (${(report as { reason?: string } | null)?.reason ?? "unknown"})`);
|
|
386
|
+
}
|
|
387
|
+
} catch (resetErr) {
|
|
388
|
+
console.error(`[memory] soft-reset failed for ${threadId}:`, (resetErr as Error).message);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
appendCompactLog({
|
|
393
|
+
threadId,
|
|
394
|
+
level,
|
|
395
|
+
effectiveLevel,
|
|
396
|
+
flushSkipped: skipFlush,
|
|
397
|
+
tokensBefore: null,
|
|
398
|
+
tokensAfter: null,
|
|
399
|
+
flushMs, // accurate: 0 if skipped or failed before flush completed
|
|
400
|
+
compactMs, // accurate: 0 if failed before/during compact
|
|
401
|
+
totalMs: Date.now() - t0,
|
|
402
|
+
model: flushModel ?? "default",
|
|
403
|
+
status: "failed",
|
|
404
|
+
error: (softResetAttempted
|
|
405
|
+
? `${softResetSucceeded ? "soft-reset-recovered" : "soft-reset-failed"}: ${errMsg}`
|
|
406
|
+
: errMsg).slice(0, 500),
|
|
407
|
+
});
|
|
408
|
+
|
|
325
409
|
try {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
410
|
+
if (softResetSucceeded) {
|
|
411
|
+
// Soft reset cleared the overflow. Mark the next turn for memory
|
|
412
|
+
// re-injection so the agent has its durable context, and clear the
|
|
413
|
+
// pendingCompact flag — there's nothing left to compact now.
|
|
414
|
+
stateBeforeCompact.forceInjectReason = "after-soft-reset";
|
|
415
|
+
stateBeforeCompact.pendingCompact = undefined;
|
|
416
|
+
} else {
|
|
417
|
+
// Re-arm pendingCompact so the next turn retries.
|
|
418
|
+
stateBeforeCompact.pendingCompact = effectiveLevel;
|
|
419
|
+
}
|
|
420
|
+
await saveThreadMemoryState(threadId, stateBeforeCompact);
|
|
329
421
|
} catch {}
|
|
330
422
|
return null;
|
|
331
423
|
}
|
package/src/memory/policy.ts
CHANGED
|
@@ -10,12 +10,26 @@ import { formatDate } from "./files";
|
|
|
10
10
|
// ── Defaults ─────────────────────────────────────────
|
|
11
11
|
|
|
12
12
|
const DEFAULT_SOFT_PERCENT = 0.45;
|
|
13
|
-
const DEFAULT_SOFT_TOKENS =
|
|
13
|
+
const DEFAULT_SOFT_TOKENS = 130_000;
|
|
14
14
|
const DEFAULT_HARD_PERCENT = 0.50;
|
|
15
|
-
const DEFAULT_HARD_TOKENS =
|
|
15
|
+
const DEFAULT_HARD_TOKENS = 150_000;
|
|
16
16
|
const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
|
|
17
17
|
const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
|
|
18
18
|
|
|
19
|
+
// Headroom reserved for the summarization payload itself when compact runs.
|
|
20
|
+
// The summarizer prompt serializes ALL discarded history (everything older
|
|
21
|
+
// than ~20k of recent tokens) plus scaffolding plus previous summary, then
|
|
22
|
+
// asks the model to summarize. If the prompt itself overflows the model
|
|
23
|
+
// context, compact() throws. 50k is the empirical headroom that fits a
|
|
24
|
+
// typical summarization prompt on Claude family.
|
|
25
|
+
const COMPACT_HEADROOM_TOKENS = 50_000;
|
|
26
|
+
|
|
27
|
+
// Why 130k/150k as the default absolute thresholds against a 200k window:
|
|
28
|
+
// see COMPACT_HEADROOM_TOKENS above and
|
|
29
|
+
// ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
|
|
30
|
+
// For smaller-window models, classifyContextPressure() clamps the absolute
|
|
31
|
+
// thresholds to `window - HEADROOM` so they never exceed the window.
|
|
32
|
+
|
|
19
33
|
// ── Injection policy ─────────────────────────────────
|
|
20
34
|
|
|
21
35
|
export interface InjectionDecision {
|
|
@@ -87,14 +101,21 @@ export function classifyContextPressure(
|
|
|
87
101
|
|
|
88
102
|
const pctDecimal = percent != null ? percent / 100 : tokens / window;
|
|
89
103
|
|
|
104
|
+
// Clamp absolute thresholds so they never exceed `window - HEADROOM`.
|
|
105
|
+
// Defends against future smaller-window models where the configured
|
|
106
|
+
// 150k/130k absolute thresholds would otherwise sit above the window.
|
|
107
|
+
// The percent thresholds already scale with window naturally.
|
|
108
|
+
const headroom = COMPACT_HEADROOM_TOKENS;
|
|
109
|
+
const ceiling = Math.max(0, window - headroom);
|
|
110
|
+
|
|
90
111
|
// Hard threshold
|
|
91
112
|
const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
|
|
92
|
-
const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
|
|
113
|
+
const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
|
|
93
114
|
if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
|
|
94
115
|
|
|
95
|
-
// Soft threshold
|
|
116
|
+
// Soft threshold (clamped one step below hard so soft fires first).
|
|
96
117
|
const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
|
|
97
|
-
const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
|
|
118
|
+
const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
|
|
98
119
|
if (pctDecimal >= softPct || tokens >= softTok) return "soft";
|
|
99
120
|
|
|
100
121
|
return "none";
|
package/src/memory/types.ts
CHANGED
|
@@ -56,7 +56,7 @@ export interface ThreadMemoryState {
|
|
|
56
56
|
/** Local date when memory was last injected (detects day boundary) */
|
|
57
57
|
lastSeenLocalDate?: string;
|
|
58
58
|
/** Force re-injection on next turn */
|
|
59
|
-
forceInjectReason?: "new-session" | "after-compact" | "manual";
|
|
59
|
+
forceInjectReason?: "new-session" | "after-compact" | "after-soft-reset" | "manual";
|
|
60
60
|
/** When last compaction happened */
|
|
61
61
|
lastCompactAt?: string;
|
|
62
62
|
/** Pending compaction level (from interrupted flush) */
|
package/src/types.ts
CHANGED
|
@@ -122,6 +122,17 @@ export interface AgentAdapter {
|
|
|
122
122
|
/** Compact with a specific model. */
|
|
123
123
|
compactWithModel?(threadId: string, modelId: string): Promise<{ tokensBefore: number; tokensAfter: number | null } | null>;
|
|
124
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Soft-reset an overflowed session by trimming on-disk history to the
|
|
127
|
+
* most-recent few turns. Called by memory lifecycle when compact() fails
|
|
128
|
+
* because the session itself is too large for the model's context window.
|
|
129
|
+
*
|
|
130
|
+
* Returns a report describing what was trimmed (shape is adapter-specific
|
|
131
|
+
* but always has `reset: boolean`), or null if not applicable.
|
|
132
|
+
* Adapters without on-disk sessions (in-memory only) should return null.
|
|
133
|
+
*/
|
|
134
|
+
softReset?(threadId: string): Promise<{ reset: boolean } | null>;
|
|
135
|
+
|
|
125
136
|
/** Abort the current agent run for a thread. */
|
|
126
137
|
abort?(threadId: string): Promise<void>;
|
|
127
138
|
|