@inceptionstack/roundhouse 0.5.30 → 0.5.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/package.json +1 -1
- package/src/agents/pi/pi-adapter.ts +3 -1
- package/src/agents/shared/error-classifiers.ts +71 -0
- package/src/agents/shared/session-repair-internal.ts +239 -0
- package/src/agents/shared/session-repair.test.ts +20 -3
- package/src/agents/shared/session-repair.ts +33 -526
- package/src/agents/shared/session-soft-reset.ts +120 -0
- package/src/memory/lifecycle.ts +40 -32
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,26 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to `@inceptionstack/roundhouse` are documented here.
|
|
4
4
|
|
|
5
|
+
## [0.5.32] — 2026-05-14
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **Soft-reset progress: emit completion message, not just start.** Before, the user saw `♻️ Session overflowed — soft-resetting to recent turns...` and then silence — success/failure outcomes only went to stderr. Now the user always sees a follow-up:
|
|
9
|
+
- ✅ `Soft-reset complete (N → M entries). Durable memory will re-inject on next turn.` on success
|
|
10
|
+
- ⚠️ `Soft-reset no-op (<reason>). Will retry compact next turn.` when nothing to trim
|
|
11
|
+
- ❌ `Soft-reset failed: <msg>. Will retry next turn.` when recovery itself errors
|
|
12
|
+
- 3 new tests verifying onProgress emissions for all three outcomes (`emergency_whenSoftResetSucceeds_emitsCompletionProgressMessage`, `..._emitsNoOpProgressMessage`, `..._emitsFailureProgressMessage`), plus 1 regression test (`..._doesNotMaskWithTypeError`) for non-Error throws inside the recovery catch. **540 tests passing.**
|
|
13
|
+
|
|
14
|
+
## [0.5.31] — 2026-05-14
|
|
15
|
+
|
|
16
|
+
### Internal
|
|
17
|
+
- **Refactor: session-repair module split + DRY shared error-classifier helper.** Pure refactor, zero behavior change. Addresses 7 maintainability findings from the post-v0.5.30 review:
|
|
18
|
+
- Extracted `matchesErrorPatterns()` shared helper so `isContextOverflowError` and `isToolPairingError` no longer duplicate ~80% of their structure. Both classifiers now walk the `cause` chain (previously only the overflow classifier did — fixed divergent-change smell). Both share `looksLikeValidationError()` gating.
|
|
19
|
+
- Extracted `buildTrimmedEntries()` from `softResetSessionFile` and `attemptSoftResetRecovery()` from `flushMemoryThenCompact`. The lifecycle catch block is now ~25 lines of linear flow (classify → recover → log → persist) instead of ~60 lines with a nested try/catch.
|
|
20
|
+
- `MAX_CAUSE_CHAIN_DEPTH = 5` named constant.
|
|
21
|
+
- Split `src/agents/shared/session-repair.ts` (574 lines, two domains) into four focused files: `session-repair.ts` (81 lines, public surface), `session-soft-reset.ts`, `error-classifiers.ts`, `session-repair-internal.ts`. All public exports preserved via re-exports for backward compat.
|
|
22
|
+
- Introduced `SessionRepairResult` named type replacing anonymous `{entries, report}` shape (named to avoid collision with the existing `RepairResult` in `message-validator.ts`).
|
|
23
|
+
- 2 new regression tests for `isToolPairingError`'s now-fixed cause-chain walking. **536 tests passing.**
|
|
24
|
+
|
|
5
25
|
## [0.5.30] — 2026-05-14
|
|
6
26
|
|
|
7
27
|
### Fixed
|
package/package.json
CHANGED
|
@@ -28,7 +28,9 @@ import {
|
|
|
28
28
|
|
|
29
29
|
import type { AgentAdapter, AgentAdapterFactory, AgentMessage, AgentResponse, AgentStreamEvent, MessageContext } from "../../types";
|
|
30
30
|
import { formatMessage, extractCustomMessage, customContentToText } from "./message-format";
|
|
31
|
-
import { isToolPairingError
|
|
31
|
+
import { isToolPairingError } from "../shared/error-classifiers";
|
|
32
|
+
import { repairSessionFile } from "../shared/session-repair";
|
|
33
|
+
import { softResetSessionFile, type SoftResetReport } from "../shared/session-soft-reset";
|
|
32
34
|
import { SESSIONS_DIR } from "../../config";
|
|
33
35
|
import { DEBUG_STREAM, threadIdToDir } from "../../util";
|
|
34
36
|
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
export const MAX_CAUSE_CHAIN_DEPTH = 5;
|
|
2
|
+
|
|
3
|
+
interface ErrorPatternMatchOptions {
|
|
4
|
+
stringifyGate?: (err: unknown) => boolean;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Stringify-search gate: only walk serialized error fields when the error
|
|
9
|
+
* looks like a 4xx / Bedrock ValidationException. Avoids false-positives
|
|
10
|
+
* from unrelated 5xx noise that happens to contain trigger phrases.
|
|
11
|
+
*/
|
|
12
|
+
function looksLikeValidationError(err: unknown): boolean {
|
|
13
|
+
const name = (err as { name?: string }).name ?? '';
|
|
14
|
+
const httpStatus =
|
|
15
|
+
(err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode;
|
|
16
|
+
return name === 'ValidationException' || httpStatus === 400;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function matchesErrorPatterns(
|
|
20
|
+
err: unknown,
|
|
21
|
+
patterns: RegExp[],
|
|
22
|
+
options: ErrorPatternMatchOptions = {},
|
|
23
|
+
): boolean {
|
|
24
|
+
if (!err) return false;
|
|
25
|
+
|
|
26
|
+
const matches = (value: unknown): boolean => {
|
|
27
|
+
const message = (value as { message?: string }).message ?? String(value);
|
|
28
|
+
return patterns.some(pattern => pattern.test(message));
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
if (matches(err)) return true;
|
|
32
|
+
|
|
33
|
+
let current: unknown = (err as { cause?: unknown }).cause;
|
|
34
|
+
for (let depth = 0; depth < MAX_CAUSE_CHAIN_DEPTH && current; depth++) {
|
|
35
|
+
if (matches(current)) return true;
|
|
36
|
+
current = (current as { cause?: unknown }).cause;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (!options.stringifyGate?.(err)) {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
const serialized = JSON.stringify(err);
|
|
45
|
+
return patterns.some(pattern => pattern.test(serialized));
|
|
46
|
+
} catch {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function isContextOverflowError(err: unknown): boolean {
|
|
52
|
+
const patterns = [
|
|
53
|
+
/prompt is too long/i,
|
|
54
|
+
/tokens?\s*[>>]\s*\d+\s*maximum/i,
|
|
55
|
+
/input is too long/i,
|
|
56
|
+
/context length exceeded/i,
|
|
57
|
+
/maximum context length/i,
|
|
58
|
+
];
|
|
59
|
+
return matchesErrorPatterns(err, patterns, { stringifyGate: looksLikeValidationError });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function isToolPairingError(err: unknown): boolean {
|
|
63
|
+
const patterns = [
|
|
64
|
+
/tool_use.*without.*tool_result/i,
|
|
65
|
+
/tool_result.*without.*tool_use/i,
|
|
66
|
+
/toolUse.*without.*toolResult/i,
|
|
67
|
+
/unmatched.*tool.?use/i,
|
|
68
|
+
/orphan.*tool/i,
|
|
69
|
+
];
|
|
70
|
+
return matchesErrorPatterns(err, patterns, { stringifyGate: looksLikeValidationError });
|
|
71
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, renameSync, existsSync, copyFileSync } from 'node:fs';
|
|
2
|
+
import { dirname, basename, join } from 'node:path';
|
|
3
|
+
import { validateToolPairing } from './message-validator';
|
|
4
|
+
import type { Message, ToolCall, AssistantMessage, ToolResultMessage } from '@earendil-works/pi-ai';
|
|
5
|
+
|
|
6
|
+
/** Minimal structural type for a pi-ai session file entry (we only touch message entries). */
|
|
7
|
+
export interface SessionFileEntry {
|
|
8
|
+
type: string;
|
|
9
|
+
id?: string;
|
|
10
|
+
parentId?: string | null;
|
|
11
|
+
message?: Message;
|
|
12
|
+
// other fields preserved as-is
|
|
13
|
+
[key: string]: unknown;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface SessionRepairReport {
|
|
17
|
+
repaired: boolean;
|
|
18
|
+
droppedEntryIds: string[];
|
|
19
|
+
droppedToolCallIds: string[];
|
|
20
|
+
droppedToolResultIds: string[];
|
|
21
|
+
backupPath?: string;
|
|
22
|
+
totalEntries: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface SessionRepairResult {
|
|
26
|
+
entries: SessionFileEntry[];
|
|
27
|
+
report: SessionRepairReport;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Parse a .jsonl session file. Tolerant of trailing blank lines. Throws on malformed JSON. */
|
|
31
|
+
export function parseSessionFile(path: string): SessionFileEntry[] {
|
|
32
|
+
const raw = readFileSync(path, 'utf8');
|
|
33
|
+
const lines = raw.split('\n');
|
|
34
|
+
const entries: SessionFileEntry[] = [];
|
|
35
|
+
for (let i = 0; i < lines.length; i++) {
|
|
36
|
+
const line = lines[i];
|
|
37
|
+
if (!line.trim()) continue;
|
|
38
|
+
try {
|
|
39
|
+
entries.push(JSON.parse(line) as SessionFileEntry);
|
|
40
|
+
} catch (err) {
|
|
41
|
+
throw new Error(`Session file parse error at line ${i + 1}: ${(err as Error).message}`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return entries;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Extract `Message[]` from file entries in the order they appear.
|
|
49
|
+
* Only includes entries of type "message" (skips session header, model_change, etc).
|
|
50
|
+
*/
|
|
51
|
+
function extractMessages(entries: SessionFileEntry[]): { messages: Message[]; entryIndex: number[] } {
|
|
52
|
+
const messages: Message[] = [];
|
|
53
|
+
const entryIndex: number[] = [];
|
|
54
|
+
for (let i = 0; i < entries.length; i++) {
|
|
55
|
+
const entry = entries[i];
|
|
56
|
+
if (entry.type === 'message' && entry.message) {
|
|
57
|
+
messages.push(entry.message);
|
|
58
|
+
entryIndex.push(i);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return { messages, entryIndex };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Re-parent children of dropped entries to preserve tree validity.
|
|
66
|
+
* If entry X is dropped and entry Y has parentId=X, set Y.parentId = X.parentId.
|
|
67
|
+
*/
|
|
68
|
+
function reparentDroppedEntries(
|
|
69
|
+
entries: SessionFileEntry[],
|
|
70
|
+
droppedEntryIds: Set<string>
|
|
71
|
+
): SessionFileEntry[] {
|
|
72
|
+
const entryById = new Map<string, SessionFileEntry>();
|
|
73
|
+
for (const entry of entries) {
|
|
74
|
+
if (entry.id) entryById.set(entry.id, entry);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const remap = new Map<string, string | null>();
|
|
78
|
+
const resolveAncestor = (id: string, visited: Set<string> = new Set()): string | null => {
|
|
79
|
+
if (remap.has(id)) return remap.get(id)!;
|
|
80
|
+
if (!droppedEntryIds.has(id)) return id;
|
|
81
|
+
if (visited.has(id)) {
|
|
82
|
+
remap.set(id, null);
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
visited.add(id);
|
|
86
|
+
const entry = entryById.get(id);
|
|
87
|
+
const parent = entry?.parentId ?? null;
|
|
88
|
+
const resolved = parent === null ? null : resolveAncestor(parent, visited);
|
|
89
|
+
remap.set(id, resolved);
|
|
90
|
+
return resolved;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const kept: SessionFileEntry[] = [];
|
|
94
|
+
for (const entry of entries) {
|
|
95
|
+
if (entry.id && droppedEntryIds.has(entry.id)) continue;
|
|
96
|
+
if (entry.parentId && droppedEntryIds.has(entry.parentId)) {
|
|
97
|
+
kept.push({ ...entry, parentId: resolveAncestor(entry.parentId) });
|
|
98
|
+
} else {
|
|
99
|
+
kept.push(entry);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return kept;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Compute the set of entry IDs to drop based on orphaned tool IDs.
|
|
107
|
+
*
|
|
108
|
+
* - Orphaned toolResult message → drop the whole entry
|
|
109
|
+
* - Orphaned toolCall inside an assistant message → drop the entry only if the
|
|
110
|
+
* toolCall was the *only* content block (otherwise keep the entry with the
|
|
111
|
+
* block stripped; handled separately in applyEntryEdits)
|
|
112
|
+
*/
|
|
113
|
+
function findEntriesToDrop(
|
|
114
|
+
entries: SessionFileEntry[],
|
|
115
|
+
orphanedToolCallIds: Set<string>,
|
|
116
|
+
orphanedToolResultIds: Set<string>
|
|
117
|
+
): { entriesToDrop: Set<string>; entriesToEdit: Map<string, string[]> } {
|
|
118
|
+
const entriesToDrop = new Set<string>();
|
|
119
|
+
const entriesToEdit = new Map<string, string[]>();
|
|
120
|
+
|
|
121
|
+
for (const entry of entries) {
|
|
122
|
+
if (entry.type !== 'message' || !entry.message || !entry.id) continue;
|
|
123
|
+
const message = entry.message;
|
|
124
|
+
|
|
125
|
+
if (message.role === 'toolResult') {
|
|
126
|
+
const toolResult = message as ToolResultMessage;
|
|
127
|
+
if (orphanedToolResultIds.has(toolResult.toolCallId)) {
|
|
128
|
+
entriesToDrop.add(entry.id);
|
|
129
|
+
}
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (message.role === 'assistant') {
|
|
134
|
+
const assistantMessage = message as AssistantMessage;
|
|
135
|
+
const orphanCallIds: string[] = [];
|
|
136
|
+
let hasNonOrphanContent = false;
|
|
137
|
+
for (const block of assistantMessage.content) {
|
|
138
|
+
if ((block as ToolCall).type === 'toolCall') {
|
|
139
|
+
const callId = (block as ToolCall).id;
|
|
140
|
+
if (orphanedToolCallIds.has(callId)) {
|
|
141
|
+
orphanCallIds.push(callId);
|
|
142
|
+
} else {
|
|
143
|
+
hasNonOrphanContent = true;
|
|
144
|
+
}
|
|
145
|
+
} else {
|
|
146
|
+
hasNonOrphanContent = true;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (orphanCallIds.length === 0) continue;
|
|
150
|
+
if (hasNonOrphanContent) {
|
|
151
|
+
entriesToEdit.set(entry.id, orphanCallIds);
|
|
152
|
+
} else {
|
|
153
|
+
entriesToDrop.add(entry.id);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return { entriesToDrop, entriesToEdit };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Apply in-place edits to assistant entries: strip orphaned toolCall blocks. */
|
|
162
|
+
function applyEntryEdits(
|
|
163
|
+
entries: SessionFileEntry[],
|
|
164
|
+
entriesToEdit: Map<string, string[]>
|
|
165
|
+
): SessionFileEntry[] {
|
|
166
|
+
if (entriesToEdit.size === 0) return entries;
|
|
167
|
+
return entries.map(entry => {
|
|
168
|
+
if (!entry.id || !entriesToEdit.has(entry.id)) return entry;
|
|
169
|
+
const orphanIds = new Set(entriesToEdit.get(entry.id)!);
|
|
170
|
+
const message = entry.message as AssistantMessage;
|
|
171
|
+
const cleanedContent = message.content.filter(block => {
|
|
172
|
+
if ((block as ToolCall).type === 'toolCall') {
|
|
173
|
+
return !orphanIds.has((block as ToolCall).id);
|
|
174
|
+
}
|
|
175
|
+
return true;
|
|
176
|
+
});
|
|
177
|
+
return { ...entry, message: { ...message, content: cleanedContent } };
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/** Atomic write: tmp file + rename. Preserves partial-failure safety. */
|
|
182
|
+
export function atomicWrite(path: string, content: string): void {
|
|
183
|
+
const tmp = `${path}.tmp-${process.pid}-${Date.now()}`;
|
|
184
|
+
writeFileSync(tmp, content, { encoding: 'utf8' });
|
|
185
|
+
renameSync(tmp, path);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/** Back up the original file before mutation. Returns the backup path. */
|
|
189
|
+
export function backupFile(path: string): string {
|
|
190
|
+
const ts = Date.now();
|
|
191
|
+
const backupPath = join(dirname(path), `${basename(path)}.bak-${ts}`);
|
|
192
|
+
copyFileSync(path, backupPath);
|
|
193
|
+
return backupPath;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Pure in-memory tool-pairing repair. Takes entries, returns repaired entries
|
|
198
|
+
* + a report. Does not touch the filesystem.
|
|
199
|
+
*/
|
|
200
|
+
export function repairEntriesInMemory(entries: SessionFileEntry[]): SessionRepairResult {
|
|
201
|
+
const { messages } = extractMessages(entries);
|
|
202
|
+
const validation = validateToolPairing(messages);
|
|
203
|
+
|
|
204
|
+
if (validation.isValid) {
|
|
205
|
+
return {
|
|
206
|
+
entries,
|
|
207
|
+
report: {
|
|
208
|
+
repaired: false,
|
|
209
|
+
droppedEntryIds: [],
|
|
210
|
+
droppedToolCallIds: [],
|
|
211
|
+
droppedToolResultIds: [],
|
|
212
|
+
totalEntries: entries.length,
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const orphanedCalls = new Set(validation.orphanedToolCallIds);
|
|
218
|
+
const orphanedResults = new Set(validation.orphanedToolResultIds);
|
|
219
|
+
const { entriesToDrop, entriesToEdit } = findEntriesToDrop(entries, orphanedCalls, orphanedResults);
|
|
220
|
+
const edited = applyEntryEdits(entries, entriesToEdit);
|
|
221
|
+
const kept = reparentDroppedEntries(edited, entriesToDrop);
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
entries: kept,
|
|
225
|
+
report: {
|
|
226
|
+
repaired: true,
|
|
227
|
+
droppedEntryIds: Array.from(entriesToDrop),
|
|
228
|
+
droppedToolCallIds: validation.orphanedToolCallIds,
|
|
229
|
+
droppedToolResultIds: validation.orphanedToolResultIds,
|
|
230
|
+
totalEntries: entries.length,
|
|
231
|
+
},
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export function assertSessionFileExists(path: string): void {
|
|
236
|
+
if (!existsSync(path)) {
|
|
237
|
+
throw new Error(`Session file not found: ${path}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
@@ -10,10 +10,9 @@ import {
|
|
|
10
10
|
parseSessionFile,
|
|
11
11
|
inspectSessionFile,
|
|
12
12
|
repairSessionFile,
|
|
13
|
-
isToolPairingError,
|
|
14
|
-
softResetSessionFile,
|
|
15
|
-
isContextOverflowError,
|
|
16
13
|
} from './session-repair';
|
|
14
|
+
import { isToolPairingError, isContextOverflowError } from './error-classifiers';
|
|
15
|
+
import { softResetSessionFile } from './session-soft-reset';
|
|
17
16
|
|
|
18
17
|
// ---------- fixtures ----------
|
|
19
18
|
|
|
@@ -306,6 +305,24 @@ describe('session-repair', () => {
|
|
|
306
305
|
expect(isToolPairingError(err)).toBe(true);
|
|
307
306
|
});
|
|
308
307
|
|
|
308
|
+
it('matches wrapped Bedrock ValidationException through cause chain', () => {
|
|
309
|
+
const err = new Error('session resume failed', {
|
|
310
|
+
cause: Object.assign(new Error('Request failed with status 400'), {
|
|
311
|
+
name: 'ValidationException',
|
|
312
|
+
$metadata: { httpStatusCode: 400 },
|
|
313
|
+
cause: { message: 'messages.3: `tool_use` ids were found without `tool_result` blocks immediately after' },
|
|
314
|
+
}),
|
|
315
|
+
});
|
|
316
|
+
expect(isToolPairingError(err)).toBe(true);
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
it('matches wrapped tool pairing text from a nested cause without stringify fallback', () => {
|
|
320
|
+
const err = new Error('session resume failed', {
|
|
321
|
+
cause: new Error('toolUse id abc123 without matching toolResult'),
|
|
322
|
+
});
|
|
323
|
+
expect(isToolPairingError(err)).toBe(true);
|
|
324
|
+
});
|
|
325
|
+
|
|
309
326
|
it('does not match unrelated 400s', () => {
|
|
310
327
|
const err = new Error('Invalid model ID');
|
|
311
328
|
expect(isToolPairingError(err)).toBe(false);
|
|
@@ -1,216 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* session-repair.ts — File-level
|
|
3
|
-
*
|
|
4
|
-
* Pi-ai persists sessions as JSONL at ~/.roundhouse/sessions/<thread>/<id>.jsonl.
|
|
5
|
-
* Each line is a `FileEntry` in a tree (parentId links). Message entries wrap
|
|
6
|
-
* pi-ai `Message` objects (role: user | assistant | toolResult).
|
|
7
|
-
*
|
|
8
|
-
* Corruption scenarios (mid-session):
|
|
9
|
-
* - Tool execution aborted → toolCall entry written, toolResult never lands
|
|
10
|
-
* - Process crash between tool completion and result persist
|
|
11
|
-
* - Manual Ctrl-C mid-tool
|
|
12
|
-
*
|
|
13
|
-
* On next resume, pi-ai loads these entries → sends history to the model →
|
|
14
|
-
* model rejects with "toolUse without toolResult" (Bedrock/Anthropic 400).
|
|
15
|
-
*
|
|
16
|
-
* This module detects and repairs orphaned tool pairs at the file level,
|
|
17
|
-
* preserving the parentId tree by re-parenting children of dropped entries.
|
|
18
|
-
*
|
|
19
|
-
* Delegates tool-pairing logic to message-validator.ts.
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
import { readFileSync, writeFileSync, renameSync, existsSync, copyFileSync } from 'node:fs';
|
|
23
|
-
import { dirname, basename, join } from 'node:path';
|
|
24
|
-
import { validateToolPairing } from './message-validator.js';
|
|
25
|
-
import type { Message, ToolCall, AssistantMessage, ToolResultMessage } from '@earendil-works/pi-ai';
|
|
26
|
-
|
|
27
|
-
/** Minimal structural type for a pi-ai session file entry (we only touch message entries). */
|
|
28
|
-
interface SessionFileEntry {
|
|
29
|
-
type: string;
|
|
30
|
-
id?: string;
|
|
31
|
-
parentId?: string | null;
|
|
32
|
-
message?: Message;
|
|
33
|
-
// other fields preserved as-is
|
|
34
|
-
[key: string]: unknown;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export interface SessionRepairReport {
|
|
38
|
-
repaired: boolean;
|
|
39
|
-
droppedEntryIds: string[];
|
|
40
|
-
droppedToolCallIds: string[];
|
|
41
|
-
droppedToolResultIds: string[];
|
|
42
|
-
backupPath?: string;
|
|
43
|
-
totalEntries: number;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/** Parse a .jsonl session file. Tolerant of trailing blank lines. Throws on malformed JSON. */
|
|
47
|
-
export function parseSessionFile(path: string): SessionFileEntry[] {
|
|
48
|
-
const raw = readFileSync(path, 'utf8');
|
|
49
|
-
const lines = raw.split('\n');
|
|
50
|
-
const entries: SessionFileEntry[] = [];
|
|
51
|
-
for (let i = 0; i < lines.length; i++) {
|
|
52
|
-
const line = lines[i];
|
|
53
|
-
if (!line.trim()) continue;
|
|
54
|
-
try {
|
|
55
|
-
entries.push(JSON.parse(line) as SessionFileEntry);
|
|
56
|
-
} catch (err) {
|
|
57
|
-
throw new Error(`Session file parse error at line ${i + 1}: ${(err as Error).message}`);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
return entries;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Extract `Message[]` from file entries in the order they appear.
|
|
65
|
-
* Only includes entries of type "message" (skips session header, model_change, etc).
|
|
66
|
-
*/
|
|
67
|
-
function extractMessages(entries: SessionFileEntry[]): { messages: Message[]; entryIndex: number[] } {
|
|
68
|
-
const messages: Message[] = [];
|
|
69
|
-
const entryIndex: number[] = []; // parallel array: messages[i] came from entries[entryIndex[i]]
|
|
70
|
-
for (let i = 0; i < entries.length; i++) {
|
|
71
|
-
const e = entries[i];
|
|
72
|
-
if (e.type === 'message' && e.message) {
|
|
73
|
-
messages.push(e.message);
|
|
74
|
-
entryIndex.push(i);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
return { messages, entryIndex };
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* Re-parent children of dropped entries to preserve tree validity.
|
|
82
|
-
* If entry X is dropped and entry Y has parentId=X, set Y.parentId = X.parentId.
|
|
83
|
-
*/
|
|
84
|
-
function reparentDroppedEntries(
|
|
85
|
-
entries: SessionFileEntry[],
|
|
86
|
-
droppedEntryIds: Set<string>
|
|
87
|
-
): SessionFileEntry[] {
|
|
88
|
-
// Build a map: droppedId → nearest non-dropped ancestor (walk up the tree)
|
|
89
|
-
const entryById = new Map<string, SessionFileEntry>();
|
|
90
|
-
for (const e of entries) {
|
|
91
|
-
if (e.id) entryById.set(e.id, e);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
const remap = new Map<string, string | null>();
|
|
95
|
-
const resolveAncestor = (id: string, visited: Set<string> = new Set()): string | null => {
|
|
96
|
-
if (remap.has(id)) return remap.get(id)!;
|
|
97
|
-
if (!droppedEntryIds.has(id)) return id;
|
|
98
|
-
if (visited.has(id)) {
|
|
99
|
-
// Cycle in parentId chain (self-parent or loop) — bail with null rather than
|
|
100
|
-
// blow the stack. Should never happen in a well-formed session file.
|
|
101
|
-
remap.set(id, null);
|
|
102
|
-
return null;
|
|
103
|
-
}
|
|
104
|
-
visited.add(id);
|
|
105
|
-
const e = entryById.get(id);
|
|
106
|
-
const parent = e?.parentId ?? null;
|
|
107
|
-
const resolved = parent === null ? null : resolveAncestor(parent, visited);
|
|
108
|
-
remap.set(id, resolved);
|
|
109
|
-
return resolved;
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
const kept: SessionFileEntry[] = [];
|
|
113
|
-
for (const e of entries) {
|
|
114
|
-
if (e.id && droppedEntryIds.has(e.id)) continue;
|
|
115
|
-
if (e.parentId && droppedEntryIds.has(e.parentId)) {
|
|
116
|
-
kept.push({ ...e, parentId: resolveAncestor(e.parentId) });
|
|
117
|
-
} else {
|
|
118
|
-
kept.push(e);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
return kept;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Compute the set of entry IDs to drop based on orphaned tool IDs.
|
|
126
|
-
*
|
|
127
|
-
* - Orphaned toolResult message → drop the whole entry
|
|
128
|
-
* - Orphaned toolCall inside an assistant message → drop the entry only if the
|
|
129
|
-
* toolCall was the *only* content block (otherwise keep the entry with the
|
|
130
|
-
* block stripped; handled separately in applyEntryEdits)
|
|
2
|
+
* session-repair.ts — File-level repair for orphaned toolCall/toolResult pairs.
|
|
131
3
|
*/
|
|
132
|
-
function findEntriesToDrop(
|
|
133
|
-
entries: SessionFileEntry[],
|
|
134
|
-
orphanedToolCallIds: Set<string>,
|
|
135
|
-
orphanedToolResultIds: Set<string>
|
|
136
|
-
): { entriesToDrop: Set<string>; entriesToEdit: Map<string, string[]> } {
|
|
137
|
-
const entriesToDrop = new Set<string>();
|
|
138
|
-
const entriesToEdit = new Map<string, string[]>(); // entryId → toolCallIds to strip
|
|
139
|
-
|
|
140
|
-
for (const e of entries) {
|
|
141
|
-
if (e.type !== 'message' || !e.message || !e.id) continue;
|
|
142
|
-
const msg = e.message;
|
|
143
|
-
|
|
144
|
-
if (msg.role === 'toolResult') {
|
|
145
|
-
const tr = msg as ToolResultMessage;
|
|
146
|
-
if (orphanedToolResultIds.has(tr.toolCallId)) {
|
|
147
|
-
entriesToDrop.add(e.id);
|
|
148
|
-
}
|
|
149
|
-
continue;
|
|
150
|
-
}
|
|
151
4
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
/** Apply in-place edits to assistant entries: strip orphaned toolCall blocks. */
|
|
181
|
-
function applyEntryEdits(
|
|
182
|
-
entries: SessionFileEntry[],
|
|
183
|
-
entriesToEdit: Map<string, string[]>
|
|
184
|
-
): SessionFileEntry[] {
|
|
185
|
-
if (entriesToEdit.size === 0) return entries;
|
|
186
|
-
return entries.map(e => {
|
|
187
|
-
if (!e.id || !entriesToEdit.has(e.id)) return e;
|
|
188
|
-
const orphanIds = new Set(entriesToEdit.get(e.id)!);
|
|
189
|
-
const msg = e.message as AssistantMessage;
|
|
190
|
-
const cleanedContent = msg.content.filter(block => {
|
|
191
|
-
if ((block as ToolCall).type === 'toolCall') {
|
|
192
|
-
return !orphanIds.has((block as ToolCall).id);
|
|
193
|
-
}
|
|
194
|
-
return true;
|
|
195
|
-
});
|
|
196
|
-
return { ...e, message: { ...msg, content: cleanedContent } };
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
/** Atomic write: tmp file + rename. Preserves partial-failure safety. */
|
|
201
|
-
function atomicWrite(path: string, content: string): void {
|
|
202
|
-
const tmp = `${path}.tmp-${process.pid}-${Date.now()}`;
|
|
203
|
-
writeFileSync(tmp, content, { encoding: 'utf8' });
|
|
204
|
-
renameSync(tmp, path);
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
/** Back up the original file before mutation. Returns the backup path. */
|
|
208
|
-
function backupFile(path: string): string {
|
|
209
|
-
const ts = Date.now();
|
|
210
|
-
const backupPath = join(dirname(path), `${basename(path)}.bak-${ts}`);
|
|
211
|
-
copyFileSync(path, backupPath);
|
|
212
|
-
return backupPath;
|
|
213
|
-
}
|
|
5
|
+
import { validateToolPairing } from './message-validator';
|
|
6
|
+
import {
|
|
7
|
+
assertSessionFileExists,
|
|
8
|
+
atomicWrite,
|
|
9
|
+
backupFile,
|
|
10
|
+
parseSessionFile,
|
|
11
|
+
repairEntriesInMemory,
|
|
12
|
+
} from './session-repair-internal';
|
|
13
|
+
|
|
14
|
+
export { parseSessionFile } from './session-repair-internal';
|
|
15
|
+
export type {
|
|
16
|
+
SessionRepairResult,
|
|
17
|
+
SessionFileEntry,
|
|
18
|
+
SessionRepairReport,
|
|
19
|
+
} from './session-repair-internal';
|
|
20
|
+
export {
|
|
21
|
+
MAX_CAUSE_CHAIN_DEPTH,
|
|
22
|
+
isContextOverflowError,
|
|
23
|
+
isToolPairingError,
|
|
24
|
+
matchesErrorPatterns,
|
|
25
|
+
} from './error-classifiers';
|
|
26
|
+
export {
|
|
27
|
+
softResetSessionFile,
|
|
28
|
+
type SoftResetOptions,
|
|
29
|
+
type SoftResetReport,
|
|
30
|
+
} from './session-soft-reset';
|
|
214
31
|
|
|
215
32
|
/**
|
|
216
33
|
* Validate a session file for orphaned tool pairs without modifying it.
|
|
@@ -224,7 +41,9 @@ export function inspectSessionFile(path: string): {
|
|
|
224
41
|
totalMessages: number;
|
|
225
42
|
} {
|
|
226
43
|
const entries = parseSessionFile(path);
|
|
227
|
-
const
|
|
44
|
+
const messages = entries
|
|
45
|
+
.filter(entry => entry.type === 'message' && entry.message)
|
|
46
|
+
.map(entry => entry.message!);
|
|
228
47
|
const validation = validateToolPairing(messages);
|
|
229
48
|
return {
|
|
230
49
|
hasOrphans: !validation.isValid,
|
|
@@ -246,54 +65,8 @@ export function inspectSessionFile(path: string): {
|
|
|
246
65
|
*
|
|
247
66
|
* @returns report describing what was repaired
|
|
248
67
|
*/
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
* + a report. Does not touch the filesystem. Used directly by
|
|
252
|
-
* `softResetSessionFile` so trim + repair land as a single atomic write, and
|
|
253
|
-
* via a thin wrapper by `repairSessionFile` for on-disk repair.
|
|
254
|
-
*/
|
|
255
|
-
function repairEntriesInMemory(entries: SessionFileEntry[]): {
|
|
256
|
-
entries: SessionFileEntry[];
|
|
257
|
-
report: SessionRepairReport;
|
|
258
|
-
} {
|
|
259
|
-
const { messages } = extractMessages(entries);
|
|
260
|
-
const validation = validateToolPairing(messages);
|
|
261
|
-
|
|
262
|
-
if (validation.isValid) {
|
|
263
|
-
return {
|
|
264
|
-
entries,
|
|
265
|
-
report: {
|
|
266
|
-
repaired: false,
|
|
267
|
-
droppedEntryIds: [],
|
|
268
|
-
droppedToolCallIds: [],
|
|
269
|
-
droppedToolResultIds: [],
|
|
270
|
-
totalEntries: entries.length,
|
|
271
|
-
},
|
|
272
|
-
};
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
const orphanedCalls = new Set(validation.orphanedToolCallIds);
|
|
276
|
-
const orphanedResults = new Set(validation.orphanedToolResultIds);
|
|
277
|
-
const { entriesToDrop, entriesToEdit } = findEntriesToDrop(entries, orphanedCalls, orphanedResults);
|
|
278
|
-
const edited = applyEntryEdits(entries, entriesToEdit);
|
|
279
|
-
const kept = reparentDroppedEntries(edited, entriesToDrop);
|
|
280
|
-
|
|
281
|
-
return {
|
|
282
|
-
entries: kept,
|
|
283
|
-
report: {
|
|
284
|
-
repaired: true,
|
|
285
|
-
droppedEntryIds: Array.from(entriesToDrop),
|
|
286
|
-
droppedToolCallIds: validation.orphanedToolCallIds,
|
|
287
|
-
droppedToolResultIds: validation.orphanedToolResultIds,
|
|
288
|
-
totalEntries: entries.length,
|
|
289
|
-
},
|
|
290
|
-
};
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
export function repairSessionFile(path: string): SessionRepairReport {
|
|
294
|
-
if (!existsSync(path)) {
|
|
295
|
-
throw new Error(`Session file not found: ${path}`);
|
|
296
|
-
}
|
|
68
|
+
export function repairSessionFile(path: string) {
|
|
69
|
+
assertSessionFileExists(path);
|
|
297
70
|
|
|
298
71
|
const entries = parseSessionFile(path);
|
|
299
72
|
const { entries: repaired, report } = repairEntriesInMemory(entries);
|
|
@@ -301,274 +74,8 @@ export function repairSessionFile(path: string): SessionRepairReport {
|
|
|
301
74
|
if (!report.repaired) return report;
|
|
302
75
|
|
|
303
76
|
const backupPath = backupFile(path);
|
|
304
|
-
const newContent = repaired.map(
|
|
77
|
+
const newContent = repaired.map(entry => JSON.stringify(entry)).join('\n') + '\n';
|
|
305
78
|
atomicWrite(path, newContent);
|
|
306
79
|
|
|
307
80
|
return { ...report, backupPath };
|
|
308
81
|
}
|
|
309
|
-
|
|
310
|
-
// ── Soft reset (recovery from already-overflowed sessions) ──────────────
|
|
311
|
-
|
|
312
|
-
/**
|
|
313
|
-
* When a session has grown past the model's context window, normal compact
|
|
314
|
-
* cannot recover — the summarizer prompt itself overflows. Soft reset trims
|
|
315
|
-
* the session jsonl on disk to its most-recent N user turns, drops everything
|
|
316
|
-
* older, and re-runs the tool-pairing repair so what's left is internally
|
|
317
|
-
* consistent.
|
|
318
|
-
*
|
|
319
|
-
* Trade-off: loses fidelity for older turns. The roundhouse memory layer
|
|
320
|
-
* (MEMORY.md, daily front-page) re-injects on the next turn, so the agent
|
|
321
|
-
* still has its durable context — just not the verbatim message history.
|
|
322
|
-
*
|
|
323
|
-
* Conservative defaults aim for ~30–40% of a 200k window so the next compact
|
|
324
|
-
* has ample room to summarize.
|
|
325
|
-
*/
|
|
326
|
-
export interface SoftResetOptions {
|
|
327
|
-
/** Keep at most this many user turns from the tail (default: 8). */
|
|
328
|
-
keepRecentUserTurns?: number;
|
|
329
|
-
/** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
|
|
330
|
-
maxBytes?: number;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
export interface SoftResetReport {
|
|
334
|
-
reset: boolean;
|
|
335
|
-
reason: string;
|
|
336
|
-
entriesBefore: number;
|
|
337
|
-
entriesAfter: number;
|
|
338
|
-
bytesBefore: number;
|
|
339
|
-
bytesAfter: number;
|
|
340
|
-
backupPath?: string;
|
|
341
|
-
/** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
|
|
342
|
-
postRepair?: SessionRepairReport;
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
/**
|
|
346
|
-
* Find a safe cut index in the entries array. Walk backwards from the end
|
|
347
|
-
* looking for user message entries; the cut sits *at* the Nth most-recent
|
|
348
|
-
* user message we encounter (so the kept tail starts on a user turn).
|
|
349
|
-
* Returns the index of the first entry to KEEP (i.e. all entries[0..cutIdx)
|
|
350
|
-
* are dropped).
|
|
351
|
-
*
|
|
352
|
-
* Byte-cap path: if we exceed the byte budget before reaching N user turns,
|
|
353
|
-
* we still snap the cut to the most-recent user-message boundary we've seen.
|
|
354
|
-
* That guarantees the kept tail always starts with a user message — never an
|
|
355
|
-
* orphaned assistant reply or toolResult whose user prompt was dropped.
|
|
356
|
-
*
|
|
357
|
-
* If we can't find ANY user messages, returns entries.length (drop everything
|
|
358
|
-
* but header) so the caller produces a header-only no-op session rather than
|
|
359
|
-
* a malformed tail.
|
|
360
|
-
*/
|
|
361
|
-
function findSoftResetCutIndex(
|
|
362
|
-
entries: SessionFileEntry[],
|
|
363
|
-
keepRecentUserTurns: number,
|
|
364
|
-
maxBytes: number,
|
|
365
|
-
): { cutIdx: number; reason: string } {
|
|
366
|
-
let userTurnsSeen = 0;
|
|
367
|
-
let bytesAccumulated = 0;
|
|
368
|
-
/** Most recent user-message index we've walked through, or -1 if none yet. */
|
|
369
|
-
let lastUserIdx = -1;
|
|
370
|
-
// Scan tail-to-head, stop when we've collected enough user turns OR exceeded byte budget.
|
|
371
|
-
for (let i = entries.length - 1; i >= 0; i--) {
|
|
372
|
-
const e = entries[i];
|
|
373
|
-
bytesAccumulated += Buffer.byteLength(JSON.stringify(e), 'utf8') + 1; // +1 for newline
|
|
374
|
-
if (e.type === 'message' && e.message?.role === 'user') {
|
|
375
|
-
userTurnsSeen++;
|
|
376
|
-
lastUserIdx = i;
|
|
377
|
-
if (userTurnsSeen >= keepRecentUserTurns) {
|
|
378
|
-
return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
// Byte cap is a safety net for sessions where a single turn is enormous
|
|
382
|
-
// (e.g. one turn dumped a 200k file). When we hit it we MUST snap the cut
|
|
383
|
-
// to the most recent user-message boundary — otherwise the kept tail could
|
|
384
|
-
// start mid-turn (assistant/toolResult with no user prompt above it), and
|
|
385
|
-
// tool-pairing repair won't fix that.
|
|
386
|
-
if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
|
|
387
|
-
return { cutIdx: lastUserIdx, reason: `byte-cap-${bytesAccumulated}b` };
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
// Fewer user turns than target — treat as no-op. Soft-reset is recovery
|
|
391
|
-
// from overflow; if the session has fewer turns than our target it isn't
|
|
392
|
-
// overflowed and we shouldn't mutate it. Returning 1 means "keep everything
|
|
393
|
-
// after the header", which the caller's `cutIdx <= 1` gate maps to reset:false.
|
|
394
|
-
return { cutIdx: 1, reason: 'fewer-turns-than-target' };
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
/**
|
|
398
|
-
* Soft-reset a pi-ai session jsonl: keep the most-recent N user turns + their
|
|
399
|
-
* surrounding messages, drop everything older. Always preserves the session
|
|
400
|
-
* header (entries[0]). Re-parents the first kept entry to null so the tree
|
|
401
|
-
* remains valid. Re-runs tool-pairing repair on the trimmed file because
|
|
402
|
-
* the cut likely orphaned some toolCall/toolResult pairs.
|
|
403
|
-
*
|
|
404
|
-
* Atomic + backup: same safety pattern as repairSessionFile.
|
|
405
|
-
*
|
|
406
|
-
* @returns report describing what was reset, or `{reset:false}` if nothing to do.
|
|
407
|
-
*/
|
|
408
|
-
export function softResetSessionFile(
|
|
409
|
-
path: string,
|
|
410
|
-
options: SoftResetOptions = {},
|
|
411
|
-
): SoftResetReport {
|
|
412
|
-
if (!existsSync(path)) {
|
|
413
|
-
throw new Error(`Session file not found: ${path}`);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
|
|
417
|
-
const maxBytes = options.maxBytes ?? 250_000;
|
|
418
|
-
|
|
419
|
-
const entries = parseSessionFile(path);
|
|
420
|
-
const bytesBefore = readFileSync(path).length;
|
|
421
|
-
|
|
422
|
-
// Need at least header + a couple of messages to be worth resetting.
|
|
423
|
-
if (entries.length < 4) {
|
|
424
|
-
return {
|
|
425
|
-
reset: false,
|
|
426
|
-
reason: 'session-too-small',
|
|
427
|
-
entriesBefore: entries.length,
|
|
428
|
-
entriesAfter: entries.length,
|
|
429
|
-
bytesBefore,
|
|
430
|
-
bytesAfter: bytesBefore,
|
|
431
|
-
};
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
|
|
435
|
-
|
|
436
|
-
// No-op if cut is already at the start (nothing to drop besides header).
|
|
437
|
-
if (cutIdx <= 1) {
|
|
438
|
-
return {
|
|
439
|
-
reset: false,
|
|
440
|
-
reason: `cut-at-start (${reason})`,
|
|
441
|
-
entriesBefore: entries.length,
|
|
442
|
-
entriesAfter: entries.length,
|
|
443
|
-
bytesBefore,
|
|
444
|
-
bytesAfter: bytesBefore,
|
|
445
|
-
};
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
// Build trimmed entries: header + tail.
|
|
449
|
-
// Re-parent the first kept tail entry to null so the tree root is intact.
|
|
450
|
-
const header = entries[0];
|
|
451
|
-
const tail = entries.slice(cutIdx);
|
|
452
|
-
if (tail.length > 0 && tail[0].parentId !== undefined) {
|
|
453
|
-
tail[0] = { ...tail[0], parentId: null };
|
|
454
|
-
}
|
|
455
|
-
const trimmed = [header, ...tail];
|
|
456
|
-
|
|
457
|
-
// Run tool-pair repair *in memory* on the trimmed entries before writing,
|
|
458
|
-
// so the on-disk update is a single atomic backup + atomic rename. Doing
|
|
459
|
-
// disk-write → repairSessionFile() (another disk-write) would mean a crash
|
|
460
|
-
// between the two leaves a partially-processed file AND a backup of the
|
|
461
|
-
// already-trimmed file rather than the true original.
|
|
462
|
-
const repaired = repairEntriesInMemory(trimmed);
|
|
463
|
-
|
|
464
|
-
const backupPath = backupFile(path);
|
|
465
|
-
const newContent = repaired.entries.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
466
|
-
atomicWrite(path, newContent);
|
|
467
|
-
|
|
468
|
-
const bytesAfter = Buffer.byteLength(newContent, 'utf8');
|
|
469
|
-
return {
|
|
470
|
-
reset: true,
|
|
471
|
-
reason,
|
|
472
|
-
entriesBefore: entries.length,
|
|
473
|
-
entriesAfter: repaired.entries.length,
|
|
474
|
-
bytesBefore,
|
|
475
|
-
bytesAfter,
|
|
476
|
-
backupPath,
|
|
477
|
-
postRepair: repaired.report,
|
|
478
|
-
};
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// ── Error classifiers ────────────────────────────────────────────────────
|
|
482
|
-
|
|
483
|
-
/**
|
|
484
|
-
* Detect whether an error from pi-ai / the model provider indicates the
|
|
485
|
-
* session has grown past the model's context window (input > max).
|
|
486
|
-
*
|
|
487
|
-
* Triggers soft-reset recovery in the memory lifecycle. Intentionally narrow:
|
|
488
|
-
* only matches the well-known overflow phrasings, not generic 4xx errors.
|
|
489
|
-
*
|
|
490
|
-
* Mirrors `isToolPairingError`'s nested-error handling: provider SDKs commonly
|
|
491
|
-
* wrap the useful text under `cause.message` or in serialized fields on
|
|
492
|
-
* Bedrock ValidationException. Stringify-search is gated on a 4xx / validation
|
|
493
|
-
* shape so we don't false-positive on noisy unrelated errors.
|
|
494
|
-
*/
|
|
495
|
-
export function isContextOverflowError(err: unknown): boolean {
|
|
496
|
-
if (!err) return false;
|
|
497
|
-
const patterns = [
|
|
498
|
-
/prompt is too long/i,
|
|
499
|
-
/tokens?\s*[>>]\s*\d+\s*maximum/i,
|
|
500
|
-
/input is too long/i,
|
|
501
|
-
/context length exceeded/i,
|
|
502
|
-
/maximum context length/i,
|
|
503
|
-
];
|
|
504
|
-
|
|
505
|
-
// 1. Top-level message.
|
|
506
|
-
const msg = (err as { message?: string }).message ?? String(err);
|
|
507
|
-
if (patterns.some(p => p.test(msg))) return true;
|
|
508
|
-
|
|
509
|
-
// 2. Walk the cause chain (a few hops — don't loop forever on circular).
|
|
510
|
-
let cur: unknown = (err as { cause?: unknown }).cause;
|
|
511
|
-
for (let hop = 0; hop < 5 && cur; hop++) {
|
|
512
|
-
const causeMsg = (cur as { message?: string }).message ?? String(cur);
|
|
513
|
-
if (patterns.some(p => p.test(causeMsg))) return true;
|
|
514
|
-
cur = (cur as { cause?: unknown }).cause;
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
// 3. Bedrock ValidationException sometimes carries the overflow text in
|
|
518
|
-
// nested SDK fields. Only stringify-search when the error LOOKS like a 4xx
|
|
519
|
-
// validation error — mirrors the gating in isToolPairingError.
|
|
520
|
-
const name = (err as { name?: string }).name ?? '';
|
|
521
|
-
const httpStatus =
|
|
522
|
-
(err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode;
|
|
523
|
-
if (name === 'ValidationException' || httpStatus === 400) {
|
|
524
|
-
try {
|
|
525
|
-
const full = JSON.stringify(err);
|
|
526
|
-
if (patterns.some(p => p.test(full))) return true;
|
|
527
|
-
} catch {
|
|
528
|
-
/* circular structure — give up */
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
return false;
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
/**
|
|
536
|
-
* Detect whether an error from pi-ai / the model provider indicates a
|
|
537
|
-
* tool-pairing mismatch that can be recovered by session repair.
|
|
538
|
-
*
|
|
539
|
-
* Matches Bedrock Converse and Anthropic error shapes. Intentionally narrow —
|
|
540
|
-
* we don't want to repair on unrelated 400s.
|
|
541
|
-
*/
|
|
542
|
-
export function isToolPairingError(err: unknown): boolean {
|
|
543
|
-
if (!err) return false;
|
|
544
|
-
const msg = (err as { message?: string }).message ?? String(err);
|
|
545
|
-
const name = (err as { name?: string }).name ?? '';
|
|
546
|
-
|
|
547
|
-
// Bedrock Converse: "messages.N: `tool_use` ids were found without `tool_result` blocks..."
|
|
548
|
-
// Anthropic direct: similar phrasing
|
|
549
|
-
const patterns = [
|
|
550
|
-
/tool_use.*without.*tool_result/i,
|
|
551
|
-
/tool_result.*without.*tool_use/i,
|
|
552
|
-
/toolUse.*without.*toolResult/i,
|
|
553
|
-
/unmatched.*tool.?use/i,
|
|
554
|
-
/orphan.*tool/i,
|
|
555
|
-
];
|
|
556
|
-
|
|
557
|
-
if (patterns.some(p => p.test(msg))) return true;
|
|
558
|
-
|
|
559
|
-
// Bedrock ValidationException may carry the pairing text in nested fields
|
|
560
|
-
// (e.g. err.cause.message, $metadata). Only stringify-search when the error
|
|
561
|
-
// *looks* like a Bedrock validation error — avoid noisy matches on unrelated
|
|
562
|
-
// messages that happen to contain '400'.
|
|
563
|
-
const httpStatus =
|
|
564
|
-
(err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode;
|
|
565
|
-
if (name === 'ValidationException' || httpStatus === 400) {
|
|
566
|
-
try {
|
|
567
|
-
const full = JSON.stringify(err);
|
|
568
|
-
if (patterns.some(p => p.test(full))) return true;
|
|
569
|
-
} catch {
|
|
570
|
-
/* circular structure — give up */
|
|
571
|
-
}
|
|
572
|
-
}
|
|
573
|
-
return false;
|
|
574
|
-
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import {
|
|
3
|
+
assertSessionFileExists,
|
|
4
|
+
atomicWrite,
|
|
5
|
+
backupFile,
|
|
6
|
+
parseSessionFile,
|
|
7
|
+
repairEntriesInMemory,
|
|
8
|
+
type SessionFileEntry,
|
|
9
|
+
type SessionRepairReport,
|
|
10
|
+
} from './session-repair-internal';
|
|
11
|
+
|
|
12
|
+
export interface SoftResetOptions {
|
|
13
|
+
/** Keep at most this many user turns from the tail (default: 8). */
|
|
14
|
+
keepRecentUserTurns?: number;
|
|
15
|
+
/** Hard cap on jsonl bytes after trim (default: 250_000 ≈ 60–80k tokens). */
|
|
16
|
+
maxBytes?: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface SoftResetReport {
|
|
20
|
+
reset: boolean;
|
|
21
|
+
reason: string;
|
|
22
|
+
entriesBefore: number;
|
|
23
|
+
entriesAfter: number;
|
|
24
|
+
bytesBefore: number;
|
|
25
|
+
bytesAfter: number;
|
|
26
|
+
backupPath?: string;
|
|
27
|
+
/** Tool-pairing repair report on the trimmed file (orphans created by the cut). */
|
|
28
|
+
postRepair?: SessionRepairReport;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function findSoftResetCutIndex(
|
|
32
|
+
entries: SessionFileEntry[],
|
|
33
|
+
keepRecentUserTurns: number,
|
|
34
|
+
maxBytes: number,
|
|
35
|
+
): { cutIdx: number; reason: string } {
|
|
36
|
+
let userTurnsSeen = 0;
|
|
37
|
+
let bytesAccumulated = 0;
|
|
38
|
+
let lastUserIdx = -1;
|
|
39
|
+
|
|
40
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
41
|
+
const entry = entries[i];
|
|
42
|
+
bytesAccumulated += Buffer.byteLength(JSON.stringify(entry), 'utf8') + 1;
|
|
43
|
+
if (entry.type === 'message' && entry.message?.role === 'user') {
|
|
44
|
+
userTurnsSeen++;
|
|
45
|
+
lastUserIdx = i;
|
|
46
|
+
if (userTurnsSeen >= keepRecentUserTurns) {
|
|
47
|
+
return { cutIdx: i, reason: `kept-${userTurnsSeen}-user-turns` };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (bytesAccumulated > maxBytes && userTurnsSeen > 0) {
|
|
51
|
+
return { cutIdx: lastUserIdx, reason: `byte-cap-${bytesAccumulated}b` };
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return { cutIdx: 1, reason: 'fewer-turns-than-target' };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function buildTrimmedEntries(entries: SessionFileEntry[], cutIdx: number): SessionFileEntry[] {
|
|
59
|
+
const header = entries[0];
|
|
60
|
+
const tail = entries.slice(cutIdx);
|
|
61
|
+
if (tail.length > 0 && tail[0].parentId !== undefined) {
|
|
62
|
+
tail[0] = { ...tail[0], parentId: null };
|
|
63
|
+
}
|
|
64
|
+
return [header, ...tail];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function softResetSessionFile(
|
|
68
|
+
path: string,
|
|
69
|
+
options: SoftResetOptions = {},
|
|
70
|
+
): SoftResetReport {
|
|
71
|
+
assertSessionFileExists(path);
|
|
72
|
+
|
|
73
|
+
const keepRecentUserTurns = options.keepRecentUserTurns ?? 8;
|
|
74
|
+
const maxBytes = options.maxBytes ?? 250_000;
|
|
75
|
+
|
|
76
|
+
const entries = parseSessionFile(path);
|
|
77
|
+
const bytesBefore = readFileSync(path).length;
|
|
78
|
+
|
|
79
|
+
if (entries.length < 4) {
|
|
80
|
+
return {
|
|
81
|
+
reset: false,
|
|
82
|
+
reason: 'session-too-small',
|
|
83
|
+
entriesBefore: entries.length,
|
|
84
|
+
entriesAfter: entries.length,
|
|
85
|
+
bytesBefore,
|
|
86
|
+
bytesAfter: bytesBefore,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const { cutIdx, reason } = findSoftResetCutIndex(entries, keepRecentUserTurns, maxBytes);
|
|
91
|
+
if (cutIdx <= 1) {
|
|
92
|
+
return {
|
|
93
|
+
reset: false,
|
|
94
|
+
reason: `cut-at-start (${reason})`,
|
|
95
|
+
entriesBefore: entries.length,
|
|
96
|
+
entriesAfter: entries.length,
|
|
97
|
+
bytesBefore,
|
|
98
|
+
bytesAfter: bytesBefore,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const trimmed = buildTrimmedEntries(entries, cutIdx);
|
|
103
|
+
const repaired = repairEntriesInMemory(trimmed);
|
|
104
|
+
|
|
105
|
+
const backupPath = backupFile(path);
|
|
106
|
+
const newContent = repaired.entries.map(entry => JSON.stringify(entry)).join('\n') + '\n';
|
|
107
|
+
atomicWrite(path, newContent);
|
|
108
|
+
|
|
109
|
+
const bytesAfter = Buffer.byteLength(newContent, 'utf8');
|
|
110
|
+
return {
|
|
111
|
+
reset: true,
|
|
112
|
+
reason,
|
|
113
|
+
entriesBefore: entries.length,
|
|
114
|
+
entriesAfter: repaired.entries.length,
|
|
115
|
+
bytesBefore,
|
|
116
|
+
bytesAfter,
|
|
117
|
+
backupPath,
|
|
118
|
+
postRepair: repaired.report,
|
|
119
|
+
};
|
|
120
|
+
}
|
package/src/memory/lifecycle.ts
CHANGED
|
@@ -16,7 +16,7 @@ import { shouldInjectMemory, classifyContextPressure, isSoftFlushOnCooldown } fr
|
|
|
16
16
|
import { buildMemoryInjection, injectMemoryIntoMessage } from "./inject";
|
|
17
17
|
import { buildFlushPrompt } from "./prompts";
|
|
18
18
|
import { bootstrapMemoryFiles } from "./bootstrap";
|
|
19
|
-
import { isContextOverflowError } from "../agents/shared/
|
|
19
|
+
import { isContextOverflowError } from "../agents/shared/error-classifiers";
|
|
20
20
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
21
21
|
import { join } from "node:path";
|
|
22
22
|
import { homedir } from "node:os";
|
|
@@ -51,6 +51,41 @@ function appendCompactLog(entry: CompactLogEntry): void {
|
|
|
51
51
|
.catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
async function attemptSoftResetRecovery(
|
|
55
|
+
err: unknown,
|
|
56
|
+
threadId: string,
|
|
57
|
+
agent: AgentAdapter,
|
|
58
|
+
onProgress?: (step: string) => void | Promise<void>,
|
|
59
|
+
): Promise<{ attempted: boolean; succeeded: boolean }> {
|
|
60
|
+
if (!isContextOverflowError(err) || !agent.softReset) {
|
|
61
|
+
return { attempted: false, succeeded: false };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
|
|
66
|
+
const report = await agent.softReset(threadId);
|
|
67
|
+
if (report?.reset) {
|
|
68
|
+
console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
|
|
69
|
+
const { entriesBefore, entriesAfter } = (report as { entriesBefore?: number; entriesAfter?: number });
|
|
70
|
+
const detail = typeof entriesBefore === "number" && typeof entriesAfter === "number"
|
|
71
|
+
? ` (${entriesBefore} → ${entriesAfter} entries)`
|
|
72
|
+
: "";
|
|
73
|
+
await onProgress?.(`✅ Soft-reset complete${detail}. Durable memory will re-inject on next turn.`);
|
|
74
|
+
return { attempted: true, succeeded: true };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const reason = (report as { reason?: string } | null)?.reason ?? "unknown";
|
|
78
|
+
console.warn(`[memory] soft-reset returned no-op for ${threadId} (${reason})`);
|
|
79
|
+
await onProgress?.(`⚠️ Soft-reset no-op (${reason}). Will retry compact next turn.`);
|
|
80
|
+
return { attempted: true, succeeded: false };
|
|
81
|
+
} catch (resetErr) {
|
|
82
|
+
const msg = resetErr instanceof Error ? resetErr.message : String(resetErr);
|
|
83
|
+
console.error(`[memory] soft-reset failed for ${threadId}:`, msg);
|
|
84
|
+
await onProgress?.(`❌ Soft-reset failed: ${msg.slice(0, 200)}. Will retry next turn.`);
|
|
85
|
+
return { attempted: true, succeeded: false };
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
54
89
|
// ── Memory mode detection ────────────────────────────
|
|
55
90
|
|
|
56
91
|
/**
|
|
@@ -360,34 +395,7 @@ export async function flushMemoryThenCompact(
|
|
|
360
395
|
} catch (err) {
|
|
361
396
|
const errMsg = (err as Error).message;
|
|
362
397
|
console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
|
|
363
|
-
|
|
364
|
-
// Recovery path: when the session has grown past the model's context
|
|
365
|
-
// window, the summarizer prompt itself overflows and compact() throws
|
|
366
|
-
// "prompt is too long". Threshold tuning prevents *new* sessions from
|
|
367
|
-
// hitting this, but does nothing for sessions already past the line.
|
|
368
|
-
// Trim the on-disk session jsonl to its most recent N user turns and
|
|
369
|
-
// mark the next turn for a fresh memory injection. We do NOT retry
|
|
370
|
-
// compact inline — that would extend the thread lock for another long
|
|
371
|
-
// operation. The trimmed session is small enough that the next user
|
|
372
|
-
// turn proceeds normally; any soft pressure from injected memory will
|
|
373
|
-
// trigger a regular compact later.
|
|
374
|
-
let softResetAttempted = false;
|
|
375
|
-
let softResetSucceeded = false;
|
|
376
|
-
if (isContextOverflowError(err) && agent.softReset) {
|
|
377
|
-
softResetAttempted = true;
|
|
378
|
-
try {
|
|
379
|
-
await onProgress?.("♻️ Session overflowed — soft-resetting to recent turns...");
|
|
380
|
-
const report = await agent.softReset(threadId);
|
|
381
|
-
if (report?.reset) {
|
|
382
|
-
softResetSucceeded = true;
|
|
383
|
-
console.warn(`[memory] soft-reset recovered ${threadId} from overflow`);
|
|
384
|
-
} else {
|
|
385
|
-
console.warn(`[memory] soft-reset returned no-op for ${threadId} (${(report as { reason?: string } | null)?.reason ?? "unknown"})`);
|
|
386
|
-
}
|
|
387
|
-
} catch (resetErr) {
|
|
388
|
-
console.error(`[memory] soft-reset failed for ${threadId}:`, (resetErr as Error).message);
|
|
389
|
-
}
|
|
390
|
-
}
|
|
398
|
+
const recovery = await attemptSoftResetRecovery(err, threadId, agent, onProgress);
|
|
391
399
|
|
|
392
400
|
appendCompactLog({
|
|
393
401
|
threadId,
|
|
@@ -401,13 +409,13 @@ export async function flushMemoryThenCompact(
|
|
|
401
409
|
totalMs: Date.now() - t0,
|
|
402
410
|
model: flushModel ?? "default",
|
|
403
411
|
status: "failed",
|
|
404
|
-
error: (
|
|
405
|
-
? `${
|
|
412
|
+
error: (recovery.attempted
|
|
413
|
+
? `${recovery.succeeded ? "soft-reset-recovered" : "soft-reset-failed"}: ${errMsg}`
|
|
406
414
|
: errMsg).slice(0, 500),
|
|
407
415
|
});
|
|
408
416
|
|
|
409
417
|
try {
|
|
410
|
-
if (
|
|
418
|
+
if (recovery.succeeded) {
|
|
411
419
|
// Soft reset cleared the overflow. Mark the next turn for memory
|
|
412
420
|
// re-injection so the agent has its durable context, and clear the
|
|
413
421
|
// pendingCompact flag — there's nothing left to compact now.
|