@inceptionstack/roundhouse 0.5.27 → 0.5.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/package.json +1 -1
- package/src/agents/pi/pi-adapter.ts +18 -0
- package/src/memory/lifecycle.ts +80 -29
- package/src/memory/policy.ts +26 -5
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to `@inceptionstack/roundhouse` are documented here.
|
|
4
4
|
|
|
5
|
+
## [0.5.28] — 2026-05-14
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
|
|
9
|
+
|
|
5
10
|
## [0.5.27] — 2026-05-14
|
|
6
11
|
|
|
7
12
|
### Fixed
|
package/package.json
CHANGED
|
@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
608
608
|
|
|
609
609
|
const agentState = (entry.session as any).agent?.state;
|
|
610
610
|
let currentModel: any;
|
|
611
|
+
let currentThinkingLevel: any;
|
|
611
612
|
let modelSwapped = false;
|
|
613
|
+
let thinkingSwapped = false;
|
|
612
614
|
|
|
613
615
|
// Resolve and swap model for compact
|
|
614
616
|
if (!agentState) {
|
|
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
627
629
|
modelSwapped = true;
|
|
628
630
|
console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
|
|
629
631
|
}
|
|
632
|
+
|
|
633
|
+
// Force thinking off for compact regardless of agent's default.
|
|
634
|
+
// Summarization doesn't benefit from reasoning, costs more tokens,
|
|
635
|
+
// and complicates the maxTokens math (adjustMaxTokensForThinking adds
|
|
636
|
+
// up to 16k thinking budget). Direct state mutation matches the model
|
|
637
|
+
// swap above and avoids setThinkingLevel(), which would persist to
|
|
638
|
+
// settings.json.
|
|
639
|
+
if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
|
|
640
|
+
currentThinkingLevel = agentState.thinkingLevel;
|
|
641
|
+
agentState.thinkingLevel = "off";
|
|
642
|
+
thinkingSwapped = true;
|
|
643
|
+
console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
|
|
644
|
+
}
|
|
630
645
|
}
|
|
631
646
|
|
|
632
647
|
try {
|
|
@@ -640,6 +655,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
|
|
|
640
655
|
if (modelSwapped) {
|
|
641
656
|
agentState.model = currentModel;
|
|
642
657
|
}
|
|
658
|
+
if (thinkingSwapped) {
|
|
659
|
+
agentState.thinkingLevel = currentThinkingLevel;
|
|
660
|
+
}
|
|
643
661
|
}
|
|
644
662
|
});
|
|
645
663
|
},
|
package/src/memory/lifecycle.ts
CHANGED
|
@@ -20,6 +20,36 @@ import { appendFile, mkdir } from "node:fs/promises";
|
|
|
20
20
|
import { join } from "node:path";
|
|
21
21
|
import { homedir } from "node:os";
|
|
22
22
|
|
|
23
|
+
// ── Telemetry helper ─────────────────────────────────
|
|
24
|
+
|
|
25
|
+
interface CompactLogEntry {
|
|
26
|
+
threadId: string;
|
|
27
|
+
level: string;
|
|
28
|
+
effectiveLevel: string;
|
|
29
|
+
flushSkipped: boolean;
|
|
30
|
+
tokensBefore: number | null;
|
|
31
|
+
tokensAfter: number | null;
|
|
32
|
+
flushMs: number;
|
|
33
|
+
compactMs: number;
|
|
34
|
+
totalMs: number;
|
|
35
|
+
model: string;
|
|
36
|
+
status: "ok" | "failed";
|
|
37
|
+
error: string | null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Append a compact telemetry entry. Fire-and-forget.
|
|
42
|
+
* Schema is uniform across success/failure (status discriminator) so
|
|
43
|
+
* downstream parsers don't have to handle missing fields.
|
|
44
|
+
*/
|
|
45
|
+
function appendCompactLog(entry: CompactLogEntry): void {
|
|
46
|
+
const logDir = join(homedir(), ".roundhouse", "logs");
|
|
47
|
+
const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
|
|
48
|
+
mkdir(logDir, { recursive: true })
|
|
49
|
+
.then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
|
|
50
|
+
.catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
|
|
51
|
+
}
|
|
52
|
+
|
|
23
53
|
// ── Memory mode detection ────────────────────────────
|
|
24
54
|
|
|
25
55
|
/**
|
|
@@ -246,11 +276,16 @@ export async function flushMemoryThenCompact(
|
|
|
246
276
|
// "manual" level, attempting the flush in that condition will hit the same
|
|
247
277
|
// 200k rejection. Deferring flush to a later (successful) turn is the safe
|
|
248
278
|
// recovery path.
|
|
249
|
-
const
|
|
279
|
+
const stateBeforeCompact = await loadThreadMemoryState(threadId);
|
|
280
|
+
const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
|
|
250
281
|
const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
|
|
251
282
|
|
|
283
|
+
// Hoisted so the catch block can report accurate flush vs compact timing
|
|
284
|
+
// (a failure during compact() would otherwise conflate the two phases).
|
|
285
|
+
let flushMs = 0;
|
|
286
|
+
let compactMs = 0;
|
|
287
|
+
|
|
252
288
|
try {
|
|
253
|
-
let flushMs = 0;
|
|
254
289
|
if (!skipFlush) {
|
|
255
290
|
// Step 1: flush
|
|
256
291
|
const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
|
|
@@ -276,16 +311,18 @@ export async function flushMemoryThenCompact(
|
|
|
276
311
|
const result = usedCompactModel
|
|
277
312
|
? await agent.compactWithModel!(threadId, flushModel!)
|
|
278
313
|
: await agent.compact!(threadId);
|
|
279
|
-
|
|
314
|
+
compactMs = Date.now() - t1;
|
|
280
315
|
if (!result) return null;
|
|
281
316
|
|
|
282
|
-
// Step 3: mark force re-inject (Full mode only)
|
|
317
|
+
// Step 3: mark force re-inject (Full mode only). Reuse the state we
|
|
318
|
+
// already loaded above; the compact step doesn't mutate memory-state
|
|
319
|
+
// (it mutates the pi session, a separate file), so the in-memory copy
|
|
320
|
+
// is still authoritative for our fields.
|
|
283
321
|
if (mode !== "complement") {
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
await saveThreadMemoryState(threadId, state);
|
|
322
|
+
stateBeforeCompact.forceInjectReason = "after-compact";
|
|
323
|
+
stateBeforeCompact.lastCompactAt = new Date().toISOString();
|
|
324
|
+
stateBeforeCompact.pendingCompact = undefined;
|
|
325
|
+
await saveThreadMemoryState(threadId, stateBeforeCompact);
|
|
289
326
|
}
|
|
290
327
|
|
|
291
328
|
const totalMs = Date.now() - t0;
|
|
@@ -302,30 +339,44 @@ export async function flushMemoryThenCompact(
|
|
|
302
339
|
const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
|
|
303
340
|
console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
|
|
304
341
|
|
|
305
|
-
// Persist timing log for debugging (async, fire-and-forget)
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
342
|
+
// Persist timing log for debugging (async, fire-and-forget).
|
|
343
|
+
// Schema is intentionally uniform across success and failure entries
|
|
344
|
+
// (status discriminator + same field set) so jsonl parsers don't have
|
|
345
|
+
// to special-case missing fields.
|
|
346
|
+
appendCompactLog({
|
|
347
|
+
threadId,
|
|
348
|
+
level,
|
|
349
|
+
effectiveLevel,
|
|
350
|
+
flushSkipped: skipFlush,
|
|
351
|
+
tokensBefore: result.tokensBefore,
|
|
352
|
+
tokensAfter: result.tokensAfter ?? null,
|
|
353
|
+
...timing,
|
|
354
|
+
status: "ok",
|
|
355
|
+
error: null,
|
|
356
|
+
});
|
|
320
357
|
|
|
321
358
|
return { ...result, timing };
|
|
322
359
|
} catch (err) {
|
|
323
|
-
|
|
324
|
-
|
|
360
|
+
const errMsg = (err as Error).message;
|
|
361
|
+
console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
|
|
362
|
+
appendCompactLog({
|
|
363
|
+
threadId,
|
|
364
|
+
level,
|
|
365
|
+
effectiveLevel,
|
|
366
|
+
flushSkipped: skipFlush,
|
|
367
|
+
tokensBefore: null,
|
|
368
|
+
tokensAfter: null,
|
|
369
|
+
flushMs, // accurate: 0 if skipped or failed before flush completed
|
|
370
|
+
compactMs, // accurate: 0 if failed before/during compact
|
|
371
|
+
totalMs: Date.now() - t0,
|
|
372
|
+
model: flushModel ?? "default",
|
|
373
|
+
status: "failed",
|
|
374
|
+
error: errMsg.slice(0, 500),
|
|
375
|
+
});
|
|
376
|
+
// Mark pending so we retry on next turn. Reuse the state we already loaded.
|
|
325
377
|
try {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
await saveThreadMemoryState(threadId, state);
|
|
378
|
+
stateBeforeCompact.pendingCompact = effectiveLevel;
|
|
379
|
+
await saveThreadMemoryState(threadId, stateBeforeCompact);
|
|
329
380
|
} catch {}
|
|
330
381
|
return null;
|
|
331
382
|
}
|
package/src/memory/policy.ts
CHANGED
|
@@ -10,12 +10,26 @@ import { formatDate } from "./files";
|
|
|
10
10
|
// ── Defaults ─────────────────────────────────────────
|
|
11
11
|
|
|
12
12
|
const DEFAULT_SOFT_PERCENT = 0.45;
|
|
13
|
-
const DEFAULT_SOFT_TOKENS =
|
|
13
|
+
const DEFAULT_SOFT_TOKENS = 130_000;
|
|
14
14
|
const DEFAULT_HARD_PERCENT = 0.50;
|
|
15
|
-
const DEFAULT_HARD_TOKENS =
|
|
15
|
+
const DEFAULT_HARD_TOKENS = 150_000;
|
|
16
16
|
const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
|
|
17
17
|
const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
|
|
18
18
|
|
|
19
|
+
// Headroom reserved for the summarization payload itself when compact runs.
|
|
20
|
+
// The summarizer prompt serializes ALL discarded history (everything older
|
|
21
|
+
// than ~20k of recent tokens) plus scaffolding plus previous summary, then
|
|
22
|
+
// asks the model to summarize. If the prompt itself overflows the model
|
|
23
|
+
// context, compact() throws. 50k is the empirical headroom that fits a
|
|
24
|
+
// typical summarization prompt on Claude family.
|
|
25
|
+
const COMPACT_HEADROOM_TOKENS = 50_000;
|
|
26
|
+
|
|
27
|
+
// Why 130k/150k as the default absolute thresholds against a 200k window:
|
|
28
|
+
// see COMPACT_HEADROOM_TOKENS above and
|
|
29
|
+
// ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
|
|
30
|
+
// For smaller-window models, classifyContextPressure() clamps the absolute
|
|
31
|
+
// thresholds to `window - HEADROOM` so they never exceed the window.
|
|
32
|
+
|
|
19
33
|
// ── Injection policy ─────────────────────────────────
|
|
20
34
|
|
|
21
35
|
export interface InjectionDecision {
|
|
@@ -87,14 +101,21 @@ export function classifyContextPressure(
|
|
|
87
101
|
|
|
88
102
|
const pctDecimal = percent != null ? percent / 100 : tokens / window;
|
|
89
103
|
|
|
104
|
+
// Clamp absolute thresholds so they never exceed `window - HEADROOM`.
|
|
105
|
+
// Defends against future smaller-window models where the configured
|
|
106
|
+
// 150k/130k absolute thresholds would otherwise sit above the window.
|
|
107
|
+
// The percent thresholds already scale with window naturally.
|
|
108
|
+
const headroom = COMPACT_HEADROOM_TOKENS;
|
|
109
|
+
const ceiling = Math.max(0, window - headroom);
|
|
110
|
+
|
|
90
111
|
// Hard threshold
|
|
91
112
|
const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
|
|
92
|
-
const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
|
|
113
|
+
const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
|
|
93
114
|
if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
|
|
94
115
|
|
|
95
|
-
// Soft threshold
|
|
116
|
+
// Soft threshold (clamped one step below hard so soft fires first).
|
|
96
117
|
const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
|
|
97
|
-
const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
|
|
118
|
+
const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
|
|
98
119
|
if (pctDecimal >= softPct || tokens >= softTok) return "soft";
|
|
99
120
|
|
|
100
121
|
return "none";
|