@inceptionstack/roundhouse 0.5.26 → 0.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,25 @@
2
2
 
3
3
  All notable changes to `@inceptionstack/roundhouse` are documented here.
4
4
 
5
+ ## [0.5.28] — 2026-05-14
6
+
7
+ ### Fixed
8
+ - **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
9
+
10
+ ## [0.5.27] — 2026-05-14
11
+
12
+ ### Fixed
13
+ - **Self-update no longer falsely fails on mise/nvm hosts** — on systems where Node is managed by mise (or nvm), `npm install -g` triggers a post-install reshim hook that exits 127 when its tool isn't on PATH, causing `execSync` to throw even though the package was written to disk correctly. The user-visible bug: "Self-update failed: Command failed: npm install -g …" plus `/status` continuing to show the old version forever (because the gateway never restarted). Fix: when the install command throws, consult `npm list -g <pkg>` and trust the on-disk version. If it matches the target, treat the install as successful. Same logic applied to extension updates. (#128)
14
+ - **Side effect:** `/update` now fires its existing 'restarting…' branch on this case, so `/status` reflects the new version on next boot.
15
+
16
+ ### Changed
17
+ - **DRY in `cli/update.ts`:** extracted `getInstalledVersion()` helper used by both pre-install version check and post-failure verification; introduced `SELF_PACKAGE` constant; fixed stale `commands/update.ts` header comment.
18
+
19
+ ## [0.5.26] — 2026-05-14
20
+
21
+ ### Fixed
22
+ - **Emergency compact loop — output-cap mismatch + summarization input overflow.** Two compounding bugs caused infinite emergency-compact loops on Haiku 4.5 sessions near the context limit. (1) `reserveTokens=150000` + Haiku's 64k output cap produced `maxTokens=120000`, which Bedrock rejected. (2) `hardTokens=200k`/`softTokens=180k` against a 200k window left no headroom for the summarizer prompt itself. Fix: lower thresholds to 150k/130k, add `COMPACT_HEADROOM_TOKENS=50k`, force `thinkingLevel:off` in `compactWithModel`, drop `reserveTokens` to 78k. State is now loaded once and reused; phase timing is hoisted; telemetry is accurate on failure. (#126)
23
+
5
24
  ## [0.5.25] — 2026-05-12
6
25
 
7
26
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inceptionstack/roundhouse",
3
- "version": "0.5.26",
3
+ "version": "0.5.28",
4
4
  "type": "module",
5
5
  "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
6
6
  "license": "MIT",
@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
608
608
 
609
609
  const agentState = (entry.session as any).agent?.state;
610
610
  let currentModel: any;
611
+ let currentThinkingLevel: any;
611
612
  let modelSwapped = false;
613
+ let thinkingSwapped = false;
612
614
 
613
615
  // Resolve and swap model for compact
614
616
  if (!agentState) {
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
627
629
  modelSwapped = true;
628
630
  console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
629
631
  }
632
+
633
+ // Force thinking off for compact regardless of agent's default.
634
+ // Summarization doesn't benefit from reasoning, costs more tokens,
635
+ // and complicates the maxTokens math (adjustMaxTokensForThinking adds
636
+ // up to 16k thinking budget). Direct state mutation matches the model
637
+ // swap above and avoids setThinkingLevel(), which would persist to
638
+ // settings.json.
639
+ if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
640
+ currentThinkingLevel = agentState.thinkingLevel;
641
+ agentState.thinkingLevel = "off";
642
+ thinkingSwapped = true;
643
+ console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
644
+ }
630
645
  }
631
646
 
632
647
  try {
@@ -640,6 +655,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
640
655
  if (modelSwapped) {
641
656
  agentState.model = currentModel;
642
657
  }
658
+ if (thinkingSwapped) {
659
+ agentState.thinkingLevel = currentThinkingLevel;
660
+ }
643
661
  }
644
662
  });
645
663
  },
package/src/cli/update.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * commands/update.ts — Handle the /update command
2
+ * cli/update.ts — Handle the /update command
3
3
  *
4
4
  * Transport-agnostic: receives a ProgressReporter interface,
5
5
  * not a Telegram-specific thread object.
@@ -15,6 +15,28 @@ const GLOBAL_PI_EXTENSION_PACKAGES = [
15
15
  "@inceptionstack/pi-branch-enforcer",
16
16
  ];
17
17
 
18
+ const SELF_PACKAGE = "@inceptionstack/roundhouse";
19
+
20
+ /**
21
+ * Read globally-installed version of a package from disk.
22
+ * Returns empty string if the package is not installed or query fails.
23
+ *
24
+ * Used both for pre-install version checks and for post-failure verification
25
+ * (mise/nvm/npm reshim hooks can fail with exit 127 even after `npm install -g`
26
+ * actually wrote the new version to disk — see PR fix/self-update-verify-on-failure).
27
+ */
28
+ function getInstalledVersion(pkg: string): string {
29
+ try {
30
+ const out = execSync(`npm list -g ${pkg} --json --depth=0 2>/dev/null`, {
31
+ timeout: 10_000,
32
+ encoding: "utf8",
33
+ });
34
+ return JSON.parse(out)?.dependencies?.[pkg]?.version ?? "";
35
+ } catch {
36
+ return "";
37
+ }
38
+ }
39
+
18
40
  export interface UpdateProgress {
19
41
  update(text: string): Promise<void>;
20
42
  }
@@ -28,14 +50,11 @@ export interface UpdateResult {
28
50
 
29
51
  export async function updateExtensions(progress: UpdateProgress): Promise<void> {
30
52
  for (const extensionPackage of GLOBAL_PI_EXTENSION_PACKAGES) {
53
+ let latestExtVersion = "";
31
54
  try {
32
55
  // Check if already at latest
33
- const installed = execSync(`npm list -g ${extensionPackage} --json 2>/dev/null`, {
34
- timeout: 10_000,
35
- encoding: "utf8",
36
- });
37
- const installedVersion = JSON.parse(installed)?.dependencies?.[extensionPackage]?.version ?? "";
38
- const latestExtVersion = execSync(`npm view ${extensionPackage} version 2>/dev/null`, {
56
+ const installedVersion = getInstalledVersion(extensionPackage);
57
+ latestExtVersion = execSync(`npm view ${extensionPackage} version 2>/dev/null`, {
39
58
  timeout: 10_000,
40
59
  encoding: "utf8",
41
60
  }).trim();
@@ -57,6 +76,14 @@ export async function updateExtensions(progress: UpdateProgress): Promise<void>
57
76
  await progress.update(`✅ ${extensionPackage} updated`);
58
77
  } catch (e) {
59
78
  const msg = e instanceof Error ? e.message : String(e);
79
+ // Verify-after-fail: post-install reshim hooks (mise/nvm) can exit non-zero
80
+ // even when the package landed on disk correctly.
81
+ const onDisk = getInstalledVersion(extensionPackage);
82
+ if (onDisk && (!latestExtVersion || onDisk === latestExtVersion)) {
83
+ console.warn(`[roundhouse] ${extensionPackage} install reported failure but v${onDisk} is on disk — treating as success:`, msg);
84
+ await progress.update(`✅ ${extensionPackage} updated to v${onDisk} (post-install hook warned, ignored)`);
85
+ continue;
86
+ }
60
87
  console.warn(`[roundhouse] failed to update extension ${extensionPackage}:`, msg);
61
88
  await progress.update(`⚠️ Failed to update ${extensionPackage}: ${msg.slice(0, 150)}`);
62
89
  }
@@ -71,13 +98,20 @@ export async function updateSelf(
71
98
  await progress.update(`📦 Updating v${currentVersion} → v${latestVersion}...`);
72
99
 
73
100
  try {
74
- execSync("npm install -g @inceptionstack/roundhouse@latest 2>&1", {
101
+ execSync(`npm install -g ${SELF_PACKAGE}@latest 2>&1`, {
75
102
  timeout: 120_000,
76
103
  encoding: "utf8",
77
104
  });
78
105
  return undefined;
79
106
  } catch (e) {
80
107
  const msg = e instanceof Error ? e.message : String(e);
108
+ // Verify-after-fail: mise/nvm post-install reshim can exit 127 even when
109
+ // npm wrote the new version to disk. Trust the on-disk state over the exit code.
110
+ const onDisk = getInstalledVersion(SELF_PACKAGE);
111
+ if (onDisk === latestVersion) {
112
+ console.warn(`[roundhouse] self-update install reported failure but v${onDisk} is on disk — treating as success:`, msg);
113
+ return undefined;
114
+ }
81
115
  console.warn("[roundhouse] self-update failed:", msg);
82
116
  return `Self-update failed: ${msg}`;
83
117
  }
@@ -106,7 +140,7 @@ export async function performUpdate(progress: UpdateProgress): Promise<UpdateRes
106
140
 
107
141
  let latestVersion: string;
108
142
  try {
109
- latestVersion = execSync("npm view @inceptionstack/roundhouse version 2>/dev/null", {
143
+ latestVersion = execSync(`npm view ${SELF_PACKAGE} version 2>/dev/null`, {
110
144
  timeout: 30_000,
111
145
  encoding: "utf8",
112
146
  }).trim();
@@ -20,6 +20,36 @@ import { appendFile, mkdir } from "node:fs/promises";
20
20
  import { join } from "node:path";
21
21
  import { homedir } from "node:os";
22
22
 
23
+ // ── Telemetry helper ─────────────────────────────────
24
+
25
+ interface CompactLogEntry {
26
+ threadId: string;
27
+ level: string;
28
+ effectiveLevel: string;
29
+ flushSkipped: boolean;
30
+ tokensBefore: number | null;
31
+ tokensAfter: number | null;
32
+ flushMs: number;
33
+ compactMs: number;
34
+ totalMs: number;
35
+ model: string;
36
+ status: "ok" | "failed";
37
+ error: string | null;
38
+ }
39
+
40
+ /**
41
+ * Append a compact telemetry entry. Fire-and-forget.
42
+ * Schema is uniform across success/failure (status discriminator) so
43
+ * downstream parsers don't have to handle missing fields.
44
+ */
45
+ function appendCompactLog(entry: CompactLogEntry): void {
46
+ const logDir = join(homedir(), ".roundhouse", "logs");
47
+ const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
48
+ mkdir(logDir, { recursive: true })
49
+ .then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
50
+ .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
51
+ }
52
+
23
53
  // ── Memory mode detection ────────────────────────────
24
54
 
25
55
  /**
@@ -246,11 +276,16 @@ export async function flushMemoryThenCompact(
246
276
  // "manual" level, attempting the flush in that condition will hit the same
247
277
  // 200k rejection. Deferring flush to a later (successful) turn is the safe
248
278
  // recovery path.
249
- const stuckInEmergency = (await loadThreadMemoryState(threadId)).pendingCompact === "emergency";
279
+ const stateBeforeCompact = await loadThreadMemoryState(threadId);
280
+ const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
250
281
  const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
251
282
 
283
+ // Hoisted so the catch block can report accurate flush vs compact timing
284
+ // (a failure during compact() would otherwise conflate the two phases).
285
+ let flushMs = 0;
286
+ let compactMs = 0;
287
+
252
288
  try {
253
- let flushMs = 0;
254
289
  if (!skipFlush) {
255
290
  // Step 1: flush
256
291
  const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
@@ -276,16 +311,18 @@ export async function flushMemoryThenCompact(
276
311
  const result = usedCompactModel
277
312
  ? await agent.compactWithModel!(threadId, flushModel!)
278
313
  : await agent.compact!(threadId);
279
- const compactMs = Date.now() - t1;
314
+ compactMs = Date.now() - t1;
280
315
  if (!result) return null;
281
316
 
282
- // Step 3: mark force re-inject (Full mode only)
317
+ // Step 3: mark force re-inject (Full mode only). Reuse the state we
318
+ // already loaded above; the compact step doesn't mutate memory-state
319
+ // (it mutates the pi session, a separate file), so the in-memory copy
320
+ // is still authoritative for our fields.
283
321
  if (mode !== "complement") {
284
- const state = await loadThreadMemoryState(threadId);
285
- state.forceInjectReason = "after-compact";
286
- state.lastCompactAt = new Date().toISOString();
287
- state.pendingCompact = undefined;
288
- await saveThreadMemoryState(threadId, state);
322
+ stateBeforeCompact.forceInjectReason = "after-compact";
323
+ stateBeforeCompact.lastCompactAt = new Date().toISOString();
324
+ stateBeforeCompact.pendingCompact = undefined;
325
+ await saveThreadMemoryState(threadId, stateBeforeCompact);
289
326
  }
290
327
 
291
328
  const totalMs = Date.now() - t0;
@@ -302,30 +339,44 @@ export async function flushMemoryThenCompact(
302
339
  const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
303
340
  console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
304
341
 
305
- // Persist timing log for debugging (async, fire-and-forget)
306
- const logDir = join(homedir(), ".roundhouse", "logs");
307
- mkdir(logDir, { recursive: true })
308
- .then(() => {
309
- const entry = JSON.stringify({
310
- ts: new Date().toISOString(),
311
- threadId,
312
- level,
313
- tokensBefore: result.tokensBefore,
314
- tokensAfter: result.tokensAfter,
315
- ...timing,
316
- });
317
- return appendFile(join(logDir, "compact-timing.jsonl"), entry + "\n");
318
- })
319
- .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
342
+ // Persist timing log for debugging (async, fire-and-forget).
343
+ // Schema is intentionally uniform across success and failure entries
344
+ // (status discriminator + same field set) so jsonl parsers don't have
345
+ // to special-case missing fields.
346
+ appendCompactLog({
347
+ threadId,
348
+ level,
349
+ effectiveLevel,
350
+ flushSkipped: skipFlush,
351
+ tokensBefore: result.tokensBefore,
352
+ tokensAfter: result.tokensAfter ?? null,
353
+ ...timing,
354
+ status: "ok",
355
+ error: null,
356
+ });
320
357
 
321
358
  return { ...result, timing };
322
359
  } catch (err) {
323
- console.error(`[memory] flush+compact failed for ${threadId}:`, (err as Error).message);
324
- // Mark pending so we retry on next turn
360
+ const errMsg = (err as Error).message;
361
+ console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
362
+ appendCompactLog({
363
+ threadId,
364
+ level,
365
+ effectiveLevel,
366
+ flushSkipped: skipFlush,
367
+ tokensBefore: null,
368
+ tokensAfter: null,
369
+ flushMs, // accurate: 0 if skipped or failed before flush completed
370
+ compactMs, // accurate: 0 if failed before/during compact
371
+ totalMs: Date.now() - t0,
372
+ model: flushModel ?? "default",
373
+ status: "failed",
374
+ error: errMsg.slice(0, 500),
375
+ });
376
+ // Mark pending so we retry on next turn. Reuse the state we already loaded.
325
377
  try {
326
- const state = await loadThreadMemoryState(threadId);
327
- state.pendingCompact = effectiveLevel;
328
- await saveThreadMemoryState(threadId, state);
378
+ stateBeforeCompact.pendingCompact = effectiveLevel;
379
+ await saveThreadMemoryState(threadId, stateBeforeCompact);
329
380
  } catch {}
330
381
  return null;
331
382
  }
@@ -10,12 +10,26 @@ import { formatDate } from "./files";
10
10
  // ── Defaults ─────────────────────────────────────────
11
11
 
12
12
  const DEFAULT_SOFT_PERCENT = 0.45;
13
- const DEFAULT_SOFT_TOKENS = 180_000;
13
+ const DEFAULT_SOFT_TOKENS = 130_000;
14
14
  const DEFAULT_HARD_PERCENT = 0.50;
15
- const DEFAULT_HARD_TOKENS = 200_000;
15
+ const DEFAULT_HARD_TOKENS = 150_000;
16
16
  const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
17
17
  const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
18
18
 
19
+ // Headroom reserved for the summarization payload itself when compact runs.
20
+ // The summarizer prompt serializes ALL discarded history (everything older
21
+ // than ~20k of recent tokens) plus scaffolding plus previous summary, then
22
+ // asks the model to summarize. If the prompt itself overflows the model
23
+ // context, compact() throws. 50k is the empirical headroom that fits a
24
+ // typical summarization prompt on Claude family.
25
+ const COMPACT_HEADROOM_TOKENS = 50_000;
26
+
27
+ // Why 130k/150k as the default absolute thresholds against a 200k window:
28
+ // see COMPACT_HEADROOM_TOKENS above and
29
+ // ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
30
+ // For smaller-window models, classifyContextPressure() clamps the absolute
31
+ // thresholds to `window - HEADROOM` so they never exceed the window.
32
+
19
33
  // ── Injection policy ─────────────────────────────────
20
34
 
21
35
  export interface InjectionDecision {
@@ -87,14 +101,21 @@ export function classifyContextPressure(
87
101
 
88
102
  const pctDecimal = percent != null ? percent / 100 : tokens / window;
89
103
 
104
+ // Clamp absolute thresholds so they never exceed `window - HEADROOM`.
105
+ // Defends against future smaller-window models where the configured
106
+ // 150k/130k absolute thresholds would otherwise sit above the window.
107
+ // The percent thresholds already scale with window naturally.
108
+ const headroom = COMPACT_HEADROOM_TOKENS;
109
+ const ceiling = Math.max(0, window - headroom);
110
+
90
111
  // Hard threshold
91
112
  const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
92
- const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
113
+ const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
93
114
  if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
94
115
 
95
- // Soft threshold
116
+ // Soft threshold (clamped one step below hard so soft fires first).
96
117
  const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
97
- const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
118
+ const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
98
119
  if (pctDecimal >= softPct || tokens >= softTok) return "soft";
99
120
 
100
121
  return "none";