npm - @inceptionstack/roundhouse - Versions diffs - 0.5.27 → 0.5.28 - Mend

@inceptionstack/roundhouse 0.5.27 → 0.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +5 -0
package/package.json +1 -1
package/src/agents/pi/pi-adapter.ts +18 -0
package/src/memory/lifecycle.ts +80 -29
package/src/memory/policy.ts +26 -5

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,11 @@
 All notable changes to `@inceptionstack/roundhouse` are documented here.
+## [0.5.28] — 2026-05-14
+### Fixed
+- **PR #126 actually shipped this time.** v0.5.26's CHANGELOG advertised the emergency-compact-loop fix, but the underlying PR (`fix/compact-loop-thresholds-and-thinking`) was still OPEN — only the version bump and self-update patch went out. Users on v0.5.26/v0.5.27 still hit `Summarization failed: prompt is too long: 212776 tokens > 200000 maximum` on overflowed sessions because `DEFAULT_HARD_TOKENS` was still 200k with no headroom clamp. This release contains the actual code change: `DEFAULT_HARD_TOKENS=150_000`, `DEFAULT_SOFT_TOKENS=130_000`, `COMPACT_HEADROOM_TOKENS=50_000`, plus `thinkingLevel='off'` forced inside `compactWithModel`. (#126)
 ## [0.5.27] — 2026-05-14
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inceptionstack/roundhouse",
-  "version": "0.5.27",
+  "version": "0.5.28",
   "type": "module",
   "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
   "license": "MIT",

package/src/agents/pi/pi-adapter.ts CHANGED Viewed

@@ -608,7 +608,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
         const agentState = (entry.session as any).agent?.state;
         let currentModel: any;
+        let currentThinkingLevel: any;
         let modelSwapped = false;
+        let thinkingSwapped = false;
         // Resolve and swap model for compact
         if (!agentState) {
@@ -627,6 +629,19 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
             modelSwapped = true;
             console.log(`[pi-agent] compact using model (in-memory): ${modelId}`);
           }
+          // Force thinking off for compact regardless of agent's default.
+          // Summarization doesn't benefit from reasoning, costs more tokens,
+          // and complicates the maxTokens math (adjustMaxTokensForThinking adds
+          // up to 16k thinking budget). Direct state mutation matches the model
+          // swap above and avoids setThinkingLevel(), which would persist to
+          // settings.json.
+          if (agentState.thinkingLevel && agentState.thinkingLevel !== "off") {
+            currentThinkingLevel = agentState.thinkingLevel;
+            agentState.thinkingLevel = "off";
+            thinkingSwapped = true;
+            console.log(`[pi-agent] compact forcing thinkingLevel=off (was ${currentThinkingLevel})`);
+          }
         }
         try {
@@ -640,6 +655,9 @@ export const createPiAgentAdapter: AgentAdapterFactory = (config) => {
           if (modelSwapped) {
             agentState.model = currentModel;
           }
+          if (thinkingSwapped) {
+            agentState.thinkingLevel = currentThinkingLevel;
+          }
         }
       });
     },

package/src/memory/lifecycle.ts CHANGED Viewed

@@ -20,6 +20,36 @@ import { appendFile, mkdir } from "node:fs/promises";
 import { join } from "node:path";
 import { homedir } from "node:os";
+// ── Telemetry helper ─────────────────────────────────
+interface CompactLogEntry {
+  threadId: string;
+  level: string;
+  effectiveLevel: string;
+  flushSkipped: boolean;
+  tokensBefore: number | null;
+  tokensAfter: number | null;
+  flushMs: number;
+  compactMs: number;
+  totalMs: number;
+  model: string;
+  status: "ok" | "failed";
+  error: string | null;
+}
+/**
+ * Append a compact telemetry entry. Fire-and-forget.
+ * Schema is uniform across success/failure (status discriminator) so
+ * downstream parsers don't have to handle missing fields.
+ */
+function appendCompactLog(entry: CompactLogEntry): void {
+  const logDir = join(homedir(), ".roundhouse", "logs");
+  const line = JSON.stringify({ ts: new Date().toISOString(), ...entry }) + "\n";
+  mkdir(logDir, { recursive: true })
+    .then(() => appendFile(join(logDir, "compact-timing.jsonl"), line))
+    .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
+}
 // ── Memory mode detection ────────────────────────────
 /**
@@ -246,11 +276,16 @@ export async function flushMemoryThenCompact(
   // "manual" level, attempting the flush in that condition will hit the same
   // 200k rejection. Deferring flush to a later (successful) turn is the safe
   // recovery path.
-  const stuckInEmergency = (await loadThreadMemoryState(threadId)).pendingCompact === "emergency";
+  const stateBeforeCompact = await loadThreadMemoryState(threadId);
+  const stuckInEmergency = stateBeforeCompact.pendingCompact === "emergency";
   const skipFlush = effectiveLevel === "emergency" || stuckInEmergency;
+  // Hoisted so the catch block can report accurate flush vs compact timing
+  // (a failure during compact() would otherwise conflate the two phases).
+  let flushMs = 0;
+  let compactMs = 0;
   try {
-    let flushMs = 0;
     if (!skipFlush) {
       // Step 1: flush
       const flushText = buildFlushPrompt(mode === "unknown" ? "full" : mode, effectiveLevel);
@@ -276,16 +311,18 @@ export async function flushMemoryThenCompact(
     const result = usedCompactModel
       ? await agent.compactWithModel!(threadId, flushModel!)
       : await agent.compact!(threadId);
-    const compactMs = Date.now() - t1;
+    compactMs = Date.now() - t1;
     if (!result) return null;
-    // Step 3: mark force re-inject (Full mode only)
+    // Step 3: mark force re-inject (Full mode only). Reuse the state we
+    // already loaded above; the compact step doesn't mutate memory-state
+    // (it mutates the pi session, a separate file), so the in-memory copy
+    // is still authoritative for our fields.
     if (mode !== "complement") {
-      const state = await loadThreadMemoryState(threadId);
-      state.forceInjectReason = "after-compact";
-      state.lastCompactAt = new Date().toISOString();
-      state.pendingCompact = undefined;
-      await saveThreadMemoryState(threadId, state);
+      stateBeforeCompact.forceInjectReason = "after-compact";
+      stateBeforeCompact.lastCompactAt = new Date().toISOString();
+      stateBeforeCompact.pendingCompact = undefined;
+      await saveThreadMemoryState(threadId, stateBeforeCompact);
     }
     const totalMs = Date.now() - t0;
@@ -302,30 +339,44 @@ export async function flushMemoryThenCompact(
     const timing = { flushMs, compactMs, totalMs, model: usedCompactModel ? flushModel! : "default" };
     console.log(`[memory] flush+compact done for ${threadId}: ${result.tokensBefore} → ${result.tokensAfter ?? "?"} tokens | flush=${flushMs}ms compact=${compactMs}ms total=${totalMs}ms model=${timing.model}`);
-    // Persist timing log for debugging (async, fire-and-forget)
-    const logDir = join(homedir(), ".roundhouse", "logs");
-    mkdir(logDir, { recursive: true })
-      .then(() => {
-        const entry = JSON.stringify({
-          ts: new Date().toISOString(),
-          threadId,
-          level,
-          tokensBefore: result.tokensBefore,
-          tokensAfter: result.tokensAfter,
-          ...timing,
-        });
-        return appendFile(join(logDir, "compact-timing.jsonl"), entry + "\n");
-      })
-      .catch((err) => console.warn(`[memory] timing log write failed:`, (err as Error).message));
+    // Persist timing log for debugging (async, fire-and-forget).
+    // Schema is intentionally uniform across success and failure entries
+    // (status discriminator + same field set) so jsonl parsers don't have
+    // to special-case missing fields.
+    appendCompactLog({
+      threadId,
+      level,
+      effectiveLevel,
+      flushSkipped: skipFlush,
+      tokensBefore: result.tokensBefore,
+      tokensAfter: result.tokensAfter ?? null,
+      ...timing,
+      status: "ok",
+      error: null,
+    });
     return { ...result, timing };
   } catch (err) {
-    console.error(`[memory] flush+compact failed for ${threadId}:`, (err as Error).message);
-    // Mark pending so we retry on next turn
+    const errMsg = (err as Error).message;
+    console.error(`[memory] flush+compact failed for ${threadId}:`, errMsg);
+    appendCompactLog({
+      threadId,
+      level,
+      effectiveLevel,
+      flushSkipped: skipFlush,
+      tokensBefore: null,
+      tokensAfter: null,
+      flushMs,    // accurate: 0 if skipped or failed before flush completed
+      compactMs,  // accurate: 0 if failed before/during compact
+      totalMs: Date.now() - t0,
+      model: flushModel ?? "default",
+      status: "failed",
+      error: errMsg.slice(0, 500),
+    });
+    // Mark pending so we retry on next turn. Reuse the state we already loaded.
     try {
-      const state = await loadThreadMemoryState(threadId);
-      state.pendingCompact = effectiveLevel;
-      await saveThreadMemoryState(threadId, state);
+      stateBeforeCompact.pendingCompact = effectiveLevel;
+      await saveThreadMemoryState(threadId, stateBeforeCompact);
     } catch {}
     return null;
   }

package/src/memory/policy.ts CHANGED Viewed

@@ -10,12 +10,26 @@ import { formatDate } from "./files";
 // ── Defaults ─────────────────────────────────────────
 const DEFAULT_SOFT_PERCENT = 0.45;
-const DEFAULT_SOFT_TOKENS = 180_000;
+const DEFAULT_SOFT_TOKENS = 130_000;
 const DEFAULT_HARD_PERCENT = 0.50;
-const DEFAULT_HARD_TOKENS = 200_000;
+const DEFAULT_HARD_TOKENS = 150_000;
 const DEFAULT_EMERGENCY_THRESHOLD = 32_768;
 const DEFAULT_COOLDOWN_MS = 10 * 60_000; // 10 minutes
+// Headroom reserved for the summarization payload itself when compact runs.
+// The summarizer prompt serializes ALL discarded history (everything older
+// than ~20k of recent tokens) plus scaffolding plus previous summary, then
+// asks the model to summarize. If the prompt itself overflows the model
+// context, compact() throws. 50k is the empirical headroom that fits a
+// typical summarization prompt on Claude family.
+const COMPACT_HEADROOM_TOKENS = 50_000;
+// Why 130k/150k as the default absolute thresholds against a 200k window:
+// see COMPACT_HEADROOM_TOKENS above and
+// ~/.roundhouse/workspace/compaction-loop-diagnosis.md (Bug B).
+// For smaller-window models, classifyContextPressure() clamps the absolute
+// thresholds to `window - HEADROOM` so they never exceed the window.
 // ── Injection policy ─────────────────────────────────
 export interface InjectionDecision {
@@ -87,14 +101,21 @@ export function classifyContextPressure(
   const pctDecimal = percent != null ? percent / 100 : tokens / window;
+  // Clamp absolute thresholds so they never exceed `window - HEADROOM`.
+  // Defends against future smaller-window models where the configured
+  // 150k/130k absolute thresholds would otherwise sit above the window.
+  // The percent thresholds already scale with window naturally.
+  const headroom = COMPACT_HEADROOM_TOKENS;
+  const ceiling = Math.max(0, window - headroom);
   // Hard threshold
   const hardPct = config?.hardPercent ?? DEFAULT_HARD_PERCENT;
-  const hardTok = config?.hardTokens ?? DEFAULT_HARD_TOKENS;
+  const hardTok = Math.min(config?.hardTokens ?? DEFAULT_HARD_TOKENS, ceiling);
   if (pctDecimal >= hardPct || tokens >= hardTok) return "hard";
-  // Soft threshold
+  // Soft threshold (clamped one step below hard so soft fires first).
   const softPct = config?.softPercent ?? DEFAULT_SOFT_PERCENT;
-  const softTok = config?.softTokens ?? DEFAULT_SOFT_TOKENS;
+  const softTok = Math.min(config?.softTokens ?? DEFAULT_SOFT_TOKENS, Math.max(0, hardTok - 1));
   if (pctDecimal >= softPct || tokens >= softTok) return "soft";
   return "none";