npm - create-walle - Versions diffs - 0.9.26 → 0.9.27 - Mend

create-walle 0.9.26 → 0.9.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +1 -1
package/template/claude-task-manager/api-prompts.js +11 -6
package/template/claude-task-manager/lib/session-messages-projection.js +30 -1
package/template/claude-task-manager/public/index.html +50 -2
package/template/claude-task-manager/server.js +125 -1
package/template/claude-task-manager/workers/read-pool-worker.js +10 -0
package/template/package.json +1 -1
package/template/wall-e/agent.js +77 -24
package/template/wall-e/brain.js +258 -1
package/template/wall-e/chat.js +30 -25
package/template/wall-e/coding/session-plan.js +79 -0
package/template/wall-e/coding-orchestrator.js +9 -3
package/template/wall-e/coding-prompts.js +10 -3
package/template/wall-e/lib/scheduler.js +154 -8
package/template/wall-e/lib/worker-thread-pool.js +9 -1
package/template/wall-e/loops/think.js +26 -3
package/template/wall-e/mcp-server.js +20 -4
package/template/wall-e/sources/jsonl-utils.js +84 -11
package/template/wall-e/tools/local-tools.js +16 -0
package/template/wall-e/workers/runtime-worker.js +24 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "create-walle",
-  "version": "0.9.26",
+  "version": "0.9.27",
   "description": "CTM + Wall-E — AI coding dashboard and personal digital twin agent. Multi-agent terminal for Claude Code, Codex, Gemini, Aider, OpenCode, and more, plus prompt editor, task queue, remote phone and tablet access, code/doc review, and an agent that learns from Slack, email & calendar.",
   "bin": {
     "create-walle": "bin/create-walle.js"

package/template/claude-task-manager/api-prompts.js CHANGED Viewed

@@ -2522,10 +2522,12 @@ const HOTKEY_BUILD_MARKER = path.join(os.homedir(), '.walle', 'bundles', '.ctm-h
 const SCREEN_AUTH_BINARY = path.join(os.homedir(), '.local', 'bin', 'ctm-screen-auth');
 const SCREEN_AUTH_SWIFT_SOURCE = path.join(__dirname, 'bin', 'ctm-screen-auth.swift');
 const SCREEN_AUTH_SIGN_IDENTIFIER = 'com.walle.ctm';
-// Full-screen, non-interactive capture (-x = no camera sound). The hotkey/button should produce a
-// screenshot immediately; the old '-i' popped a region-select crosshair that, when not drag-completed,
-// exited with no file and logged as "cancelled" — the #1 "the screenshot does nothing" report.
-const SCREEN_CAPTURE_NODE_SCRIPT = "require('child_process').execFileSync('/usr/sbin/screencapture',['-x',process.argv[1]],{stdio:'inherit'})";
+// Interactive region capture (-i): pops the crosshair so the user drags a region (or hits Space for
+// a window, or Escape to cancel). This is what users expect from a screenshot hotkey/button — '-x'
+// (full-screen) grabbed everything with no choice. A cancelled selection exits 0 with no file; the
+// capture handler treats a missing file as a clean cancel (HTTP 400 "Screenshot cancelled"), so the
+// old "-i logs as a failure" concern no longer applies.
+const SCREEN_CAPTURE_NODE_SCRIPT = "require('child_process').execFileSync('/usr/sbin/screencapture',['-i',process.argv[1]],{stdio:'inherit'})";
 const SCREEN_AUTH_NODE_SCRIPT = "const cp=require('child_process');const r=cp.spawnSync(process.argv[1],{encoding:'utf8'});if(r.stdout)process.stdout.write(r.stdout);if(r.stderr)process.stderr.write(r.stderr);process.exit(r.status==null?1:r.status);";
 // Like SCREEN_AUTH_NODE_SCRIPT but runs the helper in --preflight mode (report grant, NEVER prompt)
 // so we can probe several node identities and pick one already granted without popping dialogs.
@@ -2935,7 +2937,7 @@ function screenshotCaptureCommand(tmpFile, context = {}) {
       cmdArgs: [context.realNode, '-e', SCREEN_CAPTURE_NODE_SCRIPT, tmpFile],
     };
   }
-  return { cmd: '/usr/sbin/screencapture', cmdArgs: ['-x', tmpFile] };
+  return { cmd: '/usr/sbin/screencapture', cmdArgs: ['-i', tmpFile] };
 }
 // Spawn the Screen Recording permission helper under the same macOS responsible process
@@ -3037,7 +3039,10 @@ async function handleScreenshot(req, res) {
     // remain responsive and allows screencapture to work across all monitors.
     const tCap = Date.now();
     await new Promise((resolve, reject) => {
-      execFile(cmd, cmdArgs, { timeout: 30000, encoding: 'utf8' }, (err, _stdout, stderr) => {
+      // Interactive (-i) keeps the crosshair up until the user drags/clicks/Escapes — give them a
+      // generous window (2 min) so a deliberating user isn't killed mid-selection and mis-reported
+      // as a failure. An idle crosshair costs nothing; the timeout only bounds a truly abandoned one.
+      execFile(cmd, cmdArgs, { timeout: 120000, encoding: 'utf8' }, (err, _stdout, stderr) => {
         if (err) { if (stderr) err._stderr = stderr; reject(err); } else resolve();
       });
     });

package/template/claude-task-manager/lib/session-messages-projection.js CHANGED Viewed

@@ -167,4 +167,33 @@ function buildPaginatedPageResponse({
   return { ...pageMeta, messages: visibleMessages, ...finalExtra };
 }
-module.exports = { applyExclusions, sessionMessageTextHash, projectSessionMessagesPage, buildPaginatedPageResponse };
+// Stage C: the NON-paginated full read. Mirrors server.js sendMessages' projection + response
+// assembly EXACTLY so the worker can project + serialize the full conversation off the main loop
+// (the on-main merge/dedupText + exclusions sha256/msg + image-refs + stringify over a 3k-16k-msg
+// array was the profiled "reading conversational logs" freeze). Two response shapes, matching
+// sendMessages: a BARE ARRAY when the caller passed no extra (the common conversation read), else
+// { messages, ...finalExtra } with the same timeline fields sendMessages adds when hasCallerExtra.
+function buildFullMessagesResponse({
+  baseMessages, streamEvents = [], skipStreamTail = false, exclusionRows = [], filterCodexSynthetic = false,
+  imageManifests = null, extra = {}, publicIdentity = null, bareArray = false,
+} = {}) {
+  const proj = projectSessionMessagesPage({
+    baseMessages, streamEvents, skipStreamTail, exclusionRows, filterCodexSynthetic, imageManifests, allowEmptyBase: true,
+  });
+  const visibleMessages = proj.messages;
+  // Bare array: non-paginated read with no caller extra (sendMessages returns JSON.stringify(arr)).
+  if (bareArray) return visibleMessages;
+  const visibleTailAdded = Math.max(0, (proj.visibleMergedCount || 0) - (proj.visibleBaseCount || 0));
+  const finalExtra = { ...extra };
+  delete finalExtra.skipStreamTail;
+  finalExtra.timelineIdentity = publicIdentity || null;
+  finalExtra.timelineFreshness = visibleTailAdded > 0 ? 'hot-tail' : 'durable';
+  finalExtra.streamEventCount = proj.streamEventCount || 0;
+  if (visibleTailAdded > 0) {
+    finalExtra.source = _sourceWithStreamTail(finalExtra.source, visibleTailAdded);
+    finalExtra.streamTailAdded = visibleTailAdded;
+  }
+  return { messages: visibleMessages, ...finalExtra };
+}
+module.exports = { applyExclusions, sessionMessageTextHash, projectSessionMessagesPage, buildPaginatedPageResponse, buildFullMessagesResponse };

package/template/claude-task-manager/public/index.html CHANGED Viewed

@@ -12327,6 +12327,15 @@ const _CODEX_BUSY_STATUS_LINE_RE = /^(?:(?:waiting\s+for\s+background\s+terminal
 const _CODEX_BUSY_COMPACT_STATUS_LINE_RE = /^working\s+(?:\d+(?::\d+){0,2}|\d+(?:\.\d+)?\s*(?:ms|s|sec|secs|m|min|mins|h|hr|hrs))$/iu;
 const _CODEX_BUSY_WORD = 'working';
 const _CODEX_BUSY_HINT_RE = /esc\s+to\s+interrupt/i;
+// A width-reflow strand can paint the Codex "Working (12s • esc to interrupt)"
+// status line over trailing scrollback content, leaving a merged line such as
+// "Working RSA-1ES046-SHA256:DHE-RSA-AES256-SHA256:HIGH:!aNULL:!…". The strict
+// anchored REs above miss it, so a genuinely-running turn whose PTY has gone
+// briefly quiet gets demoted to Idle. Recognize the surviving "Working" prefix
+// when the trailing remainder is clearly NOT prose (code/cipher-like garble),
+// and never for ordinary "Working <word>" output ("working tree", "working on").
+const _CODEX_BUSY_GARBLE_PROSE_RE = /^working\s+(?:tree|dir|directory|copy|set|version|branch|on|in|with|through|from|for|as|at|to|and|but|so|the|a|an|my|your|our|this|that|it|now|here|hard|fine|well|great|correctly|properly)\b/i;
+const _CODEX_BUSY_GARBLE_SIGNAL_RE = /[:!]|[A-Z0-9]{3,}[-:]|\besc\b/;
 const _CODEX_SKILL_WARNING_RE = /^(?:⚠\s*)?Skipped loading\s+\d+\s+skill\(s\)\s+due to invalid SKILL\.md files\./i;
 const _CODEX_SKILL_DESCRIPTION_RE = /^\/.*\/SKILL\.md:\s+invalid description:\s+exceeds maximum length of\s+\d+\s+characters$/i;
 const _GEMINI_STATUS_FRAGMENT_RE = /^(?:[\s\d•◦·∙●○✦✧◆◇◐◓◑◒|\/\\-]+|Thinking\.{0,3}|Working\.{0,3}|Running\.{0,3}|Responding\.{0,3}|Loading\.{0,3}|esc\s+to\s+(?:cancel|interrupt)|ctrl\+c\s+to\s+(?:quit|cancel)|press\s+enter\s+to\s+send|shift\+enter\s+for\s+newline)$/i;
@@ -12357,11 +12366,18 @@ function _normalizeCodexStatusLineText(text) {
     .replace(/^[\s•◦·∙●○]+\s*/, '');
 }
+function _isGarbledCodexBusyStatusLine(line) {
+  if (!/^working\b/i.test(line)) return false;
+  if (_CODEX_BUSY_GARBLE_PROSE_RE.test(line)) return false;
+  return _CODEX_BUSY_GARBLE_SIGNAL_RE.test(line.slice(_CODEX_BUSY_WORD.length));
+}
 function _isCodexBusyStatusLineText(text) {
   const line = _normalizeCodexStatusLineText(text);
   if (!line || line.length > 240) return false;
   return _CODEX_BUSY_STATUS_LINE_RE.test(line) ||
-    _CODEX_BUSY_COMPACT_STATUS_LINE_RE.test(line);
+    _CODEX_BUSY_COMPACT_STATUS_LINE_RE.test(line) ||
+    _isGarbledCodexBusyStatusLine(line);
 }
 function _inputMayResolveWaiting(data, session) {
@@ -33241,7 +33257,30 @@ function _maybeReconcile(s, id, reason, extra) {
   // Only compare when the server measured at the client's CURRENT dims; otherwise
   // the fingerprints legitimately differ on dims and a resize/reconcile is already
   // in flight elsewhere. Comparing across dims would cause a reconcile loop.
-  if (s._serverFpCols != null && s._serverFpRows != null && (s.term.cols !== s._serverFpCols || s.term.rows !== s._serverFpRows)) return;
+  if (s._serverFpCols != null && s._serverFpRows != null && (s.term.cols !== s._serverFpCols || s.term.rows !== s._serverFpRows)) {
+    // The server's fingerprint was measured at a DIFFERENT width than our grid now
+    // holds — e.g. a background-tab pane-resize refit the grid on switch but no fresh
+    // heartbeat has arrived at the new width. We still skip the cross-dims compare
+    // (it would loop), but a plain skip here leaves an idle session you just switched
+    // to STUCK forever: no output ⇒ no heartbeat ⇒ the onHeartbeat dims-reassert never
+    // runs, so the server keeps serializing its stale-wide frame and the stranded
+    // composer / branch chip never heals (the on-switch "check + redraw" the user
+    // expects). Re-assert our dims ONCE here too (rate-limited via the SAME
+    // _dimsReassertAt the heartbeat path uses) so the PTY resizes, the agent repaints
+    // clean at our width, and the next heartbeat is comparable. Skipped while streaming
+    // (force bypasses the SIGWINCH-during-stream deferral → duplicate scrollback); the
+    // heartbeat reassert covers that case once output quiesces. Uniform for every
+    // provider — unlike the claude-only stale-width detector, this needs no agent type
+    // or stranded-fragment shape, so it also unsticks an idle Codex frame.
+    const nowTs = Date.now();
+    if (!_isSessionStreaming(s)
+        && (!s._dimsReassertAt || nowTs - s._dimsReassertAt > 2000)
+        && Number.isFinite(s.term.cols) && Number.isFinite(s.term.rows)) {
+      s._dimsReassertAt = nowTs;
+      try { _sendTerminalResizeIfChanged(s, id, 'reconcile-dims-reassert', { force: true, cols: s.term.cols, rows: s.term.rows }); } catch {}
+    }
+    return;
+  }
   let clientFp;
   try { clientFp = window.TerminalReconciler.clientFingerprint(s.term); } catch { return; }
   const focusHelper = (typeof _activeTerminalHasFocus === 'function') ? _activeTerminalHasFocus(s) : (document.hasFocus() && state.activeTab === id);
@@ -35986,6 +36025,15 @@ function onSnapshot(msg) {
     _terminalRenderCheckFullyRendered(s, 'snapshot-stale-no-data-skip');
     if (state.activeTab === msg.id) focusTerminalIfSafe(msg.id);
     _primeLatestAnswerForTerminalView(msg.id);
+    // The server skipped this reflow because output arrived after the request — the agent is
+    // still emitting (e.g. a post-/compact burst). Nothing else re-requests promptly, so the
+    // render (and the user's typed-input echo) can lag until the 3s self-check. Schedule an
+    // authoritative recovery so a fresh reflow lands ~1s after the output quiets; it
+    // rate-limits itself, defers while typing, and — because each retry that ALSO stale-skips
+    // re-enters this branch — forms a retry-until-settle loop that self-terminates when a fresh
+    // frame lands (snapshot-done clears _needsAuthoritativeSnapshot at ~36319).
+    s._needsAuthoritativeSnapshot = true;
+    _scheduleAuthoritativeSnapshotRecovery(s, msg.id, 'reflow-stale-skip', { delayMs: 900, minGapMs: 900 });
     return;
   }
   // A hot terminal that already has healthy buffer content does not need a

package/template/claude-task-manager/server.js CHANGED Viewed

@@ -10524,6 +10524,45 @@ function _standupSummaryFor(source, ctmSessionId, agentSessionId, identity = nul
   return null;
 }
+// Per-session timeline-summary cache. The materialized standup snapshot refreshes every
+// STANDUP_MATERIALIZED_REFRESH_AFTER_MS (1.5s) and re-projects EVERY active session — each projection
+// is a full-conversation merge (conversation-tail-merge dedupText) + summarize. With 16 mostly-idle
+// sessions, that re-derived the whole history of every session ~every 1.5s on the main loop (the
+// CPU-profiler's top `dedupText` freeze frame). Cache the per-session summary keyed on a CHEAP
+// content-version — the durable row count (countSessionMessageRows, an indexed COUNT) plus the live
+// stream summary's latest-activity ms — so an UNCHANGED session returns its cached summary instantly
+// and only sessions with new durable rows or new stream output pay the projection. A generous max-age
+// is a safety net against any version signal a change slips past. Disable with
+// CTM_TIMELINE_SUMMARY_CACHE=0; tune staleness with CTM_TIMELINE_SUMMARY_CACHE_MAX_AGE_MS.
+const _timelineStandupSummaryCache = new Map(); // ctmId → { version, summary, at }
+const _TIMELINE_SUMMARY_CACHE_MAX_AGE_MS = Number(process.env.CTM_TIMELINE_SUMMARY_CACHE_MAX_AGE_MS ?? 300000);
+function _timelineSummaryCacheVersion(ctmId, rawSummary) {
+  let count = 0;
+  try { count = dbModule.countSessionMessageRows(ctmId) || 0; } catch {}
+  let streamMs = 0;
+  try {
+    const fn = require('./lib/session-timeline-summary')._private?._summaryActivityMs;
+    streamMs = rawSummary && typeof fn === 'function' ? (fn(rawSummary) || 0) : 0;
+  } catch {}
+  return `${count}:${streamMs}`;
+}
+async function _cachedTimelineSummaryForStandup(sessionId, identity, rawSummary) {
+  if (process.env.CTM_TIMELINE_SUMMARY_CACHE === '0') return _timelineSummaryForStandup(sessionId, identity);
+  const ctmId = _resolveOwnerCtmSessionId(sessionId) || sessionId;
+  const version = _timelineSummaryCacheVersion(ctmId, rawSummary);
+  const now = Date.now();
+  const hit = _timelineStandupSummaryCache.get(ctmId);
+  if (hit && hit.version === version && (now - hit.at) < _TIMELINE_SUMMARY_CACHE_MAX_AGE_MS) {
+    return hit.summary;
+  }
+  const summary = await _timelineSummaryForStandup(sessionId, identity);
+  // Bound memory: this only ever keys active sessions, but a long-lived process churns through many —
+  // drop the whole map if it grows past a sane ceiling (cheap to repopulate).
+  if (_timelineStandupSummaryCache.size > 512) _timelineStandupSummaryCache.clear();
+  _timelineStandupSummaryCache.set(ctmId, { version, summary, at: now });
+  return summary;
+}
 async function _timelineSummaryForStandup(sessionId, identity = null) {
   const resolved = identity || _resolveSessionTimelineIdentity(sessionId);
   let timeline = null;
@@ -11275,7 +11314,7 @@ async function _buildStandupSnapshot(options = {}) {
     const timelineIdentity = _resolveSessionTimelineIdentity(payload.id);
     const rawSummary = _standupSummaryFor(source, payload.id, agentSessionId, timelineIdentity);
     const timelineSummary = includeTimeline
-      ? await _timelineSummaryForStandup(payload.id, timelineIdentity)
+      ? await _cachedTimelineSummaryForStandup(payload.id, timelineIdentity, rawSummary)
       : null;
     const rawMergedSummary = mergeTimelineSummaries(rawSummary, timelineSummary);
     let summary = rawMergedSummary ? {
@@ -12915,6 +12954,28 @@ async function _buildSessionMessagesPageResponseOffThread(payload) {
   });
 }
+// Stage C: the NON-paginated full read — project (merge + exclusions + image-refs) AND serialize the
+// whole conversation OFF the main loop (the on-main projection over a 3k-16k-msg array was the
+// profiled "reading conversational logs" freeze). Same shared projection module → byte-identical
+// on-main fallback (named apiSessionMessages:serializeFullResponse) on pool failure/degrade.
+async function _buildSessionMessagesFullResponseOffThread(payload) {
+  const onMain = () => {
+    const obj = require('./lib/session-messages-projection').buildFullMessagesResponse(payload);
+    const str = _perfTracker.runSyncProbed('apiSessionMessages:serializeFullResponse',
+      () => JSON.stringify(obj),
+      { context: () => ({ msgs: Array.isArray(obj) ? obj.length : (Array.isArray(obj?.messages) ? obj.messages.length : 0), fallback: 1 }) });
+    return Buffer.from(str, 'utf8');
+  };
+  return _offthreadRead({
+    breaker: _sessionMessagesPageBreaker,
+    op: 'buildSessionMessagesFullResponse',
+    timeoutMs: _SESSION_MESSAGES_READPOOL_TIMEOUT_MS,
+    payload,
+    onSuccess: (res) => (res && Buffer.isBuffer(res.__resultBuffer)) ? res.__resultBuffer : onMain(),
+    onFail: () => onMain(),
+  });
+}
 // Wave 3.1: off-thread the durable-tail reconcile's bounded JSONL parse. The cheap gate
 // (resolve the live JSONL path/size + the imported byte HWM) stays on main; only when the
 // importer is lagging (live > imported) do we hit the pool, so the common "caught up" case
@@ -12984,8 +13045,66 @@ async function apiSessionMessages(req, res, url) {
     return;
   }
+  // Off-thread NON-paginated full read: resolve the small projection inputs on main, then merge +
+  // exclude + image-ref + serialize the WHOLE conversation on the read-pool worker (mirrors
+  // sendPaginatedMessagePage, for the full-array case). Removes the on-main projection that froze
+  // the loop while reading a large conversation. Same shared module → byte-identical on-main
+  // fallback on pool failure. Cacheable with a stream-event signature folded into the key (so a hot
+  // tail rebuilds), exactly like the page path — no main-thread merge needed to decide cacheability.
+  const _sendFullMessagesOffThread = async (baseMessages, extra = {}) => {
+    const skipStreamTail = extra.skipStreamTail === true;
+    let identity = null;
+    let streamEvents = [];
+    if (!skipStreamTail) {
+      identity = _resolveSessionTimelineIdentity(sessionId);
+      streamEvents = collectTimelineStreamEvents(identity, {
+        limit: 200,
+        getRecentEvents: (lookupId, lookupLimit) => _getRecentStreamEventsForTimelineId(lookupId, lookupLimit),
+      });
+    }
+    const _extraForKey = { ...extra };
+    delete _extraForKey.skipStreamTail;
+    // Bare array iff the caller passed no extra — matches sendMessages' `JSON.stringify(visibleMessages)`.
+    const bareArray = Object.keys(_extraForKey).length === 0;
+    const _cacheable = CTM_MSG_RESPONSE_CACHE_ENABLED;
+    let _cacheKey = null;
+    if (_cacheable) {
+      _cacheKey = buildResponseCacheKey({
+        sessionId,
+        sourceVersion: _msgBaseSignature(baseMessages),
+        exclusionsVersion: _msgExclusionsSignature(sessionId),
+        offset, limit,
+        mode: `mfull:${skipStreamTail ? '1' : '0'}:s${_streamEventsSignature(streamEvents)}:${JSON.stringify(_extraForKey)}`,
+      });
+      if (!noCache) {
+        const _hit = _msgResponseCache.get(_cacheKey);
+        if (_hit) {
+          res.writeHead(200, { 'Content-Type': 'application/json', 'X-Msg-Cache': 'hit' });
+          res.end(_hit.buffer);
+          return;
+        }
+      }
+    }
+    const exclusionRows = _loadSessionMessageExclusions(sessionId);
+    const filterCodexSynthetic = _shouldFilterCodexSyntheticMessages(sessionId, baseMessages, {});
+    const imageManifests = _resolveImageManifestsForProjection(sessionId, baseMessages, streamEvents);
+    const buffer = await _buildSessionMessagesFullResponseOffThread({
+      baseMessages, streamEvents, skipStreamTail, exclusionRows, filterCodexSynthetic, imageManifests,
+      extra, publicIdentity: _publicTimelineIdentity(identity), bareArray,
+    });
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    if (_cacheable && _cacheKey) _msgResponseCache.set(_cacheKey, { buffer });
+    res.end(buffer);
+  };
   const sendMessages = (messages, extra = {}) => {
     const baseMessages = Array.isArray(messages) ? messages : [];
+    // Large non-paginated read → project + serialize OFF the main loop (the conversation-read freeze).
+    // Small reads stay on main (a worker round-trip + array clone isn't worth it); paginated reads
+    // already have their own off-thread page path. CTM_MSG_OFFTHREAD_FULL=0 forces the on-main path.
+    if (!paginated && CTM_MSG_OFFTHREAD_FULL_ENABLED && baseMessages.length >= CTM_MSG_OFFTHREAD_FULL_MIN) {
+      return _sendFullMessagesOffThread(baseMessages, extra);
+    }
     // Compute the stream-tail merge first (own attribution probe) so we can both name
     // it AND gate the serialized-response cache on whether the live tail adds anything.
     const merge = extra.skipStreamTail
@@ -14936,6 +15055,11 @@ const _sessionMessageExclusionsCache = new Map(); // sessionId -> { at, rows }
 // function of the output and served only when the live stream tail adds nothing
 // (merge.added === 0) — turns a steady-state poll into a Map lookup + res.end().
 const CTM_MSG_RESPONSE_CACHE_ENABLED = process.env.CTM_MSG_RESPONSE_CACHE !== '0';
+// Off-thread the NON-paginated full conversation read (merge + exclusions + image-refs + serialize)
+// once the base is large enough that a worker round-trip + array clone beats blocking the loop.
+// CTM_MSG_OFFTHREAD_FULL=0 disables (force on-main); CTM_MSG_OFFTHREAD_FULL_MIN tunes the threshold.
+const CTM_MSG_OFFTHREAD_FULL_ENABLED = process.env.CTM_MSG_OFFTHREAD_FULL !== '0';
+const CTM_MSG_OFFTHREAD_FULL_MIN = Math.max(1, Number(process.env.CTM_MSG_OFFTHREAD_FULL_MIN) || 800);
 const _msgResponseCache = createResponseCache({
   maxEntries: Math.max(16, Number(process.env.CTM_MSG_RESPONSE_CACHE_MAX) || 256),
 });

package/template/claude-task-manager/workers/read-pool-worker.js CHANGED Viewed

@@ -261,6 +261,16 @@ async function _runOp(op, payload = {}) {
       return { __transfer: true, buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
     }
+    case 'buildSessionMessagesFullResponse': {
+      // Stage C: the NON-paginated full read — project (merge + exclusions + image-refs) AND
+      // serialize the WHOLE conversation in the worker, returning BYTES (zero-copy). Removes the
+      // on-main projection that froze the loop while reading a large conversation. Returns either a
+      // bare array or { messages, ...extra } per payload.bareArray (mirrors server.js sendMessages).
+      const proj = require('../lib/session-messages-projection');
+      const obj = proj.buildFullMessagesResponse(payload);
+      return { __transfer: true, buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
+    }
     case 'parseJsonlTail': {
       // Bounded (≤~1MB) tail read+parse of a live agent JSONL OFF the main loop — the
       // apiSessionMessages durable-tail reconcile (lagging importer on an actively-written

package/template/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "walle",
-  "version": "0.9.26",
+  "version": "0.9.27",
   "private": true,
   "description": "Wall-E — your personal digital twin",
   "scripts": {

package/template/wall-e/agent.js CHANGED Viewed

@@ -557,6 +557,8 @@ async function main() {
         }, {
           timeoutMs: options.timeoutMs || 0,
           heavy: options.heavy !== false,
+          lane: options.lane,
+          priority: options.priority,
           terminateOnTimeout: (options.timeoutMs || 0) > 0,
         });
       } catch (err) {
@@ -734,6 +736,7 @@ async function main() {
     run: () => ingest.runOnce(adapters),
     intervalMs: clampInterval(config.intervals?.ingest_ms, 60000),
     pool: 'io',
+    lane: 0,
     priority: 1,
     startDelayMs: 2000,
     onResult: (r) => {
@@ -750,6 +753,7 @@ async function main() {
     run: () => think.runOnce(),
     intervalMs: clampInterval(config.intervals?.think_ms, 120000),
     pool: 'llm',
+    lane: 0,
     priority: 2,
     startDelayMs: 5000,
     dependsOn: ['ingest'],
@@ -770,6 +774,7 @@ async function main() {
     run: () => runDueTasks(),
     intervalMs: clampInterval(config.intervals?.tasks_ms, 30000),
     pool: 'llm',
+    lane: 0,
     priority: 3,
     startDelayMs: 8000,
     onResult: (r) => {
@@ -793,6 +798,7 @@ async function main() {
     run: ({ modelOverride } = {}) => runDueSkills({ modelOverride }),
     intervalMs: clampInterval(config.intervals?.skills_ms, 300000),
     pool: 'llm',
+    lane: 1,
     priority: 4,
     startDelayMs: 11000,
     onResult: (r) => {
@@ -842,7 +848,9 @@ async function main() {
     },
     intervalMs: clampInterval(config.intervals?.training_ms, 300000),
     pool: 'ollama',
+    lane: 2,
     priority: 5,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 20000,
     shouldRun: () => getShadowConfig().enabled,
     onError: (err) => console.error('[eval] Loop error:', err.message),
@@ -865,7 +873,9 @@ async function main() {
     },
     intervalMs: clampInterval(config.intervals?.replay_ms, 300000),
     pool: 'ollama',
+    lane: 2,
     priority: 5,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 15000,
     shouldRun: () => getShadowConfig().enabled,
     onError: (err) => console.error('[replay] Loop error:', err.message),
@@ -879,6 +889,7 @@ async function main() {
     },
     intervalMs: clampInterval(config.intervals?.initiative_ms, 300000),
     pool: 'llm',
+    lane: 1,
     priority: 6,
     startDelayMs: 25000,
     debounceMs: 30000,
@@ -899,7 +910,9 @@ async function main() {
     run: () => reflect.runOnce(),
     intervalMs: clampInterval(config.intervals?.reflect_ms, 3600000),
     pool: 'llm',
+    lane: 2,
     priority: 7,
+    restartCatchupPolicy: 'defer',
     dependsOn: ['think'],
     startDelayMs: 30000,
     onResult: (r) => {
@@ -924,7 +937,9 @@ async function main() {
     },
     intervalMs: 600000,
     pool: 'io',
+    lane: 2,
     priority: 8,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 30000,
     onError: (err) => console.error('[harvest] Loop error:', err.message),
   });
@@ -947,7 +962,9 @@ async function main() {
     },
     intervalMs: 30000,
     pool: 'embedding',
+    lane: 2,
     priority: 9,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 35000,
     timeoutMs: 90000,
     onError: (err) => {
@@ -961,7 +978,9 @@ async function main() {
     run: async () => require('./loops/brain-optimize').runOnce(),
     intervalMs: 3600000,    // hourly; self-gates to a one-shot via kv (brain:optimize:v1)
     pool: 'io',
+    lane: 2,
     priority: 4,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 180000,   // 3 min after boot — well past the boot storm
     timeoutMs: 120000,
     onError: (err) => console.error('[brain-optimize] Loop error:', err.message),
@@ -975,7 +994,9 @@ async function main() {
     },
     intervalMs: 24 * 60 * 60 * 1000, // daily
     pool: 'embedding',
+    lane: 2,
     priority: 10,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 120000, // wait 2min after startup
     onResult: (r) => {
       if (r.archived > 0) console.log(`[wall-e] Dedup: archived ${r.archived} duplicates`);
@@ -996,8 +1017,10 @@ async function main() {
     },
     intervalMs: 24 * 60 * 60 * 1000, // daily
     pool: 'io',
+    lane: 2,
     priority: 11,
-    startDelayMs: 180000, // 3min after startup — well off the boot/readiness path
+    restartCatchupPolicy: 'defer',
+    startDelayMs: 185000, // just after 3min — well off the boot/readiness path and unique in the IO pool
     onError: (err) => {
       console.error('[wall-e] Daily backup error:', err.message);
       telemetry.trackError('daily-backup', err);
@@ -1011,7 +1034,9 @@ async function main() {
     run: async () => brain.runBrainRetention(),
     intervalMs: 60 * 60 * 1000,
     pool: 'io',
+    lane: 2,
     priority: 11,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 90000,
     onResult: (r) => {
       const nonZero = Object.entries(r || {}).filter(([, v]) => typeof v === 'number' && v > 0);
@@ -1031,7 +1056,9 @@ async function main() {
     },
     intervalMs: 60 * 60 * 1000,
     pool: 'io',
+    lane: 2,
     priority: 11,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 100000,
     onResult: (r) => {
       if (r && r.sent) console.log(`[wall-e] Question digest sent (${r.count}): ${JSON.stringify(r.results)}`);
@@ -1048,6 +1075,7 @@ async function main() {
     },
     intervalMs: 300000,
     pool: 'io',
+    lane: 1,
     priority: 7,
     startDelayMs: 12000,
     shouldRun: () => {
@@ -1072,7 +1100,9 @@ async function main() {
     },
     intervalMs: 24 * 60 * 60 * 1000,
     pool: 'io',
+    lane: 2,
     priority: 10,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 300000,
     onResult: (r) => {
       if (r.cleaned > 0) console.log(`[wall-e] Cleaned ${r.cleaned} old recordings`);
@@ -1096,7 +1126,9 @@ async function main() {
     },
     intervalMs: 24 * 60 * 60 * 1000,
     pool: 'io',
+    lane: 2,
     priority: 10,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 295000,
     shouldRun: () => process.env.WALLE_TELEMETRY_SERVER === '1',
     onResult: (r) => {
@@ -1140,7 +1172,9 @@ async function main() {
     },
     intervalMs: 7 * 24 * 60 * 60 * 1000, // Weekly
     pool: 'llm',
+    lane: 2,
     priority: 10,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 300000, // 5min delay on startup
     shouldRun: () => {
       // Only run if we have coding-agent benchmarks
@@ -1171,7 +1205,9 @@ async function main() {
     },
     intervalMs: 7 * 24 * 60 * 60 * 1000, // Weekly
     pool: 'llm',
+    lane: 2,
     priority: 5,
+    restartCatchupPolicy: 'defer',
     startDelayMs: 600000, // 10min delay on startup
     shouldRun: () => {
       // Run on Sundays only
@@ -1226,31 +1262,48 @@ async function main() {
     telemetry.trackError('boot_check', e);
   }
-  // Restart catch-up — replay jobs whose persisted nextEligibleAt fell
-  // while the daemon was down. Cap of 5 immediate runs (5s stagger);
-  // anything beyond that is deferred to the periodic tick. Borrowed
-  // from OpenClaw's runMissedJobs (src/cron/service/timer.ts:962-1040).
-  const restartCatchupOp = runtimeHealth.beginOperation('agent.restartCatchup');
-  try {
-    const catchup = await scheduler.runMissedJobs();
-    if (catchup.ran > 0 || catchup.deferred > 0) {
-      console.log(
-        `[wall-e] Restart catch-up: ran ${catchup.ran}, deferred ${catchup.deferred}`
-      );
-      telemetry.track('restart_catchup', {
-        ran: catchup.ran,
-        deferred: catchup.deferred,
-      });
-    }
-    restartCatchupOp.end({ ok: true, meta: { ran: catchup.ran || 0, deferred: catchup.deferred || 0 } });
-  } catch (e) {
-    restartCatchupOp.end({ ok: false, error: e });
-    console.warn('[wall-e] runMissedJobs threw (non-fatal):', e.message);
-  }
-  // Start the scheduler
+  // Start the scheduler before restart catch-up. Missed-job replay is useful,
+  // but it must never be startup-critical or block the daemon from serving
+  // chat/MCP requests after a restart.
   scheduler.start();
+  // Restart catch-up — replay only small foreground jobs immediately; defer
+  // expensive harvest/eval/maintenance work and persist that decision to the
+  // runtime work-item queue for observability. This keeps the main process
+  // responsive even after long downtime with many missed jobs.
+  setTimeout(() => {
+    (async () => {
+      const restartCatchupOp = runtimeHealth.beginOperation('agent.restartCatchup');
+      try {
+        const catchup = await scheduler.runMissedJobs({
+          nonBlocking: true,
+          max: 3,
+          staggerMs: 0,
+          maxWorkMs: 5000,
+          deferDelayMs: 120000,
+          enqueueWorkItem: brain.enqueueRuntimeWorkItem,
+        });
+        if (catchup.ran > 0 || catchup.deferred > 0) {
+          console.log(
+            `[wall-e] Restart catch-up: dispatched ${catchup.ran}, deferred ${catchup.deferred}`
+          );
+          telemetry.track('restart_catchup', {
+            ran: catchup.ran,
+            deferred: catchup.deferred,
+            non_blocking: 1,
+          });
+        }
+        restartCatchupOp.end({
+          ok: true,
+          meta: { ran: catchup.ran || 0, deferred: catchup.deferred || 0, non_blocking: 1 },
+        });
+      } catch (e) {
+        restartCatchupOp.end({ ok: false, error: e });
+        console.warn('[wall-e] runMissedJobs threw (non-fatal):', e.message);
+      }
+    })().catch((e) => console.warn('[wall-e] Restart catch-up failed (non-fatal):', e.message));
+  }, 1000).unref?.();
   // Hot-reload config on file changes
   const { ConfigWatcher } = require('./lib/config-watcher');
   const configWatcher = new ConfigWatcher();